diff --git a/.openvswitch.metadata b/.openvswitch.metadata index af1aabc..0db19ca 100644 --- a/.openvswitch.metadata +++ b/.openvswitch.metadata @@ -1,5 +1,5 @@ 002450621b33c5690060345b0aac25bc2426d675 SOURCES/docutils-0.12.tar.gz -e704a36f712c1c81f253f77d1bd7c60d85b8a7ff SOURCES/dpdk-19.11.1.tar.xz +435b0b3a5da6d7417d318050e5b50ac400354c60 SOURCES/dpdk-19.11.tar.xz 0c5f78212173d2cac286f8f78aa95ebdea9e2444 SOURCES/openvswitch-2.13.0.tar.gz d34f96421a86004aa5d26ecf975edefd09f948b1 SOURCES/Pygments-1.4.tar.gz 6beb30f18ffac3de7689b7fd63e9a8a7d9c8df3a SOURCES/Sphinx-1.1.3.tar.gz diff --git a/SOURCES/arm64-armv8a-linuxapp-gcc-config b/SOURCES/arm64-armv8a-linuxapp-gcc-config index c219def..06a3d70 100644 --- a/SOURCES/arm64-armv8a-linuxapp-gcc-config +++ b/SOURCES/arm64-armv8a-linuxapp-gcc-config @@ -1,4 +1,4 @@ -# -*- cfg-sha: c5b6330ff61c71cf3196f55aad5cc3766b44dd62560396f67c2fee4f7ab46780 +# -*- cfg-sha: bfd08c718502ce9a9d75d102e9b680c4ecf9fb2b14b112aa45899a016d3bc7bb # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2015 Cavium, Inc # SPDX-License-Identifier: BSD-3-Clause @@ -12,7 +12,7 @@ CONFIG_RTE_VER_PREFIX="DPDK" # Version information completed when this file is processed for a build CONFIG_RTE_VER_YEAR=19 CONFIG_RTE_VER_MONTH=11 -CONFIG_RTE_VER_MINOR=1 +CONFIG_RTE_VER_MINOR=3 CONFIG_RTE_VER_SUFFIX="" CONFIG_RTE_VER_RELEASE=99 # RTE_EXEC_ENV values are the directories in mk/exec-env/ @@ -604,4 +604,3 @@ CONFIG_RTE_ARCH_ARM64_MEMCPY=n #CONFIG_RTE_ARM64_MEMCPY_STRICT_ALIGN=n # NXP PFE PMD Driver CONFIG_RTE_TOOLCHAIN_GCC=y -CONFIG_RTE_LIBRTE_PMD_XENVIRT=n diff --git a/SOURCES/openvswitch-2.13.0.patch b/SOURCES/openvswitch-2.13.0.patch index 930e9e4..2776047 100644 --- a/SOURCES/openvswitch-2.13.0.patch +++ b/SOURCES/openvswitch-2.13.0.patch @@ -1,1615 +1,41744 @@ -From 4ee0f6af9e601cbb5f69a486526d1011314bbfed Mon Sep 17 00:00:00 2001 -From: Ben Pfaff <blp@ovn.org> -Date: Thu, 19 Mar 2020 17:53:10 -0700 -Subject: [PATCH 01/15] ofproto-dpif-xlate: Fix recirculation when in_port is - OFPP_CONTROLLER. - -[ upstream commit c5a910dd92ecbad24f86b4c59b4ff8105b5149fd ] - -Recirculation usually requires finding the pre-recirculation input port. -Packets sent by the controller, with in_port of OFPP_CONTROLLER or -OFPP_NONE, do not have a real input port data structure, only a port -number. The code in xlate_lookup_ofproto_() mishandled this case, -failing to return the ofproto data structure. This commit fixes the -problem and adds a test to guard against regression. - -Reported-by: Numan Siddique <numans@ovn.org> -Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-March/368642.html -Tested-by: Numan Siddique <numans@ovn.org> -Acked-by: Numan Siddique <numans@ovn.org> -Signed-off-by: Ben Pfaff <blp@ovn.org> - -Resolves: #1775160 -Signed-off-by: Numan Siddique <nusiddiq@redhat.com> ---- - ofproto/ofproto-dpif-xlate.c | 25 +++++++++++++++++++++---- - tests/ofproto-dpif.at | 30 ++++++++++++++++++++++++++++++ - 2 files changed, 51 insertions(+), 4 deletions(-) - -diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c -index 4407f9c97a..54cfbfbdff 100644 ---- a/ofproto/ofproto-dpif-xlate.c -+++ b/ofproto/ofproto-dpif-xlate.c -@@ -1516,15 +1516,32 @@ xlate_lookup_ofproto_(const struct dpif_backer *backer, - return NULL; - } +diff --git a/.cirrus.yml b/.cirrus.yml +index 1b32f55d65..9428164eee 100644 +--- a/.cirrus.yml ++++ b/.cirrus.yml +@@ -16,6 +16,7 @@ freebsd_build_task: -- /* If recirculation was initiated due to bond (in_port = OFPP_NONE) -- * then frozen state is static and xport_uuid is not defined, so xport -- * cannot be restored from frozen state. */ -- if (recirc_id_node->state.metadata.in_port != OFPP_NONE) { -+ ofp_port_t in_port = recirc_id_node->state.metadata.in_port; -+ if (in_port != OFPP_NONE && in_port != OFPP_CONTROLLER) { - struct uuid xport_uuid = recirc_id_node->state.xport_uuid; - xport = xport_lookup_by_uuid(xcfg, &xport_uuid); - if (xport && xport->xbridge && xport->xbridge->ofproto) { - goto out; - } -+ } else { -+ /* OFPP_NONE and OFPP_CONTROLLER are not real ports. They indicate -+ * that the packet originated from the controller via an OpenFlow -+ * "packet-out". The right thing to do is to find just the -+ * ofproto. There is no xport, which is OK. -+ * -+ * OFPP_NONE can also indicate that a bond caused recirculation. */ -+ struct uuid uuid = recirc_id_node->state.ofproto_uuid; -+ const struct xbridge *bridge = xbridge_lookup_by_uuid(xcfg, &uuid); -+ if (bridge && bridge->ofproto) { -+ if (errorp) { -+ *errorp = NULL; -+ } -+ *xportp = NULL; -+ if (ofp_in_port) { -+ *ofp_in_port = in_port; -+ } -+ return bridge->ofproto; -+ } - } - } + prepare_script: + - sysctl -w kern.coredump=0 ++ - pkg update -f + - pkg install -y ${DEPENDENCIES} -diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at -index ff1cc93707..d444cf0844 100644 ---- a/tests/ofproto-dpif.at -+++ b/tests/ofproto-dpif.at -@@ -5171,6 +5171,36 @@ AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 2 - OVS_VSWITCHD_STOP - AT_CLEANUP + configure_script: +diff --git a/.travis.yml b/.travis.yml +index abd2a9117a..a59371c496 100644 +--- a/.travis.yml ++++ b/.travis.yml +@@ -52,6 +52,18 @@ matrix: + compiler: clang + env: OPTS="--disable-ssl" -+# Checks for regression against a bug in which OVS dropped packets -+# with in_port=CONTROLLER when they were recirculated (because -+# CONTROLLER isn't a real port and could not be looked up). -+AT_SETUP([ofproto-dpif - packet-out recirculation]) -+OVS_VSWITCHD_START -+add_of_ports br0 1 2 -+ -+AT_DATA([flows.txt], [dnl -+table=0 ip actions=mod_dl_dst:83:83:83:83:83:83,ct(table=1) -+table=1 ip actions=ct(commit),output:2 -+]) -+AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) ++matrix: ++ include: ++ - env: DEB_PACKAGE=1 ++ addons: ++ apt: ++ packages: ++ - linux-headers-$(uname -r) ++ - build-essential ++ - fakeroot ++ - devscripts ++ - equivs + -+packet=ffffffffffff00102030405008004500001c00000000401100000a000002ffffffff0035111100080000 -+AT_CHECK([ovs-ofctl packet-out br0 "in_port=controller packet=$packet actions=table"]) -+ -+# Dumps out the flow table, extracts the number of packets that have gone -+# through the (single) flow in table 1, and returns success if it's exactly 1. -+# -+# If this remains 0, then the recirculation isn't working properly since the -+# packet never goes through flow in table 1. -+check_flows () { -+ n=$(ovs-ofctl dump-flows br0 table=1 | sed -n 's/.*n_packets=\([[0-9]]\{1,\}\).*/\1/p') -+ echo "n_packets=$n" -+ test "$n" = 1 -+} -+OVS_WAIT_UNTIL([check_flows], [ovs dump-flows br0]) + script: ./.travis/${TRAVIS_OS_NAME}-build.sh $OPTS + + notifications: +diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh +index bb47b3ee19..dd89eab5f8 100755 +--- a/.travis/linux-build.sh ++++ b/.travis/linux-build.sh +@@ -159,13 +159,24 @@ function build_ovs() + fi + } + ++if [ "$DEB_PACKAGE" ]; then ++ mk-build-deps --install --root-cmd sudo --remove debian/control ++ dpkg-checkbuilddeps ++ DEB_BUILD_OPTIONS='parallel=4 nocheck' fakeroot debian/rules binary ++ # Not trying to install ipsec package as there are issues with system-wide ++ # installed python3-openvswitch package and the pyenv used by Travis. ++ packages=$(ls $(pwd)/../*.deb | grep -v ipsec) ++ sudo apt install ${packages} ++ exit 0 ++fi + -+OVS_VSWITCHD_STOP -+AT_CLEANUP + if [ "$KERNEL" ]; then + install_kernel $KERNEL + fi - AT_SETUP([ofproto-dpif - debug_slow action]) - OVS_VSWITCHD_START --- -2.25.1 - - -From 71f25b7920093daa59827a0a4be4095309aec6ff Mon Sep 17 00:00:00 2001 -From: Timothy Redaelli <tredaelli@redhat.com> -Date: Thu, 19 Mar 2020 20:05:39 +0100 -Subject: [PATCH 02/15] bugtool: Fix for Python3. - -Currently ovs-bugtool tool doesn't start on Python 3. -This commit fixes ovs-bugtool to make it works on Python 3. - -Replaced StringIO.StringIO with io.BytesIO since the script is -processing binary data. - -Reported-at: https://bugzilla.redhat.com/1809241 -Reported-by: Flavio Leitner <fbl@sysclose.org> -Signed-off-by: Timothy Redaelli <tredaelli@redhat.com> -Co-authored-by: William Tu <u9012063@gmail.com> -Signed-off-by: William Tu <u9012063@gmail.com> -(cherry picked from commit 9e6c00bca9af29031d0e160d33174b7ae99b9244) ---- - utilities/bugtool/ovs-bugtool.in | 48 +++++++++++++++++--------------- - 1 file changed, 25 insertions(+), 23 deletions(-) - -diff --git a/utilities/bugtool/ovs-bugtool.in b/utilities/bugtool/ovs-bugtool.in -index e55bfc2ed5..47f3c4629f 100755 ---- a/utilities/bugtool/ovs-bugtool.in -+++ b/utilities/bugtool/ovs-bugtool.in -@@ -33,8 +33,7 @@ - # or func_output(). - # + if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then + if [ -z "$DPDK_VER" ]; then +- DPDK_VER="19.11" ++ DPDK_VER="19.11.2" + fi + install_dpdk $DPDK_VER + # Enable pdump support in OVS. +diff --git a/.travis/linux-prepare.sh b/.travis/linux-prepare.sh +index fda13e7d21..71eb347e89 100755 +--- a/.travis/linux-prepare.sh ++++ b/.travis/linux-prepare.sh +@@ -2,14 +2,22 @@ --import StringIO --import commands -+from io import BytesIO - import fcntl - import getopt - import hashlib -@@ -48,7 +47,7 @@ import warnings - import zipfile - from select import select - from signal import SIGTERM --from subprocess import PIPE, Popen -+from subprocess import PIPE, Popen, check_output + set -ev - from xml.dom.minidom import getDOMImplementation, parse ++if [ "$DEB_PACKAGE" ]; then ++ # We're not using sparse for debian packages, tests are skipped and ++ # all extra dependencies tracked by mk-build-deps. ++ exit 0 ++fi ++ + # Build and install sparse. + # + # Explicitly disable sparse support for llvm because some travis + # environments claim to have LLVM (llvm-config exists and works) but + # linking against it fails. ++# Disabling sqlite support because sindex build fails and we don't ++# really need this utility being installed. + git clone git://git.kernel.org/pub/scm/devel/sparse/sparse.git + cd sparse +-make -j4 HAVE_LLVM= install ++make -j4 HAVE_LLVM= HAVE_SQLITE= install + cd .. -@@ -348,7 +347,7 @@ def collect_data(): - cap = v['cap'] - if 'cmd_args' in v: - if 'output' not in v.keys(): -- v['output'] = StringIOmtime() -+ v['output'] = BytesIOmtime() - if v['repeat_count'] > 0: - if cap not in process_lists: - process_lists[cap] = [] -@@ -373,20 +372,23 @@ def collect_data(): - if 'filename' in v and v['filename'].startswith('/proc/'): - # proc files must be read into memory - try: -- f = open(v['filename'], 'r') -+ f = open(v['filename'], 'rb') - s = f.read() - f.close() - if check_space(cap, v['filename'], len(s)): -- v['output'] = StringIOmtime(s) -+ v['output'] = BytesIOmtime(s) - except: - pass - elif 'func' in v: - try: - s = v['func'](cap) - except Exception as e: -- s = str(e) -+ s = str(e).encode() - if check_space(cap, k, len(s)): -- v['output'] = StringIOmtime(s) -+ if isinstance(s, str): -+ v['output'] = BytesIOmtime(s.encode()) -+ else: -+ v['output'] = BytesIOmtime(s) + pip3 install --disable-pip-version-check --user flake8 hacking +diff --git a/AUTHORS.rst b/AUTHORS.rst +index fe3935fca2..4c8772f63a 100644 +--- a/AUTHORS.rst ++++ b/AUTHORS.rst +@@ -419,6 +419,7 @@ Zhenyu Gao sysugaozhenyu@gmail.com + ZhiPeng Lu luzhipeng@uniudc.com + Zhou Yangchao 1028519445@qq.com + aginwala amginwal@gmail.com ++lzhecheng lzhecheng@vmware.com + parameswaran krishnamurthy parkrish@gmail.com + solomon liwei.solomon@gmail.com + wenxu wenxu@ucloud.cn +@@ -496,6 +497,7 @@ Edwin Chiu echiu@vmware.com + Eivind Bulie Haanaes + Enas Ahmad enas.ahmad@kaust.edu.sa + Eric Lopez ++Frank Wang (王培辉) wangpeihui@inspur.com + Frido Roose fr.roose@gmail.com + Gaetano Catalli gaetano.catalli@gmail.com + Gavin Remaley gavin_remaley@selinc.com +@@ -558,6 +560,7 @@ Krishna Miriyala miriyalak@vmware.com + Krishna Mohan Elluru elluru.kri.mohan@hpe.com + László Sürü laszlo.suru@ericsson.com + Len Gao leng@vmware.com ++Linhaifeng haifeng.lin@huawei.com + Logan Rosen logatronico@gmail.com + Luca Falavigna dktrkranz@debian.org + Luiz Henrique Ozaki luiz.ozaki@gmail.com +@@ -655,6 +658,7 @@ Ying Chen yingchen@vmware.com + Yongqiang Liu liuyq7809@gmail.com + ZHANG Zhiming zhangzhiming@yunshan.net.cn + Zhangguanghui zhang.guanghui@h3c.com ++Zheng Jingzhou glovejmm@163.com + Ziyou Wang ziyouw@vmware.com + ankur dwivedi ankurengg2003@gmail.com + chen zhang 3zhangchen9211@gmail.com +diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst +index 6702c58a2b..41e1315a4c 100644 +--- a/Documentation/faq/releases.rst ++++ b/Documentation/faq/releases.rst +@@ -67,9 +67,10 @@ Q: What Linux kernel versions does each Open vSwitch release work with? + 2.7.x 3.10 to 4.9 + 2.8.x 3.10 to 4.12 + 2.9.x 3.10 to 4.13 +- 2.10.x 3.10 to 4.17 +- 2.11.x 3.10 to 4.18 +- 2.12.x 3.10 to 5.0 ++ 2.10.x 3.16 to 4.17 ++ 2.11.x 3.16 to 4.18 ++ 2.12.x 3.16 to 5.0 ++ 2.13.x 3.16 to 5.0 + ============ ============== + Open vSwitch userspace should also work with the Linux kernel module built +@@ -78,6 +79,10 @@ Q: What Linux kernel versions does each Open vSwitch release work with? + Open vSwitch userspace is not sensitive to the Linux kernel version. It + should build against almost any kernel, certainly against 2.6.32 and later. - def main(argv=None): -@@ -704,7 +706,7 @@ exclude those logs from the archive. ++ Open vSwitch branches 2.10 through 2.13 will still compile against the ++ RHEL and CentOS 7 3.10 based kernels since they have diverged from the ++ Linux kernel.org 3.10 kernels. ++ + Q: Are all features available with all datapaths? - # permit the user to filter out data - # We cannot use iteritems, since we modify 'data' as we pass through -- for (k, v) in sorted(data.items()): -+ for (k, v) in data.items(): - cap = v['cap'] - if 'filename' in v: - key = k[0] -@@ -721,7 +723,7 @@ exclude those logs from the archive. + A: Open vSwitch supports different datapaths on different platforms. Each +@@ -173,9 +178,9 @@ Q: What DPDK version does each Open vSwitch release work with? + A: The following table lists the DPDK version against which the given + versions of Open vSwitch will successfully build. - # include inventory - data['inventory.xml'] = {'cap': None, -- 'output': StringIOmtime(make_inventory(data, subdir))} -+ 'output': BytesIOmtime(make_inventory(data, subdir))} +- ============ ======= ++ ============ ======== + Open vSwitch DPDK +- ============ ======= ++ ============ ======== + 2.2.x 1.6 + 2.3.x 1.6 + 2.4.x 2.0 +@@ -183,11 +188,12 @@ Q: What DPDK version does each Open vSwitch release work with? + 2.6.x 16.07.2 + 2.7.x 16.11.9 + 2.8.x 17.05.2 +- 2.9.x 17.11.4 +- 2.10.x 17.11.4 +- 2.11.x 18.11.5 +- 2.12.x 18.11.5 +- ============ ======= ++ 2.9.x 17.11.10 ++ 2.10.x 17.11.10 ++ 2.11.x 18.11.9 ++ 2.12.x 18.11.9 ++ 2.13.x 19.11.2 ++ ============ ======== - # create archive - if output_fd == -1: -@@ -782,7 +784,7 @@ def dump_scsi_hosts(cap): + Q: Are all the DPDK releases that OVS versions work with maintained? +diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst +index dbf88ec43f..90eaa8aa2c 100644 +--- a/Documentation/intro/install/dpdk.rst ++++ b/Documentation/intro/install/dpdk.rst +@@ -42,7 +42,7 @@ Build requirements + In addition to the requirements described in :doc:`general`, building Open + vSwitch with DPDK will require the following: - def module_info(cap): -- output = StringIO.StringIO() -+ output = BytesIO() - modules = open(PROC_MODULES, 'r') - procs = [] +-- DPDK 19.11 ++- DPDK 19.11.2 -@@ -806,7 +808,7 @@ def multipathd_topology(cap): + - A `DPDK supported NIC`_ +@@ -71,9 +71,9 @@ Install DPDK + #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``:: - def dp_list(): -- output = StringIO.StringIO() -+ output = BytesIO() - procs = [ProcOutput([OVS_DPCTL, 'dump-dps'], - caps[CAP_NETWORK_STATUS][MAX_TIME], output)] + $ cd /usr/src/ +- $ wget https://fast.dpdk.org/rel/dpdk-19.11.tar.xz +- $ tar xf dpdk-19.11.tar.xz +- $ export DPDK_DIR=/usr/src/dpdk-19.11 ++ $ wget https://fast.dpdk.org/rel/dpdk-19.11.2.tar.xz ++ $ tar xf dpdk-19.11.2.tar.xz ++ $ export DPDK_DIR=/usr/src/dpdk-stable-19.11.2 + $ cd $DPDK_DIR -@@ -828,7 +830,7 @@ def collect_ovsdb(): - if os.path.isfile(OPENVSWITCH_COMPACT_DB): - os.unlink(OPENVSWITCH_COMPACT_DB) + #. (Optional) Configure DPDK as a shared library +diff --git a/Documentation/topics/dpdk/vhost-user.rst b/Documentation/topics/dpdk/vhost-user.rst +index c6c6fd8bde..4bc5aef59d 100644 +--- a/Documentation/topics/dpdk/vhost-user.rst ++++ b/Documentation/topics/dpdk/vhost-user.rst +@@ -392,9 +392,9 @@ To begin, instantiate a guest as described in :ref:`dpdk-vhost-user` or + DPDK sources to VM and build DPDK:: -- output = StringIO.StringIO() -+ output = BytesIO() - max_time = 5 - procs = [ProcOutput(['ovsdb-tool', 'compact', - OPENVSWITCH_CONF_DB, OPENVSWITCH_COMPACT_DB], -@@ -871,7 +873,7 @@ def fd_usage(cap): + $ cd /root/dpdk/ +- $ wget https://fast.dpdk.org/rel/dpdk-19.11.tar.xz +- $ tar xf dpdk-19.11.tar.xz +- $ export DPDK_DIR=/root/dpdk/dpdk-19.11 ++ $ wget https://fast.dpdk.org/rel/dpdk-19.11.2.tar.xz ++ $ tar xf dpdk-19.11.2.tar.xz ++ $ export DPDK_DIR=/root/dpdk/dpdk-stable-19.11.2 + $ export DPDK_TARGET=x86_64-native-linuxapp-gcc + $ export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET + $ cd $DPDK_DIR +diff --git a/Documentation/topics/userspace-tso.rst b/Documentation/topics/userspace-tso.rst +index 94eddc0b2f..369d70691d 100644 +--- a/Documentation/topics/userspace-tso.rst ++++ b/Documentation/topics/userspace-tso.rst +@@ -91,21 +91,19 @@ The current OvS userspace `TSO` implementation supports flat and VLAN networks + only (i.e. no support for `TSO` over tunneled connection [VxLAN, GRE, IPinIP, + etc.]). ++The NIC driver must support and advertise checksum offload for TCP and UDP. ++However, SCTP is not mandatory because very few drivers advertised support ++and it wasn't a widely used protocol at the moment this feature was introduced ++in Open vSwitch. Currently, if the NIC supports that, then the feature is ++enabled, otherwise TSO can still be enabled but SCTP packets sent to the NIC ++will be dropped. ++ + There is no software implementation of TSO, so all ports attached to the + datapath must support TSO or packets using that feature will be dropped + on ports without TSO support. That also means guests using vhost-user + in client mode will receive TSO packet regardless of TSO being enabled + or disabled within the guest. - def dump_rdac_groups(cap): -- output = StringIO.StringIO() -+ output = BytesIO() - procs = [ProcOutput([MPPUTIL, '-a'], caps[cap][MAX_TIME], output)] +-When the NIC performing the segmentation is using the i40e DPDK PMD, a fix +-must be included in the DPDK build, otherwise TSO will not work. The fix can +-be found on `DPDK patchwork`__. +- +-__ https://patches.dpdk.org/patch/64136/ +- +-This fix is expected to be included in the 19.11.1 release. When OVS migrates +-to this DPDK release, this limitation can be removed. +- + ~~~~~~~~~~~~~~~~~~ + Performance Tuning + ~~~~~~~~~~~~~~~~~~ +diff --git a/Makefile.am b/Makefile.am +index b279303d18..27ef9e4b48 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -46,7 +46,7 @@ AM_CPPFLAGS += -DNDEBUG + AM_CFLAGS += -fomit-frame-pointer + endif - run_procs([procs]) -@@ -896,7 +898,7 @@ def load_plugins(just_capabilities=False, filter=None): - for node in nodelist: - if node.nodeType == node.TEXT_NODE: - rc += node.data -- return rc.encode() -+ return rc +-AM_CTAGSFLAGS = $(OVS_CTAGS_IDENTIFIERS_LIST) ++AM_CTAGSFLAGS = -I "$(OVS_CTAGS_IDENTIFIERS_LIST)" - def getBoolAttr(el, attr, default=False): - ret = default -@@ -1037,7 +1039,7 @@ def make_tar(subdir, suffix, output_fd, output_file): - s = os.stat(v['filename']) - ti.mtime = s.st_mtime - ti.size = s.st_size -- tf.addfile(ti, open(v['filename'])) -+ tf.addfile(ti, open(v['filename'], 'rb')) - except: - pass - finally: -@@ -1095,12 +1097,12 @@ def make_inventory(inventory, subdir): - s.setAttribute('date', time.strftime('%c')) - s.setAttribute('hostname', platform.node()) - s.setAttribute('uname', ' '.join(platform.uname())) -- s.setAttribute('uptime', commands.getoutput(UPTIME)) -+ s.setAttribute('uptime', check_output(UPTIME).decode()) - document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(s) + if WIN32 + psep=";" +diff --git a/NEWS b/NEWS +index dab94e924d..128db0f619 100644 +--- a/NEWS ++++ b/NEWS +@@ -1,3 +1,14 @@ ++v2.13.2 - xx xxx xxxx ++--------------------- ++ ++v2.13.1 - 30 Jul 2020 ++--------------------- ++ - Bug fixes ++ - DPDK: ++ * OVS validated with DPDK 19.11.2, due to the inclusion of fixes for ++ CVE-2020-10722, CVE-2020-10723, CVE-2020-10724, CVE-2020-10725 and ++ CVE-2020-10726, this DPDK version is strongly recommended to be used. ++ + v2.13.0 - 14 Feb 2020 + --------------------- + - OVN: +diff --git a/acinclude.m4 b/acinclude.m4 +index c1470ccc6b..7f028836f5 100644 +--- a/acinclude.m4 ++++ b/acinclude.m4 +@@ -250,6 +250,18 @@ AC_DEFUN([OVS_CHECK_LINUX_SCTP_CT], [ + [Define to 1 if SCTP_CONNTRACK_HEARTBEAT_SENT is available.])]) + ]) - map(lambda k_v: inventory_entry(document, subdir, k_v[0], k_v[1]), - inventory.items()) -- return document.toprettyxml() -+ return document.toprettyxml().encode() ++dnl OVS_CHECK_LINUX_VIRTIO_TYPES ++dnl ++dnl Checks for kernels that need virtio_types definition. ++AC_DEFUN([OVS_CHECK_LINUX_VIRTIO_TYPES], [ ++ AC_COMPILE_IFELSE([ ++ AC_LANG_PROGRAM([#include <linux/virtio_types.h>], [ ++ __virtio16 x = 0; ++ ])], ++ [AC_DEFINE([HAVE_VIRTIO_TYPES], [1], ++ [Define to 1 if __virtio16 is available.])]) ++]) ++ + dnl OVS_FIND_DEPENDENCY(FUNCTION, SEARCH_LIBS, NAME_TO_PRINT) + dnl + dnl Check for a function in a library list. +@@ -379,7 +391,6 @@ AC_DEFUN([OVS_CHECK_DPDK], [ + [AC_MSG_RESULT([no])]) + AC_CHECK_DECL([RTE_LIBRTE_MLX5_PMD], [dnl found +- OVS_FIND_DEPENDENCY([mnl_attr_put], [mnl], [libmnl]) + AC_CHECK_DECL([RTE_IBVERBS_LINK_DLOPEN], [], [dnl not found + OVS_FIND_DEPENDENCY([mlx5dv_create_wq], [mlx5], [libmlx5]) + OVS_FIND_DEPENDENCY([verbs_init_cq], [ibverbs], [libibverbs]) +@@ -567,9 +578,14 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ + OVS_GREP_IFELSE([$KSRC/include/net/ip6_fib.h], [rt6_get_cookie], + [OVS_DEFINE([HAVE_RT6_GET_COOKIE])]) - def inventory_entry(document, subdir, k, v): -@@ -1301,7 +1303,7 @@ class ProcOutput(object): - line = self.proc.stdout.readline() - else: - line = self.proc.stdout.read(self.bufsize) -- if line == '': -+ if line == b'': - # process exited - self.proc.stdout.close() - self.status = self.proc.wait() -@@ -1391,13 +1393,13 @@ def get_free_disk_space(path): - return s.f_frsize * s.f_bfree ++ OVS_FIND_FIELD_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_stub], ++ [dst_entry]) + OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_dst_lookup.*net], + [OVS_DEFINE([HAVE_IPV6_DST_LOOKUP_NET])]) ++ OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_dst_lookup_flow.*net], ++ [OVS_DEFINE([HAVE_IPV6_DST_LOOKUP_FLOW_NET])]) + OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_stub]) ++ OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_dst_lookup_flow]) + OVS_GREP_IFELSE([$KSRC/include/linux/err.h], [ERR_CAST]) + OVS_GREP_IFELSE([$KSRC/include/linux/err.h], [IS_ERR_OR_NULL]) +@@ -765,6 +781,10 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ + [prandom_u32[[\(]]], + [OVS_DEFINE([HAVE_PRANDOM_U32])]) + OVS_GREP_IFELSE([$KSRC/include/linux/random.h], [prandom_u32_max]) ++ OVS_GREP_IFELSE([$KSRC/include/linux/prandom.h], ++ [prandom_u32[[\(]]], ++ [OVS_DEFINE([HAVE_PRANDOM_U32])]) ++ OVS_GREP_IFELSE([$KSRC/include/linux/prandom.h], [prandom_u32_max]) --class StringIOmtime(StringIO.StringIO): -- def __init__(self, buf=''): -- StringIO.StringIO.__init__(self, buf) -+class BytesIOmtime(BytesIO): -+ def __init__(self, buf=b''): -+ BytesIO.__init__(self, buf) - self.mtime = time.time() + OVS_GREP_IFELSE([$KSRC/include/net/rtnetlink.h], [get_link_net]) + OVS_GREP_IFELSE([$KSRC/include/net/rtnetlink.h], [name_assign_type]) +@@ -1294,11 +1314,11 @@ AC_DEFUN([OVS_ENABLE_SPARSE], - def write(self, s): -- StringIO.StringIO.write(self, s) -+ BytesIO.write(self, s) - self.mtime = time.time() + dnl OVS_CTAGS_IDENTIFIERS + dnl +-dnl ctags ignores symbols with extras identifiers. This builds a list of +-dnl specially handled identifiers to be ignored. ++dnl ctags ignores symbols with extras identifiers. This is a list of ++dnl specially handled identifiers to be ignored. [ctags(1) -I <list>]. + AC_DEFUN([OVS_CTAGS_IDENTIFIERS], + AC_SUBST([OVS_CTAGS_IDENTIFIERS_LIST], +- [`printf %s '-I "'; sed -n 's/^#define \(OVS_[A-Z_]\+\)(\.\.\.)$/\1+/p' ${srcdir}/include/openvswitch/compiler.h | tr \\\n ' ' ; printf '"'`] )) ++ ["OVS_LOCKABLE OVS_NO_THREAD_SAFETY_ANALYSIS OVS_REQ_RDLOCK+ OVS_ACQ_RDLOCK+ OVS_REQ_WRLOCK+ OVS_ACQ_WRLOCK+ OVS_REQUIRES+ OVS_ACQUIRES+ OVS_TRY_WRLOCK+ OVS_TRY_RDLOCK+ OVS_TRY_LOCK+ OVS_GUARDED_BY+ OVS_EXCLUDED+ OVS_RELEASES+ OVS_ACQ_BEFORE+ OVS_ACQ_AFTER+"])) + dnl OVS_PTHREAD_SET_NAME + dnl +diff --git a/configure.ac b/configure.ac +index 92b52f6712..67942bbfb7 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -13,7 +13,7 @@ + # limitations under the License. --- -2.25.1 - - -From 914d885061c9f7e7e6e5f921065301e08837e122 Mon Sep 17 00:00:00 2001 -From: Han Zhou <hzhou@ovn.org> -Date: Fri, 28 Feb 2020 18:07:04 -0800 -Subject: [PATCH 03/15] raft-rpc: Fix message format. - -[ upstream commit 78c8011f58daec41ec97440f2e42795699322742 ] - -Signed-off-by: Han Zhou <hzhou@ovn.org> -Signed-off-by: Ben Pfaff <blp@ovn.org> - -Resolves: #1836305 -Signed-off-by: Dumitru Ceara <dceara@redhat.com> ---- - ovsdb/raft-rpc.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/ovsdb/raft-rpc.c b/ovsdb/raft-rpc.c -index 18c83fe9c2..dd14d81091 100644 ---- a/ovsdb/raft-rpc.c -+++ b/ovsdb/raft-rpc.c -@@ -544,8 +544,8 @@ raft_format_install_snapshot_request( - ds_put_format(s, " last_index=%"PRIu64, rq->last_index); - ds_put_format(s, " last_term=%"PRIu64, rq->last_term); - ds_put_format(s, " last_eid="UUID_FMT, UUID_ARGS(&rq->last_eid)); -- ds_put_cstr(s, " last_servers="); - ds_put_format(s, " election_timer=%"PRIu64, rq->election_timer); -+ ds_put_cstr(s, " last_servers="); + AC_PREREQ(2.63) +-AC_INIT(openvswitch, 2.13.0, bugs@openvswitch.org) ++AC_INIT(openvswitch, 2.13.2, bugs@openvswitch.org) + AC_CONFIG_SRCDIR([datapath/datapath.c]) + AC_CONFIG_MACRO_DIR([m4]) + AC_CONFIG_AUX_DIR([build-aux]) +@@ -188,6 +188,7 @@ OVS_CHECK_LINUX + OVS_CHECK_LINUX_NETLINK + OVS_CHECK_LINUX_TC + OVS_CHECK_LINUX_SCTP_CT ++OVS_CHECK_LINUX_VIRTIO_TYPES + OVS_CHECK_DPDK + OVS_CHECK_PRAGMA_MESSAGE + AC_SUBST([OVS_CFLAGS]) +diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c +index 5c9b5c3a0c..ced1d2957d 100644 +--- a/datapath-windows/ovsext/Actions.c ++++ b/datapath-windows/ovsext/Actions.c +@@ -1259,6 +1259,7 @@ OvsActionMplsPush(OvsForwardingContext *ovsFwdCtx, + */ + static __inline NDIS_STATUS + OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx, ++ OvsFlowKey *key, + const struct ovs_key_ethernet *ethAttr) + { + PNET_BUFFER curNb; +@@ -1285,9 +1286,11 @@ OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx, + } + ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb)); - struct hmap servers; - struct ovsdb_error *error = --- -2.25.1 - - -From 8ff30dfee6cb075e36ed38b77695ff03321ce12b Mon Sep 17 00:00:00 2001 -From: Han Zhou <hzhou@ovn.org> -Date: Fri, 28 Feb 2020 18:07:05 -0800 -Subject: [PATCH 04/15] ovsdb-server: Don't disconnect clients after raft - install_snapshot. - -[ upstream commit f0c8b44c5832c36989fad78927407fc14e64ce46 ] - -When "schema" field is found in read_db(), there can be two cases: -1. There is a schema change in clustered DB and the "schema" is the new one. -2. There is a install_snapshot RPC happened, which caused log compaction on the -server and the next log is just the snapshot, which always constains "schema" -field, even though the schema hasn't been changed. - -The current implementation doesn't handle case 2), and always assume the schema -is changed hence disconnect all clients of the server. It can cause stability -problem when there are big number of clients connected when this happens in -a large scale environment. - -Signed-off-by: Han Zhou <hzhou@ovn.org> -Signed-off-by: Ben Pfaff <blp@ovn.org> - -Resolves: #1836305 -Signed-off-by: Dumitru Ceara <dceara@redhat.com> ---- - ovsdb/ovsdb-server.c | 3 ++- - tests/ovsdb-cluster.at | 56 ++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 58 insertions(+), 1 deletion(-) - -diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c -index b6957d7300..d416f1b606 100644 ---- a/ovsdb/ovsdb-server.c -+++ b/ovsdb/ovsdb-server.c -@@ -543,7 +543,8 @@ parse_txn(struct server_config *config, struct db *db, - struct ovsdb_schema *schema, const struct json *txn_json, - const struct uuid *txnid) +- RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst, +- sizeof ethHdr->Destination); +- RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source); ++ RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst, ETH_ADDR_LENGTH); ++ RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, ETH_ADDR_LENGTH); ++ /* Update l2 flow key */ ++ RtlCopyMemory(key->l2.dlDst, ethAttr->eth_dst, ETH_ADDR_LENGTH); ++ RtlCopyMemory(key->l2.dlSrc, ethAttr->eth_src, ETH_ADDR_LENGTH); + + return NDIS_STATUS_SUCCESS; + } +@@ -1376,6 +1379,7 @@ PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx, + */ + NDIS_STATUS + OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, ++ OvsFlowKey *key, + const struct ovs_key_udp *udpAttr) { -- if (schema) { -+ if (schema && (!db->db->schema || strcmp(schema->version, -+ db->db->schema->version))) { - /* We're replacing the schema (and the data). Destroy the database - * (first grabbing its storage), then replace it with the new schema. - * The transaction must also include the replacement data. -diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at -index 3a0bd4579e..5b6188b96f 100644 ---- a/tests/ovsdb-cluster.at -+++ b/tests/ovsdb-cluster.at -@@ -273,6 +273,62 @@ OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name | grep "Ele + PUINT8 bufferStart; +@@ -1400,15 +1404,19 @@ OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, + udpHdr->check = ChecksumUpdate16(udpHdr->check, udpHdr->source, + udpAttr->udp_src); + udpHdr->source = udpAttr->udp_src; ++ key->ipKey.l4.tpSrc = udpAttr->udp_src; + } + if (udpHdr->dest != udpAttr->udp_dst) { + udpHdr->check = ChecksumUpdate16(udpHdr->check, udpHdr->dest, + udpAttr->udp_dst); + udpHdr->dest = udpAttr->udp_dst; ++ key->ipKey.l4.tpDst = udpAttr->udp_dst; + } + } else { + udpHdr->source = udpAttr->udp_src; ++ key->ipKey.l4.tpSrc = udpAttr->udp_src; + udpHdr->dest = udpAttr->udp_dst; ++ key->ipKey.l4.tpDst = udpAttr->udp_dst; + } - AT_CLEANUP + return NDIS_STATUS_SUCCESS; +@@ -1423,6 +1431,7 @@ OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, + */ + NDIS_STATUS + OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx, ++ OvsFlowKey *key, + const struct ovs_key_tcp *tcpAttr) + { + PUINT8 bufferStart; +@@ -1447,11 +1456,13 @@ OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx, + tcpHdr->check = ChecksumUpdate16(tcpHdr->check, tcpHdr->source, + tcpAttr->tcp_src); + tcpHdr->source = tcpAttr->tcp_src; ++ key->ipKey.l4.tpSrc = tcpAttr->tcp_src; + } + if (tcpHdr->dest != tcpAttr->tcp_dst) { + tcpHdr->check = ChecksumUpdate16(tcpHdr->check, tcpHdr->dest, + tcpAttr->tcp_dst); + tcpHdr->dest = tcpAttr->tcp_dst; ++ key->ipKey.l4.tpDst = tcpAttr->tcp_dst; + } -+ -+AT_BANNER([OVSDB cluster install snapshot RPC]) -+ -+AT_SETUP([OVSDB cluster - install snapshot RPC]) -+AT_KEYWORDS([ovsdb server positive unix cluster snapshot]) -+ -+n=3 -+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` -+ordinal_schema > schema -+AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) -+cid=`ovsdb-tool db-cid s1.db` -+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` -+for i in `seq 2 $n`; do -+ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) -+done -+ -+on_exit 'kill `cat *.pid`' -+for i in `seq $n`; do -+ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) -+done -+for i in `seq $n`; do -+ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) -+done -+ -+# Kill one follower (s2) and write some data to cluster, so that the follower is falling behind -+printf "\ns2: stopping\n" -+OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s2], [s2.pid]) -+ -+AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest", -+ {"op": "insert", -+ "table": "simple", -+ "row": {"i": 1}}]]'], [0], [ignore], [ignore]) -+ -+# Compact leader online to generate snapshot -+AT_CHECK([ovs-appctl -t "`pwd`"/s1 ovsdb-server/compact]) -+ -+# Start the follower s2 again. -+AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s2.log --pidfile=s2.pid --unixctl=s2 --remote=punix:s2.ovsdb s2.db]) -+AT_CHECK([ovsdb_client_wait unix:s2.ovsdb $schema_name connected]) -+ -+# A client transaction through s2. During this transaction, there will be a -+# install_snapshot RPC because s2 detects it is behind and s1 doesn't have the -+# pre_log_index requested by s2 because it is already compacted. -+# After the install_snapshot RPC process, the transaction through s2 should -+# succeed. -+AT_CHECK([ovsdb-client transact unix:s2.ovsdb '[["idltest", -+ {"op": "insert", -+ "table": "simple", -+ "row": {"i": 1}}]]'], [0], [ignore], [ignore]) -+ -+for i in `seq $n`; do -+ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) -+done -+ -+AT_CLEANUP -+ - + return NDIS_STATUS_SUCCESS; +@@ -1579,6 +1590,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, + */ + NDIS_STATUS + OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, ++ OvsFlowKey *key, + const struct ovs_key_ipv4 *ipAttr) + { + PUINT8 bufferStart; +@@ -1632,6 +1644,7 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, + ipAttr->ipv4_src); + } + ipHdr->saddr = ipAttr->ipv4_src; ++ key->ipKey.nwSrc = ipAttr->ipv4_src; + } + if (ipHdr->daddr != ipAttr->ipv4_dst) { + if (tcpHdr) { +@@ -1647,6 +1660,7 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, + ipAttr->ipv4_dst); + } + ipHdr->daddr = ipAttr->ipv4_dst; ++ key->ipKey.nwDst = ipAttr->ipv4_dst; + } + if (ipHdr->protocol != ipAttr->ipv4_proto) { + UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00; +@@ -1661,6 +1675,7 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, + ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto); + } + ipHdr->protocol = ipAttr->ipv4_proto; ++ key->ipKey.nwProto = ipAttr->ipv4_proto; + } + if (ipHdr->ttl != ipAttr->ipv4_ttl) { + UINT16 oldTtl = (ipHdr->ttl) & 0xff; +@@ -1669,6 +1684,7 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, + ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl); + } + ipHdr->ttl = ipAttr->ipv4_ttl; ++ key->ipKey.nwTtl = ipAttr->ipv4_ttl; + } - OVS_START_SHELL_HELPERS --- -2.25.1 - - -From e732012d7be335650398ff03c2431c64b2c4aaba Mon Sep 17 00:00:00 2001 -From: Han Zhou <hzhou@ovn.org> -Date: Fri, 28 Feb 2020 18:07:06 -0800 -Subject: [PATCH 05/15] raft: Fix raft_is_connected() when there is no leader - yet. - -[ upstream commit adc64ab057345f7004c44bf92363b9adda862134 ] - -If there is never a leader known by the current server, it's status -should be "disconnected" to the cluster. Without this patch, when -a server in cluster is restarted, before it successfully connecting -back to the cluster it will appear as connected, which is wrong. - -Signed-off-by: Han Zhou <hzhou@ovn.org> -Signed-off-by: Ben Pfaff <blp@ovn.org> - -Resolves: #1836305 -Signed-off-by: Dumitru Ceara <dceara@redhat.com> ---- - ovsdb/raft.c | 10 ++++++++-- - tests/ovsdb-cluster.at | 35 +++++++++++++++++++++++++++++++++++ - 2 files changed, 43 insertions(+), 2 deletions(-) - -diff --git a/ovsdb/raft.c b/ovsdb/raft.c -index 4789bc4f22..6cd7b0041a 100644 ---- a/ovsdb/raft.c -+++ b/ovsdb/raft.c -@@ -298,6 +298,11 @@ struct raft { - bool had_leader; /* There has been leader elected since last - election initiated. This is to help setting - candidate_retrying. */ -+ -+ /* For all. */ -+ bool ever_had_leader; /* There has been leader elected since the raft -+ is initialized, meaning it is ever -+ connected. */ - }; + return NDIS_STATUS_SUCCESS; +@@ -1691,12 +1707,12 @@ OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx, - /* All Raft structures. */ -@@ -1024,7 +1029,8 @@ raft_is_connected(const struct raft *raft) - && !raft->joining - && !raft->leaving - && !raft->left -- && !raft->failed); -+ && !raft->failed -+ && raft->ever_had_leader); - VLOG_DBG("raft_is_connected: %s\n", ret? "true": "false"); - return ret; - } -@@ -2519,7 +2525,7 @@ static void - raft_set_leader(struct raft *raft, const struct uuid *sid) + switch (type) { + case OVS_KEY_ATTR_ETHERNET: +- status = OvsUpdateEthHeader(ovsFwdCtx, ++ status = OvsUpdateEthHeader(ovsFwdCtx, key, + NlAttrGetUnspec(a, sizeof(struct ovs_key_ethernet))); + break; + + case OVS_KEY_ATTR_IPV4: +- status = OvsUpdateIPv4Header(ovsFwdCtx, ++ status = OvsUpdateIPv4Header(ovsFwdCtx, key, + NlAttrGetUnspec(a, sizeof(struct ovs_key_ipv4))); + break; + +@@ -1709,16 +1725,17 @@ OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx, + status = SUCCEEDED(convertStatus) ? NDIS_STATUS_SUCCESS : NDIS_STATUS_FAILURE; + ASSERT(status == NDIS_STATUS_SUCCESS); + RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey); ++ RtlCopyMemory(&key->tunKey, &tunKey, sizeof key->tunKey); + break; + } + + case OVS_KEY_ATTR_UDP: +- status = OvsUpdateUdpPorts(ovsFwdCtx, ++ status = OvsUpdateUdpPorts(ovsFwdCtx, key, + NlAttrGetUnspec(a, sizeof(struct ovs_key_udp))); + break; + + case OVS_KEY_ATTR_TCP: +- status = OvsUpdateTcpPorts(ovsFwdCtx, ++ status = OvsUpdateTcpPorts(ovsFwdCtx, key, + NlAttrGetUnspec(a, sizeof(struct ovs_key_tcp))); + break; + +diff --git a/datapath-windows/ovsext/Actions.h b/datapath-windows/ovsext/Actions.h +index fd050d5dd8..bc12e1166d 100644 +--- a/datapath-windows/ovsext/Actions.h ++++ b/datapath-windows/ovsext/Actions.h +@@ -115,14 +115,17 @@ PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx, + + NDIS_STATUS + OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, ++ OvsFlowKey *key, + const struct ovs_key_udp *udpAttr); + + NDIS_STATUS + OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx, ++ OvsFlowKey *key, + const struct ovs_key_tcp *tcpAttr); + + NDIS_STATUS + OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, ++ OvsFlowKey *key, + const struct ovs_key_ipv4 *ipAttr); + + NDIS_STATUS +diff --git a/datapath-windows/ovsext/Conntrack-other.c b/datapath-windows/ovsext/Conntrack-other.c +index 962cc8ac65..8580415a6b 100644 +--- a/datapath-windows/ovsext/Conntrack-other.c ++++ b/datapath-windows/ovsext/Conntrack-other.c +@@ -49,17 +49,19 @@ OvsConntrackUpdateOtherEntry(OVS_CT_ENTRY *conn_, { - raft->leader_sid = *sid; -- raft->had_leader = true; -+ raft->ever_had_leader = raft->had_leader = true; - raft->candidate_retrying = false; + ASSERT(conn_); + struct conn_other *conn = OvsCastConntrackEntryToOtherEntry(conn_); ++ enum CT_UPDATE_RES ret = CT_UPDATE_VALID; + + if (reply && conn->state != OTHERS_BIDIR) { + conn->state = OTHERS_BIDIR; + } else if (conn->state == OTHERS_FIRST) { + conn->state = OTHERS_MULTIPLE; ++ ret = CT_UPDATE_VALID_NEW; + } + + OvsConntrackUpdateExpiration(&conn->up, now, + other_timeouts[conn->state]); + +- return CT_UPDATE_VALID; ++ return ret; } -diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at -index 5b6188b96f..0aa4564480 100644 ---- a/tests/ovsdb-cluster.at -+++ b/tests/ovsdb-cluster.at -@@ -179,6 +179,41 @@ AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) - ovsdb_test_cluster_disconnect 5 leader yes - AT_CLEANUP + OVS_CT_ENTRY * +diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c +index eda42ac823..a468c3e6bc 100644 +--- a/datapath-windows/ovsext/Conntrack-tcp.c ++++ b/datapath-windows/ovsext/Conntrack-tcp.c +@@ -213,11 +213,17 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_, + return CT_UPDATE_INVALID; + } -+AT_SETUP([OVSDB cluster - initial status should be disconnected]) -+AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) -+ -+n=3 -+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` -+ordinal_schema > schema -+AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) -+cid=`ovsdb-tool db-cid s1.db` -+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` -+for i in `seq 2 $n`; do -+ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) -+done -+ -+on_exit 'kill `cat *.pid`' -+for i in `seq $n`; do -+ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) -+done -+for i in `seq $n`; do -+ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) -+done +- if (((tcp_flags & (TCP_SYN|TCP_ACK)) == TCP_SYN) +- && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2 ++ if ((tcp_flags & (TCP_SYN|TCP_ACK)) == TCP_SYN) { ++ if (dst->state >= CT_DPIF_TCPS_FIN_WAIT_2 + && src->state >= CT_DPIF_TCPS_FIN_WAIT_2) { +- src->state = dst->state = CT_DPIF_TCPS_CLOSED; +- return CT_UPDATE_NEW; ++ src->state = dst->state = CT_DPIF_TCPS_CLOSED; ++ return CT_UPDATE_NEW; ++ } else if (src->state <= CT_DPIF_TCPS_SYN_SENT) { ++ src->state = CT_DPIF_TCPS_SYN_SENT; ++ OvsConntrackUpdateExpiration(&conn->up, now, ++ 30 * CT_INTERVAL_SEC); ++ return CT_UPDATE_VALID_NEW; ++ } + } + + if (src->wscale & CT_WSCALE_FLAG +diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c +index ba5611697a..55917c43ff 100644 +--- a/datapath-windows/ovsext/Conntrack.c ++++ b/datapath-windows/ovsext/Conntrack.c +@@ -753,6 +753,9 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx, + return NULL; + } + break; ++ case CT_UPDATE_VALID_NEW: ++ state |= OVS_CS_F_NEW; ++ break; + } + } + if (entry) { +diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h +index bc6580d708..b0932186af 100644 +--- a/datapath-windows/ovsext/Conntrack.h ++++ b/datapath-windows/ovsext/Conntrack.h +@@ -56,6 +56,7 @@ typedef enum CT_UPDATE_RES { + CT_UPDATE_INVALID, + CT_UPDATE_VALID, + CT_UPDATE_NEW, ++ CT_UPDATE_VALID_NEW, + } CT_UPDATE_RES; + + /* Metadata mark for masked write to conntrack mark */ +diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c +index c044b14896..bf995aa83a 100644 +--- a/datapath/linux/compat/geneve.c ++++ b/datapath/linux/compat/geneve.c +@@ -962,14 +962,26 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, + return dst; + } + +-#ifdef HAVE_IPV6_DST_LOOKUP_NET +- if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) { ++#if defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) && defined(HAVE_IPV6_DST_LOOKUP_FLOW) ++#ifdef HAVE_IPV6_DST_LOOKUP_FLOW_NET ++ dst = ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, fl6, ++ NULL); + #else +-#ifdef HAVE_IPV6_STUB ++ dst = ipv6_stub->ipv6_dst_lookup_flow(gs6->sock->sk, fl6, ++ NULL); ++#endif ++ if (IS_ERR(dst)) { ++#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW_NET) ++ if (ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, &dst, ++ fl6)) { ++#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW) ++ if (ipv6_stub->ipv6_dst_lookup_flow(gs6->sock->sk, &dst, fl6)) { ++#elif defined(HAVE_IPV6_DST_LOOKUP_NET) ++ if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) { ++#elif defined(HAVE_IPV6_STUB) + if (ipv6_stub->ipv6_dst_lookup(gs6->sock->sk, &dst, fl6)) { + #else + if (ip6_dst_lookup(gs6->sock->sk, &dst, fl6)) { +-#endif + #endif + netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr); + return ERR_PTR(-ENETUNREACH); +diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c +index 23118e8b63..05ccfb9288 100644 +--- a/datapath/linux/compat/vxlan.c ++++ b/datapath/linux/compat/vxlan.c +@@ -967,7 +967,10 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, + bool use_cache = (dst_cache && ip_tunnel_dst_cache_usable(skb, info)); + struct dst_entry *ndst; + struct flowi6 fl6; ++#if !defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) || \ ++ !defined(HAVE_IPV6_DST_LOOKUP_FLOW) + int err; ++#endif + + if (!sock6) + return ERR_PTR(-EIO); +@@ -990,20 +993,35 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, + fl6.fl6_dport = dport; + fl6.fl6_sport = sport; + +-#ifdef HAVE_IPV6_DST_LOOKUP_NET +- err = ipv6_stub->ipv6_dst_lookup(vxlan->net, +- sock6->sock->sk, +- &ndst, &fl6); ++#if defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) && defined(HAVE_IPV6_DST_LOOKUP_FLOW) ++#ifdef HAVE_IPV6_DST_LOOKUP_FLOW_NET ++ ndst = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk, ++ &fl6, NULL); + #else +-#ifdef HAVE_IPV6_STUB ++ ndst = ipv6_stub->ipv6_dst_lookup_flow(sock6->sock->sk, &fl6, NULL); ++#endif ++ if (unlikely(IS_ERR(ndst))) { ++#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW_NET) ++ err = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk, ++ &ndst, &fl6); ++#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW) ++ err = ipv6_stub->ipv6_dst_lookup_flow(sock6->sock->sk, &ndst, &fl6); ++#elif defined(HAVE_IPV6_DST_LOOKUP_NET) ++ err = ipv6_stub->ipv6_dst_lookup(vxlan->net, sock6->sock->sk, ++ &ndst, &fl6); ++#elif defined(HAVE_IPV6_STUB) + err = ipv6_stub->ipv6_dst_lookup(vxlan->vn6_sock->sock->sk, + &ndst, &fl6); + #else + err = ip6_dst_lookup(vxlan->vn6_sock->sock->sk, &ndst, &fl6); + #endif +-#endif ++#if defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) && defined(HAVE_IPV6_DST_LOOKUP_FLOW) ++ return ERR_PTR(-ENETUNREACH); ++ } ++#else + if (err < 0) + return ERR_PTR(err); ++#endif + + *saddr = fl6.saddr; + if (use_cache) +diff --git a/debian/changelog b/debian/changelog +index 8e075bc98b..d803cf10d1 100644 +--- a/debian/changelog ++++ b/debian/changelog +@@ -1,3 +1,15 @@ ++openvswitch (2.13.2-1) unstable; urgency=low ++ [ Open vSwitch team ] ++ * New upstream version + -+# Stop all servers, and start the s1 only, to test initial connection status -+# when there is no leader yet. -+for i in `seq 1 $n`; do -+ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) -+done -+i=1 -+AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) ++ -- Open vSwitch team <dev@openvswitch.org> Thu, 30 Jul 2020 00:25:23 +0200 + -+# The initial status should be disconnected. So wait should fail. -+AT_CHECK([ovsdb_client_wait --timeout=1 unix:s$i.ovsdb $schema_name connected], [142], [ignore], [ignore]) -+OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) ++openvswitch (2.13.1-1) unstable; urgency=low ++ [ Open vSwitch team] ++ * New upstream version + -+AT_CLEANUP ++ -- Open vSwitch team <dev@openvswitch.org> Thu, 30 Jul 2020 00:25:23 +0200 + - + openvswitch (2.13.0-1) unstable; urgency=low + [ Open vSwitch team] + * New upstream version +diff --git a/debian/control b/debian/control +index a50e97249f..6420b9d3e2 100644 +--- a/debian/control ++++ b/debian/control +@@ -14,8 +14,9 @@ Build-Depends: graphviz, + openssl, + procps, + python3-all, +- python3-twisted-conch, +- python3-zopeinterface, ++ python3-sphinx, ++ python3-twisted, ++ python3-zope.interface, + libunbound-dev, + libunwind-dev + Standards-Version: 3.9.3 +@@ -187,7 +188,7 @@ Description: Python bindings for Open vSwitch + Package: openvswitch-test + Architecture: all + Depends: python3, +- python3-twisted-web, ++ python3-twisted, + ${misc:Depends}, + ${python3:Depends} + Description: Open vSwitch test package +diff --git a/debian/openvswitch-common.manpages b/debian/openvswitch-common.manpages +index 9ac6a1dd6d..95004122cc 100644 +--- a/debian/openvswitch-common.manpages ++++ b/debian/openvswitch-common.manpages +@@ -1,7 +1,7 @@ + ovsdb/ovsdb-client.1 + ovsdb/ovsdb-tool.1 + utilities/bugtool/ovs-bugtool.8 +-utilities/ovs-appctl.8 ++debian/tmp/usr/share/man/man8/ovs-appctl.8 + utilities/ovs-ofctl.8 +-utilities/ovs-parse-backtrace.8 +-utilities/ovs-pki.8 ++debian/tmp/usr/share/man/man8/ovs-parse-backtrace.8 ++debian/tmp/usr/share/man/man8/ovs-pki.8 +diff --git a/debian/openvswitch-switch.manpages b/debian/openvswitch-switch.manpages +index 1161cfda77..7fd7bc55da 100644 +--- a/debian/openvswitch-switch.manpages ++++ b/debian/openvswitch-switch.manpages +@@ -1,12 +1,12 @@ + ovsdb/ovsdb-server.1 + ovsdb/ovsdb-server.5 +-utilities/ovs-ctl.8 ++debian/tmp/usr/share/man/man8/ovs-ctl.8 + utilities/ovs-dpctl-top.8 + utilities/ovs-dpctl.8 + utilities/ovs-kmod-ctl.8 + utilities/ovs-pcap.1 +-utilities/ovs-tcpdump.8 +-utilities/ovs-tcpundump.1 ++debian/tmp/usr/share/man/man8/ovs-tcpdump.8 ++debian/tmp/usr/share/man/man1/ovs-tcpundump.1 + utilities/ovs-vsctl.8 + vswitchd/ovs-vswitchd.8 + vswitchd/ovs-vswitchd.conf.db.5 +diff --git a/debian/openvswitch-test.manpages b/debian/openvswitch-test.manpages +index 3f71858691..eb3a561d01 100644 +--- a/debian/openvswitch-test.manpages ++++ b/debian/openvswitch-test.manpages +@@ -1 +1 @@ +-utilities/ovs-l3ping.8 ++debian/tmp/usr/share/man/man8/ovs-l3ping.8 +diff --git a/dpdk/.ci/linux-setup.sh b/dpdk/.ci/linux-setup.sh +index dfb9d4a206..38bb88e15c 100755 +--- a/dpdk/.ci/linux-setup.sh ++++ b/dpdk/.ci/linux-setup.sh +@@ -1,7 +1,7 @@ + #!/bin/sh -xe - AT_BANNER([OVSDB cluster election timer change]) --- -2.25.1 - - -From 053b78c8d60ffb4d212fd7894f91be52027f291f Mon Sep 17 00:00:00 2001 -From: Han Zhou <hzhou@ovn.org> -Date: Fri, 28 Feb 2020 18:07:07 -0800 -Subject: [PATCH 06/15] raft: Avoid busy loop during leader election. - -[ upstream commit 3ae90e1899c5a05148ea1870d9bb4ac3c05e3a19 ] - -When a server doesn't see a leader yet, e.g. during leader re-election, -if a transaction comes from a client, it will cause 100% CPU busy loop. -With debug log enabled it is like: - -2020-02-28T04:04:35.631Z|00059|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 -2020-02-28T04:04:35.631Z|00062|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 -2020-02-28T04:04:35.631Z|00065|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 -2020-02-28T04:04:35.631Z|00068|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 -2020-02-28T04:04:35.631Z|00071|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 -2020-02-28T04:04:35.631Z|00074|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 -2020-02-28T04:04:35.631Z|00077|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 -... - -The problem is that in ovsdb_trigger_try(), all cluster errors are treated -as temporary error and retry immediately. This patch fixes it by introducing -'run_triggers_now', which tells if a retry is needed immediately. When the -cluster error is with detail 'not leader', we don't immediately retry, but -will wait for the next poll event to trigger the retry. When 'not leader' -status changes, there must be a event, i.e. raft RPC that changes the -status, so the trigger is guaranteed to be triggered, without busy loop. - -Signed-off-by: Han Zhou <hzhou@ovn.org> -Signed-off-by: Ben Pfaff <blp@ovn.org> - -Resolves: #1836305 -Signed-off-by: Dumitru Ceara <dceara@redhat.com> ---- - ovsdb/ovsdb.c | 2 +- - ovsdb/ovsdb.h | 1 + - ovsdb/transaction.c | 2 +- - ovsdb/trigger.c | 11 +++++++++-- - 4 files changed, 12 insertions(+), 4 deletions(-) - -diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c -index cfc96b32f8..7e683e6815 100644 ---- a/ovsdb/ovsdb.c -+++ b/ovsdb/ovsdb.c -@@ -414,7 +414,7 @@ ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage) - db->storage = storage; - ovs_list_init(&db->monitors); - ovs_list_init(&db->triggers); -- db->run_triggers = false; -+ db->run_triggers_now = db->run_triggers = false; + # need to install as 'root' since some of the unit tests won't run without it +-sudo python3 -m pip install --upgrade meson ++sudo python3 -m pip install --upgrade 'meson==0.47.1' - shash_init(&db->tables); - if (schema) { -diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h -index 32e5333163..5c30a83d92 100644 ---- a/ovsdb/ovsdb.h -+++ b/ovsdb/ovsdb.h -@@ -83,6 +83,7 @@ struct ovsdb { - /* Triggers. */ - struct ovs_list triggers; /* Contains "struct ovsdb_trigger"s. */ - bool run_triggers; -+ bool run_triggers_now; + # setup hugepages + cat /proc/meminfo +diff --git a/dpdk/.travis.yml b/dpdk/.travis.yml +index 8f90d06f28..77ac26dd85 100644 +--- a/dpdk/.travis.yml ++++ b/dpdk/.travis.yml +@@ -15,19 +15,19 @@ addons: + packages: &required_packages + - [libnuma-dev, linux-headers-$(uname -r), python3-setuptools, python3-wheel, python3-pip, ninja-build] - struct ovsdb_table *rbac_role; +-aarch64_packages: &aarch64_packages ++_aarch64_packages: &aarch64_packages + - *required_packages + - [gcc-aarch64-linux-gnu, libc6-dev-arm64-cross, pkg-config-aarch64-linux-gnu] -diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c -index 369436bffb..8ffefcf7c9 100644 ---- a/ovsdb/transaction.c -+++ b/ovsdb/transaction.c -@@ -967,7 +967,7 @@ ovsdb_txn_complete(struct ovsdb_txn *txn) +-extra_packages: &extra_packages ++_extra_packages: &extra_packages + - *required_packages +- - [libbsd-dev, libpcap-dev, libcrypto++-dev, libjansson4] ++ - [libbsd-dev, libpcap-dev, libcrypto++-dev, libjansson-dev] + +-build_32b_packages: &build_32b_packages ++_build_32b_packages: &build_32b_packages + - *required_packages + - [gcc-multilib] + +-doc_packages: &doc_packages ++_doc_packages: &doc_packages + - [doxygen, graphviz, python3-sphinx] + + before_install: ./.ci/${TRAVIS_OS_NAME}-setup.sh +@@ -39,7 +39,7 @@ env: + - DEF_LIB="shared" OPTS="-Denable_kmods=false" + - DEF_LIB="shared" RUN_TESTS=1 + +-matrix: ++jobs: + include: + - env: DEF_LIB="static" BUILD_32BIT=1 + compiler: gcc +diff --git a/dpdk/MAINTAINERS b/dpdk/MAINTAINERS +index 4395d8df14..10c4e1a613 100644 +--- a/dpdk/MAINTAINERS ++++ b/dpdk/MAINTAINERS +@@ -370,7 +370,7 @@ F: devtools/test-null.sh + F: doc/guides/prog_guide/switch_representation.rst + + Flow API +-M: Adrien Mazarguil <adrien.mazarguil@6wind.com> ++M: Ori Kam <orika@mellanox.com> + T: git://dpdk.org/next/dpdk-next-net + F: app/test-pmd/cmdline_flow.c + F: doc/guides/prog_guide/rte_flow.rst +@@ -910,7 +910,7 @@ F: drivers/net/null/ + F: doc/guides/nics/features/null.ini + + Fail-safe PMD +-M: Gaetan Rivet <gaetan.rivet@6wind.com> ++M: Gaetan Rivet <grive@u256.net> + F: drivers/net/failsafe/ + F: doc/guides/nics/fail_safe.rst + F: doc/guides/nics/features/failsafe.ini +@@ -1373,7 +1373,7 @@ F: app/test/test_rcu* + F: doc/guides/prog_guide/rcu_lib.rst + + PCI +-M: Gaetan Rivet <gaetan.rivet@6wind.com> ++M: Gaetan Rivet <grive@u256.net> + F: lib/librte_pci/ + + Power management +@@ -1434,6 +1434,7 @@ Unit tests framework + F: app/test/Makefile + F: app/test/autotest* + F: app/test/commands.c ++F: app/test/get-coremask.sh + F: app/test/packet_burst_generator.c + F: app/test/packet_burst_generator.h + F: app/test/process.h +diff --git a/dpdk/VERSION b/dpdk/VERSION +index 22131b00aa..a43c349903 100644 +--- a/dpdk/VERSION ++++ b/dpdk/VERSION +@@ -1 +1 @@ +-19.11.0 ++19.11.3 +diff --git a/dpdk/app/pdump/main.c b/dpdk/app/pdump/main.c +index 903d02f482..c38c53719e 100644 +--- a/dpdk/app/pdump/main.c ++++ b/dpdk/app/pdump/main.c +@@ -151,7 +151,7 @@ static uint8_t multiple_core_capture; + static void + pdump_usage(const char *prgname) { - if (!ovsdb_txn_is_empty(txn)) { +- printf("usage: %s [EAL options]" ++ printf("usage: %s [EAL options] --" + " --["CMD_LINE_OPT_MULTI"]\n" + " --"CMD_LINE_OPT_PDUMP" " + "'(port=<port id> | device_id=<pci id or vdev name>)," +@@ -595,7 +595,7 @@ configure_vdev(uint16_t port_id) + if (ret != 0) + rte_exit(EXIT_FAILURE, "dev config failed\n"); -- txn->db->run_triggers = true; -+ txn->db->run_triggers_now = txn->db->run_triggers = true; - ovsdb_monitors_commit(txn->db, txn); - ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_update_weak_refs)); - ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit)); -diff --git a/ovsdb/trigger.c b/ovsdb/trigger.c -index 7e62e90ae3..0372302af4 100644 ---- a/ovsdb/trigger.c -+++ b/ovsdb/trigger.c -@@ -141,7 +141,7 @@ ovsdb_trigger_run(struct ovsdb *db, long long int now) - struct ovsdb_trigger *t, *next; +- for (q = 0; q < txRings; q++) { ++ for (q = 0; q < txRings; q++) { + ret = rte_eth_tx_queue_setup(port_id, q, TX_DESC_PER_QUEUE, + rte_eth_dev_socket_id(port_id), NULL); + if (ret < 0) +diff --git a/dpdk/app/test-acl/main.c b/dpdk/app/test-acl/main.c +index 57f23942eb..08f06c1fa3 100644 +--- a/dpdk/app/test-acl/main.c ++++ b/dpdk/app/test-acl/main.c +@@ -12,7 +12,7 @@ + #include <rte_lcore.h> + #include <rte_ip.h> - bool run_triggers = db->run_triggers; -- db->run_triggers = false; -+ db->run_triggers_now = db->run_triggers = false; +-#define PRINT_USAGE_START "%s [EAL options]\n" ++#define PRINT_USAGE_START "%s [EAL options] --\n" - bool disconnect_all = false; + #define RTE_LOGTYPE_TESTACL RTE_LOGTYPE_USER1 + +diff --git a/dpdk/app/test-crypto-perf/main.c b/dpdk/app/test-crypto-perf/main.c +index 52a1860fbf..7bb286ccbe 100644 +--- a/dpdk/app/test-crypto-perf/main.c ++++ b/dpdk/app/test-crypto-perf/main.c +@@ -582,7 +582,8 @@ main(int argc, char **argv) + goto err; + } + +- if (!opts.silent) ++ if (!opts.silent && opts.test != CPERF_TEST_TYPE_THROUGHPUT && ++ opts.test != CPERF_TEST_TYPE_LATENCY) + show_test_vector(t_vec); + + total_nb_qps = nb_cryptodevs * opts.nb_qps; +diff --git a/dpdk/app/test-eventdev/meson.build b/dpdk/app/test-eventdev/meson.build +index 7ff2b786cf..9e588d9ec7 100644 +--- a/dpdk/app/test-eventdev/meson.build ++++ b/dpdk/app/test-eventdev/meson.build +@@ -10,5 +10,8 @@ sources = files('evt_main.c', + 'test_order_queue.c', + 'test_perf_common.c', + 'test_perf_atq.c', +- 'test_perf_queue.c') ++ 'test_perf_queue.c', ++ 'test_pipeline_common.c', ++ 'test_pipeline_atq.c', ++ 'test_pipeline_queue.c') + deps += 'eventdev' +diff --git a/dpdk/app/test-eventdev/test_pipeline_common.c b/dpdk/app/test-eventdev/test_pipeline_common.c +index fa91bf2290..126e2165a3 100644 +--- a/dpdk/app/test-eventdev/test_pipeline_common.c ++++ b/dpdk/app/test-eventdev/test_pipeline_common.c +@@ -385,12 +385,16 @@ pipeline_event_tx_adapter_setup(struct evt_options *opt, + if (!(cap & RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT)) { + uint32_t service_id = -1U; + +- rte_event_eth_tx_adapter_service_id_get(consm, +- &service_id); ++ ret = rte_event_eth_tx_adapter_service_id_get(consm, ++ &service_id); ++ if (ret != -ESRCH && ret != 0) { ++ evt_err("Failed to get Tx adptr service ID"); ++ return ret; ++ } + ret = evt_service_setup(service_id); + if (ret) { + evt_err("Failed to setup service core" +- " for Tx adapter\n"); ++ " for Tx adapter"); + return ret; + } + } +diff --git a/dpdk/app/test-pipeline/config.c b/dpdk/app/test-pipeline/config.c +index 28ac9fcc0e..33f3f1c827 100644 +--- a/dpdk/app/test-pipeline/config.c ++++ b/dpdk/app/test-pipeline/config.c +@@ -42,8 +42,6 @@ + + #include "main.h" + +-struct app_params app; +- + static const char usage[] = "\n"; -@@ -160,7 +160,7 @@ ovsdb_trigger_run(struct ovsdb *db, long long int now) void - ovsdb_trigger_wait(struct ovsdb *db, long long int now) +diff --git a/dpdk/app/test-pmd/cmdline.c b/dpdk/app/test-pmd/cmdline.c +index 9f3e0b251b..d508d1e26d 100644 +--- a/dpdk/app/test-pmd/cmdline.c ++++ b/dpdk/app/test-pmd/cmdline.c +@@ -94,7 +94,7 @@ static void cmd_help_brief_parsed(__attribute__((unused)) void *parsed_result, + " help ports : Configuring ports.\n" + " help registers : Reading and setting port registers.\n" + " help filters : Filters configuration help.\n" +- " help traffic_management : Traffic Management commmands.\n" ++ " help traffic_management : Traffic Management commands.\n" + " help devices : Device related cmds.\n" + " help all : All of the above sections.\n\n" + ); +@@ -1437,7 +1437,7 @@ cmdline_parse_inst_t cmd_set_port_setup_on = { + struct cmd_operate_attach_port_result { + cmdline_fixed_string_t port; + cmdline_fixed_string_t keyword; +- cmdline_fixed_string_t identifier; ++ cmdline_multi_string_t identifier; + }; + + static void cmd_operate_attach_port_parsed(void *parsed_result, +@@ -1460,7 +1460,7 @@ cmdline_parse_token_string_t cmd_operate_attach_port_keyword = + keyword, "attach"); + cmdline_parse_token_string_t cmd_operate_attach_port_identifier = + TOKEN_STRING_INITIALIZER(struct cmd_operate_attach_port_result, +- identifier, NULL); ++ identifier, TOKEN_STRING_MULTI); + + cmdline_parse_inst_t cmd_operate_attach_port = { + .f = cmd_operate_attach_port_parsed, +@@ -1488,10 +1488,12 @@ static void cmd_operate_detach_port_parsed(void *parsed_result, { -- if (db->run_triggers) { -+ if (db->run_triggers_now) { - poll_immediate_wake(); - } else { - long long int deadline = LLONG_MAX; -@@ -319,9 +319,16 @@ ovsdb_trigger_try(struct ovsdb_trigger *t, long long int now) - if (!strcmp(ovsdb_error_get_tag(error), "cluster error")) { - /* Temporary error. Transition back to "initialized" state to - * try again. */ -+ char *err_s = ovsdb_error_to_string(error); -+ VLOG_DBG("cluster error %s", err_s); -+ - jsonrpc_msg_destroy(t->reply); - t->reply = NULL; - t->db->run_triggers = true; -+ if (!strstr(err_s, "not leader")) { -+ t->db->run_triggers_now = true; -+ } -+ free(err_s); - ovsdb_error_destroy(error); - } else { - /* Permanent error. Transition to "completed" state to report --- -2.25.1 - - -From cc3d02699203e2fe9d9fd384d09e268ba614828d Mon Sep 17 00:00:00 2001 -From: Han Zhou <hzhou@ovn.org> -Date: Fri, 28 Feb 2020 18:07:10 -0800 -Subject: [PATCH 07/15] raft: Fix next_index in install_snapshot reply - handling. - -[ upstream commit 877618fc833273d1e29e012b5e925d51cba80ff5 ] - -When a leader handles install_snapshot reply, the next_index for -the follower should be log_start instead of log_end, because there -can be new entries added in leader's log after initiating the -install_snapshot procedure. Also, it should send all the accumulated -entries to follower in the following append-request message, instead -of sending 0 entries, to speed up the converge. - -Without this fix, there is no functional problem, but it takes -uncessary extra rounds of append-requests responsed with "inconsistency" -by follower, although finally will be converged. - -Signed-off-by: Han Zhou <hzhou@ovn.org> -Signed-off-by: Ben Pfaff <blp@ovn.org> - -Resolves: #1836305 -Signed-off-by: Dumitru Ceara <dceara@redhat.com> ---- - ovsdb/raft.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/ovsdb/raft.c b/ovsdb/raft.c -index 6cd7b0041a..fa04d8c80b 100644 ---- a/ovsdb/raft.c -+++ b/ovsdb/raft.c -@@ -3998,8 +3998,9 @@ raft_handle_install_snapshot_reply( - VLOG_INFO_RL(&rl, "cluster "CID_FMT": installed snapshot on server %s " - " up to %"PRIu64":%"PRIu64, CID_ARGS(&raft->cid), - s->nickname, rpy->last_term, rpy->last_index); -- s->next_index = raft->log_end; -- raft_send_append_request(raft, s, 0, "snapshot installed"); -+ s->next_index = raft->log_start; -+ raft_send_append_request(raft, s, raft->log_end - s->next_index, -+ "snapshot installed"); + struct cmd_operate_detach_port_result *res = parsed_result; + +- if (!strcmp(res->keyword, "detach")) ++ if (!strcmp(res->keyword, "detach")) { ++ RTE_ETH_VALID_PORTID_OR_RET(res->port_id); + detach_port_device(res->port_id); +- else ++ } else { + printf("Unknown parameter\n"); ++ } } - /* Returns true if 'raft' has grown enough since the last snapshot that --- -2.25.1 - - -From 9c76350e271546eedfeb18720975e35b4e36e1f1 Mon Sep 17 00:00:00 2001 -From: Han Zhou <hzhou@ovn.org> -Date: Thu, 5 Mar 2020 23:48:45 -0800 -Subject: [PATCH 08/15] raft: Fix the problem of stuck in candidate role - forever. - -[ upstream commit 25a7e5547f1e107db0f032ad269f447c57401531 ] - -Sometimes a server can stay in candidate role forever, even if the server -already see the new leader and handles append-requests normally. However, -because of the wrong role, it appears as disconnected from cluster and -so the clients are disconnected. - -This problem happens when 2 servers become candidates in the same -term, and one of them is elected as leader in that term. It can be -reproduced by the test cases added in this patch. - -The root cause is that the current implementation only changes role to -follower when a bigger term is observed (in raft_receive_term__()). -According to the RAFT paper, if another candidate becomes leader with -the same term, the candidate should change to follower. - -This patch fixes it by changing the role to follower when leader -is being updated in raft_update_leader(). - -Signed-off-by: Han Zhou <hzhou@ovn.org> -Signed-off-by: Ben Pfaff <blp@ovn.org> - -Resolves: #1836305 -Signed-off-by: Dumitru Ceara <dceara@redhat.com> ---- - ovsdb/raft.c | 19 +++++++++++++-- - tests/ovsdb-cluster.at | 55 ++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 72 insertions(+), 2 deletions(-) - -diff --git a/ovsdb/raft.c b/ovsdb/raft.c -index fa04d8c80b..6452182ba6 100644 ---- a/ovsdb/raft.c -+++ b/ovsdb/raft.c -@@ -73,7 +73,8 @@ enum raft_failure_test { - FT_CRASH_BEFORE_SEND_EXEC_REQ, - FT_CRASH_AFTER_SEND_EXEC_REQ, - FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE, -- FT_DELAY_ELECTION -+ FT_DELAY_ELECTION, -+ FT_DONT_SEND_VOTE_REQUEST - }; - static enum raft_failure_test failure_test; + cmdline_parse_token_string_t cmd_operate_detach_port_port = +@@ -1530,7 +1532,7 @@ static void cmd_operate_detach_device_parsed(void *parsed_result, + struct cmd_operate_detach_device_result *res = parsed_result; -@@ -1647,6 +1648,7 @@ raft_start_election(struct raft *raft, bool leadership_transfer) - } + if (!strcmp(res->keyword, "detach")) +- detach_device(res->identifier); ++ detach_devargs(res->identifier); + else + printf("Unknown parameter\n"); + } +@@ -5120,7 +5122,7 @@ cmd_gso_size_parsed(void *parsed_result, - ovs_assert(raft->role != RAFT_LEADER); -+ - raft->role = RAFT_CANDIDATE; - /* If there was no leader elected since last election, we know we are - * retrying now. */ -@@ -1690,7 +1692,9 @@ raft_start_election(struct raft *raft, bool leadership_transfer) - .leadership_transfer = leadership_transfer, - }, - }; -- raft_send(raft, &rq); -+ if (failure_test != FT_DONT_SEND_VOTE_REQUEST) { -+ raft_send(raft, &rq); -+ } - } + if (test_done == 0) { + printf("Before setting GSO segsz, please first" +- " stop fowarding\n"); ++ " stop forwarding\n"); + return; + } - /* Vote for ourselves. */ -@@ -2966,6 +2970,15 @@ raft_update_leader(struct raft *raft, const struct uuid *sid) - }; - ignore(ovsdb_log_write_and_free(raft->log, raft_record_to_json(&r))); - } -+ if (raft->role == RAFT_CANDIDATE) { -+ /* Section 3.4: While waiting for votes, a candidate may -+ * receive an AppendEntries RPC from another server claiming to -+ * be leader. If the leader’s term (included in its RPC) is at -+ * least as large as the candidate’s current term, then the -+ * candidate recognizes the leader as legitimate and returns to -+ * follower state. */ -+ raft->role = RAFT_FOLLOWER; -+ } - return true; - } +@@ -7078,9 +7080,10 @@ cmd_priority_flow_ctrl_set_parsed(void *parsed_result, + * the RTE_FC_RX_PAUSE, Respond to the pause frame at the Tx side. + */ + static enum rte_eth_fc_mode rx_tx_onoff_2_pfc_mode[2][2] = { +- {RTE_FC_NONE, RTE_FC_RX_PAUSE}, {RTE_FC_TX_PAUSE, RTE_FC_FULL} ++ {RTE_FC_NONE, RTE_FC_TX_PAUSE}, {RTE_FC_RX_PAUSE, RTE_FC_FULL} + }; -@@ -4674,6 +4687,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, - raft_reset_election_timer(raft); - } - } -+ } else if (!strcmp(test, "dont-send-vote-request")) { -+ failure_test = FT_DONT_SEND_VOTE_REQUEST; - } else if (!strcmp(test, "clear")) { - failure_test = FT_NO_TEST; - unixctl_command_reply(conn, "test dismissed"); -diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at -index 0aa4564480..9714545151 100644 ---- a/tests/ovsdb-cluster.at -+++ b/tests/ovsdb-cluster.at -@@ -527,6 +527,61 @@ AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) - ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update - AT_CLEANUP ++ memset(&pfc_conf, 0, sizeof(struct rte_eth_pfc_conf)); + rx_fc_enable = (!strncmp(res->rx_pfc_mode, "on",2)) ? 1 : 0; + tx_fc_enable = (!strncmp(res->tx_pfc_mode, "on",2)) ? 1 : 0; + pfc_conf.fc.mode = rx_tx_onoff_2_pfc_mode[rx_fc_enable][tx_fc_enable]; +@@ -16802,8 +16805,10 @@ cmd_ddp_get_list_parsed( + #ifdef RTE_LIBRTE_I40E_PMD + size = PROFILE_INFO_SIZE * MAX_PROFILE_NUM + 4; + p_list = (struct rte_pmd_i40e_profile_list *)malloc(size); +- if (!p_list) ++ if (!p_list) { + printf("%s: Failed to malloc buffer\n", __func__); ++ return; ++ } -+ -+AT_SETUP([OVSDB cluster - competing candidates]) -+AT_KEYWORDS([ovsdb server negative unix cluster competing-candidates]) -+ -+n=3 -+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` -+ordinal_schema > schema -+AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) -+cid=`ovsdb-tool db-cid s1.db` -+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` -+for i in `seq 2 $n`; do -+ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) -+done -+ -+on_exit 'kill `cat *.pid`' -+for i in `seq $n`; do -+ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) -+done -+for i in `seq $n`; do -+ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) -+done + if (ret == -ENOTSUP) + ret = rte_pmd_i40e_get_ddp_list(res->port_id, +diff --git a/dpdk/app/test-pmd/cmdline_flow.c b/dpdk/app/test-pmd/cmdline_flow.c +index 99dade7d8c..deced65016 100644 +--- a/dpdk/app/test-pmd/cmdline_flow.c ++++ b/dpdk/app/test-pmd/cmdline_flow.c +@@ -1005,7 +1005,6 @@ static const enum index item_pppoes[] = { + }; + + static const enum index item_pppoe_proto_id[] = { +- ITEM_PPPOE_PROTO_ID, + ITEM_NEXT, + ZERO, + }; +@@ -2544,11 +2543,14 @@ static const struct token token_list[] = { + session_id)), + }, + [ITEM_PPPOE_PROTO_ID] = { +- .name = "proto_id", ++ .name = "pppoe_proto_id", + .help = "match PPPoE session protocol identifier", + .priv = PRIV_ITEM(PPPOE_PROTO_ID, + sizeof(struct rte_flow_item_pppoe_proto_id)), +- .next = NEXT(item_pppoe_proto_id), ++ .next = NEXT(item_pppoe_proto_id, NEXT_ENTRY(UNSIGNED), ++ item_param), ++ .args = ARGS(ARGS_ENTRY_HTON ++ (struct rte_flow_item_pppoe_proto_id, proto_id)), + .call = parse_vc, + }, + [ITEM_HIGIG2] = { +@@ -4534,7 +4536,9 @@ parse_vc_action_mplsogre_decap(struct context *ctx, const struct token *token, + struct rte_flow_item_gre gre = { + .protocol = rte_cpu_to_be_16(ETHER_TYPE_MPLS_UNICAST), + }; +- struct rte_flow_item_mpls mpls; ++ struct rte_flow_item_mpls mpls = { ++ .ttl = 0, ++ }; + uint8_t *header; + int ret; + +@@ -6236,6 +6240,9 @@ flow_item_default_mask(const struct rte_flow_item *item) + case RTE_FLOW_ITEM_TYPE_GTP_PSC: + mask = &rte_flow_item_gtp_psc_mask; + break; ++ case RTE_FLOW_ITEM_TYPE_GENEVE: ++ mask = &rte_flow_item_geneve_mask; ++ break; + case RTE_FLOW_ITEM_TYPE_PPPOE_PROTO_ID: + mask = &rte_flow_item_pppoe_proto_id_mask; + default: +diff --git a/dpdk/app/test-pmd/config.c b/dpdk/app/test-pmd/config.c +index d599682788..42eba68b35 100644 +--- a/dpdk/app/test-pmd/config.c ++++ b/dpdk/app/test-pmd/config.c +@@ -223,11 +223,26 @@ nic_stats_display(portid_t port_id) + void + nic_stats_clear(portid_t port_id) + { ++ int ret; + -+# We need to simulate the situation when 2 candidates starts election with same -+# term. -+# -+# Before triggering leader election, tell follower s2 don't send vote request (simulating -+# vote-request lost or not handled in time), and tell follower s3 to delay -+# election timer to make sure s3 doesn't send vote-request before s2 enters -+# term 2. -+AT_CHECK([ovs-appctl -t "`pwd`"/s2 cluster/failure-test dont-send-vote-request], [0], [ignore]) -+AT_CHECK([ovs-appctl -t "`pwd`"/s3 cluster/failure-test delay-election], [0], [ignore]) + if (port_id_is_invalid(port_id, ENABLED_WARN)) { + print_valid_ports(); + return; + } +- rte_eth_stats_reset(port_id); + -+# Restart leader, which will become follower, and both old followers will start -+# election as candidate. The new follower (old leader) will vote one of them, -+# and the other candidate should step back as follower as again. -+kill -9 `cat s1.pid` -+AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s1.log --pidfile=s1.pid --unixctl=s1 --remote=punix:s1.ovsdb s1.db]) ++ ret = rte_eth_stats_reset(port_id); ++ if (ret != 0) { ++ printf("%s: Error: failed to reset stats (port %u): %s", ++ __func__, port_id, strerror(ret)); ++ return; ++ } + -+# Tell s1 to delay election timer so that it won't start election before s3 -+# becomes candidate. -+AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/failure-test delay-election], [0], [ignore]) ++ ret = rte_eth_stats_get(port_id, &ports[port_id].stats); ++ if (ret != 0) { ++ printf("%s: Error: failed to get stats (port %u): %s", ++ __func__, port_id, strerror(ret)); ++ return; ++ } + printf("\n NIC statistics for port %d cleared\n", port_id); + } + +@@ -303,10 +318,19 @@ nic_xstats_clear(portid_t port_id) + print_valid_ports(); + return; + } + -+OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Term: 2"]) + ret = rte_eth_xstats_reset(port_id); + if (ret != 0) { + printf("%s: Error: failed to reset xstats (port %u): %s", + __func__, port_id, strerror(ret)); ++ return; ++ } + -+for i in `seq $n`; do -+ OVS_WAIT_WHILE([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "candidate"]) -+ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) -+done ++ ret = rte_eth_stats_get(port_id, &ports[port_id].stats); ++ if (ret != 0) { ++ printf("%s: Error: failed to get stats (port %u): %s", ++ __func__, port_id, strerror(ret)); ++ return; + } + } + +@@ -1216,7 +1240,9 @@ void + port_mtu_set(portid_t port_id, uint16_t mtu) + { + int diag; ++ struct rte_port *rte_port = &ports[port_id]; + struct rte_eth_dev_info dev_info; ++ uint16_t eth_overhead; + int ret; + + if (port_id_is_invalid(port_id, ENABLED_WARN)) +@@ -1232,8 +1258,25 @@ port_mtu_set(portid_t port_id, uint16_t mtu) + return; + } + diag = rte_eth_dev_set_mtu(port_id, mtu); +- if (diag == 0) ++ if (diag == 0 && ++ dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) { ++ /* ++ * Ether overhead in driver is equal to the difference of ++ * max_rx_pktlen and max_mtu in rte_eth_dev_info when the ++ * device supports jumbo frame. ++ */ ++ eth_overhead = dev_info.max_rx_pktlen - dev_info.max_mtu; ++ if (mtu > RTE_ETHER_MAX_LEN - eth_overhead) { ++ rte_port->dev_conf.rxmode.offloads |= ++ DEV_RX_OFFLOAD_JUMBO_FRAME; ++ rte_port->dev_conf.rxmode.max_rx_pkt_len = ++ mtu + eth_overhead; ++ } else ++ rte_port->dev_conf.rxmode.offloads &= ++ ~DEV_RX_OFFLOAD_JUMBO_FRAME; + -+for i in `seq $n`; do -+ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) -+done + return; ++ } + printf("Set MTU failed. diag=%d\n", diag); + } + +@@ -3707,6 +3750,14 @@ mcast_addr_pool_extend(struct rte_port *port) + + } + ++static void ++mcast_addr_pool_append(struct rte_port *port, struct rte_ether_addr *mc_addr) ++{ ++ if (mcast_addr_pool_extend(port) != 0) ++ return; ++ rte_ether_addr_copy(mc_addr, &port->mc_addr_pool[port->mc_addr_nb - 1]); ++} + -+AT_CLEANUP + static void + mcast_addr_pool_remove(struct rte_port *port, uint32_t addr_idx) + { +@@ -3725,7 +3776,7 @@ mcast_addr_pool_remove(struct rte_port *port, uint32_t addr_idx) + sizeof(struct rte_ether_addr) * (port->mc_addr_nb - addr_idx)); + } + +-static void ++static int + eth_port_multicast_addr_list_set(portid_t port_id) + { + struct rte_port *port; +@@ -3734,10 +3785,11 @@ eth_port_multicast_addr_list_set(portid_t port_id) + port = &ports[port_id]; + diag = rte_eth_dev_set_mc_addr_list(port_id, port->mc_addr_pool, + port->mc_addr_nb); +- if (diag == 0) +- return; +- printf("rte_eth_dev_set_mc_addr_list(port=%d, nb=%u) failed. diag=%d\n", +- port->mc_addr_nb, port_id, -diag); ++ if (diag < 0) ++ printf("rte_eth_dev_set_mc_addr_list(port=%d, nb=%u) failed. diag=%d\n", ++ port_id, port->mc_addr_nb, diag); + - - AT_BANNER([OVSDB - cluster tests]) ++ return diag; + } --- -2.25.1 - - -From 5c38ccd52fb3925e82eda20f1897ec02abb390d9 Mon Sep 17 00:00:00 2001 -From: Ilya Maximets <i.maximets@ovn.org> -Date: Mon, 4 May 2020 21:55:41 +0200 -Subject: [PATCH 09/15] raft: Fix leak of the incomplete command. - -[ upstream commit 168beb87ca63056e8896b09a60031565b7b60728 ] - -Function raft_command_initiate() returns correctly referenced command -instance. 'n_ref' equals 1 for complete commands and 2 for incomplete -commands because one more reference is in raft->commands list. -raft_handle_execute_command_request__() leaks the reference by not -returning pointer anywhere and not unreferencing incomplete commands. - - 792 bytes in 11 blocks are definitely lost in loss record 258 of 262 - at 0x483BB1A: calloc (vg_replace_malloc.c:762) - by 0x44BA32: xcalloc (util.c:121) - by 0x422E5F: raft_command_create_incomplete (raft.c:2038) - by 0x422E5F: raft_command_initiate (raft.c:2061) - by 0x428651: raft_handle_execute_command_request__ (raft.c:4161) - by 0x428651: raft_handle_execute_command_request (raft.c:4177) - by 0x428651: raft_handle_rpc (raft.c:4230) - by 0x428651: raft_conn_run (raft.c:1445) - by 0x428DEA: raft_run (raft.c:1803) - by 0x407392: main_loop (ovsdb-server.c:226) - by 0x407392: main (ovsdb-server.c:469) - -Fixes: 1b1d2e6daa56 ("ovsdb: Introduce experimental support for clustered databases.") -Signed-off-by: Ilya Maximets <i.maximets@ovn.org> -Acked-by: Han Zhou <hzhou@ovn.org> -Signed-off-by: William Tu <u9012063@gmail.com> - -Resolves: #1836307 -Signed-off-by: Dumitru Ceara <dceara@redhat.com> ---- - ovsdb/raft.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/ovsdb/raft.c b/ovsdb/raft.c -index 6452182ba6..1505814138 100644 ---- a/ovsdb/raft.c -+++ b/ovsdb/raft.c -@@ -4163,9 +4163,7 @@ raft_handle_execute_command_request__( - cmd->sid = rq->common.sid; + void +@@ -3762,10 +3814,10 @@ mcast_addr_add(portid_t port_id, struct rte_ether_addr *mc_addr) + } + } - enum raft_command_status status = cmd->status; -- if (status != RAFT_CMD_INCOMPLETE) { -- raft_command_unref(cmd); -- } -+ raft_command_unref(cmd); - return status; +- if (mcast_addr_pool_extend(port) != 0) +- return; +- rte_ether_addr_copy(mc_addr, &port->mc_addr_pool[i]); +- eth_port_multicast_addr_list_set(port_id); ++ mcast_addr_pool_append(port, mc_addr); ++ if (eth_port_multicast_addr_list_set(port_id) < 0) ++ /* Rollback on failure, remove the address from the pool */ ++ mcast_addr_pool_remove(port, i); } --- -2.25.1 - - -From 3d9b529afb098531190d57d6f35d1622bb4093cd Mon Sep 17 00:00:00 2001 -From: Zhen Wang <zhewang@nvidia.com> -Date: Mon, 30 Mar 2020 17:21:04 -0700 -Subject: [PATCH 10/15] raft: Disable RAFT jsonrpc inactivity probe. - -[ upstream commit 1600e0040caded7eaa9b1f41926f9619d8e0ec8d ] - -With the scale test of 640 nodes k8s cluster, raft DB nodes' jsonrpc -session got closed due to the timeout of default 5 seconds probe. -It will cause disturbance of the raft cluster. Since we already have -the heartbeat for RAFT, just disable the probe between the servers -to avoid the unnecessary jsonrpc inactivity probe. - -Acked-by: Han Zhou <hzhou@ovn.org> -Signed-off-by: Zhen Wang <zhewang@nvidia.com> -Signed-off-by: Ilya Maximets <i.maximets@ovn.org> - -Resolves: #1836308 -Signed-off-by: Dumitru Ceara <dceara@redhat.com> ---- - ovsdb/raft.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/ovsdb/raft.c b/ovsdb/raft.c -index 1505814138..395cc56113 100644 ---- a/ovsdb/raft.c -+++ b/ovsdb/raft.c -@@ -938,6 +938,7 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js, - &conn->sid); - conn->incoming = incoming; - conn->js_seqno = jsonrpc_session_get_seqno(conn->js); -+ jsonrpc_session_set_probe_interval(js, 0); + void +@@ -3792,7 +3844,9 @@ mcast_addr_remove(portid_t port_id, struct rte_ether_addr *mc_addr) + } + + mcast_addr_pool_remove(port, i); +- eth_port_multicast_addr_list_set(port_id); ++ if (eth_port_multicast_addr_list_set(port_id) < 0) ++ /* Rollback on failure, add the address back into the pool */ ++ mcast_addr_pool_append(port, mc_addr); } - /* Starts the local server in an existing Raft cluster, using the local copy of --- -2.25.1 - - -From 8b155475749cdb7a1817810d447e4cf6598cb6fa Mon Sep 17 00:00:00 2001 -From: Aaron Conole <aconole@redhat.com> -Date: Fri, 15 May 2020 16:36:18 -0400 -Subject: [PATCH 11/15] netdev-linux: Update LAG in all cases. - -In some cases, when processing a netlink change event, it's possible for -an alternate part of OvS (like the IPv6 endpoint processing) to hold an -active netdev interface. This creates a race-condition, where sometimes -the OvS change processing will take the normal path. This doesn't work -because the netdev device object won't actually be enslaved to the -ovs-system (for instance, a linux bond) and ingress qdisc entries will -be missing. - -To address this, we update the LAG information in ALL cases where -LAG information could come in. - -Fixes: d22f8927c3c9 ("netdev-linux: monitor and offload LAG slaves to TC") -Cc: Marcelo Leitner <mleitner@redhat.com> -Cc: John Hurley <john.hurley@netronome.com> -Acked-by: Roi Dayan <roid@mellanox.com> -Signed-off-by: Aaron Conole <aconole@redhat.com> -Signed-off-by: Ilya Maximets <i.maximets@ovn.org> ---- - lib/netdev-linux.c | 11 +++++------ - 1 file changed, 5 insertions(+), 6 deletions(-) - -diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c -index c6f3d27409..2bf8d4c477 100644 ---- a/lib/netdev-linux.c -+++ b/lib/netdev-linux.c -@@ -659,10 +659,6 @@ netdev_linux_update_lag(struct rtnetlink_change *change) + void +diff --git a/dpdk/app/test-pmd/csumonly.c b/dpdk/app/test-pmd/csumonly.c +index 25091de881..7b92ab1195 100644 +--- a/dpdk/app/test-pmd/csumonly.c ++++ b/dpdk/app/test-pmd/csumonly.c +@@ -139,22 +139,23 @@ parse_ipv6(struct rte_ipv6_hdr *ipv6_hdr, struct testpmd_offload_info *info) + + /* + * Parse an ethernet header to fill the ethertype, l2_len, l3_len and +- * ipproto. This function is able to recognize IPv4/IPv6 with one optional vlan +- * header. The l4_len argument is only set in case of TCP (useful for TSO). ++ * ipproto. This function is able to recognize IPv4/IPv6 with optional VLAN ++ * headers. The l4_len argument is only set in case of TCP (useful for TSO). + */ + static void + parse_ethernet(struct rte_ether_hdr *eth_hdr, struct testpmd_offload_info *info) { - struct linux_lag_slave *lag; + struct rte_ipv4_hdr *ipv4_hdr; + struct rte_ipv6_hdr *ipv6_hdr; ++ struct rte_vlan_hdr *vlan_hdr; -- if (!rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) { -- return; -- } + info->l2_len = sizeof(struct rte_ether_hdr); + info->ethertype = eth_hdr->ether_type; + +- if (info->ethertype == _htons(RTE_ETHER_TYPE_VLAN)) { +- struct rte_vlan_hdr *vlan_hdr = ( +- struct rte_vlan_hdr *)(eth_hdr + 1); - - if (change->slave && netdev_linux_kind_is_lag(change->slave)) { - lag = shash_find_data(&lag_shash, change->ifname); ++ while (info->ethertype == _htons(RTE_ETHER_TYPE_VLAN) || ++ info->ethertype == _htons(RTE_ETHER_TYPE_QINQ)) { ++ vlan_hdr = (struct rte_vlan_hdr *) ++ ((char *)eth_hdr + info->l2_len); + info->l2_len += sizeof(struct rte_vlan_hdr); + info->ethertype = vlan_hdr->eth_proto; + } +diff --git a/dpdk/app/test-pmd/flowgen.c b/dpdk/app/test-pmd/flowgen.c +index 03b72aaa56..68931fdea6 100644 +--- a/dpdk/app/test-pmd/flowgen.c ++++ b/dpdk/app/test-pmd/flowgen.c +@@ -1,35 +1,5 @@ +-/*- +- * BSD LICENSE +- * +- * Copyright(c) 2010-2013 Tilera Corporation. All rights reserved. +- * All rights reserved. +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions +- * are met: +- * +- * * Redistributions of source code must retain the above copyright +- * notice, this list of conditions and the following disclaimer. +- * * Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in +- * the documentation and/or other materials provided with the +- * distribution. +- * * Neither the name of Tilera Corporation nor the names of its +- * contributors may be used to endorse or promote products derived +- * from this software without specific prior written permission. +- * +- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +- * ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright 2014-2020 Mellanox Technologies, Ltd + */ -@@ -760,8 +756,11 @@ netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) - netdev_linux_update(netdev, nsid, &change); - ovs_mutex_unlock(&netdev->mutex); - } -- else if (!netdev_ && change.ifname) { -- /* Netdev is not present in OvS but its master could be. */ -+ -+ if (change.ifname && -+ rtnetlink_type_is_rtnlgrp_link(change.nlmsg_type)) { -+ -+ /* Need to try updating the LAG information. */ - ovs_mutex_lock(&lag_mutex); - netdev_linux_update_lag(&change); - ovs_mutex_unlock(&lag_mutex); --- -2.25.1 - - -From d14e39f81bec29064a58df0177ce457765305f8b Mon Sep 17 00:00:00 2001 -From: Aaron Conole <aconole@redhat.com> -Date: Fri, 15 May 2020 16:36:19 -0400 -Subject: [PATCH 12/15] netdev-offload-tc: Re-fetch block ID after probing. - -It's possible that block_id could changes after the probe for block -support. Therefore, fetch the block_id again after the probe. - -Fixes: edc2055a2bf7 ("netdev-offload-tc: Flush rules on ingress block when init tc flow api") -Cc: Dmytro Linkin <dmitrolin@mellanox.com> -Acked-by: Roi Dayan <roid@mellanox.com> -Co-authored-by: Marcelo Leitner <mleitner@redhat.com> -Signed-off-by: Marcelo Leitner <mleitner@redhat.com> -Signed-off-by: Aaron Conole <aconole@redhat.com> -Signed-off-by: Ilya Maximets <i.maximets@ovn.org> ---- - lib/netdev-offload-tc.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c -index 550e440b3a..f577311aec 100644 ---- a/lib/netdev-offload-tc.c -+++ b/lib/netdev-offload-tc.c -@@ -1922,6 +1922,8 @@ netdev_tc_init_flow_api(struct netdev *netdev) + #include <stdarg.h> +diff --git a/dpdk/app/test-pmd/macswap.c b/dpdk/app/test-pmd/macswap.c +index 71af916fc3..8428c26d85 100644 +--- a/dpdk/app/test-pmd/macswap.c ++++ b/dpdk/app/test-pmd/macswap.c +@@ -1,34 +1,5 @@ +-/*- +- * BSD LICENSE +- * +- * Copyright(c) 2014 Tilera Corporation. All rights reserved. +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions +- * are met: +- * +- * * Redistributions of source code must retain the above copyright +- * notice, this list of conditions and the following disclaimer. +- * * Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in +- * the documentation and/or other materials provided with the +- * distribution. +- * * Neither the name of Tilera Corporation nor the names of its +- * contributors may be used to endorse or promote products derived +- * from this software without specific prior written permission. +- * +- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +- * ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright 2014-2020 Mellanox Technologies, Ltd + */ - if (ovsthread_once_start(&block_once)) { - probe_tc_block_support(ifindex); -+ /* Need to re-fetch block id as it depends on feature availability. */ -+ block_id = get_block_id_from_netdev(netdev); - ovsthread_once_done(&block_once); - } + #include <stdarg.h> +diff --git a/dpdk/app/test-pmd/parameters.c b/dpdk/app/test-pmd/parameters.c +index 2e7a504415..0eb7844783 100644 +--- a/dpdk/app/test-pmd/parameters.c ++++ b/dpdk/app/test-pmd/parameters.c +@@ -49,7 +49,7 @@ + static void + usage(char* progname) + { +- printf("usage: %s " ++ printf("usage: %s [EAL options] -- " + #ifdef RTE_LIBRTE_CMDLINE + "[--interactive|-i] " + "[--cmdline-file=FILENAME] " +diff --git a/dpdk/app/test-pmd/testpmd.c b/dpdk/app/test-pmd/testpmd.c +index b374682236..0b126594b7 100644 +--- a/dpdk/app/test-pmd/testpmd.c ++++ b/dpdk/app/test-pmd/testpmd.c +@@ -2549,32 +2549,17 @@ setup_attached_port(portid_t pi) + printf("Done\n"); + } --- -2.25.1 - - -From fb32a78921e50b1ffa0c52f873167f68622e8723 Mon Sep 17 00:00:00 2001 -From: Ilya Maximets <i.maximets@ovn.org> -Date: Fri, 22 May 2020 18:31:19 +0200 -Subject: [PATCH 13/15] ovsdb: Add raft memory usage to memory report. - -[ upstream commit 3423cd97f88fe6a8de8b649d79fe6ac83bce94d1 ] - -Memory reports could be found in logs or by calling 'memory/show' -appctl command. For ovsdb-server it includes information about db -cells, monitor connections with their backlog size, etc. But it -doesn't contain any information about memory consumed by raft. -Backlogs of raft connections could be insanely large because of -snapshot installation requests that simply contains the whole database. -In not that healthy clusters where one of ovsdb servers is not able to -timely handle all the incoming raft traffic, backlog on a sender's side -could cause significant memory consumption issues. - -Adding new 'raft-connections' and 'raft-backlog' counters to the -memory report to better track such conditions. - -Acked-by: Han Zhou <hzhou@ovn.org> -Signed-off-by: Ilya Maximets <i.maximets@ovn.org> - -Related: #1834838 -Signed-off-by: Ilya Maximets <i.maximets@redhat.com> ---- - ovsdb/ovsdb.c | 4 ++++ - ovsdb/raft.c | 16 ++++++++++++++++ - ovsdb/raft.h | 2 ++ - ovsdb/storage.c | 10 ++++++++++ - ovsdb/storage.h | 3 +++ - 5 files changed, 35 insertions(+) - -diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c -index 7e683e6815..2da117cb36 100644 ---- a/ovsdb/ovsdb.c -+++ b/ovsdb/ovsdb.c -@@ -502,6 +502,10 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage) - } +-void +-detach_port_device(portid_t port_id) ++static void ++detach_device(struct rte_device *dev) + { +- struct rte_device *dev; + portid_t sibling; - simap_increase(usage, "cells", cells); -+ -+ if (db->storage) { -+ ovsdb_storage_get_memory_usage(db->storage, usage); -+ } - } +- printf("Removing a device...\n"); +- +- if (port_id_is_invalid(port_id, ENABLED_WARN)) +- return; +- +- dev = rte_eth_devices[port_id].device; + if (dev == NULL) { + printf("Device already removed\n"); + return; + } - struct ovsdb_table * -diff --git a/ovsdb/raft.c b/ovsdb/raft.c -index 395cc56113..6ca63b4352 100644 ---- a/ovsdb/raft.c -+++ b/ovsdb/raft.c -@@ -36,6 +36,7 @@ - #include "ovsdb/log.h" - #include "raft-rpc.h" - #include "random.h" -+#include "simap.h" - #include "socket-util.h" - #include "stream.h" - #include "timeval.h" -@@ -1014,6 +1015,21 @@ raft_get_sid(const struct raft *raft) - return &raft->sid; +- if (ports[port_id].port_status != RTE_PORT_CLOSED) { +- if (ports[port_id].port_status != RTE_PORT_STOPPED) { +- printf("Port not stopped\n"); +- return; +- } +- printf("Port was not closed\n"); +- if (ports[port_id].flow_list) +- port_flow_flush(port_id); +- } ++ printf("Removing a device...\n"); + + if (rte_dev_remove(dev) < 0) { + TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name); +@@ -2592,14 +2577,33 @@ detach_port_device(portid_t port_id) + + remove_invalid_ports(); + +- printf("Device of port %u is detached\n", port_id); ++ printf("Device is detached\n"); + printf("Now total ports is %d\n", nb_ports); + printf("Done\n"); + return; } -+/* Adds memory consumption info to 'usage' for later use by memory_report(). */ -+void -+raft_get_memory_usage(const struct raft *raft, struct simap *usage) + void +-detach_device(char *identifier) ++detach_port_device(portid_t port_id) +{ -+ struct raft_conn *conn; -+ int cnt = 0; ++ if (port_id_is_invalid(port_id, ENABLED_WARN)) ++ return; + -+ LIST_FOR_EACH (conn, list_node, &raft->conns) { -+ simap_increase(usage, "raft-backlog", -+ jsonrpc_session_get_backlog(conn->js)); -+ cnt++; -+ } -+ simap_increase(usage, "raft-connections", cnt); ++ if (ports[port_id].port_status != RTE_PORT_CLOSED) { ++ if (ports[port_id].port_status != RTE_PORT_STOPPED) { ++ printf("Port not stopped\n"); ++ return; ++ } ++ printf("Port was not closed\n"); ++ if (ports[port_id].flow_list) ++ port_flow_flush(port_id); ++ } ++ ++ detach_device(rte_eth_devices[port_id].device); +} + - /* Returns true if 'raft' has completed joining its cluster, has not left or - * initiated leaving the cluster, does not have failed disk storage, and is - * apparently connected to the leader in a healthy way (or is itself the -diff --git a/ovsdb/raft.h b/ovsdb/raft.h -index 3d448995af..99d5307e54 100644 ---- a/ovsdb/raft.h -+++ b/ovsdb/raft.h -@@ -67,6 +67,7 @@ - struct json; - struct ovsdb_log; - struct raft; -+struct simap; - struct sset; - - #define RAFT_MAGIC "CLUSTER" -@@ -113,6 +114,7 @@ const struct uuid *raft_get_cid(const struct raft *); - const struct uuid *raft_get_sid(const struct raft *); - bool raft_is_connected(const struct raft *); - bool raft_is_leader(const struct raft *); -+void raft_get_memory_usage(const struct raft *, struct simap *usage); ++void ++detach_devargs(char *identifier) + { + struct rte_dev_iterator iterator; + struct rte_devargs da; +@@ -2748,7 +2752,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. speed %u Mbps- %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +@@ -2790,6 +2794,7 @@ rmv_port_callback(void *arg) + int need_to_start = 0; + int org_no_link_check = no_link_check; + portid_t port_id = (intptr_t)arg; ++ struct rte_device *dev; - /* Joining a cluster. */ - bool raft_is_joining(const struct raft *); -diff --git a/ovsdb/storage.c b/ovsdb/storage.c -index e26252b066..7b4ad16f60 100644 ---- a/ovsdb/storage.c -+++ b/ovsdb/storage.c -@@ -26,6 +26,7 @@ - #include "ovsdb.h" - #include "raft.h" - #include "random.h" -+#include "simap.h" - #include "timeval.h" - #include "util.h" + RTE_ETH_VALID_PORTID_OR_RET(port_id); -@@ -188,6 +189,15 @@ ovsdb_storage_get_applied_index(const struct ovsdb_storage *storage) - return storage->raft ? raft_get_applied_index(storage->raft) : 0; +@@ -2800,8 +2805,12 @@ rmv_port_callback(void *arg) + no_link_check = 1; + stop_port(port_id); + no_link_check = org_no_link_check; ++ ++ /* Save rte_device pointer before closing ethdev port */ ++ dev = rte_eth_devices[port_id].device; + close_port(port_id); +- detach_port_device(port_id); ++ detach_device(dev); /* might be already removed or have more ports */ ++ + if (need_to_start) + start_packet_forwarding(0); } +@@ -3184,6 +3193,8 @@ get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf, + struct rte_eth_dcb_tx_conf *tx_conf = + ð_conf->tx_adv_conf.dcb_tx_conf; -+void -+ovsdb_storage_get_memory_usage(const struct ovsdb_storage *storage, -+ struct simap *usage) -+{ -+ if (storage->raft) { -+ raft_get_memory_usage(storage->raft, usage); -+ } -+} ++ memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf)); + - void - ovsdb_storage_run(struct ovsdb_storage *storage) - { -diff --git a/ovsdb/storage.h b/ovsdb/storage.h -index 8a9bbab709..a223968912 100644 ---- a/ovsdb/storage.h -+++ b/ovsdb/storage.h -@@ -23,6 +23,7 @@ - struct json; - struct ovsdb_schema; - struct ovsdb_storage; -+struct simap; - struct uuid; + rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf); + if (rc != 0) + return rc; +@@ -3570,5 +3581,10 @@ main(int argc, char** argv) + return 1; + } - struct ovsdb_error *ovsdb_storage_open(const char *filename, bool rw, -@@ -39,6 +40,8 @@ bool ovsdb_storage_is_leader(const struct ovsdb_storage *); - const struct uuid *ovsdb_storage_get_cid(const struct ovsdb_storage *); - const struct uuid *ovsdb_storage_get_sid(const struct ovsdb_storage *); - uint64_t ovsdb_storage_get_applied_index(const struct ovsdb_storage *); -+void ovsdb_storage_get_memory_usage(const struct ovsdb_storage *, -+ struct simap *usage); +- return 0; ++ ret = rte_eal_cleanup(); ++ if (ret != 0) ++ rte_exit(EXIT_FAILURE, ++ "EAL cleanup failed: %s\n", strerror(-ret)); ++ ++ return EXIT_SUCCESS; + } +diff --git a/dpdk/app/test-pmd/testpmd.h b/dpdk/app/test-pmd/testpmd.h +index 217d577018..0694e1ef8b 100644 +--- a/dpdk/app/test-pmd/testpmd.h ++++ b/dpdk/app/test-pmd/testpmd.h +@@ -797,7 +797,7 @@ void stop_port(portid_t pid); + void close_port(portid_t pid); + void reset_port(portid_t pid); + void attach_port(char *identifier); +-void detach_device(char *identifier); ++void detach_devargs(char *identifier); + void detach_port_device(portid_t port_id); + int all_ports_stopped(void); + int port_is_stopped(portid_t port_id); +diff --git a/dpdk/app/test-pmd/txonly.c b/dpdk/app/test-pmd/txonly.c +index 3caf281cb8..8ed436def5 100644 +--- a/dpdk/app/test-pmd/txonly.c ++++ b/dpdk/app/test-pmd/txonly.c +@@ -45,8 +45,8 @@ uint16_t tx_udp_src_port = 9; + uint16_t tx_udp_dst_port = 9; - void ovsdb_storage_run(struct ovsdb_storage *); - void ovsdb_storage_wait(struct ovsdb_storage *); --- -2.25.1 - - -From 92a1e56c8a37927441fb1742e6054a9118654ef0 Mon Sep 17 00:00:00 2001 -From: Ilya Maximets <i.maximets@ovn.org> -Date: Thu, 14 May 2020 22:10:45 +0200 -Subject: [PATCH 14/15] ovsdb-server: Fix schema leak while reading db. - -[ upstream commit 16e3a80cf646f6c53d22ef98599d5aecb8310414 ] - -parse_txn() function doesn't always take ownership of the 'schema' -passed. So, if the schema of the clustered db has same version as the -one that already in use, parse_txn() will not use it, resulting with a -memory leak: - - 7,827 (56 direct, 7,771 indirect) bytes in 1 blocks are definitely lost - at 0x483BB1A: calloc (vg_replace_malloc.c:762) - by 0x44AD02: xcalloc (util.c:121) - by 0x40E70E: ovsdb_schema_create (ovsdb.c:41) - by 0x40EA6D: ovsdb_schema_from_json (ovsdb.c:217) - by 0x415EDD: ovsdb_storage_read (storage.c:280) - by 0x408968: read_db (ovsdb-server.c:607) - by 0x40733D: main_loop (ovsdb-server.c:227) - by 0x40733D: main (ovsdb-server.c:469) - -While we could put ovsdb_schema_destroy() in a few places inside -'parse_txn()', from the users' point of view it seems better to have a -constant argument and just clone the 'schema' if needed. The caller -will be responsible for destroying the 'schema' it owns. - -Fixes: 1b1d2e6daa56 ("ovsdb: Introduce experimental support for clustered databases.") -Acked-by: Han Zhou <hzhou@ovn.org> -Signed-off-by: Ilya Maximets <i.maximets@ovn.org> - -Related: #1834838 -Signed-off-by: Ilya Maximets <i.maximets@redhat.com> ---- - ovsdb/ovsdb-server.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c -index d416f1b606..ef4e996df2 100644 ---- a/ovsdb/ovsdb-server.c -+++ b/ovsdb/ovsdb-server.c -@@ -540,7 +540,7 @@ close_db(struct server_config *config, struct db *db, char *comment) + /* use RFC5735 / RFC2544 reserved network test addresses */ +-uint32_t tx_ip_src_addr = (192U << 24) | (18 << 16) | (0 << 8) | 1; +-uint32_t tx_ip_dst_addr = (192U << 24) | (18 << 16) | (0 << 8) | 2; ++uint32_t tx_ip_src_addr = (198U << 24) | (18 << 16) | (0 << 8) | 1; ++uint32_t tx_ip_dst_addr = (198U << 24) | (18 << 16) | (0 << 8) | 2; - static struct ovsdb_error * OVS_WARN_UNUSED_RESULT - parse_txn(struct server_config *config, struct db *db, -- struct ovsdb_schema *schema, const struct json *txn_json, -+ const struct ovsdb_schema *schema, const struct json *txn_json, - const struct uuid *txnid) + #define IP_DEFTTL 64 /* from RFC 1340. */ + +@@ -153,7 +153,6 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp, + const uint16_t vlan_tci_outer, const uint64_t ol_flags) { - if (schema && (!db->db->schema || strcmp(schema->version, -@@ -565,7 +565,7 @@ parse_txn(struct server_config *config, struct db *db, - ? xasprintf("database %s schema changed", db->db->name) - : xasprintf("database %s connected to storage", db->db->name))); + struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT]; +- uint8_t ip_var = RTE_PER_LCORE(_ip_var); + struct rte_mbuf *pkt_seg; + uint32_t nb_segs, pkt_len; + uint8_t i; +@@ -192,6 +191,7 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp, + copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt, + sizeof(struct rte_ether_hdr)); + if (txonly_multi_flow) { ++ uint8_t ip_var = RTE_PER_LCORE(_ip_var); + struct rte_ipv4_hdr *ip_hdr; + uint32_t addr; -- ovsdb_replace(db->db, ovsdb_create(schema, NULL)); -+ ovsdb_replace(db->db, ovsdb_create(ovsdb_schema_clone(schema), NULL)); +@@ -207,6 +207,7 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp, + */ + addr = (tx_ip_dst_addr | (ip_var++ << 8)) + rte_lcore_id(); + ip_hdr->src_addr = rte_cpu_to_be_32(addr); ++ RTE_PER_LCORE(_ip_var) = ip_var; + } + copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt, + sizeof(struct rte_ether_hdr) + +@@ -314,7 +315,7 @@ pkt_burst_transmit(struct fwd_stream *fs) + fs->tx_packets += nb_tx; - /* Force update to schema in _Server database. */ - db->row_uuid = UUID_ZERO; -@@ -614,6 +614,7 @@ read_db(struct server_config *config, struct db *db) - } else { - error = parse_txn(config, db, schema, txn_json, &txnid); - json_destroy(txn_json); -+ ovsdb_schema_destroy(schema); - if (error) { - break; - } --- -2.25.1 - - -From 3168eba559cbce28937be4e785c3337030694455 Mon Sep 17 00:00:00 2001 -From: Ilya Maximets <i.maximets@ovn.org> -Date: Fri, 22 May 2020 22:36:27 +0200 -Subject: [PATCH 15/15] raft: Avoid sending equal snapshots. - -[ upstream commit 8c2c503bdb0da1ce6044a53d462f905fd4f8acf5 ] - -Snapshots are huge. In some cases we could receive several outdated -append replies from the remote server. This could happen in high -scale cases if the remote server is overloaded and not able to process -all the raft requests in time. As an action to each outdated append -reply we're sending full database snapshot. While remote server is -already overloaded those snapshots will stuck in jsonrpc backlog for -a long time making it grow up to few GB. Since remote server wasn't -able to timely process incoming messages it will likely not able to -process snapshots leading to the same situation with low chances to -recover. Remote server will likely stuck in 'candidate' state, other -servers will grow their memory consumption due to growing jsonrpc -backlogs: - -jsonrpc|INFO|excessive sending backlog, jsonrpc: ssl:192.16.0.3:6644, - num of msgs: 3795, backlog: 8838994624. - -This patch is trying to avoid that situation by avoiding sending of -equal snapshot install requests. This helps maintain reasonable memory -consumption and allows the cluster to recover on a larger scale. - -Acked-by: Han Zhou <hzhou@ovn.org> -Signed-off-by: Ilya Maximets <i.maximets@ovn.org> - -Related: #1834838 -Signed-off-by: Ilya Maximets <i.maximets@redhat.com> ---- - ovsdb/raft-private.c | 1 + - ovsdb/raft-private.h | 4 ++++ - ovsdb/raft.c | 39 ++++++++++++++++++++++++++++++++++++++- - 3 files changed, 43 insertions(+), 1 deletion(-) - -diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c -index 26d39a087f..9468fdaf4a 100644 ---- a/ovsdb/raft-private.c -+++ b/ovsdb/raft-private.c -@@ -137,6 +137,7 @@ raft_server_destroy(struct raft_server *s) - if (s) { - free(s->address); - free(s->nickname); -+ free(s->last_install_snapshot_request); - free(s); - } - } -diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h -index ac8656d42f..1f366b4ab3 100644 ---- a/ovsdb/raft-private.h -+++ b/ovsdb/raft-private.h -@@ -27,6 +27,7 @@ + if (txonly_multi_flow) +- RTE_PER_LCORE(_ip_var) += nb_tx; ++ RTE_PER_LCORE(_ip_var) -= nb_pkt - nb_tx; - struct ds; - struct ovsdb_parser; -+struct raft_install_snapshot_request; - - /* Formatting server IDs and cluster IDs for use in human-readable logs. Do - * not use these in cases where the whole server or cluster ID is needed; use -@@ -83,6 +84,9 @@ struct raft_server { - bool replied; /* Reply to append_request was received from this - node during current election_timeout interval. - */ -+ /* Copy of the last install_snapshot_request sent to this server. */ -+ struct raft_install_snapshot_request *last_install_snapshot_request; -+ - /* For use in adding and removing servers: */ - struct uuid requester_sid; /* Nonzero if requested via RPC. */ - struct unixctl_conn *requester_conn; /* Only if requested via unixctl. */ -diff --git a/ovsdb/raft.c b/ovsdb/raft.c -index 6ca63b4352..8df386fa19 100644 ---- a/ovsdb/raft.c -+++ b/ovsdb/raft.c -@@ -1421,8 +1421,20 @@ raft_conn_run(struct raft *raft, struct raft_conn *conn) - jsonrpc_session_run(conn->js); + #ifdef RTE_TEST_PMD_RECORD_BURST_STATS + fs->tx_burst_stats.pkt_burst_spread[nb_tx]++; +diff --git a/dpdk/app/test-pmd/util.c b/dpdk/app/test-pmd/util.c +index b514be5e16..4e4ead3075 100644 +--- a/dpdk/app/test-pmd/util.c ++++ b/dpdk/app/test-pmd/util.c +@@ -1,6 +1,6 @@ + /* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation +- * Copyright(c) 2018 Mellanox Technology ++ * Copyright 2018 Mellanox Technologies, Ltd + */ - unsigned int new_seqno = jsonrpc_session_get_seqno(conn->js); -- bool just_connected = (new_seqno != conn->js_seqno -+ bool reconnected = new_seqno != conn->js_seqno; -+ bool just_connected = (reconnected - && jsonrpc_session_is_connected(conn->js)); -+ -+ if (reconnected) { -+ /* Clear 'last_install_snapshot_request' since it might not reach the -+ * destination or server was restarted. */ -+ struct raft_server *server = raft_find_server(raft, &conn->sid); -+ if (server) { -+ free(server->last_install_snapshot_request); -+ server->last_install_snapshot_request = NULL; -+ } -+ } -+ - conn->js_seqno = new_seqno; - if (just_connected) { - if (raft->joining) { -@@ -3296,6 +3308,31 @@ raft_send_install_snapshot_request(struct raft *raft, - .election_timer = raft->election_timer, /* use latest value */ - } - }; -+ -+ if (s->last_install_snapshot_request) { -+ struct raft_install_snapshot_request *old, *new; + #include <stdio.h> +diff --git a/dpdk/app/test/Makefile b/dpdk/app/test/Makefile +index 57930c00b1..1ee1550094 100644 +--- a/dpdk/app/test/Makefile ++++ b/dpdk/app/test/Makefile +@@ -151,8 +151,12 @@ SRCS-y += test_func_reentrancy.c + + SRCS-y += test_service_cores.c + ++ifeq ($(CONFIG_RTE_LIBRTE_PMD_RING),y) ++SRCS-y += sample_packet_forward.c + SRCS-$(CONFIG_RTE_LIBRTE_BITRATE) += test_bitratestats.c + SRCS-$(CONFIG_RTE_LIBRTE_LATENCY_STATS) += test_latencystats.c ++SRCS-$(CONFIG_RTE_LIBRTE_PDUMP) += test_pdump.c ++endif + + SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test_cmdline.c + SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += test_cmdline_num.c +@@ -181,11 +185,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += test_distributor_perf.c + + SRCS-$(CONFIG_RTE_LIBRTE_REORDER) += test_reorder.c + +-SRCS-$(CONFIG_RTE_LIBRTE_PDUMP) += test_pdump.c +- + SRCS-y += virtual_pmd.c + SRCS-y += packet_burst_generator.c +-SRCS-y += sample_packet_forward.c + SRCS-$(CONFIG_RTE_LIBRTE_ACL) += test_acl.c + + ifeq ($(CONFIG_RTE_LIBRTE_PMD_RING),y) +@@ -215,7 +216,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_EVENTDEV),y) + SRCS-y += test_eventdev.c + SRCS-y += test_event_ring.c + SRCS-y += test_event_eth_rx_adapter.c +-SRCS-y += test_event_eth_tx_adapter.c ++SRCS-$(CONFIG_RTE_LIBRTE_PMD_RING) += test_event_eth_tx_adapter.c + SRCS-y += test_event_timer_adapter.c + SRCS-y += test_event_crypto_adapter.c + endif +@@ -268,13 +269,6 @@ endif + endif + endif + +-# Link against shared libraries when needed +-ifeq ($(CONFIG_RTE_LIBRTE_PMD_BOND),y) +-ifneq ($(CONFIG_RTE_LIBRTE_PMD_RING),y) +-$(error Link bonding tests require CONFIG_RTE_LIBRTE_PMD_RING=y) +-endif +-endif +- + ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),y) + + ifeq ($(CONFIG_RTE_LIBRTE_PMD_BOND),y) +diff --git a/dpdk/app/test/get-coremask.sh b/dpdk/app/test/get-coremask.sh +new file mode 100755 +index 0000000000..bb8cf404d2 +--- /dev/null ++++ b/dpdk/app/test/get-coremask.sh +@@ -0,0 +1,13 @@ ++#! /bin/sh -e ++# SPDX-License-Identifier: BSD-3-Clause ++# Copyright(c) 2019 Intel Corporation + -+ old = s->last_install_snapshot_request; -+ new = &rpc.install_snapshot_request; -+ if ( old->term == new->term -+ && old->last_index == new->last_index -+ && old->last_term == new->last_term -+ && old->last_servers == new->last_servers -+ && old->data == new->data -+ && old->election_timer == new->election_timer -+ && uuid_equals(&old->last_eid, &new->last_eid)) { -+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); ++if [ "$(uname)" = "Linux" ] ; then ++ cat /sys/devices/system/cpu/present ++elif [ "$(uname)" = "FreeBSD" ] ; then ++ ncpus=$(/sbin/sysctl -n hw.ncpu) ++ echo 0-$(expr $ncpus - 1) ++else ++# fallback ++ echo 0-3 ++fi +diff --git a/dpdk/app/test/meson.build b/dpdk/app/test/meson.build +index fb49d804ba..8524a986a1 100644 +--- a/dpdk/app/test/meson.build ++++ b/dpdk/app/test/meson.build +@@ -7,13 +7,11 @@ endif + + test_sources = files('commands.c', + 'packet_burst_generator.c', +- 'sample_packet_forward.c', + 'test.c', + 'test_acl.c', + 'test_alarm.c', + 'test_atomic.c', + 'test_barrier.c', +- 'test_bitratestats.c', + 'test_bpf.c', + 'test_byteorder.c', + 'test_cmdline.c', +@@ -43,7 +41,6 @@ test_sources = files('commands.c', + 'test_event_crypto_adapter.c', + 'test_event_eth_rx_adapter.c', + 'test_event_ring.c', +- 'test_event_eth_tx_adapter.c', + 'test_event_timer_adapter.c', + 'test_eventdev.c', + 'test_external_mem.c', +@@ -65,9 +62,7 @@ test_sources = files('commands.c', + 'test_ipsec_sad.c', + 'test_kni.c', + 'test_kvargs.c', +- 'test_latencystats.c', + 'test_link_bonding.c', +- 'test_link_bonding_mode4.c', + 'test_link_bonding_rssconf.c', + 'test_logs.c', + 'test_lpm.c', +@@ -88,11 +83,8 @@ test_sources = files('commands.c', + 'test_metrics.c', + 'test_mcslock.c', + 'test_mp_secondary.c', +- 'test_pdump.c', + 'test_per_lcore.c', + 'test_pmd_perf.c', +- 'test_pmd_ring.c', +- 'test_pmd_ring_perf.c', + 'test_power.c', + 'test_power_cpufreq.c', + 'test_power_kvm_vm.c', +@@ -212,7 +204,6 @@ fast_test_names = [ + 'rib_autotest', + 'rib6_autotest', + 'ring_autotest', +- 'ring_pmd_autotest', + 'rwlock_test1_autotest', + 'rwlock_rda_autotest', + 'rwlock_rds_wrm_autotest', +@@ -227,7 +218,6 @@ fast_test_names = [ + 'timer_autotest', + 'user_delay_us', + 'version_autotest', +- 'bitratestats_autotest', + 'crc_autotest', + 'delay_us_sleep_autotest', + 'distributor_autotest', +@@ -238,10 +228,8 @@ fast_test_names = [ + 'ipsec_autotest', + 'kni_autotest', + 'kvargs_autotest', +- 'latencystats_autotest', + 'member_autotest', + 'metrics_autotest', +- 'pdump_autotest', + 'power_cpufreq_autotest', + 'power_autotest', + 'power_kvm_vm_autotest', +@@ -277,7 +265,6 @@ perf_test_names = [ + 'rcu_qsbr_perf_autotest', + 'red_perf', + 'distributor_perf_autotest', +- 'ring_pmd_perf_autotest', + 'pmd_perf_autotest', + 'stack_perf_autotest', + 'stack_lf_perf_autotest', +@@ -302,7 +289,6 @@ driver_test_names = [ + 'eventdev_selftest_octeontx', + 'eventdev_selftest_sw', + 'link_bonding_autotest', +- 'link_bonding_mode4_autotest', + 'link_bonding_rssconf_autotest', + 'rawdev_autotest', + ] +@@ -339,6 +325,21 @@ if dpdk_conf.has('RTE_LIBRTE_BOND_PMD') + endif + if dpdk_conf.has('RTE_LIBRTE_RING_PMD') + test_deps += 'pmd_ring' ++ test_sources += 'test_pmd_ring_perf.c' ++ test_sources += 'test_pmd_ring.c' ++ test_sources += 'test_event_eth_tx_adapter.c' ++ test_sources += 'test_bitratestats.c' ++ test_sources += 'test_latencystats.c' ++ test_sources += 'test_link_bonding_mode4.c' ++ test_sources += 'sample_packet_forward.c' ++ test_sources += 'test_pdump.c' ++ fast_test_names += 'ring_pmd_autotest' ++ perf_test_names += 'ring_pmd_perf_autotest' ++ fast_test_names += 'event_eth_tx_adapter_autotest' ++ fast_test_names += 'bitratestats_autotest' ++ fast_test_names += 'latencystats_autotest' ++ driver_test_names += 'link_bonding_mode4_autotest' ++ fast_test_names += 'pdump_autotest' + endif + + if dpdk_conf.has('RTE_LIBRTE_POWER') +@@ -398,45 +399,36 @@ dpdk_test = executable('dpdk-test', + timeout_seconds = 600 + timeout_seconds_fast = 10 + +-# Retrieve the number of CPU cores, defaulting to 4. +-num_cores = '0-3' +-if host_machine.system() == 'linux' +- num_cores = run_command('cat', +- '/sys/devices/system/cpu/present' +- ).stdout().strip() +-elif host_machine.system() == 'freebsd' +- snum_cores = run_command('/sbin/sysctl', '-n', +- 'hw.ncpu').stdout().strip() +- inum_cores = snum_cores.to_int() - 1 +- num_cores = '0-@0@'.format(inum_cores) +-endif ++get_coremask = find_program('get-coremask.sh') ++num_cores_arg = '-l ' + run_command(get_coremask).stdout().strip() + +-num_cores_arg = '-l ' + num_cores ++default_test_args = [num_cores_arg] + +-test_args = [num_cores_arg] + foreach arg : fast_test_names +- if host_machine.system() == 'linux' +- test(arg, dpdk_test, +- env : ['DPDK_TEST=' + arg], +- args : test_args + +- ['--file-prefix=@0@'.format(arg)], +- timeout : timeout_seconds_fast, +- is_parallel : false, +- suite : 'fast-tests') +- else +- test(arg, dpdk_test, +- env : ['DPDK_TEST=' + arg], +- args : test_args, ++ test_args = default_test_args + -+ VLOG_WARN_RL(&rl, "not sending exact same install_snapshot_request" -+ " to server %s again", s->nickname); -+ return; -+ } -+ } -+ free(s->last_install_snapshot_request); -+ CONST_CAST(struct raft_server *, s)->last_install_snapshot_request -+ = xmemdup(&rpc.install_snapshot_request, -+ sizeof rpc.install_snapshot_request); ++ if (get_option('default_library') == 'shared' and ++ arg == 'event_eth_tx_adapter_autotest') ++ foreach drv:dpdk_drivers ++ test_args += ['-d', drv.full_path().split('.a')[0] + '.so'] ++ endforeach ++ endif ++ if is_linux ++ test_args += ['--file-prefix=@0@'.format(arg)] ++ endif + - raft_send(raft, &rpc); - } ++ test(arg, dpdk_test, ++ env : ['DPDK_TEST=' + arg], ++ args : test_args, + timeout : timeout_seconds_fast, + is_parallel : false, + suite : 'fast-tests') +- endif + endforeach --- -2.25.1 - -diff --git a/dpdk/drivers/bus/pci/linux/pci_vfio.c b/dpdk/drivers/bus/pci/linux/pci_vfio.c -index 64cd84a689..ba60e7ce99 100644 ---- a/dpdk/drivers/bus/pci/linux/pci_vfio.c -+++ b/dpdk/drivers/bus/pci/linux/pci_vfio.c -@@ -149,6 +149,38 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) - return 0; + foreach arg : perf_test_names + test(arg, dpdk_test, + env : ['DPDK_TEST=' + arg], +- args : test_args, ++ args : default_test_args, + timeout : timeout_seconds, + is_parallel : false, + suite : 'perf-tests') +@@ -445,7 +437,7 @@ endforeach + foreach arg : driver_test_names + test(arg, dpdk_test, + env : ['DPDK_TEST=' + arg], +- args : test_args, ++ args : default_test_args, + timeout : timeout_seconds, + is_parallel : false, + suite : 'driver-tests') +@@ -454,7 +446,7 @@ endforeach + foreach arg : dump_test_names + test(arg, dpdk_test, + env : ['DPDK_TEST=' + arg], +- args : test_args, ++ args : default_test_args, + timeout : timeout_seconds, + is_parallel : false, + suite : 'debug-tests') +diff --git a/dpdk/app/test/process.h b/dpdk/app/test/process.h +index 191d2796a9..c3b3780337 100644 +--- a/dpdk/app/test/process.h ++++ b/dpdk/app/test/process.h +@@ -25,10 +25,12 @@ + #endif + + #ifdef RTE_LIBRTE_PDUMP ++#ifdef RTE_LIBRTE_RING_PMD + #include <pthread.h> + extern void *send_pkts(void *empty); + extern uint16_t flag_for_send_pkts; + #endif ++#endif + + /* + * launches a second copy of the test process using the given argv parameters, +@@ -44,7 +46,9 @@ process_dup(const char *const argv[], int numargs, const char *env_value) + int i, status; + char path[32]; + #ifdef RTE_LIBRTE_PDUMP ++#ifdef RTE_LIBRTE_RING_PMD + pthread_t thread; ++#endif + #endif + + pid_t pid = fork(); +@@ -121,17 +125,21 @@ process_dup(const char *const argv[], int numargs, const char *env_value) + } + /* parent process does a wait */ + #ifdef RTE_LIBRTE_PDUMP ++#ifdef RTE_LIBRTE_RING_PMD + if ((strcmp(env_value, "run_pdump_server_tests") == 0)) + pthread_create(&thread, NULL, &send_pkts, NULL); ++#endif + #endif + + while (wait(&status) != pid) + ; + #ifdef RTE_LIBRTE_PDUMP ++#ifdef RTE_LIBRTE_RING_PMD + if ((strcmp(env_value, "run_pdump_server_tests") == 0)) { + flag_for_send_pkts = 0; + pthread_join(thread, NULL); + } ++#endif + #endif + return status; } +diff --git a/dpdk/app/test/test.c b/dpdk/app/test/test.c +index cd7aaf645f..d0826ca69e 100644 +--- a/dpdk/app/test/test.c ++++ b/dpdk/app/test/test.c +@@ -53,7 +53,9 @@ do_recursive_call(void) + } actions[] = { + { "run_secondary_instances", test_mp_secondary }, + #ifdef RTE_LIBRTE_PDUMP ++#ifdef RTE_LIBRTE_RING_PMD + { "run_pdump_server_tests", test_pdump }, ++#endif + #endif + { "test_missing_c_flag", no_action }, + { "test_master_lcore_flag", no_action }, +diff --git a/dpdk/app/test/test.h b/dpdk/app/test/test.h +index ac0c50616c..b07f6c1ef0 100644 +--- a/dpdk/app/test/test.h ++++ b/dpdk/app/test/test.h +@@ -22,8 +22,6 @@ + # define TEST_TRACE_FAILURE(_file, _line, _func) + #endif -+/* enable PCI bus memory space */ -+static int -+pci_vfio_enable_bus_memory(int dev_fd) -+{ -+ uint16_t cmd; -+ int ret; -+ -+ ret = pread64(dev_fd, &cmd, sizeof(cmd), -+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + -+ PCI_COMMAND); +-#define RTE_TEST_TRACE_FAILURE TEST_TRACE_FAILURE +- + #include <rte_test.h> + + #define TEST_ASSERT RTE_TEST_ASSERT +diff --git a/dpdk/app/test/test_acl.c b/dpdk/app/test/test_acl.c +index 9cd9e37dbe..b78b67193a 100644 +--- a/dpdk/app/test/test_acl.c ++++ b/dpdk/app/test/test_acl.c +@@ -1394,16 +1394,18 @@ test_invalid_parameters(void) + } else + rte_acl_free(acx); + +- /* invalid NUMA node */ +- memcpy(¶m, &acl_param, sizeof(param)); +- param.socket_id = RTE_MAX_NUMA_NODES + 1; +- +- acx = rte_acl_create(¶m); +- if (acx != NULL) { +- printf("Line %i: ACL context creation with invalid NUMA " +- "should have failed!\n", __LINE__); +- rte_acl_free(acx); +- return -1; ++ if (rte_eal_has_hugepages()) { ++ /* invalid NUMA node */ ++ memcpy(¶m, &acl_param, sizeof(param)); ++ param.socket_id = RTE_MAX_NUMA_NODES + 1; + -+ if (ret != sizeof(cmd)) { -+ RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); -+ return -1; ++ acx = rte_acl_create(¶m); ++ if (acx != NULL) { ++ printf("Line %i: ACL context creation with invalid " ++ "NUMA should have failed!\n", __LINE__); ++ rte_acl_free(acx); ++ return -1; ++ } + } + + /* NULL name */ +diff --git a/dpdk/app/test/test_common.c b/dpdk/app/test/test_common.c +index 2b856f8ba5..12bd1cad90 100644 +--- a/dpdk/app/test/test_common.c ++++ b/dpdk/app/test/test_common.c +@@ -216,7 +216,19 @@ test_log2(void) + const uint32_t max = 0x10000; + const uint32_t step = 1; + +- for (i = 0; i < max; i = i + step) { ++ compare = rte_log2_u32(0); ++ if (compare != 0) { ++ printf("Wrong rte_log2_u32(0) val %x, expected 0\n", compare); ++ return TEST_FAILED; + } + -+ if (cmd & PCI_COMMAND_MEMORY) -+ return 0; -+ -+ cmd |= PCI_COMMAND_MEMORY; -+ ret = pwrite64(dev_fd, &cmd, sizeof(cmd), -+ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + -+ PCI_COMMAND); -+ -+ if (ret != sizeof(cmd)) { -+ RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); -+ return -1; ++ compare = rte_log2_u64(0); ++ if (compare != 0) { ++ printf("Wrong rte_log2_u64(0) val %x, expected 0\n", compare); ++ return TEST_FAILED; + } + -+ return 0; -+} -+ - /* set PCI bus mastering */ - static int - pci_vfio_set_bus_master(int dev_fd, bool op) -@@ -427,6 +459,11 @@ pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd) - return -1; - } ++ for (i = 1; i < max; i = i + step) { + uint64_t i64; -+ if (pci_vfio_enable_bus_memory(vfio_dev_fd)) { -+ RTE_LOG(ERR, EAL, "Cannot enable bus memory!\n"); -+ return -1; -+ } + /* extend range for 64-bit */ +diff --git a/dpdk/app/test/test_compressdev_test_buffer.h b/dpdk/app/test/test_compressdev_test_buffer.h +index c0492f89a2..d241602445 100644 +--- a/dpdk/app/test/test_compressdev_test_buffer.h ++++ b/dpdk/app/test/test_compressdev_test_buffer.h +@@ -1,3 +1,7 @@ ++/* SPDX-License-Identifier: (BSD-3-Clause) ++ * Copyright(c) 2018-2020 Intel Corporation ++ */ + - /* set bus mastering for the device */ - if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { - RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); -diff --git a/dpdk/lib/librte_vhost/vhost_user.c b/dpdk/lib/librte_vhost/vhost_user.c -index 40c4520c08..8954f7930e 100644 ---- a/dpdk/lib/librte_vhost/vhost_user.c -+++ b/dpdk/lib/librte_vhost/vhost_user.c -@@ -206,7 +206,7 @@ vhost_backend_cleanup(struct virtio_net *dev) - dev->inflight_info->addr = NULL; - } + #ifndef TEST_COMPRESSDEV_TEST_BUFFERS_H_ + #define TEST_COMPRESSDEV_TEST_BUFFERS_H_ -- if (dev->inflight_info->fd > 0) { -+ if (dev->inflight_info->fd >= 0) { - close(dev->inflight_info->fd); - dev->inflight_info->fd = -1; - } -@@ -1408,6 +1408,7 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev, - "failed to alloc dev inflight area\n"); - return RTE_VHOST_MSG_RESULT_ERR; - } -+ dev->inflight_info->fd = -1; - } +@@ -190,106 +194,104 @@ static const char test_buf_shakespeare[] = + "\n" + "ORLANDO Go apart, Adam, and thou shalt hear how he will\n"; - num_queues = msg->payload.inflight.num_queues; -@@ -1433,6 +1434,16 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev, +-/* Snippet of source code in Pascal */ +-static const char test_buf_pascal[] = +- " Ptr = 1..DMem;\n" +- " Loc = 1..IMem;\n" +- " Loc0 = 0..IMem;\n" +- " EdgeT = (hout,lin,hin,lout); {Warning this order is important in}\n" +- " {predicates such as gtS,geS}\n" +- " CardT = (finite,infinite);\n" +- " ExpT = Minexp..Maxexp;\n" +- " ManT = Mininf..Maxinf; \n" +- " Pflag = (PNull,PSoln,PTrace,PPrint);\n" +- " Sreal = record\n" +- " edge:EdgeT;\n" +- " cardinality:CardT;\n" +- " exp:ExpT; {exponent}\n" +- " mantissa:ManT;\n" +- " end;\n" +- " Int = record\n" +- " hi:Sreal;\n" +- " lo:Sreal;\n" +- " end;\n" +- " Instr = record\n" +- " Code:OpType;\n" +- " Pars: array[0..Par] of 0..DMem;\n" +- " end;\n" +- " DataMem= record\n" +- " D :array [Ptr] of Int;\n" +- " S :array [Loc] of State;\n" +- " LastHalve:Loc;\n" +- " RHalve :array [Loc] of real;\n" +- " end;\n" +- " DataFlags=record\n" +- " PF :array [Ptr] of Pflag;\n" +- " end;\n" +- "var\n" +- " Debug : (none,activity,post,trace,dump);\n" +- " Cut : (once,all);\n" +- " GlobalEnd,Verifiable:boolean;\n" +- " HalveThreshold:real;\n" +- " I : array [Loc] of Instr; {Memory holding instructions}\n" +- " End : Loc; {last instruction in I}\n" +- " ParN : array [OpType] of -1..Par; {number of parameters for each \n" +- " opcode. -1 means no result}\n" +- " ParIntersect : array [OpType] of boolean ;\n" +- " DInit : DataMem; {initial memory which is cleared and \n" +- " used in first call}\n" +- " DF : DataFlags; {hold flags for variables, e.g. print/trace}\n" +- " MaxDMem:0..DMem;\n" +- " Shift : array[0..Digits] of 1..maxint;{array of constant multipliers}\n" +- " {used for alignment etc.}\n" +- " Dummy :Positive;\n" +- " {constant intervals and Sreals}\n" +- " PlusInfS,MinusInfS,PlusSmallS,MinusSmallS,ZeroS,\n" +- " PlusFiniteS,MinusFiniteS:Sreal;\n" +- " Zero,All,AllFinite:Int;\n" +- "\n" +- "procedure deblank;\n" +- "var Ch:char;\n" +- "begin\n" +- " while (not eof) and (input^ in [' ',' ']) do read(Ch);\n" +- "end;\n" +- "\n" +- "procedure InitialOptions;\n" +- "\n" +- "#include '/user/profs/cleary/bin/options.i';\n" +- "\n" +- " procedure Option;\n" +- " begin\n" +- " case Opt of\n" +- " 'a','A':Debug:=activity;\n" +- " 'd','D':Debug:=dump;\n" +- " 'h','H':HalveThreshold:=StringNum/100;\n" +- " 'n','N':Debug:=none;\n" +- " 'p','P':Debug:=post;\n" +- " 't','T':Debug:=trace;\n" +- " 'v','V':Verifiable:=true;\n" +- " end;\n" +- " end;\n" +- "\n" +- "begin\n" +- " Debug:=trace;\n" +- " Verifiable:=false;\n" +- " HalveThreshold:=67/100;\n" +- " Options;\n" +- " writeln(Debug);\n" +- " writeln('Verifiable:',Verifiable);\n" +- " writeln('Halve threshold',HalveThreshold);\n" +- "end;{InitialOptions}\n" +- "\n" +- "procedure NormalizeUp(E,M:integer;var S:Sreal;var Closed:boolean);\n" +- "begin\n" +- "with S do\n" +- "begin\n" +- " if M=0 then S:=ZeroS else\n" +- " if M>0 then\n"; ++/* Snippet of Alice's Adventures in Wonderland */ ++static const char test_buf_alice2[] = ++ "`Curiouser and curiouser!' cried Alice (she was so much\n" ++ "surprised, that for the moment she quite forgot how to speak good\n" ++ "English); `now I'm opening out like the largest telescope that\n" ++ "ever was! Good-bye, feet!' (for when she looked down at her\n" ++ "feet, they seemed to be almost out of sight, they were getting so\n" ++ "far off). `Oh, my poor little feet, I wonder who will put on\n" ++ "your shoes and stockings for you now, dears? I'm sure _I_ shan't\n" ++ "be able! I shall be a great deal too far off to trouble myself\n" ++ "about you: you must manage the best way you can; --but I must be\n" ++ "kind to them,' thought Alice, `or perhaps they won't walk the\n" ++ "way I want to go! Let me see: I'll give them a new pair of\n" ++ "boots every Christmas.'\n" ++ "\n" ++ " And she went on planning to herself how she would manage it.\n" ++ "`They must go by the carrier,' she thought; `and how funny it'll\n" ++ "seem, sending presents to one's own feet! And how odd the\n" ++ "directions will look!\n" ++ "\n" ++ " ALICE'S RIGHT FOOT, ESQ.\n" ++ " HEARTHRUG,\n" ++ " NEAR THE FENDER,\n" ++ " (WITH ALICE'S LOVE).\n" ++ "\n" ++ "Oh dear, what nonsense I'm talking!'\n" ++ "\n" ++ " Just then her head struck against the roof of the hall: in\n" ++ "fact she was now more than nine feet high, and she at once took\n" ++ "up the little golden key and hurried off to the garden door.\n" ++ "\n" ++ " Poor Alice! It was as much as she could do, lying down on one\n" ++ "side, to look through into the garden with one eye; but to get\n" ++ "through was more hopeless than ever: she sat down and began to\n" ++ "cry again.\n" ++ "\n" ++ " `You ought to be ashamed of yourself,' said Alice, `a great\n" ++ "girl like you,' (she might well say this), `to go on crying in\n" ++ "this way! Stop this moment, I tell you!' But she went on all\n" ++ "the same, shedding gallons of tears, until there was a large pool\n" ++ "all round her, about four inches deep and reaching half down the\n" ++ "hall.\n" ++ "\n" ++ " After a time she heard a little pattering of feet in the\n" ++ "distance, and she hastily dried her eyes to see what was coming.\n" ++ "It was the White Rabbit returning, splendidly dressed, with a\n" ++ "pair of white kid gloves in one hand and a large fan in the\n" ++ "other: he came trotting along in a great hurry, muttering to\n" ++ "himself as he came, `Oh! the Duchess, the Duchess! Oh! won't she\n" ++ "be savage if I've kept her waiting!' Alice felt so desperate\n" ++ "that she was ready to ask help of any one; so, when the Rabbit\n" ++ "came near her, she began, in a low, timid voice, `If you please,\n" ++ "sir--' The Rabbit started violently, dropped the white kid\n" ++ "gloves and the fan, and skurried away into the darkness as hard\n" ++ "as he could go.\n" ++ "\n" ++ " Alice took up the fan and gloves, and, as the hall was very\n" ++ "hot, she kept fanning herself all the time she went on talking:\n" ++ "`Dear, dear! How queer everything is to-day! And yesterday\n" ++ "things went on just as usual. I wonder if I've been changed in\n" ++ "the night? Let me think: was I the same when I got up this\n" ++ "morning? I almost think I can remember feeling a little\n" ++ "different. But if I'm not the same, the next question is, Who in\n" ++ "the world am I? Ah, THAT'S the great puzzle!' And she began\n" ++ "thinking over all the children she knew that were of the same age\n" ++ "as herself, to see if she could have been changed for any of\n" ++ "them.\n" ++ "\n" ++ " `I'm sure I'm not Ada,' she said, `for her hair goes in such\n" ++ "long ringlets, and mine doesn't go in ringlets at all; and I'm\n" ++ "sure I can't be Mabel, for I know all sorts of things, and she,\n" ++ "oh! she knows such a very little! Besides, SHE'S she, and I'm I,\n" ++ "and--oh dear, how puzzling it all is! I'll try if I know all the\n" ++ "things I used to know. Let me see: four times five is twelve,\n" ++ "and four times six is thirteen, and four times seven is--oh dear!\n" ++ "I shall never get to twenty at that rate! However, the\n" ++ "Multiplication Table doesn't signify: let's try Geography.\n" ++ "London is the capital of Paris, and Paris is the capital of Rome,\n" ++ "and Rome--no, THAT'S all wrong, I'm certain! I must have been\n" ++ "changed for Mabel! I'll try and say ''How doth the little--''\n" ++ "and she crossed her hands on her lap as if she were saying lessons,\n" ++ "and began to repeat it, but her voice sounded hoarse and\n" ++ "strange, and the words did not come the same as they used to do:--\n" ++ "\n" ++ " `How doth the little crocodile\n" ++ " Improve his shining tail,\n" ++ " And pour the waters of the Nile\n" ++ " On every golden scale!\n" ++ "\n" ++ " `How cheerfully he seems to grin,\n" ++ " How neatly spread his claws,\n" ++ " And welcome little fishes in\n" ++ " With gently smiling jaws!'\n"; + + static const char * const compress_test_bufs[] = { + test_buf_alice, + test_buf_shakespeare, +- test_buf_pascal ++ test_buf_alice2 + }; + + #endif /* TEST_COMPRESSDEV_TEST_BUFFERS_H_ */ +diff --git a/dpdk/app/test/test_cryptodev.c b/dpdk/app/test/test_cryptodev.c +index 1b561456d7..db9dd3aecb 100644 +--- a/dpdk/app/test/test_cryptodev.c ++++ b/dpdk/app/test/test_cryptodev.c +@@ -143,7 +143,7 @@ static struct rte_crypto_op * + process_crypto_request(uint8_t dev_id, struct rte_crypto_op *op) + { + if (rte_cryptodev_enqueue_burst(dev_id, 0, &op, 1) != 1) { +- printf("Error sending packet for encryption"); ++ RTE_LOG(ERR, USER1, "Error sending packet for encryption\n"); + return NULL; } - memset(addr, 0, mmap_size); -+ if (dev->inflight_info->addr) { -+ munmap(dev->inflight_info->addr, dev->inflight_info->size); -+ dev->inflight_info->addr = NULL; +@@ -152,6 +152,11 @@ process_crypto_request(uint8_t dev_id, struct rte_crypto_op *op) + while (rte_cryptodev_dequeue_burst(dev_id, 0, &op, 1) == 0) + rte_pause(); + ++ if (op->status != RTE_CRYPTO_OP_STATUS_SUCCESS) { ++ RTE_LOG(DEBUG, USER1, "Operation status %d\n", op->status); ++ return NULL; + } + -+ if (dev->inflight_info->fd >= 0) { -+ close(dev->inflight_info->fd); -+ dev->inflight_info->fd = -1; -+ } + return op; + } + +@@ -2823,9 +2828,18 @@ create_wireless_algo_auth_cipher_session(uint8_t dev_id, + ut_params->sess = rte_cryptodev_sym_session_create( + ts_params->session_mpool); + +- status = rte_cryptodev_sym_session_init(dev_id, ut_params->sess, +- &ut_params->auth_xform, +- ts_params->session_priv_mpool); ++ if (cipher_op == RTE_CRYPTO_CIPHER_OP_DECRYPT) { ++ ut_params->auth_xform.next = NULL; ++ ut_params->cipher_xform.next = &ut_params->auth_xform; ++ status = rte_cryptodev_sym_session_init(dev_id, ut_params->sess, ++ &ut_params->cipher_xform, ++ ts_params->session_priv_mpool); + - dev->inflight_info->addr = addr; - dev->inflight_info->size = msg->payload.inflight.mmap_size = mmap_size; - dev->inflight_info->fd = msg->fds[0] = fd; -@@ -1515,10 +1526,13 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, - "failed to alloc dev inflight area\n"); - return RTE_VHOST_MSG_RESULT_ERR; ++ } else ++ status = rte_cryptodev_sym_session_init(dev_id, ut_params->sess, ++ &ut_params->auth_xform, ++ ts_params->session_priv_mpool); ++ + TEST_ASSERT_EQUAL(status, 0, "session init failed"); + TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); + +@@ -3018,13 +3032,14 @@ create_wireless_algo_cipher_hash_operation(const uint8_t *auth_tag, + } + + static int +-create_wireless_algo_auth_cipher_operation(unsigned int auth_tag_len, ++create_wireless_algo_auth_cipher_operation( ++ const uint8_t *auth_tag, unsigned int auth_tag_len, + const uint8_t *cipher_iv, uint8_t cipher_iv_len, + const uint8_t *auth_iv, uint8_t auth_iv_len, + unsigned int data_pad_len, + unsigned int cipher_len, unsigned int cipher_offset, + unsigned int auth_len, unsigned int auth_offset, +- uint8_t op_mode, uint8_t do_sgl) ++ uint8_t op_mode, uint8_t do_sgl, uint8_t verify) + { + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest_params; +@@ -3081,6 +3096,10 @@ create_wireless_algo_auth_cipher_operation(unsigned int auth_tag_len, } -+ dev->inflight_info->fd = -1; } -- if (dev->inflight_info->addr) -+ if (dev->inflight_info->addr) { - munmap(dev->inflight_info->addr, dev->inflight_info->size); -+ dev->inflight_info->addr = NULL; -+ } ++ /* Copy digest for the verification */ ++ if (verify) ++ memcpy(sym_op->auth.digest.data, auth_tag, auth_tag_len); ++ + /* Copy cipher and auth IVs at the end of the crypto operation */ + uint8_t *iv_ptr = rte_crypto_op_ctod_offset( + ut_params->op, uint8_t *, IV_OFFSET); +@@ -4643,7 +4662,7 @@ test_snow3g_auth_cipher(const struct snow3g_test_data *tdata, - addr = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, - fd, mmap_offset); -@@ -1527,8 +1541,10 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, - return RTE_VHOST_MSG_RESULT_ERR; + /* Create SNOW 3G operation */ + retval = create_wireless_algo_auth_cipher_operation( +- tdata->digest.len, ++ tdata->digest.data, tdata->digest.len, + tdata->cipher_iv.data, tdata->cipher_iv.len, + tdata->auth_iv.data, tdata->auth_iv.len, + (tdata->digest.offset_bytes == 0 ? +@@ -4653,7 +4672,7 @@ test_snow3g_auth_cipher(const struct snow3g_test_data *tdata, + tdata->cipher.offset_bits, + tdata->validAuthLenInBits.len, + tdata->auth.offset_bits, +- op_mode, 0); ++ op_mode, 0, verify); + + if (retval < 0) + return retval; +@@ -4819,7 +4838,7 @@ test_snow3g_auth_cipher_sgl(const struct snow3g_test_data *tdata, + + /* Create SNOW 3G operation */ + retval = create_wireless_algo_auth_cipher_operation( +- tdata->digest.len, ++ tdata->digest.data, tdata->digest.len, + tdata->cipher_iv.data, tdata->cipher_iv.len, + tdata->auth_iv.data, tdata->auth_iv.len, + (tdata->digest.offset_bytes == 0 ? +@@ -4829,7 +4848,7 @@ test_snow3g_auth_cipher_sgl(const struct snow3g_test_data *tdata, + tdata->cipher.offset_bits, + tdata->validAuthLenInBits.len, + tdata->auth.offset_bits, +- op_mode, 1); ++ op_mode, 1, verify); + + if (retval < 0) + return retval; +@@ -4988,7 +5007,7 @@ test_kasumi_auth_cipher(const struct kasumi_test_data *tdata, + + /* Create KASUMI operation */ + retval = create_wireless_algo_auth_cipher_operation( +- tdata->digest.len, ++ tdata->digest.data, tdata->digest.len, + tdata->cipher_iv.data, tdata->cipher_iv.len, + NULL, 0, + (tdata->digest.offset_bytes == 0 ? +@@ -4998,7 +5017,7 @@ test_kasumi_auth_cipher(const struct kasumi_test_data *tdata, + tdata->validCipherOffsetInBits.len, + tdata->validAuthLenInBits.len, + 0, +- op_mode, 0); ++ op_mode, 0, verify); + + if (retval < 0) + return retval; +@@ -5165,7 +5184,7 @@ test_kasumi_auth_cipher_sgl(const struct kasumi_test_data *tdata, + + /* Create KASUMI operation */ + retval = create_wireless_algo_auth_cipher_operation( +- tdata->digest.len, ++ tdata->digest.data, tdata->digest.len, + tdata->cipher_iv.data, tdata->cipher_iv.len, + NULL, 0, + (tdata->digest.offset_bytes == 0 ? +@@ -5175,7 +5194,7 @@ test_kasumi_auth_cipher_sgl(const struct kasumi_test_data *tdata, + tdata->validCipherOffsetInBits.len, + tdata->validAuthLenInBits.len, + 0, +- op_mode, 1); ++ op_mode, 1, verify); + + if (retval < 0) + return retval; +@@ -5666,7 +5685,7 @@ test_zuc_auth_cipher(const struct wireless_test_data *tdata, + + /* Create ZUC operation */ + retval = create_wireless_algo_auth_cipher_operation( +- tdata->digest.len, ++ tdata->digest.data, tdata->digest.len, + tdata->cipher_iv.data, tdata->cipher_iv.len, + tdata->auth_iv.data, tdata->auth_iv.len, + (tdata->digest.offset_bytes == 0 ? +@@ -5676,7 +5695,7 @@ test_zuc_auth_cipher(const struct wireless_test_data *tdata, + tdata->validCipherOffsetInBits.len, + tdata->validAuthLenInBits.len, + 0, +- op_mode, 0); ++ op_mode, 0, verify); + + if (retval < 0) + return retval; +@@ -5852,7 +5871,7 @@ test_zuc_auth_cipher_sgl(const struct wireless_test_data *tdata, + + /* Create ZUC operation */ + retval = create_wireless_algo_auth_cipher_operation( +- tdata->digest.len, ++ tdata->digest.data, tdata->digest.len, + tdata->cipher_iv.data, tdata->cipher_iv.len, + NULL, 0, + (tdata->digest.offset_bytes == 0 ? +@@ -5862,7 +5881,7 @@ test_zuc_auth_cipher_sgl(const struct wireless_test_data *tdata, + tdata->validCipherOffsetInBits.len, + tdata->validAuthLenInBits.len, + 0, +- op_mode, 1); ++ op_mode, 1, verify); + + if (retval < 0) + return retval; +@@ -6643,7 +6662,7 @@ test_mixed_auth_cipher(const struct mixed_cipher_auth_test_data *tdata, + + /* Create the operation */ + retval = create_wireless_algo_auth_cipher_operation( +- tdata->digest_enc.len, ++ tdata->digest_enc.data, tdata->digest_enc.len, + tdata->cipher_iv.data, tdata->cipher_iv.len, + tdata->auth_iv.data, tdata->auth_iv.len, + (tdata->digest_enc.offset == 0 ? +@@ -6653,7 +6672,7 @@ test_mixed_auth_cipher(const struct mixed_cipher_auth_test_data *tdata, + tdata->cipher.offset_bits, + tdata->validAuthLen.len_bits, + tdata->auth.offset_bits, +- op_mode, 0); ++ op_mode, 0, verify); + + if (retval < 0) + return retval; +@@ -6827,7 +6846,7 @@ test_mixed_auth_cipher_sgl(const struct mixed_cipher_auth_test_data *tdata, + + /* Create the operation */ + retval = create_wireless_algo_auth_cipher_operation( +- tdata->digest_enc.len, ++ tdata->digest_enc.data, tdata->digest_enc.len, + tdata->cipher_iv.data, tdata->cipher_iv.len, + tdata->auth_iv.data, tdata->auth_iv.len, + (tdata->digest_enc.offset == 0 ? +@@ -6837,7 +6856,7 @@ test_mixed_auth_cipher_sgl(const struct mixed_cipher_auth_test_data *tdata, + tdata->cipher.offset_bits, + tdata->validAuthLen.len_bits, + tdata->auth.offset_bits, +- op_mode, 1); ++ op_mode, 1, verify); + + if (retval < 0) + return retval; +@@ -9139,8 +9158,10 @@ test_stats(void) + { + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct rte_cryptodev_stats stats; +- struct rte_cryptodev *dev; +- cryptodev_stats_get_t temp_pfn; ++ ++ if (rte_cryptodev_stats_get(ts_params->valid_devs[0], &stats) ++ == -ENOTSUP) ++ return -ENOTSUP; + + rte_cryptodev_stats_reset(ts_params->valid_devs[0]); + TEST_ASSERT((rte_cryptodev_stats_get(ts_params->valid_devs[0] + 600, +@@ -9148,13 +9169,6 @@ test_stats(void) + "rte_cryptodev_stats_get invalid dev failed"); + TEST_ASSERT((rte_cryptodev_stats_get(ts_params->valid_devs[0], 0) != 0), + "rte_cryptodev_stats_get invalid Param failed"); +- dev = &rte_cryptodevs[ts_params->valid_devs[0]]; +- temp_pfn = dev->dev_ops->stats_get; +- dev->dev_ops->stats_get = (cryptodev_stats_get_t)0; +- TEST_ASSERT((rte_cryptodev_stats_get(ts_params->valid_devs[0], &stats) +- == -ENOTSUP), +- "rte_cryptodev_stats_get invalid Param failed"); +- dev->dev_ops->stats_get = temp_pfn; + + /* Test expected values */ + ut_setup(); +@@ -10818,13 +10832,8 @@ test_authentication_verify_fail_when_data_corruption( + + ut_params->op = process_crypto_request(ts_params->valid_devs[0], + ut_params->op); +- TEST_ASSERT_NOT_NULL(ut_params->op, "failed crypto process"); +- TEST_ASSERT_NOT_EQUAL(ut_params->op->status, +- RTE_CRYPTO_OP_STATUS_SUCCESS, +- "authentication not failed"); + +- ut_params->obuf = ut_params->op->sym->m_src; +- TEST_ASSERT_NOT_NULL(ut_params->obuf, "failed to retrieve obuf"); ++ TEST_ASSERT_NULL(ut_params->op, "authentication not failed"); + + return 0; + } +@@ -10879,13 +10888,8 @@ test_authentication_verify_GMAC_fail_when_corruption( + + ut_params->op = process_crypto_request(ts_params->valid_devs[0], + ut_params->op); +- TEST_ASSERT_NOT_NULL(ut_params->op, "failed crypto process"); +- TEST_ASSERT_NOT_EQUAL(ut_params->op->status, +- RTE_CRYPTO_OP_STATUS_SUCCESS, +- "authentication not failed"); + +- ut_params->obuf = ut_params->op->sym->m_src; +- TEST_ASSERT_NOT_NULL(ut_params->obuf, "failed to retrieve obuf"); ++ TEST_ASSERT_NULL(ut_params->op, "authentication not failed"); + + return 0; + } +@@ -10940,13 +10944,7 @@ test_authenticated_decryption_fail_when_corruption( + ut_params->op = process_crypto_request(ts_params->valid_devs[0], + ut_params->op); + +- TEST_ASSERT_NOT_NULL(ut_params->op, "failed crypto process"); +- TEST_ASSERT_NOT_EQUAL(ut_params->op->status, +- RTE_CRYPTO_OP_STATUS_SUCCESS, +- "authentication not failed"); +- +- ut_params->obuf = ut_params->op->sym->m_src; +- TEST_ASSERT_NOT_NULL(ut_params->obuf, "failed to retrieve obuf"); ++ TEST_ASSERT_NULL(ut_params->op, "authentication not failed"); + + return 0; + } +@@ -11149,6 +11147,7 @@ create_aead_operation_SGL(enum rte_crypto_aead_operation op, + const unsigned int auth_tag_len = tdata->auth_tag.len; + const unsigned int iv_len = tdata->iv.len; + unsigned int aad_len = tdata->aad.len; ++ unsigned int aad_len_pad = 0; + + /* Generate Crypto op data structure */ + ut_params->op = rte_crypto_op_alloc(ts_params->op_mpool, +@@ -11203,8 +11202,10 @@ create_aead_operation_SGL(enum rte_crypto_aead_operation op, + + rte_memcpy(iv_ptr, tdata->iv.data, iv_len); + ++ aad_len_pad = RTE_ALIGN_CEIL(aad_len, 16); ++ + sym_op->aead.aad.data = (uint8_t *)rte_pktmbuf_prepend( +- ut_params->ibuf, aad_len); ++ ut_params->ibuf, aad_len_pad); + TEST_ASSERT_NOT_NULL(sym_op->aead.aad.data, + "no room to prepend aad"); + sym_op->aead.aad.phys_addr = rte_pktmbuf_iova( +@@ -11219,7 +11220,7 @@ create_aead_operation_SGL(enum rte_crypto_aead_operation op, } -- if (dev->inflight_info->fd) -+ if (dev->inflight_info->fd >= 0) { - close(dev->inflight_info->fd); -+ dev->inflight_info->fd = -1; + sym_op->aead.data.length = tdata->plaintext.len; +- sym_op->aead.data.offset = aad_len; ++ sym_op->aead.data.offset = aad_len_pad; + + return 0; + } +@@ -11252,7 +11253,7 @@ test_authenticated_encryption_SGL(const struct aead_test_data *tdata, + int ecx = 0; + void *digest_mem = NULL; + +- uint32_t prepend_len = tdata->aad.len; ++ uint32_t prepend_len = RTE_ALIGN_CEIL(tdata->aad.len, 16); + + if (tdata->plaintext.len % fragsz != 0) { + if (tdata->plaintext.len / fragsz + 1 > SGL_MAX_NO) +@@ -11915,6 +11916,8 @@ static struct unit_test_suite cryptodev_qat_testsuite = { + test_AES_GCM_auth_encrypt_SGL_out_of_place_400B_400B), + TEST_CASE_ST(ut_setup, ut_teardown, + test_AES_GCM_auth_encrypt_SGL_out_of_place_1500B_2000B), ++ TEST_CASE_ST(ut_setup, ut_teardown, ++ test_AES_GCM_auth_encrypt_SGL_out_of_place_400B_1seg), + TEST_CASE_ST(ut_setup, ut_teardown, + test_AES_GCM_authenticated_encryption_test_case_1), + TEST_CASE_ST(ut_setup, ut_teardown, +diff --git a/dpdk/app/test/test_cryptodev_blockcipher.c b/dpdk/app/test/test_cryptodev_blockcipher.c +index 5bfe2d009f..2f91d000a2 100644 +--- a/dpdk/app/test/test_cryptodev_blockcipher.c ++++ b/dpdk/app/test/test_cryptodev_blockcipher.c +@@ -93,7 +93,7 @@ test_blockcipher_one_case(const struct blockcipher_test_case *t, + uint64_t feat_flags = dev_info.feature_flags; + uint64_t oop_flag = RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT; + +- if (t->feature_mask && BLOCKCIPHER_TEST_FEATURE_OOP) { ++ if (t->feature_mask & BLOCKCIPHER_TEST_FEATURE_OOP) { + if (!(feat_flags & oop_flag)) { + printf("Device doesn't support out-of-place " + "scatter-gather in input mbuf. " +diff --git a/dpdk/app/test/test_cryptodev_hash_test_vectors.h b/dpdk/app/test/test_cryptodev_hash_test_vectors.h +index cff2831185..394bb6b60b 100644 +--- a/dpdk/app/test/test_cryptodev_hash_test_vectors.h ++++ b/dpdk/app/test/test_cryptodev_hash_test_vectors.h +@@ -460,6 +460,7 @@ static const struct blockcipher_test_case hash_test_cases[] = { + .test_data = &sha1_test_vector, + .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN, + .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL | ++ BLOCKCIPHER_TEST_TARGET_PMD_QAT | + BLOCKCIPHER_TEST_TARGET_PMD_CCP | + BLOCKCIPHER_TEST_TARGET_PMD_MVSAM | + #if IMB_VERSION_NUM >= IMB_VERSION(0, 52, 0) +@@ -473,6 +474,7 @@ static const struct blockcipher_test_case hash_test_cases[] = { + .test_data = &sha1_test_vector, + .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY, + .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL | ++ BLOCKCIPHER_TEST_TARGET_PMD_QAT | + BLOCKCIPHER_TEST_TARGET_PMD_CCP | + BLOCKCIPHER_TEST_TARGET_PMD_MVSAM | + #if IMB_VERSION_NUM >= IMB_VERSION(0, 52, 0) +@@ -540,6 +542,7 @@ static const struct blockcipher_test_case hash_test_cases[] = { + .test_data = &sha224_test_vector, + .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN, + .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL | ++ BLOCKCIPHER_TEST_TARGET_PMD_QAT | + BLOCKCIPHER_TEST_TARGET_PMD_CCP | + BLOCKCIPHER_TEST_TARGET_PMD_MVSAM | + #if IMB_VERSION_NUM >= IMB_VERSION(0, 52, 0) +@@ -553,6 +556,7 @@ static const struct blockcipher_test_case hash_test_cases[] = { + .test_data = &sha224_test_vector, + .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY, + .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL | ++ BLOCKCIPHER_TEST_TARGET_PMD_QAT | + BLOCKCIPHER_TEST_TARGET_PMD_CCP | + BLOCKCIPHER_TEST_TARGET_PMD_MVSAM | + #if IMB_VERSION_NUM >= IMB_VERSION(0, 52, 0) +@@ -596,6 +600,7 @@ static const struct blockcipher_test_case hash_test_cases[] = { + .test_data = &sha256_test_vector, + .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN, + .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL | ++ BLOCKCIPHER_TEST_TARGET_PMD_QAT | + BLOCKCIPHER_TEST_TARGET_PMD_CCP | + BLOCKCIPHER_TEST_TARGET_PMD_MVSAM | + #if IMB_VERSION_NUM >= IMB_VERSION(0, 52, 0) +@@ -609,6 +614,7 @@ static const struct blockcipher_test_case hash_test_cases[] = { + .test_data = &sha256_test_vector, + .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY, + .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL | ++ BLOCKCIPHER_TEST_TARGET_PMD_QAT | + BLOCKCIPHER_TEST_TARGET_PMD_CCP | + BLOCKCIPHER_TEST_TARGET_PMD_MVSAM | + #if IMB_VERSION_NUM >= IMB_VERSION(0, 52, 0) +@@ -654,6 +660,7 @@ static const struct blockcipher_test_case hash_test_cases[] = { + .test_data = &sha384_test_vector, + .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN, + .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL | ++ BLOCKCIPHER_TEST_TARGET_PMD_QAT | + BLOCKCIPHER_TEST_TARGET_PMD_CCP | + BLOCKCIPHER_TEST_TARGET_PMD_MVSAM | + #if IMB_VERSION_NUM >= IMB_VERSION(0, 52, 0) +@@ -667,6 +674,7 @@ static const struct blockcipher_test_case hash_test_cases[] = { + .test_data = &sha384_test_vector, + .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY, + .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL | ++ BLOCKCIPHER_TEST_TARGET_PMD_QAT | + BLOCKCIPHER_TEST_TARGET_PMD_CCP | + BLOCKCIPHER_TEST_TARGET_PMD_MVSAM | + #if IMB_VERSION_NUM >= IMB_VERSION(0, 52, 0) +@@ -712,6 +720,7 @@ static const struct blockcipher_test_case hash_test_cases[] = { + .test_data = &sha512_test_vector, + .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN, + .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL | ++ BLOCKCIPHER_TEST_TARGET_PMD_QAT | + BLOCKCIPHER_TEST_TARGET_PMD_CCP | + BLOCKCIPHER_TEST_TARGET_PMD_MVSAM | + #if IMB_VERSION_NUM >= IMB_VERSION(0, 52, 0) +@@ -724,6 +733,7 @@ static const struct blockcipher_test_case hash_test_cases[] = { + .test_data = &sha512_test_vector, + .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY, + .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL | ++ BLOCKCIPHER_TEST_TARGET_PMD_QAT | + BLOCKCIPHER_TEST_TARGET_PMD_CCP | + BLOCKCIPHER_TEST_TARGET_PMD_MVSAM | + #if IMB_VERSION_NUM >= IMB_VERSION(0, 52, 0) +diff --git a/dpdk/app/test/test_eventdev.c b/dpdk/app/test/test_eventdev.c +index 427dbbf77f..43ccb1ce97 100644 +--- a/dpdk/app/test/test_eventdev.c ++++ b/dpdk/app/test/test_eventdev.c +@@ -996,9 +996,13 @@ test_eventdev_common(void) + static int + test_eventdev_selftest_impl(const char *pmd, const char *opts) + { +- rte_vdev_init(pmd, opts); ++ int ret = 0; ++ + if (rte_event_dev_get_dev_id(pmd) == -ENODEV) ++ ret = rte_vdev_init(pmd, opts); ++ if (ret) + return TEST_SKIPPED; ++ + return rte_event_dev_selftest(rte_event_dev_get_dev_id(pmd)); + } + +@@ -1017,7 +1021,7 @@ test_eventdev_selftest_octeontx(void) + static int + test_eventdev_selftest_octeontx2(void) + { +- return test_eventdev_selftest_impl("otx2_eventdev", ""); ++ return test_eventdev_selftest_impl("event_octeontx2", ""); + } + + static int +diff --git a/dpdk/app/test/test_fib_perf.c b/dpdk/app/test/test_fib_perf.c +index 573087c3c0..dd2e54db8b 100644 +--- a/dpdk/app/test/test_fib_perf.c ++++ b/dpdk/app/test/test_fib_perf.c +@@ -35,7 +35,7 @@ struct route_rule { + uint8_t depth; + }; + +-struct route_rule large_route_table[MAX_RULE_NUM]; ++static struct route_rule large_route_table[MAX_RULE_NUM]; + + static uint32_t num_route_entries; + #define NUM_ROUTE_ENTRIES num_route_entries +diff --git a/dpdk/app/test/test_flow_classify.c b/dpdk/app/test/test_flow_classify.c +index ff5265c6af..ef0b6fdd5c 100644 +--- a/dpdk/app/test/test_flow_classify.c ++++ b/dpdk/app/test/test_flow_classify.c +@@ -23,7 +23,7 @@ + + #define FLOW_CLASSIFY_MAX_RULE_NUM 100 + #define MAX_PKT_BURST 32 +-#define NB_SOCKETS 1 ++#define NB_SOCKETS 4 + #define MEMPOOL_CACHE_SIZE 256 + #define MBUF_SIZE 512 + #define NB_MBUF 512 +diff --git a/dpdk/app/test/test_hash.c b/dpdk/app/test/test_hash.c +index 0052dce2de..2ac298e21e 100644 +--- a/dpdk/app/test/test_hash.c ++++ b/dpdk/app/test/test_hash.c +@@ -1142,8 +1142,11 @@ fbk_hash_unit_test(void) + handle = rte_fbk_hash_create(&invalid_params_7); + RETURN_IF_ERROR_FBK(handle != NULL, "fbk hash creation should have failed"); + +- handle = rte_fbk_hash_create(&invalid_params_8); +- RETURN_IF_ERROR_FBK(handle != NULL, "fbk hash creation should have failed"); ++ if (rte_eal_has_hugepages()) { ++ handle = rte_fbk_hash_create(&invalid_params_8); ++ RETURN_IF_ERROR_FBK(handle != NULL, ++ "fbk hash creation should have failed"); + } - dev->inflight_info->fd = fd; - dev->inflight_info->addr = addr; -@@ -2059,10 +2075,10 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct VhostUserMsg *msg, - size = msg->payload.log.mmap_size; - off = msg->payload.log.mmap_offset; + handle = rte_fbk_hash_create(&invalid_params_same_name_1); + RETURN_IF_ERROR_FBK(handle == NULL, "fbk hash creation should have succeeded"); +diff --git a/dpdk/app/test/test_ipsec.c b/dpdk/app/test/test_ipsec.c +index 7dc83fee7e..79d00d7e02 100644 +--- a/dpdk/app/test/test_ipsec.c ++++ b/dpdk/app/test/test_ipsec.c +@@ -237,7 +237,7 @@ fill_crypto_xform(struct ipsec_unitest_params *ut_params, + } -- /* Don't allow mmap_offset to point outside the mmap region */ -- if (off > size) { -+ /* Check for mmap size and offset overflow. */ -+ if (off >= -size) { - RTE_LOG(ERR, VHOST_CONFIG, -- "log offset %#"PRIx64" exceeds log size %#"PRIx64"\n", -+ "log offset %#"PRIx64" and log size %#"PRIx64" overflow\n", - off, size); - return RTE_VHOST_MSG_RESULT_ERR; + static int +-check_cryptodev_capablity(const struct ipsec_unitest_params *ut, ++check_cryptodev_capability(const struct ipsec_unitest_params *ut, + uint8_t dev_id) + { + struct rte_cryptodev_sym_capability_idx cap_idx; +@@ -302,7 +302,7 @@ testsuite_setup(void) + + /* Find first valid crypto device */ + for (i = 0; i < nb_devs; i++) { +- rc = check_cryptodev_capablity(ut_params, i); ++ rc = check_cryptodev_capability(ut_params, i); + if (rc == 0) { + ts_params->valid_dev = i; + ts_params->valid_dev_found = 1; +@@ -1167,6 +1167,34 @@ test_ipsec_dump_buffers(struct ipsec_unitest_params *ut_params, int i) } -@@ -2526,7 +2542,7 @@ static int - vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, - struct VhostUserMsg *msg) + } + ++static void ++destroy_dummy_sec_session(struct ipsec_unitest_params *ut, ++ uint32_t j) ++{ ++ rte_security_session_destroy(&dummy_sec_ctx, ++ ut->ss[j].security.ses); ++ ut->ss[j].security.ctx = NULL; ++} ++ ++static void ++destroy_crypto_session(struct ipsec_unitest_params *ut, ++ uint8_t crypto_dev, uint32_t j) ++{ ++ rte_cryptodev_sym_session_clear(crypto_dev, ut->ss[j].crypto.ses); ++ rte_cryptodev_sym_session_free(ut->ss[j].crypto.ses); ++ memset(&ut->ss[j], 0, sizeof(ut->ss[j])); ++} ++ ++static void ++destroy_session(struct ipsec_unitest_params *ut, ++ uint8_t crypto_dev, uint32_t j) ++{ ++ if (ut->ss[j].type == RTE_SECURITY_ACTION_TYPE_NONE) ++ return destroy_crypto_session(ut, crypto_dev, j); ++ else ++ return destroy_dummy_sec_session(ut, j); ++} ++ + static void + destroy_sa(uint32_t j) { -- uint16_t vring_idx; -+ uint32_t vring_idx; +@@ -1175,9 +1203,8 @@ destroy_sa(uint32_t j) - switch (msg->request.master) { - case VHOST_USER_SET_VRING_KICK: -diff --git a/dpdk/lib/librte_vhost/virtio_net.c b/dpdk/lib/librte_vhost/virtio_net.c -index ac2842b2d2..33f10258cf 100644 ---- a/dpdk/lib/librte_vhost/virtio_net.c -+++ b/dpdk/lib/librte_vhost/virtio_net.c -@@ -1086,6 +1086,8 @@ virtio_dev_rx_batch_packed(struct virtio_net *dev, - VHOST_ACCESS_RW); + rte_ipsec_sa_fini(ut->ss[j].sa); + rte_free(ut->ss[j].sa); +- rte_cryptodev_sym_session_clear(ts->valid_dev, ut->ss[j].crypto.ses); +- rte_cryptodev_sym_session_free(ut->ss[j].crypto.ses); +- memset(&ut->ss[j], 0, sizeof(ut->ss[j])); ++ ++ destroy_session(ut, ts->valid_dev, j); + } - vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { -+ if (unlikely(!desc_addrs[i])) -+ return -1; - if (unlikely(lens[i] != descs[avail_idx + i].len)) - return -1; + static int +diff --git a/dpdk/app/test/test_kvargs.c b/dpdk/app/test/test_kvargs.c +index a42056f361..2a2dae43a0 100644 +--- a/dpdk/app/test/test_kvargs.c ++++ b/dpdk/app/test/test_kvargs.c +@@ -142,7 +142,7 @@ static int test_valid_kvargs(void) + valid_keys = valid_keys_list; + kvlist = rte_kvargs_parse(args, valid_keys); + if (kvlist == NULL) { +- printf("rte_kvargs_parse() error"); ++ printf("rte_kvargs_parse() error\n"); + goto fail; } -@@ -1841,6 +1843,8 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev, + if (strcmp(kvlist->pairs[0].value, "[0,1]") != 0) { +@@ -157,6 +157,40 @@ static int test_valid_kvargs(void) } + rte_kvargs_free(kvlist); - vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { -+ if (unlikely(!desc_addrs[i])) -+ return -1; - if (unlikely((lens[i] != descs[avail_idx + i].len))) - return -1; ++ /* test using empty string (it is valid) */ ++ args = ""; ++ kvlist = rte_kvargs_parse(args, NULL); ++ if (kvlist == NULL) { ++ printf("rte_kvargs_parse() error\n"); ++ goto fail; ++ } ++ if (rte_kvargs_count(kvlist, NULL) != 0) { ++ printf("invalid count value\n"); ++ goto fail; ++ } ++ rte_kvargs_free(kvlist); ++ ++ /* test using empty elements (it is valid) */ ++ args = "foo=1,,check=value2,,"; ++ kvlist = rte_kvargs_parse(args, NULL); ++ if (kvlist == NULL) { ++ printf("rte_kvargs_parse() error\n"); ++ goto fail; ++ } ++ if (rte_kvargs_count(kvlist, NULL) != 2) { ++ printf("invalid count value\n"); ++ goto fail; ++ } ++ if (rte_kvargs_count(kvlist, "foo") != 1) { ++ printf("invalid count value for 'foo'\n"); ++ goto fail; ++ } ++ if (rte_kvargs_count(kvlist, "check") != 1) { ++ printf("invalid count value for 'check'\n"); ++ goto fail; ++ } ++ rte_kvargs_free(kvlist); ++ + return 0; + + fail: +@@ -179,11 +213,11 @@ static int test_invalid_kvargs(void) + const char *args_list[] = { + "wrong-key=x", /* key not in valid_keys_list */ + "foo=1,foo=", /* empty value */ +- "foo=1,,foo=2", /* empty key/value */ + "foo=1,foo", /* no value */ + "foo=1,=2", /* no key */ + "foo=[1,2", /* no closing bracket in value */ + ",=", /* also test with a smiley */ ++ "foo=[", /* no value in list and no closing bracket */ + NULL }; + const char **args; + const char *valid_keys_list[] = { "foo", "check", NULL }; +@@ -197,8 +231,8 @@ static int test_invalid_kvargs(void) + rte_kvargs_free(kvlist); + goto fail; + } +- return 0; + } ++ return 0; + + fail: + printf("while processing <%s>", *args); +diff --git a/dpdk/app/test/test_lpm_perf.c b/dpdk/app/test/test_lpm_perf.c +index a2578fe90e..489719c40b 100644 +--- a/dpdk/app/test/test_lpm_perf.c ++++ b/dpdk/app/test/test_lpm_perf.c +@@ -34,7 +34,7 @@ struct route_rule { + uint8_t depth; + }; + +-struct route_rule large_route_table[MAX_RULE_NUM]; ++static struct route_rule large_route_table[MAX_RULE_NUM]; + + static uint32_t num_route_entries; + #define NUM_ROUTE_ENTRIES num_route_entries +diff --git a/dpdk/app/test/test_malloc.c b/dpdk/app/test/test_malloc.c +index a16e28cc32..57f796f9e5 100644 +--- a/dpdk/app/test/test_malloc.c ++++ b/dpdk/app/test/test_malloc.c +@@ -746,6 +746,18 @@ test_malloc_bad_params(void) + if (bad_ptr != NULL) + goto err_return; + ++ /* rte_malloc expected to return null with size will cause overflow */ ++ align = RTE_CACHE_LINE_SIZE; ++ size = (size_t)-8; ++ ++ bad_ptr = rte_malloc(type, size, align); ++ if (bad_ptr != NULL) ++ goto err_return; ++ ++ bad_ptr = rte_realloc(NULL, size, align); ++ if (bad_ptr != NULL) ++ goto err_return; ++ + return 0; + + err_return: +diff --git a/dpdk/app/test/test_mbuf.c b/dpdk/app/test/test_mbuf.c +index 61ecffc184..f2922e05e0 100644 +--- a/dpdk/app/test/test_mbuf.c ++++ b/dpdk/app/test/test_mbuf.c +@@ -1144,7 +1144,7 @@ test_refcnt_mbuf(void) + tref += refcnt_lcore[slave]; + + if (tref != refcnt_lcore[master]) +- rte_panic("refernced mbufs: %u, freed mbufs: %u\n", ++ rte_panic("referenced mbufs: %u, freed mbufs: %u\n", + tref, refcnt_lcore[master]); + + rte_mempool_dump(stdout, refcnt_pool); +diff --git a/dpdk/app/test/test_pmd_perf.c b/dpdk/app/test/test_pmd_perf.c +index d61be58bb3..352cd47156 100644 +--- a/dpdk/app/test/test_pmd_perf.c ++++ b/dpdk/app/test/test_pmd_perf.c +@@ -151,7 +151,7 @@ check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + if (link_mbps == 0) + link_mbps = link.link_speed; + } else +diff --git a/dpdk/app/test/test_table_pipeline.c b/dpdk/app/test/test_table_pipeline.c +index 441338ac01..bc412c3081 100644 +--- a/dpdk/app/test/test_table_pipeline.c ++++ b/dpdk/app/test/test_table_pipeline.c +@@ -190,11 +190,13 @@ check_pipeline_invalid_params(void) + goto fail; + } + +- p = rte_pipeline_create(&pipeline_params_3); +- if (p != NULL) { +- RTE_LOG(INFO, PIPELINE, "%s: Configure pipeline with invalid " +- "socket\n", __func__); +- goto fail; ++ if (rte_eal_has_hugepages()) { ++ p = rte_pipeline_create(&pipeline_params_3); ++ if (p != NULL) { ++ RTE_LOG(INFO, PIPELINE, "%s: Configure pipeline with " ++ "invalid socket\n", __func__); ++ goto fail; ++ } } + + /* Check pipeline consistency */ +diff --git a/dpdk/buildtools/meson.build b/dpdk/buildtools/meson.build +index 6ef2c5721c..cd6f4c1af0 100644 +--- a/dpdk/buildtools/meson.build ++++ b/dpdk/buildtools/meson.build +@@ -3,9 +3,11 @@ + + subdir('pmdinfogen') + ++pkgconf = find_program('pkg-config', 'pkgconf', required: false) + pmdinfo = find_program('gen-pmdinfo-cfile.sh') + + check_experimental_syms = find_program('check-experimental-syms.sh') ++ldflags_ibverbs_static = find_program('options-ibverbs-static.sh') + + # set up map-to-def script using python, either built-in or external + python3 = import('python').find_installation(required: false) +diff --git a/dpdk/buildtools/options-ibverbs-static.sh b/dpdk/buildtools/options-ibverbs-static.sh +index 0f285a343b..0740a711ff 100755 +--- a/dpdk/buildtools/options-ibverbs-static.sh ++++ b/dpdk/buildtools/options-ibverbs-static.sh +@@ -9,6 +9,13 @@ + # + # PKG_CONFIG_PATH may be required to be set if libibverbs.pc is not installed. + +-pkg-config --libs-only-l --static libibverbs | ++lib='libibverbs' ++deps='pthread|nl' ++ ++pkg-config --libs --static $lib | + tr '[:space:]' '\n' | +- sed -r '/^-l(pthread|nl)/! s,(^-l)(.*),\1:lib\2.a,' ++ sed -r "/^-l($deps)/! s,(^-l)(.*),\1:lib\2.a," | # explicit .a ++ sed -n '/^-[Ll]/p' | # extra link options may break with make ++ tac | ++ awk "/^-l:$lib.a/&&c++ {next} 1" | # drop first duplicates of main lib ++ tac +diff --git a/dpdk/config/common_base b/dpdk/config/common_base +index 7dec7ed457..861f7d1a0b 100644 +--- a/dpdk/config/common_base ++++ b/dpdk/config/common_base +@@ -328,7 +328,6 @@ CONFIG_RTE_LIBRTE_ICE_PMD=y + CONFIG_RTE_LIBRTE_ICE_DEBUG_RX=n + CONFIG_RTE_LIBRTE_ICE_DEBUG_TX=n + CONFIG_RTE_LIBRTE_ICE_DEBUG_TX_FREE=n +-CONFIG_RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC=y + CONFIG_RTE_LIBRTE_ICE_16BYTE_RX_DESC=n + + # Compile burst-oriented IAVF PMD driver +@@ -352,7 +351,7 @@ CONFIG_RTE_LIBRTE_MLX4_DEBUG=n + + # + # Compile burst-oriented Mellanox ConnectX-4, ConnectX-5, +-# ConnectX-6 & Bluefield (MLX5) PMD ++# ConnectX-6 & BlueField (MLX5) PMD + # + CONFIG_RTE_LIBRTE_MLX5_PMD=n + CONFIG_RTE_LIBRTE_MLX5_DEBUG=n +diff --git a/dpdk/config/meson.build b/dpdk/config/meson.build +index 364a8d7394..78bfdf3094 100644 +--- a/dpdk/config/meson.build ++++ b/dpdk/config/meson.build +@@ -14,6 +14,10 @@ foreach env:supported_exec_envs + set_variable('is_' + env, exec_env == env) + endforeach + ++# MS linker requires special treatment. ++# TODO: use cc.get_linker_id() with Meson >= 0.54 ++is_ms_linker = is_windows and (cc.get_id() == 'clang') ++ + # set the major version, which might be used by drivers and libraries + # depending on the configuration options + pver = meson.project_version().split('.') +@@ -98,14 +102,18 @@ dpdk_conf.set('RTE_TOOLCHAIN_' + toolchain.to_upper(), 1) + + dpdk_conf.set('RTE_ARCH_64', cc.sizeof('void *') == 8) + +-add_project_link_arguments('-Wl,--no-as-needed', language: 'c') ++if not is_windows ++ add_project_link_arguments('-Wl,--no-as-needed', language: 'c') ++endif + +-# use pthreads +-add_project_link_arguments('-pthread', language: 'c') +-dpdk_extra_ldflags += '-pthread' ++# use pthreads if available for the platform ++if not is_ms_linker ++ add_project_link_arguments('-pthread', language: 'c') ++ dpdk_extra_ldflags += '-pthread' ++endif + + # on some OS, maths functions are in a separate library +-if cc.find_library('libm', required : false).found() ++if cc.find_library('m', required : false).found() + # some libs depend on maths lib + add_project_link_arguments('-lm', language: 'c') + dpdk_extra_ldflags += '-lm' +@@ -183,6 +191,10 @@ warning_flags = [ + '-Wno-packed-not-aligned', + '-Wno-missing-field-initializers' + ] ++if cc.get_id() == 'gcc' and cc.version().version_compare('>=10.0') ++# FIXME: Bugzilla 396 ++ warning_flags += '-Wno-zero-length-bounds' ++endif + if not dpdk_conf.get('RTE_ARCH_64') + # for 32-bit, don't warn about casting a 32-bit pointer to 64-bit int - it's fine!! + warning_flags += '-Wno-pointer-to-int-cast' +@@ -231,6 +243,16 @@ if is_freebsd + add_project_arguments('-D__BSD_VISIBLE', language: 'c') + endif + ++if is_windows ++ # Minimum supported API is Windows 7. ++ add_project_arguments('-D_WIN32_WINNT=0x0601', language: 'c') ++ ++ # Use MinGW-w64 stdio, because DPDK assumes ANSI-compliant formatting. ++ if cc.get_id() == 'gcc' ++ add_project_arguments('-D__USE_MINGW_ANSI_STDIO', language: 'c') ++ endif ++endif ++ + if get_option('b_lto') + if cc.has_argument('-ffat-lto-objects') + add_project_arguments('-ffat-lto-objects', language: 'c') +diff --git a/dpdk/config/x86/meson.build b/dpdk/config/x86/meson.build +index 8b0fa3e6f1..adc857ba28 100644 +--- a/dpdk/config/x86/meson.build ++++ b/dpdk/config/x86/meson.build +@@ -15,11 +15,9 @@ if not is_windows + endif + + # we require SSE4.2 for DPDK +-sse_errormsg = '''SSE4.2 instruction set is required for DPDK. +-Please set the machine type to "nehalem" or "corei7" or higher value''' +- + if cc.get_define('__SSE4_2__', args: machine_args) == '' +- error(sse_errormsg) ++ message('SSE 4.2 not enabled by default, explicitly enabling') ++ machine_args += '-msse4' + endif + + base_flags = ['SSE', 'SSE2', 'SSE3','SSSE3', 'SSE4_1', 'SSE4_2'] +diff --git a/dpdk/devtools/check-symbol-change.sh b/dpdk/devtools/check-symbol-change.sh +index c5434f3bb0..ed2178e36e 100755 +--- a/dpdk/devtools/check-symbol-change.sh ++++ b/dpdk/devtools/check-symbol-change.sh +@@ -17,13 +17,11 @@ build_map_changes() + # map files are altered, and all section/symbol names + # appearing between a triggering of this rule and the + # next trigger of this rule are associated with this file +- /[-+] a\/.*\.map/ {map=$2; in_map=1} ++ /[-+] [ab]\/.*\.map/ {map=$2; in_map=1; next} + +- # Same pattern as above, only it matches on anything that +- # does not end in 'map', indicating we have left the map chunk. +- # When we hit this, turn off the in_map variable, which +- # supresses the subordonate rules below +- /[-+] a\/.*\.[^map]/ {in_map=0} ++ # The previous rule catches all .map files, anything else ++ # indicates we left the map chunk. ++ /[-+] [ab]\// {in_map=0} + + # Triggering this rule, which starts a line and ends it + # with a { identifies a versioned section. The section name is +diff --git a/dpdk/devtools/checkpatches.sh b/dpdk/devtools/checkpatches.sh +index b16bace927..9902e2a9bc 100755 +--- a/dpdk/devtools/checkpatches.sh ++++ b/dpdk/devtools/checkpatches.sh +@@ -70,6 +70,14 @@ check_forbidden_additions() { # <patch> + -f $(dirname $(readlink -f $0))/check-forbidden-tokens.awk \ + "$1" || res=1 + ++ # links must prefer https over http ++ awk -v FOLDERS='doc' \ ++ -v EXPRESSIONS='http://.*dpdk.org' \ ++ -v RET_ON_FAIL=1 \ ++ -v MESSAGE='Using non https link to dpdk.org' \ ++ -f $(dirname $(readlink -f $0))/check-forbidden-tokens.awk \ ++ "$1" || res=1 ++ + return $res + } + +diff --git a/dpdk/devtools/cocci.sh b/dpdk/devtools/cocci.sh +index 8b17a8ceba..ab9a6efe9a 100755 +--- a/dpdk/devtools/cocci.sh ++++ b/dpdk/devtools/cocci.sh +@@ -1,34 +1,6 @@ + #! /bin/sh +- +-# BSD LICENSE +-# +-# Copyright 2015 EZchip Semiconductor Ltd. +-# +-# Redistribution and use in source and binary forms, with or without +-# modification, are permitted provided that the following conditions +-# are met: +-# +-# * Redistributions of source code must retain the above copyright +-# notice, this list of conditions and the following disclaimer. +-# * Redistributions in binary form must reproduce the above copyright +-# notice, this list of conditions and the following disclaimer in +-# the documentation and/or other materials provided with the +-# distribution. +-# * Neither the name of EZchip Semiconductor nor the names of its +-# contributors may be used to endorse or promote products derived +-# from this software without specific prior written permission. +-# +-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++# SPDX-License-Identifier: BSD-3-Clause ++# Copyright 2015-2020 Mellanox Technologies, Ltd + + # Apply coccinelle transforms. + +diff --git a/dpdk/devtools/git-log-fixes.sh b/dpdk/devtools/git-log-fixes.sh +index e37ee22600..6d468d6731 100755 +--- a/dpdk/devtools/git-log-fixes.sh ++++ b/dpdk/devtools/git-log-fixes.sh +@@ -94,11 +94,23 @@ stable_tag () # <hash> + fi + } + ++# print a marker for fixes tag presence ++fixes_tag () # <hash> ++{ ++ if git log --format='%b' -1 $1 | grep -qi '^Fixes: *' ; then ++ echo 'F' ++ else ++ echo '-' ++ fi ++} ++ + git log --oneline --reverse $range | + while read id headline ; do + origins=$(origin_filter $id) + stable=$(stable_tag $id) +- [ "$stable" = "S" ] || [ -n "$origins" ] || echo "$headline" | grep -q fix || continue ++ fixes=$(fixes_tag $id) ++ [ "$stable" = "S" ] || [ "$fixes" = "F" ] || [ -n "$origins" ] || \ ++ echo "$headline" | grep -q fix || continue + version=$(commit_version $id) + if [ -n "$origins" ] ; then + origver="$(origin_version $origins)" +@@ -108,5 +120,5 @@ while read id headline ; do + else + origver='N/A' + fi +- printf '%s %7s %s %s (%s)\n' $version $id $stable "$headline" "$origver" ++ printf '%s %7s %s %s %s (%s)\n' $version $id $stable $fixes "$headline" "$origver" + done +diff --git a/dpdk/devtools/test-build.sh b/dpdk/devtools/test-build.sh +index be565a1bea..52305fbb8c 100755 +--- a/dpdk/devtools/test-build.sh ++++ b/dpdk/devtools/test-build.sh +@@ -149,7 +149,7 @@ config () # <directory> <target> <options> + ! echo $3 | grep -q '+debug' || ( \ + sed -ri="" 's,(RTE_LOG_DP_LEVEL=).*,\1RTE_LOG_DEBUG,' $1/.config + sed -ri="" 's,(_DEBUG.*=)n,\1y,' $1/.config +- sed -ri="" 's,(_STAT.*=)n,\1y,' $1/.config ++ sed -ri="" 's,(_STAT)([S_].*=|=)n,\1\2y,' $1/.config + sed -ri="" 's,(TEST_PMD_RECORD_.*=)n,\1y,' $1/.config ) + + # Automatic configuration +diff --git a/dpdk/doc/api/doxy-api-index.md b/dpdk/doc/api/doxy-api-index.md +index dff496be09..d7c8bd24db 100644 +--- a/dpdk/doc/api/doxy-api-index.md ++++ b/dpdk/doc/api/doxy-api-index.md +@@ -1,4 +1,4 @@ +-API {#index} ++API + === + + <!-- +diff --git a/dpdk/doc/api/doxy-api.conf.in b/dpdk/doc/api/doxy-api.conf.in +index 1c4392eecc..12f0a26a90 100644 +--- a/dpdk/doc/api/doxy-api.conf.in ++++ b/dpdk/doc/api/doxy-api.conf.in +@@ -3,6 +3,7 @@ + + PROJECT_NAME = DPDK + PROJECT_NUMBER = @VERSION@ ++USE_MDFILE_AS_MAINPAGE = @TOPDIR@/doc/api/doxy-api-index.md + INPUT = @TOPDIR@/doc/api/doxy-api-index.md \ + @TOPDIR@/drivers/bus/vdev \ + @TOPDIR@/drivers/crypto/scheduler \ +diff --git a/dpdk/doc/api/meson.build b/dpdk/doc/api/meson.build +index 1c48b7672e..c72b880e10 100644 +--- a/dpdk/doc/api/meson.build ++++ b/dpdk/doc/api/meson.build +@@ -3,53 +3,54 @@ + + doxygen = find_program('doxygen', required: get_option('enable_docs')) + +-if doxygen.found() +- # due to the CSS customisation script, which needs to run on a file that +- # is in a subdirectory that is created at build time and thus it cannot +- # be an individual custom_target, we need to wrap the doxygen call in a +- # script to run the CSS modification afterwards +- generate_doxygen = find_program('generate_doxygen.sh') +- generate_examples = find_program('generate_examples.sh') +- generate_css = find_program('doxy-html-custom.sh') +- +- inputdir = join_paths(meson.source_root(), 'examples') +- htmldir = join_paths('share', 'doc', 'dpdk') +- +- # due to the following bug: https://github.com/mesonbuild/meson/issues/4107 +- # if install is set to true it will override build_by_default and it will +- # cause the target to always be built. If install were to be always set to +- # false it would be impossible to install the docs. +- # So use a configure option for now. +- example = custom_target('examples.dox', +- input: inputdir, +- output: 'examples.dox', +- command: [generate_examples, '@INPUT@', '@OUTPUT@'], +- install: get_option('enable_docs'), +- install_dir: htmldir, +- build_by_default: get_option('enable_docs')) +- +- cdata = configuration_data() +- cdata.set('VERSION', meson.project_version()) +- cdata.set('API_EXAMPLES', join_paths(meson.build_root(), 'doc', 'api', 'examples.dox')) +- cdata.set('OUTPUT', join_paths(meson.build_root(), 'doc', 'api')) +- cdata.set('HTML_OUTPUT', 'api') +- cdata.set('TOPDIR', meson.source_root()) +- cdata.set('STRIP_FROM_PATH', meson.source_root()) +- +- doxy_conf = configure_file(input: 'doxy-api.conf.in', +- output: 'doxy-api.conf', +- configuration: cdata, +- install: false) +- +- doxy_build = custom_target('doxygen', +- depends: example, +- input: doxy_conf, +- output: 'api', +- command: [generate_doxygen, '@INPUT@', '@OUTPUT@', generate_css], +- install: get_option('enable_docs'), +- install_dir: htmldir, +- build_by_default: get_option('enable_docs')) +- +- doc_targets += doxy_build +- doc_target_names += 'Doxygen_API' ++if not doxygen.found() ++ subdir_done() + endif ++ ++# due to the CSS customisation script, which needs to run on a file that ++# is in a subdirectory that is created at build time and thus it cannot ++# be an individual custom_target, we need to wrap the doxygen call in a ++# script to run the CSS modification afterwards ++generate_doxygen = find_program('generate_doxygen.sh') ++generate_examples = find_program('generate_examples.sh') ++generate_css = find_program('doxy-html-custom.sh') ++ ++inputdir = join_paths(meson.source_root(), 'examples') ++htmldir = join_paths('share', 'doc', 'dpdk') ++ ++# due to the following bug: https://github.com/mesonbuild/meson/issues/4107 ++# if install is set to true it will override build_by_default and it will ++# cause the target to always be built. If install were to be always set to ++# false it would be impossible to install the docs. ++# So use a configure option for now. ++example = custom_target('examples.dox', ++ input: inputdir, ++ output: 'examples.dox', ++ command: [generate_examples, '@INPUT@', '@OUTPUT@'], ++ install: get_option('enable_docs'), ++ install_dir: htmldir, ++ build_by_default: get_option('enable_docs')) ++ ++cdata = configuration_data() ++cdata.set('VERSION', meson.project_version()) ++cdata.set('API_EXAMPLES', join_paths(meson.build_root(), 'doc', 'api', 'examples.dox')) ++cdata.set('OUTPUT', join_paths(meson.build_root(), 'doc', 'api')) ++cdata.set('HTML_OUTPUT', 'api') ++cdata.set('TOPDIR', meson.source_root()) ++cdata.set('STRIP_FROM_PATH', meson.source_root()) ++ ++doxy_conf = configure_file(input: 'doxy-api.conf.in', ++ output: 'doxy-api.conf', ++ configuration: cdata) ++ ++doxy_build = custom_target('doxygen', ++ depends: example, ++ input: doxy_conf, ++ output: 'api', ++ command: [generate_doxygen, '@INPUT@', '@OUTPUT@', generate_css], ++ install: get_option('enable_docs'), ++ install_dir: htmldir, ++ build_by_default: get_option('enable_docs')) ++ ++doc_targets += doxy_build ++doc_target_names += 'Doxygen_API' +diff --git a/dpdk/doc/guides/conf.py b/dpdk/doc/guides/conf.py +index e2b52e2df9..c1a82be95b 100644 +--- a/dpdk/doc/guides/conf.py ++++ b/dpdk/doc/guides/conf.py +@@ -237,7 +237,7 @@ def generate_overview_table(output_filename, table_id, section, table_name, titl + ini_filename)) + continue + +- if value is not '': ++ if value: + # Get the first letter only. + ini_data[ini_filename][name] = value[0] + +@@ -314,16 +314,22 @@ def print_table_css(outfile, table_id): + cursor: default; + overflow: hidden; + } ++ table#idx p { ++ margin: 0; ++ line-height: inherit; ++ } + table#idx th, table#idx td { + text-align: center; ++ border: solid 1px #ddd; + } + table#idx th { +- font-size: 72%; ++ padding: 0.5em 0; ++ } ++ table#idx th, table#idx th p { ++ font-size: 11px; + white-space: pre-wrap; + vertical-align: top; +- padding: 0.5em 0; + min-width: 0.9em; +- width: 2em; + } + table#idx col:first-child { + width: 0; +@@ -332,9 +338,11 @@ def print_table_css(outfile, table_id): + vertical-align: bottom; + } + table#idx td { +- font-size: 70%; + padding: 1px; + } ++ table#idx td, table#idx td p { ++ font-size: 11px; ++ } + table#idx td:first-child { + padding-left: 1em; + text-align: left; +@@ -410,4 +418,8 @@ def setup(app): + # Process the numref references once the doctree has been created. + app.connect('doctree-resolved', process_numref) + +- app.add_stylesheet('css/custom.css') ++ try: ++ # New function in sphinx 1.8 ++ app.add_css_file('css/custom.css') ++ except: ++ app.add_stylesheet('css/custom.css') +diff --git a/dpdk/doc/guides/contributing/abi_policy.rst b/dpdk/doc/guides/contributing/abi_policy.rst +index 05ca95980b..2198519d9b 100644 +--- a/dpdk/doc/guides/contributing/abi_policy.rst ++++ b/dpdk/doc/guides/contributing/abi_policy.rst +@@ -220,19 +220,18 @@ Examples of ABI Changes + The following are examples of allowable ABI changes occurring between + declarations of major ABI versions. + +-* DPDK 19.11 release, defines the function ``rte_foo()``, and ``rte_foo()`` +- as part of the major ABI version ``20``. ++* DPDK 19.11 release defines the function ``rte_foo()`` ; ``rte_foo()`` ++ is part of the major ABI version ``20``. + +-* DPDK 20.02 release defines a new function ``rte_foo(uint8_t bar)``, and +- this is not a problem as long as the symbol ``rte_foo@DPDK20`` is ++* DPDK 20.02 release defines a new function ``rte_foo(uint8_t bar)``. ++ This is not a problem as long as the symbol ``rte_foo@DPDK20`` is + preserved through :ref:`abi_versioning`. + + - The new function may be marked with the ``__rte_experimental`` tag for a + number of releases, as described in the section :ref:`experimental_apis`. + +- - Once ``rte_foo(uint8_t bar)`` becomes non-experimental ``rte_foo()`` is then +- declared as ``__rte_depreciated``, with an associated deprecation notice +- provided. ++ - Once ``rte_foo(uint8_t bar)`` becomes non-experimental, ``rte_foo()`` is ++ declared as ``__rte_deprecated`` and an deprecation notice is provided. + + * DPDK 19.11 is not re-released to include ``rte_foo(uint8_t bar)``, the new + version of ``rte_foo`` only exists from DPDK 20.02 onwards as described in the +@@ -242,13 +241,13 @@ declarations of major ABI versions. + rte_baz()``. This function may or may not exist in the DPDK 20.05 release. + + * An application ``dPacket`` wishes to use ``rte_foo(uint8_t bar)``, before the +- declaration of the DPDK ``21`` major API version. The application can only ++ declaration of the DPDK ``21`` major ABI version. The application can only + ensure its runtime dependencies are met by specifying ``DPDK (>= 20.2)`` as +- an explicit package dependency, as the soname only may only indicate the ++ an explicit package dependency, as the soname can only indicate the + supported major ABI version. + + * At the release of DPDK 20.11, the function ``rte_foo(uint8_t bar)`` becomes +- formally part of then new major ABI version DPDK 21.0 and ``rte_foo()`` may be ++ formally part of then new major ABI version DPDK ``21`` and ``rte_foo()`` may be + removed. + + .. _deprecation_notices: +@@ -322,6 +321,6 @@ Libraries + + Libraries marked as ``experimental`` are entirely not considered part of an ABI + version, and may change without warning at any time. Experimental libraries +-always have a major version of ``0`` to indicate they exist outside of ++always have a major ABI version of ``0`` to indicate they exist outside of + :ref:`abi_versioning` , with the minor version incremented with each ABI change + to library. +diff --git a/dpdk/doc/guides/contributing/abi_versioning.rst b/dpdk/doc/guides/contributing/abi_versioning.rst +index a21f4e7a41..ea9d99606b 100644 +--- a/dpdk/doc/guides/contributing/abi_versioning.rst ++++ b/dpdk/doc/guides/contributing/abi_versioning.rst +@@ -200,7 +200,7 @@ private, is safe), but it also requires modifying the code as follows + Note also that, being a public function, the header file prototype must also be + changed, as must all the call sites, to reflect the new ABI footprint. We will + maintain previous ABI versions that are accessible only to previously compiled +-binaries ++binaries. + + The addition of a parameter to the function is ABI breaking as the function is + public, and existing application may use it in its current form. However, the +@@ -266,12 +266,12 @@ This file needs to be modified as follows + + } DPDK_20; + +-The addition of the new block tells the linker that a new version node is +-available (DPDK_21), which contains the symbol rte_acl_create, and inherits ++The addition of the new block tells the linker that a new version node ++``DPDK_21`` is available, which contains the symbol rte_acl_create, and inherits + the symbols from the DPDK_20 node. This list is directly translated into a +-list of exported symbols when DPDK is compiled as a shared library ++list of exported symbols when DPDK is compiled as a shared library. + +-Next, we need to specify in the code which function map to the rte_acl_create ++Next, we need to specify in the code which function maps to the rte_acl_create + symbol at which versions. First, at the site of the initial symbol definition, + we need to update the function so that it is uniquely named, and not in conflict + with the public symbol name +@@ -288,24 +288,29 @@ with the public symbol name + ... + + Note that the base name of the symbol was kept intact, as this is conducive to +-the macros used for versioning symbols and we have annotated the function as an +-implementation of versioned symbol. That is our next step, mapping this new +-symbol name to the initial symbol name at version node 20. Immediately after +-the function, we add this line of code ++the macros used for versioning symbols and we have annotated the function as ++``__vsym``, an implementation of a versioned symbol . That is our next step, ++mapping this new symbol name to the initial symbol name at version node 20. ++Immediately after the function, we add the VERSION_SYMBOL macro. + + .. code-block:: c + ++ #include <rte_function_versioning.h> ++ ++ ... + VERSION_SYMBOL(rte_acl_create, _v20, 20); + + Remembering to also add the rte_function_versioning.h header to the requisite c +-file where these changes are being made. The above macro instructs the linker to ++file where these changes are being made. The macro instructs the linker to + create a new symbol ``rte_acl_create@DPDK_20``, which matches the symbol created + in older builds, but now points to the above newly named function. We have now + mapped the original rte_acl_create symbol to the original function (but with a + new name). + +-Next, we need to create the 21 version of the symbol. We create a new function +-name, with a different suffix, and implement it appropriately ++Please see the section :ref:`Enabling versioning macros ++<enabling_versioning_macros>` to enable this macro in the meson/ninja build. ++Next, we need to create the new ``v21`` version of the symbol. We create a new ++function name, with the ``v21`` suffix, and implement it appropriately. + + .. code-block:: c + +@@ -320,35 +325,58 @@ name, with a different suffix, and implement it appropriately + } + + This code serves as our new API call. Its the same as our old call, but adds the +-new parameter in place. Next we need to map this function to the symbol +-``rte_acl_create@DPDK_21``. To do this, we modify the public prototype of the +-call in the header file, adding the macro there to inform all including +-applications, that on re-link, the default rte_acl_create symbol should point to +-this function. Note that we could do this by simply naming the function above +-rte_acl_create, and the linker would chose the most recent version tag to apply +-in the version script, but we can also do this in the header file ++new parameter in place. Next we need to map this function to the new default ++symbol ``rte_acl_create@DPDK_21``. To do this, immediately after the function, ++we add the BIND_DEFAULT_SYMBOL macro. ++ ++.. code-block:: c ++ ++ #include <rte_function_versioning.h> ++ ++ ... ++ BIND_DEFAULT_SYMBOL(rte_acl_create, _v21, 21); ++ ++The macro instructs the linker to create the new default symbol ++``rte_acl_create@DPDK_21``, which points to the above newly named function. ++ ++We finally modify the prototype of the call in the public header file, ++such that it contains both versions of the symbol and the public API. + + .. code-block:: c + + struct rte_acl_ctx * +- -rte_acl_create(const struct rte_acl_param *param); +- +rte_acl_create_v21(const struct rte_acl_param *param, int debug); +- +BIND_DEFAULT_SYMBOL(rte_acl_create, _v21, 21); +- +-The BIND_DEFAULT_SYMBOL macro explicitly tells applications that include this +-header, to link to the rte_acl_create_v21 function and apply the DPDK_21 +-version node to it. This method is more explicit and flexible than just +-re-implementing the exact symbol name, and allows for other features (such as +-linking to the old symbol version by default, when the new ABI is to be opt-in +-for a period. +- +-One last thing we need to do. Note that we've taken what was a public symbol, +-and duplicated it into two uniquely and differently named symbols. We've then +-mapped each of those back to the public symbol ``rte_acl_create`` with different +-version tags. This only applies to dynamic linking, as static linking has no +-notion of versioning. That leaves this code in a position of no longer having a +-symbol simply named ``rte_acl_create`` and a static build will fail on that +-missing symbol. ++ rte_acl_create(const struct rte_acl_param *param); ++ ++ struct rte_acl_ctx * __vsym ++ rte_acl_create_v20(const struct rte_acl_param *param); ++ ++ struct rte_acl_ctx * __vsym ++ rte_acl_create_v21(const struct rte_acl_param *param, int debug); ++ ++ ++And that's it, on the next shared library rebuild, there will be two versions of ++rte_acl_create, an old DPDK_20 version, used by previously built applications, ++and a new DPDK_21 version, used by future built applications. ++ ++.. note:: ++ ++ **Before you leave**, please take care reviewing the sections on ++ :ref:`mapping static symbols <mapping_static_symbols>`, ++ :ref:`enabling versioning macros <enabling_versioning_macros>`, ++ and :ref:`ABI deprecation <abi_deprecation>`. ++ ++ ++.. _mapping_static_symbols: ++ ++Mapping static symbols ++______________________ ++ ++Now we've taken what was a public symbol, and duplicated it into two uniquely ++and differently named symbols. We've then mapped each of those back to the ++public symbol ``rte_acl_create`` with different version tags. This only applies ++to dynamic linking, as static linking has no notion of versioning. That leaves ++this code in a position of no longer having a symbol simply named ++``rte_acl_create`` and a static build will fail on that missing symbol. + + To correct this, we can simply map a function of our choosing back to the public + symbol in the static build with the ``MAP_STATIC_SYMBOL`` macro. Generally the +@@ -369,15 +397,31 @@ defined, we add this + That tells the compiler that, when building a static library, any calls to the + symbol ``rte_acl_create`` should be linked to ``rte_acl_create_v21`` + +-That's it, on the next shared library rebuild, there will be two versions of +-rte_acl_create, an old DPDK_20 version, used by previously built applications, +-and a new DPDK_21 version, used by future built applications. + ++.. _enabling_versioning_macros: ++ ++Enabling versioning macros ++__________________________ ++ ++Finally, we need to indicate to the meson/ninja build system ++to enable versioning macros when building the ++library or driver. In the libraries or driver where we have added symbol ++versioning, in the ``meson.build`` file we add the following ++ ++.. code-block:: none ++ ++ use_function_versioning = true ++ ++at the start of the head of the file. This will indicate to the tool-chain to ++enable the function version macros when building. There is no corresponding ++directive required for the ``make`` build system. ++ ++.. _abi_deprecation: + + Deprecating part of a public API + ________________________________ + +-Lets assume that you've done the above update, and in preparation for the next ++Lets assume that you've done the above updates, and in preparation for the next + major ABI version you decide you would like to retire the old version of the + function. After having gone through the ABI deprecation announcement process, + removal is easy. Start by removing the symbol from the requisite version map +@@ -421,8 +465,8 @@ Next remove the corresponding versioned export. + + + Note that the internal function definition could also be removed, but its used +-in our example by the newer version v21, so we leave it in place and declare it +-as static. This is a coding style choice. ++in our example by the newer version ``v21``, so we leave it in place and declare ++it as static. This is a coding style choice. + + .. _deprecating_entire_abi: + +diff --git a/dpdk/doc/guides/contributing/documentation.rst b/dpdk/doc/guides/contributing/documentation.rst +index 27e4b13be1..3924771cf0 100644 +--- a/dpdk/doc/guides/contributing/documentation.rst ++++ b/dpdk/doc/guides/contributing/documentation.rst +@@ -82,7 +82,7 @@ added to by the developer. + * **API documentation** + + The API documentation explains how to use the public DPDK functions. +- The `API index page <http://doc.dpdk.org/api/>`_ shows the generated API documentation with related groups of functions. ++ The `API index page <https://doc.dpdk.org/api/>`_ shows the generated API documentation with related groups of functions. + + The API documentation should be updated via Doxygen comments when new functions are added. + +@@ -561,14 +561,14 @@ Hyperlinks + ~~~~~~~~~~ + + * Links to external websites can be plain URLs. +- The following is rendered as http://dpdk.org:: ++ The following is rendered as https://dpdk.org:: + +- http://dpdk.org ++ https://dpdk.org + + * They can contain alternative text. +- The following is rendered as `Check out DPDK <http://dpdk.org>`_:: ++ The following is rendered as `Check out DPDK <https://dpdk.org>`_:: + +- `Check out DPDK <http://dpdk.org>`_ ++ `Check out DPDK <https://dpdk.org>`_ + + * An internal link can be generated by placing labels in the document with the format ``.. _label_name``. + +@@ -666,7 +666,7 @@ The following are some guidelines for use of Doxygen in the DPDK API documentati + */ + + In the API documentation the functions will be rendered as links, see the +- `online section of the rte_ethdev.h docs <http://doc.dpdk.org/api/rte__ethdev_8h.html>`_ that contains the above text. ++ `online section of the rte_ethdev.h docs <https://doc.dpdk.org/api/rte__ethdev_8h.html>`_ that contains the above text. + + * The ``@see`` keyword can be used to create a *see also* link to another file or library. + This directive should be placed on one line at the bottom of the documentation section. +diff --git a/dpdk/doc/guides/contributing/patches.rst b/dpdk/doc/guides/contributing/patches.rst +index 0686450e45..5ca037757e 100644 +--- a/dpdk/doc/guides/contributing/patches.rst ++++ b/dpdk/doc/guides/contributing/patches.rst +@@ -28,9 +28,9 @@ The DPDK development process has the following features: + * All sub-repositories are merged into main repository for ``-rc1`` and ``-rc2`` versions of the release. + * After the ``-rc2`` release all patches should target the main repository. + +-The mailing list for DPDK development is `dev@dpdk.org <http://mails.dpdk.org/archives/dev/>`_. +-Contributors will need to `register for the mailing list <http://mails.dpdk.org/listinfo/dev>`_ in order to submit patches. +-It is also worth registering for the DPDK `Patchwork <http://patches.dpdk.org/project/dpdk/list/>`_ ++The mailing list for DPDK development is `dev@dpdk.org <https://mails.dpdk.org/archives/dev/>`_. ++Contributors will need to `register for the mailing list <https://mails.dpdk.org/listinfo/dev>`_ in order to submit patches. ++It is also worth registering for the DPDK `Patchwork <https://patches.dpdk.org/project/dpdk/list/>`_ + + If you are using the GitHub service, you can link your repository to + the ``travis-ci.org`` build service. When you push patches to your GitHub +@@ -130,12 +130,12 @@ The source code can be cloned using either of the following: + main repository:: + + git clone git://dpdk.org/dpdk +- git clone http://dpdk.org/git/dpdk ++ git clone https://dpdk.org/git/dpdk + +-sub-repositories (`list <http://git.dpdk.org/next>`_):: ++sub-repositories (`list <https://git.dpdk.org/next>`_):: + + git clone git://dpdk.org/next/dpdk-next-* +- git clone http://dpdk.org/git/next/dpdk-next-* ++ git clone https://dpdk.org/git/next/dpdk-next-* + + Make your Changes + ----------------- +@@ -182,7 +182,7 @@ A good way of thinking about whether a patch should be split is to consider whet + applied without dependencies as a backport. + + It is better to keep the related documentation changes in the same patch +-file as the code, rather than one big documentation patch at then end of a ++file as the code, rather than one big documentation patch at the end of a + patchset. This makes it easier for future maintenance and development of the + code. + +@@ -320,7 +320,7 @@ Patch for Stable Releases + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + All fix patches to the master branch that are candidates for backporting +-should also be CCed to the `stable@dpdk.org <http://mails.dpdk.org/listinfo/stable>`_ ++should also be CCed to the `stable@dpdk.org <https://mails.dpdk.org/listinfo/stable>`_ + mailing list. + In the commit message body the Cc: stable@dpdk.org should be inserted as follows:: + +@@ -423,7 +423,7 @@ are loaded from the following files, in order of preference:: + ~/.config/dpdk/devel.config + /etc/dpdk/devel.config. + +-Once the environment variable the script can be run as follows:: ++Once the environment variable is set, the script can be run as follows:: + + devtools/checkpatches.sh ~/patch/ + +@@ -548,7 +548,7 @@ If the patch is in relation to a previous email thread you can add it to the sam + git send-email --to dev@dpdk.org --in-reply-to <1234-foo@bar.com> 000*.patch + + The Message ID can be found in the raw text of emails or at the top of each Patchwork patch, +-`for example <http://patches.dpdk.org/patch/7646/>`_. ++`for example <https://patches.dpdk.org/patch/7646/>`_. + Shallow threading (``--thread --no-chain-reply-to``) is preferred for a patch series. + + Once submitted your patches will appear on the mailing list and in Patchwork. +diff --git a/dpdk/doc/guides/contributing/stable.rst b/dpdk/doc/guides/contributing/stable.rst +index 4d38bb8606..021c762fc6 100644 +--- a/dpdk/doc/guides/contributing/stable.rst ++++ b/dpdk/doc/guides/contributing/stable.rst +@@ -51,7 +51,7 @@ agreement and a commitment from a maintainer. The current policy is that each + year's November (X.11) release will be maintained as an LTS for 2 years. + + After the X.11 release, an LTS branch will be created for it at +-http://git.dpdk.org/dpdk-stable where bugfixes will be backported to. ++https://git.dpdk.org/dpdk-stable where bugfixes will be backported to. + + A LTS release may align with the declaration of a new major ABI version, + please read the :doc:`abi_policy` for more information. +@@ -107,7 +107,7 @@ The Stable and LTS release are coordinated on the stable@dpdk.org mailing + list. + + All fix patches to the master branch that are candidates for backporting +-should also be CCed to the `stable@dpdk.org <http://mails.dpdk.org/listinfo/stable>`_ ++should also be CCed to the `stable@dpdk.org <https://mails.dpdk.org/listinfo/stable>`_ + mailing list. + + +@@ -118,7 +118,7 @@ A Stable Release will be released by: + + * Tagging the release with YY.MM.n (year, month, number). + * Uploading a tarball of the release to dpdk.org. +-* Sending an announcement to the `announce@dpdk.org <http://mails.dpdk.org/listinfo/announce>`_ ++* Sending an announcement to the `announce@dpdk.org <https://mails.dpdk.org/listinfo/announce>`_ + list. + +-Stable releases are available on the `dpdk.org download page <http://core.dpdk.org/download/>`_. ++Stable releases are available on the `dpdk.org download page <https://core.dpdk.org/download/>`_. +diff --git a/dpdk/doc/guides/contributing/vulnerability.rst b/dpdk/doc/guides/contributing/vulnerability.rst +index 5484119d19..da00acd4f0 100644 +--- a/dpdk/doc/guides/contributing/vulnerability.rst ++++ b/dpdk/doc/guides/contributing/vulnerability.rst +@@ -36,11 +36,11 @@ Report + + Do not use Bugzilla (unsecured). + Instead, send GPG-encrypted emails +-to `security@dpdk.org <http://core.dpdk.org/security#contact>`_. ++to `security@dpdk.org <https://core.dpdk.org/security#contact>`_. + Anyone can post to this list. + In order to reduce the disclosure of a vulnerability in the early stages, + membership of this list is intentionally limited to a `small number of people +-<http://mails.dpdk.org/roster/security>`_. ++<https://mails.dpdk.org/roster/security>`_. + + It is additionally encouraged to GPG-sign one-on-one conversations + as part of the security process. +@@ -188,7 +188,7 @@ Downstream stakeholders are expected not to deploy or disclose patches + until the embargo is passed, otherwise they will be removed from the list. + + Downstream stakeholders (in `security-prerelease list +-<http://mails.dpdk.org/roster/security-prerelease>`_), are: ++<https://mails.dpdk.org/roster/security-prerelease>`_), are: + + * Operating system vendors known to package DPDK + * Major DPDK users, considered trustworthy by the technical board, who +diff --git a/dpdk/doc/guides/cryptodevs/aesni_gcm.rst b/dpdk/doc/guides/cryptodevs/aesni_gcm.rst +index 151aa30606..a8ea3206ba 100644 +--- a/dpdk/doc/guides/cryptodevs/aesni_gcm.rst ++++ b/dpdk/doc/guides/cryptodevs/aesni_gcm.rst +@@ -45,6 +45,19 @@ can be downloaded in `<https://github.com/01org/intel-ipsec-mb/archive/v0.53.zip + make + make install + ++The library requires NASM to be built. Depending on the library version, it might require a minimum NASM version (e.g. v0.53 requires at least NASM 2.13.03). ++ ++NASM is packaged for different OS. However, on some OS the version is too old, so a manual installation is required. ++In that case, NASM can be downloaded from ++`NASM website <https://www.nasm.us/pub/nasm/releasebuilds/?C=M;O=D>`_. ++Once it is downloaded, extract it and follow these steps: ++ ++.. code-block:: console ++ ++ ./configure ++ make ++ make install ++ + As a reference, the following table shows a mapping between the past DPDK versions + and the external crypto libraries supported by them: + +diff --git a/dpdk/doc/guides/cryptodevs/aesni_mb.rst b/dpdk/doc/guides/cryptodevs/aesni_mb.rst +index 5d8fb46efe..ca6c169858 100644 +--- a/dpdk/doc/guides/cryptodevs/aesni_mb.rst ++++ b/dpdk/doc/guides/cryptodevs/aesni_mb.rst +@@ -72,6 +72,19 @@ can be downloaded from `<https://github.com/01org/intel-ipsec-mb/archive/v0.53.z + make + make install + ++The library requires NASM to be built. Depending on the library version, it might require a minimum NASM version (e.g. v0.53 requires at least NASM 2.13.03). ++ ++NASM is packaged for different OS. However, on some OS the version is too old, so a manual installation is required. ++In that case, NASM can be downloaded from ++`NASM website <https://www.nasm.us/pub/nasm/releasebuilds/?C=M;O=D>`_. ++Once it is downloaded, extract it and follow these steps: ++ ++.. code-block:: console ++ ++ ./configure ++ make ++ make install ++ + As a reference, the following table shows a mapping between the past DPDK versions + and the Multi-Buffer library version supported by them: + +diff --git a/dpdk/doc/guides/cryptodevs/features/qat.ini b/dpdk/doc/guides/cryptodevs/features/qat.ini +index 6e350eb81f..a722419979 100644 +--- a/dpdk/doc/guides/cryptodevs/features/qat.ini ++++ b/dpdk/doc/guides/cryptodevs/features/qat.ini +@@ -44,10 +44,15 @@ ZUC EEA3 = Y + [Auth] + NULL = Y + MD5 HMAC = Y ++SHA1 = Y + SHA1 HMAC = Y ++SHA224 = Y + SHA224 HMAC = Y ++SHA256 = Y + SHA256 HMAC = Y ++SHA384 = Y + SHA384 HMAC = Y ++SHA512 = Y + SHA512 HMAC = Y + AES GMAC = Y + SNOW3G UIA2 = Y +diff --git a/dpdk/doc/guides/cryptodevs/qat.rst b/dpdk/doc/guides/cryptodevs/qat.rst +index 6197875fe3..bb04590619 100644 +--- a/dpdk/doc/guides/cryptodevs/qat.rst ++++ b/dpdk/doc/guides/cryptodevs/qat.rst +@@ -52,10 +52,15 @@ Cipher algorithms: + + Hash algorithms: + ++* ``RTE_CRYPTO_AUTH_SHA1`` + * ``RTE_CRYPTO_AUTH_SHA1_HMAC`` ++* ``RTE_CRYPTO_AUTH_SHA224`` + * ``RTE_CRYPTO_AUTH_SHA224_HMAC`` ++* ``RTE_CRYPTO_AUTH_SHA256`` + * ``RTE_CRYPTO_AUTH_SHA256_HMAC`` ++* ``RTE_CRYPTO_AUTH_SHA384`` + * ``RTE_CRYPTO_AUTH_SHA384_HMAC`` ++* ``RTE_CRYPTO_AUTH_SHA512`` + * ``RTE_CRYPTO_AUTH_SHA512_HMAC`` + * ``RTE_CRYPTO_AUTH_AES_XCBC_MAC`` + * ``RTE_CRYPTO_AUTH_SNOW3G_UIA2`` +diff --git a/dpdk/doc/guides/eventdevs/index.rst b/dpdk/doc/guides/eventdevs/index.rst +index 570905b813..bb66a5eacc 100644 +--- a/dpdk/doc/guides/eventdevs/index.rst ++++ b/dpdk/doc/guides/eventdevs/index.rst +@@ -5,7 +5,7 @@ Event Device Drivers + ==================== + + The following are a list of event device PMDs, which can be used from an +-application trough the eventdev API. ++application through the eventdev API. + + .. toctree:: + :maxdepth: 2 +diff --git a/dpdk/doc/guides/eventdevs/octeontx2.rst b/dpdk/doc/guides/eventdevs/octeontx2.rst +index fad84cf42d..d4b2515ce5 100644 +--- a/dpdk/doc/guides/eventdevs/octeontx2.rst ++++ b/dpdk/doc/guides/eventdevs/octeontx2.rst +@@ -66,7 +66,7 @@ Runtime Config Options + upper limit for in-flight events. + For example:: + +- --dev "0002:0e:00.0,xae_cnt=16384" ++ -w 0002:0e:00.0,xae_cnt=16384 + + - ``Force legacy mode`` + +@@ -74,7 +74,7 @@ Runtime Config Options + single workslot mode in SSO and disable the default dual workslot mode. + For example:: + +- --dev "0002:0e:00.0,single_ws=1" ++ -w 0002:0e:00.0,single_ws=1 + + - ``Event Group QoS support`` + +@@ -89,7 +89,7 @@ Runtime Config Options + default. + For example:: + +- --dev "0002:0e:00.0,qos=[1-50-50-50]" ++ -w 0002:0e:00.0,qos=[1-50-50-50] + + - ``Selftest`` + +@@ -98,7 +98,7 @@ Runtime Config Options + The tests are run once the vdev creation is successfully complete. + For example:: + +- --dev "0002:0e:00.0,selftest=1" ++ -w 0002:0e:00.0,selftest=1 + + - ``TIM disable NPA`` + +@@ -107,7 +107,7 @@ Runtime Config Options + parameter disables NPA and uses software mempool to manage chunks + For example:: + +- --dev "0002:0e:00.0,tim_disable_npa=1" ++ -w 0002:0e:00.0,tim_disable_npa=1 + + - ``TIM modify chunk slots`` + +@@ -118,7 +118,7 @@ Runtime Config Options + to SSO. The default value is 255 and the max value is 4095. + For example:: + +- --dev "0002:0e:00.0,tim_chnk_slots=1023" ++ -w 0002:0e:00.0,tim_chnk_slots=1023 + + - ``TIM enable arm/cancel statistics`` + +@@ -126,7 +126,7 @@ Runtime Config Options + event timer adapter. + For example:: + +- --dev "0002:0e:00.0,tim_stats_ena=1" ++ -w 0002:0e:00.0,tim_stats_ena=1 + + - ``TIM limit max rings reserved`` + +@@ -136,7 +136,7 @@ Runtime Config Options + rings. + For example:: + +- --dev "0002:0e:00.0,tim_rings_lmt=5" ++ -w 0002:0e:00.0,tim_rings_lmt=5 + + - ``TIM ring control internal parameters`` + +@@ -146,7 +146,7 @@ Runtime Config Options + default values. + For Example:: + +- --dev "0002:0e:00.0,tim_ring_ctl=[2-1023-1-0]" ++ -w 0002:0e:00.0,tim_ring_ctl=[2-1023-1-0] + + Debugging Options + ~~~~~~~~~~~~~~~~~ +diff --git a/dpdk/doc/guides/freebsd_gsg/install_from_ports.rst b/dpdk/doc/guides/freebsd_gsg/install_from_ports.rst +index 29f16cc6c5..dce028bc62 100644 +--- a/dpdk/doc/guides/freebsd_gsg/install_from_ports.rst ++++ b/dpdk/doc/guides/freebsd_gsg/install_from_ports.rst +@@ -62,7 +62,7 @@ environmental variables should be set as below: + .. note:: + + To install a copy of the DPDK compiled using gcc, please download the +- official DPDK package from http://core.dpdk.org/download/ and install manually using ++ official DPDK package from https://core.dpdk.org/download/ and install manually using + the instructions given in the next chapter, :ref:`building_from_source` + + An example application can therefore be copied to a user's home directory and +diff --git a/dpdk/doc/guides/linux_gsg/eal_args.include.rst b/dpdk/doc/guides/linux_gsg/eal_args.include.rst +index ed8b0e35b0..7b2f6b1d43 100644 +--- a/dpdk/doc/guides/linux_gsg/eal_args.include.rst ++++ b/dpdk/doc/guides/linux_gsg/eal_args.include.rst +@@ -132,7 +132,7 @@ Debugging options + + Specify log level for a specific component. For example:: + +- --log-level eal:8 ++ --log-level lib.eal:debug + + Can be specified multiple times. + +diff --git a/dpdk/doc/guides/linux_gsg/nic_perf_intel_platform.rst b/dpdk/doc/guides/linux_gsg/nic_perf_intel_platform.rst +index c554c2159c..1dabbce244 100644 +--- a/dpdk/doc/guides/linux_gsg/nic_perf_intel_platform.rst ++++ b/dpdk/doc/guides/linux_gsg/nic_perf_intel_platform.rst +@@ -64,7 +64,7 @@ This aligns with the previous output which showed that each channel has one memo + Network Interface Card Requirements + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-Use a `DPDK supported <http://core.dpdk.org/supported/>`_ high end NIC such as the Intel XL710 40GbE. ++Use a `DPDK supported <https://core.dpdk.org/supported/>`_ high end NIC such as the Intel XL710 40GbE. + + Make sure each NIC has been flashed the latest version of NVM/firmware. + +diff --git a/dpdk/doc/guides/meson.build b/dpdk/doc/guides/meson.build +index 7931ef3bb5..80c21d1682 100644 +--- a/dpdk/doc/guides/meson.build ++++ b/dpdk/doc/guides/meson.build +@@ -3,26 +3,28 @@ + + sphinx = find_program('sphinx-build', required: get_option('enable_docs')) + +-if sphinx.found() +- htmldir = join_paths('share', 'doc', 'dpdk') +- html_guides_build = custom_target('html_guides_build', +- input: meson.current_source_dir(), +- output: 'guides', +- command: [sphinx, '-b', 'html', +- '-d', meson.current_build_dir() + '/.doctrees', +- '@INPUT@', meson.current_build_dir() + '/guides'], +- build_by_default: get_option('enable_docs'), +- install: get_option('enable_docs'), +- install_dir: htmldir) ++if not sphinx.found() ++ subdir_done() ++endif + +- doc_targets += html_guides_build +- doc_target_names += 'HTML_Guides' ++htmldir = join_paths('share', 'doc', 'dpdk') ++html_guides = custom_target('html_guides', ++ input: meson.current_source_dir(), ++ output: 'guides', ++ command: [sphinx, '-b', 'html', ++ '-d', meson.current_build_dir() + '/.doctrees', ++ '@INPUT@', meson.current_build_dir() + '/guides'], ++ build_by_default: get_option('enable_docs'), ++ install: get_option('enable_docs'), ++ install_dir: htmldir) + +- # sphinx leaves a .buildinfo in the target directory, which we don't +- # want to install. Note that sh -c has to be used, otherwise the +- # env var does not get expanded if calling rm/install directly. +- meson.add_install_script('sh', '-c', +- 'rm -f $MESON_INSTALL_DESTDIR_PREFIX/share/doc/dpdk/guides/.buildinfo') +- meson.add_install_script('sh', '-c', +- 'install -D -m0644 $MESON_SOURCE_ROOT/doc/guides/custom.css $MESON_INSTALL_DESTDIR_PREFIX/share/doc/dpdk/guides/_static/css/custom.css') +-endif ++doc_targets += html_guides ++doc_target_names += 'HTML_Guides' ++ ++# sphinx leaves a .buildinfo in the target directory, which we don't ++# want to install. Note that sh -c has to be used, otherwise the ++# env var does not get expanded if calling rm/install directly. ++meson.add_install_script('sh', '-c', ++ 'rm -f $MESON_INSTALL_DESTDIR_PREFIX/share/doc/dpdk/guides/.buildinfo') ++meson.add_install_script('sh', '-c', ++ 'install -D -m0644 $MESON_SOURCE_ROOT/doc/guides/custom.css $MESON_INSTALL_DESTDIR_PREFIX/share/doc/dpdk/guides/_static/css/custom.css') +diff --git a/dpdk/doc/guides/nics/enic.rst b/dpdk/doc/guides/nics/enic.rst +index 65e536d422..24d2b5713a 100644 +--- a/dpdk/doc/guides/nics/enic.rst ++++ b/dpdk/doc/guides/nics/enic.rst +@@ -14,7 +14,7 @@ How to obtain ENIC PMD integrated DPDK + -------------------------------------- + + ENIC PMD support is integrated into the DPDK suite. dpdk-<version>.tar.gz +-should be downloaded from http://core.dpdk.org/download/ ++should be downloaded from https://core.dpdk.org/download/ + + + Configuration information +diff --git a/dpdk/doc/guides/nics/fail_safe.rst b/dpdk/doc/guides/nics/fail_safe.rst +index 6c02d7ef6d..60bbf40f7f 100644 +--- a/dpdk/doc/guides/nics/fail_safe.rst ++++ b/dpdk/doc/guides/nics/fail_safe.rst +@@ -49,7 +49,7 @@ The Fail-safe PMD can be used like most other DPDK virtual devices, by passing a + ``--vdev`` parameter to the EAL when starting the application. The device name + must start with the *net_failsafe* prefix, followed by numbers or letters. This + name must be unique for each device. Each fail-safe instance must have at least one +-sub-device, up to ``RTE_MAX_ETHPORTS-1``. ++sub-device, and at most two. + + A sub-device can be any legal DPDK device, including possibly another fail-safe + instance. +diff --git a/dpdk/doc/guides/nics/features/hns3.ini b/dpdk/doc/guides/nics/features/hns3.ini +index 6df789ed10..cd5c08a9d7 100644 +--- a/dpdk/doc/guides/nics/features/hns3.ini ++++ b/dpdk/doc/guides/nics/features/hns3.ini +@@ -5,6 +5,7 @@ + ; + [Features] + Link status = Y ++Rx interrupt = Y + MTU update = Y + Jumbo frame = Y + Promiscuous mode = Y +diff --git a/dpdk/doc/guides/nics/features/hns3_vf.ini b/dpdk/doc/guides/nics/features/hns3_vf.ini +index 41497c4c2d..fd00ac3e22 100644 +--- a/dpdk/doc/guides/nics/features/hns3_vf.ini ++++ b/dpdk/doc/guides/nics/features/hns3_vf.ini +@@ -5,6 +5,7 @@ + ; + [Features] + Link status = Y ++Rx interrupt = Y + MTU update = Y + Jumbo frame = Y + Unicast MAC filter = Y +diff --git a/dpdk/doc/guides/nics/features/i40e.ini b/dpdk/doc/guides/nics/features/i40e.ini +index e5ae6ded08..c2717cdc47 100644 +--- a/dpdk/doc/guides/nics/features/i40e.ini ++++ b/dpdk/doc/guides/nics/features/i40e.ini +@@ -18,7 +18,6 @@ TSO = Y + Promiscuous mode = Y + Allmulticast mode = Y + Unicast MAC filter = Y +-Multicast MAC filter = Y + RSS hash = Y + RSS key update = Y + RSS reta update = Y +diff --git a/dpdk/doc/guides/nics/features/iavf.ini b/dpdk/doc/guides/nics/features/iavf.ini +index 80143059e4..f08392a9cf 100644 +--- a/dpdk/doc/guides/nics/features/iavf.ini ++++ b/dpdk/doc/guides/nics/features/iavf.ini +@@ -15,7 +15,6 @@ TSO = Y + Promiscuous mode = Y + Allmulticast mode = Y + Unicast MAC filter = Y +-Multicast MAC filter = Y + RSS hash = Y + RSS key update = Y + RSS reta update = Y +diff --git a/dpdk/doc/guides/nics/features/ice.ini b/dpdk/doc/guides/nics/features/ice.ini +index 65923f0bc0..949d09f423 100644 +--- a/dpdk/doc/guides/nics/features/ice.ini ++++ b/dpdk/doc/guides/nics/features/ice.ini +@@ -18,7 +18,6 @@ TSO = Y + Promiscuous mode = Y + Allmulticast mode = Y + Unicast MAC filter = Y +-Multicast MAC filter = Y + RSS hash = Y + RSS key update = Y + RSS reta update = Y +diff --git a/dpdk/doc/guides/nics/features/igb.ini b/dpdk/doc/guides/nics/features/igb.ini +index 0351f8495d..167c0cabe8 100644 +--- a/dpdk/doc/guides/nics/features/igb.ini ++++ b/dpdk/doc/guides/nics/features/igb.ini +@@ -15,6 +15,7 @@ TSO = Y + Promiscuous mode = Y + Allmulticast mode = Y + Unicast MAC filter = Y ++Multicast MAC filter = Y + RSS hash = Y + RSS key update = Y + RSS reta update = Y +diff --git a/dpdk/doc/guides/nics/features/ixgbe.ini b/dpdk/doc/guides/nics/features/ixgbe.ini +index c412d7af1a..1c7a2a5240 100644 +--- a/dpdk/doc/guides/nics/features/ixgbe.ini ++++ b/dpdk/doc/guides/nics/features/ixgbe.ini +@@ -17,6 +17,7 @@ TSO = Y + Promiscuous mode = Y + Allmulticast mode = Y + Unicast MAC filter = Y ++Multicast MAC filter = Y + RSS hash = Y + RSS key update = Y + RSS reta update = Y +diff --git a/dpdk/doc/guides/nics/features/mlx5.ini b/dpdk/doc/guides/nics/features/mlx5.ini +index b0a2f8e5f7..30a4d80ead 100644 +--- a/dpdk/doc/guides/nics/features/mlx5.ini ++++ b/dpdk/doc/guides/nics/features/mlx5.ini +@@ -41,6 +41,7 @@ Basic stats = Y + Extended stats = Y + Stats per queue = Y + FW version = Y ++Module EEPROM dump = Y + Multiprocess aware = Y + Other kdrv = Y + ARMv8 = Y +diff --git a/dpdk/doc/guides/nics/hns3.rst b/dpdk/doc/guides/nics/hns3.rst +index 505488b6ca..8d19f48515 100644 +--- a/dpdk/doc/guides/nics/hns3.rst ++++ b/dpdk/doc/guides/nics/hns3.rst +@@ -22,6 +22,7 @@ Features of the HNS3 PMD are: + - Port hardware statistics + - Jumbo frames + - Link state information ++- Interrupt mode for RX + - VLAN stripping + - NUMA support + +diff --git a/dpdk/doc/guides/nics/i40e.rst b/dpdk/doc/guides/nics/i40e.rst +index 38acf5906d..61d72c2b10 100644 +--- a/dpdk/doc/guides/nics/i40e.rst ++++ b/dpdk/doc/guides/nics/i40e.rst +@@ -69,7 +69,9 @@ to chapter Tested Platforms/Tested NICs in release notes. + +--------------+-----------------------+------------------+ + | DPDK version | Kernel driver version | Firmware version | + +==============+=======================+==================+ +- | 19.08 | 2.9.21 | 7.00 | ++ | 19.11 | 2.9.21 | 7.00 | ++ +--------------+-----------------------+------------------+ ++ | 19.08 | 2.8.43 | 7.00 | + +--------------+-----------------------+------------------+ + | 19.05 | 2.7.29 | 6.80 | + +--------------+-----------------------+------------------+ +@@ -665,6 +667,15 @@ Use 16 Bytes RX Descriptor Size + As i40e PMD supports both 16 and 32 bytes RX descriptor sizes, and 16 bytes size can provide helps to high performance of small packets. + Configuration of ``CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC`` in config files can be changed to use 16 bytes size RX descriptors. + ++Input set requirement of each pctype for FDIR ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++Each PCTYPE can only have one specific FDIR input set at one time. ++For example, if creating 2 rte_flow rules with different input set for one PCTYPE, ++it will fail and return the info "Conflict with the first rule's input set", ++which means the current rule's input set conflicts with the first rule's. ++Remove the first rule if want to change the input set of the PCTYPE. ++ + Example of getting best performance with l3fwd example + ------------------------------------------------------ + +diff --git a/dpdk/doc/guides/nics/ice.rst b/dpdk/doc/guides/nics/ice.rst +index 9b90b389ec..58eb023983 100644 +--- a/dpdk/doc/guides/nics/ice.rst ++++ b/dpdk/doc/guides/nics/ice.rst +@@ -54,10 +54,6 @@ Please note that enabling debugging options may affect system performance. + + Toggle display of generic debugging messages. + +-- ``CONFIG_RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC`` (default ``y``) +- +- Toggle bulk allocation for RX. +- + - ``CONFIG_RTE_LIBRTE_ICE_16BYTE_RX_DESC`` (default ``n``) + + Toggle to use a 16-byte RX descriptor, by default the RX descriptor is 32 byte. +diff --git a/dpdk/doc/guides/nics/mlx4.rst b/dpdk/doc/guides/nics/mlx4.rst +index d0e8a8b2ff..1f1e2f6c77 100644 +--- a/dpdk/doc/guides/nics/mlx4.rst ++++ b/dpdk/doc/guides/nics/mlx4.rst +@@ -92,6 +92,10 @@ These options can be modified in the ``.config`` file. + adds additional run-time checks and debugging messages at the cost of + lower performance. + ++This option is available in meson: ++ ++- ``ibverbs_link`` can be ``static``, ``shared``, or ``dlopen``. ++ + Environment variables + ~~~~~~~~~~~~~~~~~~~~~ + +@@ -294,11 +298,6 @@ Installing Mellanox OFED + + 5. Continue with :ref:`section 2 of the Quick Start Guide <QSG_2>`. + +-Supported NICs +--------------- +- +-* Mellanox(R) ConnectX(R)-3 Pro 40G MCX354A-FCC_Ax (2*40G) +- + .. _qsg: + + Quick Start Guide +diff --git a/dpdk/doc/guides/nics/mlx5.rst b/dpdk/doc/guides/nics/mlx5.rst +index 18573cf6a0..75f58e6027 100644 +--- a/dpdk/doc/guides/nics/mlx5.rst ++++ b/dpdk/doc/guides/nics/mlx5.rst +@@ -2,12 +2,14 @@ + Copyright 2015 6WIND S.A. + Copyright 2015 Mellanox Technologies, Ltd + ++.. include:: <isonum.txt> ++ + MLX5 poll mode driver + ===================== + + The MLX5 poll mode driver library (**librte_pmd_mlx5**) provides support + for **Mellanox ConnectX-4**, **Mellanox ConnectX-4 Lx** , **Mellanox +-ConnectX-5**, **Mellanox ConnectX-6**, **Mellanox ConnectX-6DX** and ++ConnectX-5**, **Mellanox ConnectX-6**, **Mellanox ConnectX-6 Dx** and + **Mellanox BlueField** families of 10/25/40/50/100/200 Gb/s adapters + as well as their virtual functions (VF) in SR-IOV context. + +@@ -107,22 +109,37 @@ Limitations + process. If the external memory is registered by primary process but has + different virtual address in secondary process, unexpected error may happen. + +-- Flow pattern without any specific vlan will match for vlan packets as well: ++- When using Verbs flow engine (``dv_flow_en`` = 0), flow pattern without any ++ specific VLAN will match for VLAN packets as well: + + When VLAN spec is not specified in the pattern, the matching rule will be created with VLAN as a wild card. + Meaning, the flow rule:: + + flow create 0 ingress pattern eth / vlan vid is 3 / ipv4 / end ... + +- Will only match vlan packets with vid=3. and the flow rules:: ++ Will only match vlan packets with vid=3. and the flow rule:: ++ ++ flow create 0 ingress pattern eth / ipv4 / end ... ++ ++ Will match any ipv4 packet (VLAN included). ++ ++- When using DV flow engine (``dv_flow_en`` = 1), flow pattern without VLAN item ++ will match untagged packets only. ++ The flow rule:: + + flow create 0 ingress pattern eth / ipv4 / end ... + +- Or:: ++ Will match untagged packets only. ++ The flow rule:: + + flow create 0 ingress pattern eth / vlan / ipv4 / end ... + +- Will match any ipv4 packet (VLAN included). ++ Will match tagged packets only, with any VLAN ID value. ++ The flow rule:: ++ ++ flow create 0 ingress pattern eth / vlan vid is 3 / ipv4 / end ... ++ ++ Will only match tagged packets with VLAN ID 3. + + - VLAN pop offload command: + +@@ -270,6 +287,10 @@ These options can be modified in the ``.config`` file. + 64. Default armv8a configuration of make build and meson build set it to 128 + then brings performance degradation. + ++This option is available in meson: ++ ++- ``ibverbs_link`` can be ``static``, ``shared``, or ``dlopen``. ++ + Environment variables + ~~~~~~~~~~~~~~~~~~~~~ + +@@ -315,9 +336,9 @@ Run-time configuration + + Supported on: + +- - x86_64 with ConnectX-4, ConnectX-4 LX, ConnectX-5, ConnectX-6, ConnectX-6 DX ++ - x86_64 with ConnectX-4, ConnectX-4 Lx, ConnectX-5, ConnectX-6, ConnectX-6 Dx + and BlueField. +- - POWER9 and ARMv8 with ConnectX-4 LX, ConnectX-5, ConnectX-6, ConnectX-6 DX ++ - POWER9 and ARMv8 with ConnectX-4 Lx, ConnectX-5, ConnectX-6, ConnectX-6 Dx + and BlueField. + + - ``rxq_cqe_pad_en`` parameter [int] +@@ -348,17 +369,16 @@ Run-time configuration + + Supported on: + +- - x86_64 with ConnectX-4, ConnectX-4 LX, ConnectX-5, ConnectX-6, ConnectX-6 DX ++ - x86_64 with ConnectX-4, ConnectX-4 Lx, ConnectX-5, ConnectX-6, ConnectX-6 Dx + and BlueField. +- - POWER8 and ARMv8 with ConnectX-4 LX, ConnectX-5, ConnectX-6, ConnectX-6 DX ++ - POWER8 and ARMv8 with ConnectX-4 Lx, ConnectX-5, ConnectX-6, ConnectX-6 Dx + and BlueField. + + - ``mprq_en`` parameter [int] + + A nonzero value enables configuring Multi-Packet Rx queues. Rx queue is + configured as Multi-Packet RQ if the total number of Rx queues is +- ``rxqs_min_mprq`` or more and Rx scatter isn't configured. Disabled by +- default. ++ ``rxqs_min_mprq`` or more. Disabled by default. + + Multi-Packet Rx Queue (MPRQ a.k.a Striding RQ) can further save PCIe bandwidth + by posting a single large buffer for multiple packets. Instead of posting a +@@ -383,6 +403,20 @@ Run-time configuration + + The size of Rx queue should be bigger than the number of strides. + ++- ``mprq_log_stride_size`` parameter [int] ++ ++ Log 2 of the size of a stride for Multi-Packet Rx queue. Configuring a smaller ++ stride size can save some memory and reduce probability of a depletion of all ++ available strides due to unreleased packets by an application. If configured ++ value is not in the range of device capability, the default value will be set ++ with a warning message. The default value is 11 which is 2048 bytes per a ++ stride, valid only if ``mprq_en`` is set. With ``mprq_log_stride_size`` set ++ it is possible for a pcaket to span across multiple strides. This mode allows ++ support of jumbo frames (9K) with MPRQ. The memcopy of some packets (or part ++ of a packet if Rx scatter is configured) may be required in case there is no ++ space left for a head room at the end of a stride which incurs some ++ performance penalty. ++ + - ``mprq_max_memcpy_len`` parameter [int] + + The maximum length of packet to memcpy in case of Multi-Packet Rx queue. Rx +@@ -453,14 +487,14 @@ Run-time configuration + If ``txq_inline_min`` key is not present, the value may be queried by the + driver from the NIC via DevX if this feature is available. If there is no DevX + enabled/supported the value 18 (supposing L2 header including VLAN) is set +- for ConnectX-4 and ConnectX-4LX, and 0 is set by default for ConnectX-5 ++ for ConnectX-4 and ConnectX-4 Lx, and 0 is set by default for ConnectX-5 + and newer NICs. If packet is shorter the ``txq_inline_min`` value, the entire + packet is inlined. + + For ConnectX-4 NIC, driver does not allow specifying value below 18 + (minimal L2 header, including VLAN), error will be raised. + +- For ConnectX-4LX NIC, it is allowed to specify values below 18, but ++ For ConnectX-4 Lx NIC, it is allowed to specify values below 18, but + it is not recommended and may prevent NIC from sending packets over + some configurations. + +@@ -543,7 +577,7 @@ Run-time configuration + - ``txq_mpw_en`` parameter [int] + + A nonzero value enables Enhanced Multi-Packet Write (eMPW) for ConnectX-5, +- ConnectX-6, ConnectX-6 DX and BlueField. eMPW allows the TX burst function to pack ++ ConnectX-6, ConnectX-6 Dx and BlueField. eMPW allows the TX burst function to pack + up multiple packets in a single descriptor session in order to save PCI bandwidth + and improve performance at the cost of a slightly higher CPU usage. When + ``txq_inline_mpw`` is set along with ``txq_mpw_en``, TX burst function copies +@@ -559,16 +593,17 @@ Run-time configuration + The rdma core library can map doorbell register in two ways, depending on the + environment variable "MLX5_SHUT_UP_BF": + +- - As regular cached memory, if the variable is either missing or set to zero. ++ - As regular cached memory (usually with write combining attribute), if the ++ variable is either missing or set to zero. + - As non-cached memory, if the variable is present and set to not "0" value. + + The type of mapping may slightly affect the Tx performance, the optimal choice + is strongly relied on the host architecture and should be deduced practically. + + If ``tx_db_nc`` is set to zero, the doorbell is forced to be mapped to regular +- memory, the PMD will perform the extra write memory barrier after writing to +- doorbell, it might increase the needed CPU clocks per packet to send, but +- latency might be improved. ++ memory (with write combining), the PMD will perform the extra write memory barrier ++ after writing to doorbell, it might increase the needed CPU clocks per packet ++ to send, but latency might be improved. + + If ``tx_db_nc`` is set to one, the doorbell is forced to be mapped to non + cached memory, the PMD will not perform the extra write memory barrier +@@ -589,7 +624,7 @@ Run-time configuration + + - ``tx_vec_en`` parameter [int] + +- A nonzero value enables Tx vector on ConnectX-5, ConnectX-6, ConnectX-6 DX ++ A nonzero value enables Tx vector on ConnectX-5, ConnectX-6, ConnectX-6 Dx + and BlueField NICs if the number of global Tx queues on the port is less than + ``txqs_max_vec``. The parameter is deprecated and ignored. + +@@ -886,7 +921,7 @@ Mellanox OFED/EN + - ConnectX-5: **16.21.1000** and above. + - ConnectX-5 Ex: **16.21.1000** and above. + - ConnectX-6: **20.99.5374** and above. +- - ConnectX-6 DX: **22.27.0090** and above. ++ - ConnectX-6 Dx: **22.27.0090** and above. + - BlueField: **18.25.1010** and above. + + While these libraries and kernel modules are available on OpenFabrics +@@ -911,28 +946,43 @@ required from that distribution. + Supported NICs + -------------- + +-* Mellanox(R) ConnectX(R)-4 10G MCX4111A-XCAT (1x10G) +-* Mellanox(R) ConnectX(R)-4 10G MCX4121A-XCAT (2x10G) +-* Mellanox(R) ConnectX(R)-4 25G MCX4111A-ACAT (1x25G) +-* Mellanox(R) ConnectX(R)-4 25G MCX4121A-ACAT (2x25G) +-* Mellanox(R) ConnectX(R)-4 40G MCX4131A-BCAT (1x40G) +-* Mellanox(R) ConnectX(R)-4 40G MCX413A-BCAT (1x40G) +-* Mellanox(R) ConnectX(R)-4 40G MCX415A-BCAT (1x40G) +-* Mellanox(R) ConnectX(R)-4 50G MCX4131A-GCAT (1x50G) +-* Mellanox(R) ConnectX(R)-4 50G MCX413A-GCAT (1x50G) +-* Mellanox(R) ConnectX(R)-4 50G MCX414A-BCAT (2x50G) +-* Mellanox(R) ConnectX(R)-4 50G MCX415A-GCAT (2x50G) +-* Mellanox(R) ConnectX(R)-4 50G MCX416A-BCAT (2x50G) +-* Mellanox(R) ConnectX(R)-4 50G MCX416A-GCAT (2x50G) +-* Mellanox(R) ConnectX(R)-4 50G MCX415A-CCAT (1x100G) +-* Mellanox(R) ConnectX(R)-4 100G MCX416A-CCAT (2x100G) +-* Mellanox(R) ConnectX(R)-4 Lx 10G MCX4121A-XCAT (2x10G) +-* Mellanox(R) ConnectX(R)-4 Lx 25G MCX4121A-ACAT (2x25G) +-* Mellanox(R) ConnectX(R)-5 100G MCX556A-ECAT (2x100G) +-* Mellanox(R) ConnectX(R)-5 Ex EN 100G MCX516A-CDAT (2x100G) +-* Mellanox(R) ConnectX(R)-6 200G MCX654106A-HCAT (4x200G) +-* Mellanox(R) ConnectX(R)-6DX EN 100G MCX623106AN-CDAT (2*100g) +-* Mellanox(R) ConnectX(R)-6DX EN 200G MCX623105AN-VDAT (1*200g) ++The following Mellanox device families are supported by the same mlx5 driver: ++ ++ - ConnectX-4 ++ - ConnectX-4 Lx ++ - ConnectX-5 ++ - ConnectX-5 Ex ++ - ConnectX-6 ++ - ConnectX-6 Dx ++ - BlueField ++ ++Below are detailed device names: ++ ++* Mellanox\ |reg| ConnectX\ |reg|-4 10G MCX4111A-XCAT (1x10G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 10G MCX412A-XCAT (2x10G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 25G MCX4111A-ACAT (1x25G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 25G MCX412A-ACAT (2x25G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 40G MCX413A-BCAT (1x40G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 40G MCX4131A-BCAT (1x40G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 40G MCX415A-BCAT (1x40G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX413A-GCAT (1x50G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX4131A-GCAT (1x50G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX414A-BCAT (2x50G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX415A-GCAT (1x50G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX416A-BCAT (2x50G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX416A-GCAT (2x50G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 50G MCX415A-CCAT (1x100G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 100G MCX416A-CCAT (2x100G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 Lx 10G MCX4111A-XCAT (1x10G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 Lx 10G MCX4121A-XCAT (2x10G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 Lx 25G MCX4111A-ACAT (1x25G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 Lx 25G MCX4121A-ACAT (2x25G) ++* Mellanox\ |reg| ConnectX\ |reg|-4 Lx 40G MCX4131A-BCAT (1x40G) ++* Mellanox\ |reg| ConnectX\ |reg|-5 100G MCX556A-ECAT (2x100G) ++* Mellanox\ |reg| ConnectX\ |reg|-5 Ex EN 100G MCX516A-CDAT (2x100G) ++* Mellanox\ |reg| ConnectX\ |reg|-6 200G MCX654106A-HCAT (2x200G) ++* Mellanox\ |reg| ConnectX\ |reg|-6 Dx EN 100G MCX623106AN-CDAT (2x100G) ++* Mellanox\ |reg| ConnectX\ |reg|-6 Dx EN 200G MCX623105AN-VDAT (1x200G) + + Quick Start Guide on OFED/EN + ---------------------------- +@@ -1195,6 +1245,19 @@ Supported hardware offloads + | | | ConnectX-5 | | ConnectX-5 | + +-----------------------+-----------------+-----------------+ + ++Notes for metadata ++------------------ ++ ++MARK and META items are interrelated with datapath - they might move from/to ++the applications in mbuf fields. Hence, zero value for these items has the ++special meaning - it means "no metadata are provided", not zero values are ++treated by applications and PMD as valid ones. ++ ++Moreover in the flow engine domain the value zero is acceptable to match and ++set, and we should allow to specify zero values as rte_flow parameters for the ++META and MARK items and actions. In the same time zero mask has no meaning and ++should be rejected on validation stage. ++ + Notes for testpmd + ----------------- + +diff --git a/dpdk/doc/guides/prog_guide/cryptodev_lib.rst b/dpdk/doc/guides/prog_guide/cryptodev_lib.rst +index ac16437740..c839379885 100644 +--- a/dpdk/doc/guides/prog_guide/cryptodev_lib.rst ++++ b/dpdk/doc/guides/prog_guide/cryptodev_lib.rst +@@ -1097,4 +1097,4 @@ Asymmetric Crypto Device API + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + The cryptodev Library API is described in the +-`DPDK API Reference <http://doc.dpdk.org/api/>`_ ++`DPDK API Reference <https://doc.dpdk.org/api/>`_ +diff --git a/dpdk/doc/guides/prog_guide/img/ring-mp-enqueue3.svg b/dpdk/doc/guides/prog_guide/img/ring-mp-enqueue3.svg +index da483b031e..83ef7dba13 100644 +--- a/dpdk/doc/guides/prog_guide/img/ring-mp-enqueue3.svg ++++ b/dpdk/doc/guides/prog_guide/img/ring-mp-enqueue3.svg +@@ -16,7 +16,7 @@ + height="403.06647" + id="svg3388" + version="1.1" +- inkscape:version="0.48.4 r9939" ++ inkscape:version="0.92.4 (f8dce91, 2019-08-02)" + sodipodi:docname="ring-mp-enqueue3.svg"> + <defs + id="defs3390"> +@@ -359,15 +359,15 @@ + inkscape:pageshadow="2" + inkscape:zoom="1.4" + inkscape:cx="201.35119" +- inkscape:cy="221.79811" ++ inkscape:cy="107.5124" + inkscape:document-units="px" + inkscape:current-layer="layer1" + showgrid="false" +- inkscape:window-width="958" +- inkscape:window-height="1002" +- inkscape:window-x="223" +- inkscape:window-y="22" +- inkscape:window-maximized="0" ++ inkscape:window-width="1313" ++ inkscape:window-height="713" ++ inkscape:window-x="53" ++ inkscape:window-y="27" ++ inkscape:window-maximized="1" + inkscape:snap-grids="false" + inkscape:snap-to-guides="true" + showguides="false" +@@ -382,8 +382,10 @@ + visible="true" + enabled="true" + snapvisiblegridlinesonly="true" +- originx="-162.97143px" +- originy="-370.03525px" /> ++ originx="-162.97143" ++ originy="-370.03525" ++ spacingx="1" ++ spacingy="1" /> + </sodipodi:namedview> + <metadata + id="metadata3393"> +@@ -393,7 +395,7 @@ + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> +- <dc:title /> ++ <dc:title></dc:title> + </cc:Work> + </rdf:RDF> + </metadata> +@@ -490,37 +492,37 @@ + </g> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="313.90488" + y="495.49646" +- id="text4269" +- sodipodi:linespacing="125%"><tspan ++ id="text4269"><tspan + sodipodi:role="line" + id="tspan4271" + x="313.90488" +- y="495.49646">obj1</tspan></text> ++ y="495.49646" ++ style="font-size:14px;line-height:1.25">obj1</tspan></text> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="368.95203" + y="495.49646" +- id="text4269-4" +- sodipodi:linespacing="125%"><tspan ++ id="text4269-4"><tspan + sodipodi:role="line" + id="tspan4271-5" + x="368.95203" +- y="495.49646">obj2</tspan></text> ++ y="495.49646" ++ style="font-size:14px;line-height:1.25">obj2</tspan></text> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="422.99518" + y="495.49646" +- id="text4269-5" +- sodipodi:linespacing="125%"><tspan ++ id="text4269-5"><tspan + sodipodi:role="line" + id="tspan4271-4" + x="422.99518" +- y="495.49646">obj3</tspan></text> ++ y="495.49646" ++ style="font-size:14px;line-height:1.25">obj3</tspan></text> + <path + style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Lend);font-family:Arial;-inkscape-font-specification:Arial" + d="m 323.57143,578.07647 0,-42.14286" +@@ -533,48 +535,48 @@ + inkscape:connector-curvature="0" /> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="289.85715" + y="589.505" +- id="text4787" +- sodipodi:linespacing="125%"><tspan ++ id="text4787"><tspan + sodipodi:role="line" + id="tspan4789" + x="289.85715" +- y="589.505">cons_head</tspan></text> ++ y="589.505" ++ style="font-size:14px;line-height:1.25">cons_head</tspan></text> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="293.45334" + y="603.41034" +- id="text4787-3" +- sodipodi:linespacing="125%"><tspan ++ id="text4787-3"><tspan + sodipodi:role="line" + id="tspan4789-0" + x="293.45334" +- y="603.41034">cons_tail</tspan></text> ++ y="603.41034" ++ style="font-size:14px;line-height:1.25">cons_tail</tspan></text> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" +- x="527.01239" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" ++ x="567.01239" + y="587.9577" +- id="text4787-7" +- sodipodi:linespacing="125%"><tspan ++ id="text4787-7"><tspan + sodipodi:role="line" + id="tspan4789-8" +- x="527.01239" +- y="587.9577">prod_head</tspan></text> ++ x="567.01239" ++ y="587.9577" ++ style="font-size:14px;line-height:1.25">prod_head</tspan></text> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="460.7514" + y="602.57739" +- id="text4787-3-6" +- sodipodi:linespacing="125%"><tspan ++ id="text4787-3-6"><tspan + sodipodi:role="line" + id="tspan4789-0-8" + x="460.7514" +- y="602.57739">prod_tail</tspan></text> ++ y="602.57739" ++ style="font-size:14px;line-height:1.25">prod_tail</tspan></text> + <rect + style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 1;stroke-dashoffset:0;font-family:Arial;-inkscape-font-specification:Arial" + id="rect4889" +@@ -586,19 +588,20 @@ + ry="11.631636" /> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="174.28571" + y="328.93362" +- id="text4891" +- sodipodi:linespacing="125%"><tspan ++ id="text4891"><tspan + sodipodi:role="line" + id="tspan4893" + x="174.28571" +- y="328.93362">local variables</tspan><tspan ++ y="328.93362" ++ style="font-size:14px;line-height:1.25">local variables</tspan><tspan + sodipodi:role="line" + x="174.28571" + y="346.43362" +- id="tspan4150">core 2</tspan></text> ++ id="tspan4150" ++ style="font-size:14px;line-height:1.25">core 2</tspan></text> + <rect + style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 1;stroke-dashoffset:0;font-family:Arial;-inkscape-font-specification:Arial" + id="rect4889-8" +@@ -610,15 +613,15 @@ + ry="11.631636" /> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="170.89287" +- y="682.09021" +- id="text4891-4" +- sodipodi:linespacing="125%"><tspan ++ y="664.09021" ++ id="text4891-4"><tspan + sodipodi:role="line" + id="tspan4893-3" + x="170.89287" +- y="682.09021">structure state</tspan></text> ++ y="664.09021" ++ style="font-size:14px;line-height:1.25">structure state</tspan></text> + <path + style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Lend);font-family:Arial;-inkscape-font-specification:Arial" + d="m 325.25296,407.43361 0,42.14286" +@@ -631,37 +634,37 @@ + inkscape:connector-curvature="0" /> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="296.992" + y="401.48123" +- id="text4787-3-64" +- sodipodi:linespacing="125%"><tspan ++ id="text4787-3-64"><tspan + sodipodi:role="line" + id="tspan4789-0-9" + x="296.992" +- y="401.48123">cons_tail</tspan></text> ++ y="401.48123" ++ style="font-size:14px;line-height:1.25">cons_tail</tspan></text> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="440.26532" + y="401.48123" +- id="text4787-7-5" +- sodipodi:linespacing="125%"><tspan ++ id="text4787-7-5"><tspan + sodipodi:role="line" + id="tspan4789-8-0" + x="440.26532" +- y="401.48123">prod_head</tspan></text> ++ y="401.48123" ++ style="font-size:14px;line-height:1.25">prod_head</tspan></text> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="522.43298" + y="401.48123" +- id="text4787-3-6-4" +- sodipodi:linespacing="125%"><tspan ++ id="text4787-3-6-4"><tspan + sodipodi:role="line" + id="tspan4789-0-8-8" + x="522.43298" +- y="401.48123">prod_next</tspan></text> ++ y="401.48123" ++ style="font-size:14px;line-height:1.25">prod_next</tspan></text> + <path + style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Lend);font-family:Arial;-inkscape-font-specification:Arial" + d="m 537.14285,407.43361 0,42.14286" +@@ -678,19 +681,20 @@ + ry="11.631636" /> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="174.65646" + y="398.23306" +- id="text4891-3" +- sodipodi:linespacing="125%"><tspan ++ id="text4891-3"><tspan + sodipodi:role="line" + id="tspan4893-1" + x="174.65646" +- y="398.23306">local variables</tspan><tspan ++ y="398.23306" ++ style="font-size:14px;line-height:1.25">local variables</tspan><tspan + sodipodi:role="line" + x="174.65646" + y="415.73306" +- id="tspan4152">core 1</tspan></text> ++ id="tspan4152" ++ style="font-size:14px;line-height:1.25">core 1</tspan></text> + <path + style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Lend);font-family:Arial;-inkscape-font-specification:Arial" + d="m 326.73097,334.53006 0,42.14286" +@@ -703,37 +707,37 @@ + inkscape:connector-curvature="0" /> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="298.47" + y="328.57767" +- id="text4787-3-64-5" +- sodipodi:linespacing="125%"><tspan ++ id="text4787-3-64-5"><tspan + sodipodi:role="line" + id="tspan4789-0-9-0" + x="298.47" +- y="328.57767">cons_tail</tspan></text> ++ y="328.57767" ++ style="font-size:14px;line-height:1.25">cons_tail</tspan></text> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="489.02905" + y="328.57767" +- id="text4787-7-5-3" +- sodipodi:linespacing="125%"><tspan ++ id="text4787-7-5-3"><tspan + sodipodi:role="line" + id="tspan4789-8-0-6" + x="489.02905" +- y="328.57767">prod_head</tspan></text> ++ y="328.57767" ++ style="font-size:14px;line-height:1.25">prod_head</tspan></text> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="571.19672" + y="328.57767" +- id="text4787-3-6-4-1" +- sodipodi:linespacing="125%"><tspan ++ id="text4787-3-6-4-1"><tspan + sodipodi:role="line" + id="tspan4789-0-8-8-0" + x="571.19672" +- y="328.57767">prod_next</tspan></text> ++ y="328.57767" ++ style="font-size:14px;line-height:1.25">prod_next</tspan></text> + <path + style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Lend);font-family:Arial;-inkscape-font-specification:Arial" + d="m 587.90657,334.53006 0,42.14286" +@@ -741,45 +745,46 @@ + inkscape:connector-curvature="0" /> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="447.85715" + y="289.505" +- id="text3320" +- sodipodi:linespacing="125%"><tspan ++ id="text3320"><tspan + sodipodi:role="line" + id="tspan3322" + x="447.85715" +- y="289.505">compare and swap succeeds</tspan><tspan ++ y="289.505" ++ style="font-size:14px;line-height:1.25">compare and swap succeeds</tspan><tspan + sodipodi:role="line" + x="447.85715" + y="307.005" +- id="tspan3324">on core 2</tspan></text> ++ id="tspan3324" ++ style="font-size:14px;line-height:1.25">on core 2</tspan></text> + <path +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Lend);font-family:Arial;-inkscape-font-specification:Arial" +- d="m 542.85715,575.57647 0,-42.14286" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:14px;line-height:125%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Lend)" ++ d="M 602.85715,575.57647 V 533.43361" + id="path4309-4-0" + inkscape:connector-curvature="0" /> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="477.22983" + y="495.49646" +- id="text4269-5-5" +- sodipodi:linespacing="125%"><tspan ++ id="text4269-5-5"><tspan + sodipodi:role="line" + id="tspan4271-4-5" + x="477.22983" +- y="495.49646">obj4</tspan></text> ++ y="495.49646" ++ style="font-size:14px;line-height:1.25">obj4</tspan></text> + <text + xml:space="preserve" +- style="font-size:14px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial" ++ style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:0%;font-family:Arial;-inkscape-font-specification:Arial;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none" + x="531.27301" + y="496.00156" +- id="text4269-5-7" +- sodipodi:linespacing="125%"><tspan ++ id="text4269-5-7"><tspan + sodipodi:role="line" + id="tspan4271-4-6" + x="531.27301" +- y="496.00156">obj5</tspan></text> ++ y="496.00156" ++ style="font-size:14px;line-height:1.25">obj5</tspan></text> + </g> + </svg> +diff --git a/dpdk/doc/guides/prog_guide/lto.rst b/dpdk/doc/guides/prog_guide/lto.rst +index 43f4c63379..277a6f1090 100644 +--- a/dpdk/doc/guides/prog_guide/lto.rst ++++ b/dpdk/doc/guides/prog_guide/lto.rst +@@ -31,7 +31,7 @@ the whole DPDK by setting: + + .. code-block:: console + +- CONFIG_ENABLE_LTO=y ++ CONFIG_RTE_ENABLE_LTO=y + + in config file. + +diff --git a/dpdk/doc/guides/prog_guide/rcu_lib.rst b/dpdk/doc/guides/prog_guide/rcu_lib.rst +index 8d0dfcf291..9b0bf138f6 100644 +--- a/dpdk/doc/guides/prog_guide/rcu_lib.rst ++++ b/dpdk/doc/guides/prog_guide/rcu_lib.rst +@@ -61,7 +61,7 @@ wait till thread 2 enters quiescent state as well. + + However, the writer does not need to wait for reader thread 3 to enter + quiescent state. Reader thread 3 was not accessing D1 when the delete +-operation happened. So, reader thread 1 will not have a reference to the ++operation happened. So, reader thread 3 will not have a reference to the + deleted entry. + + It can be noted that, the critical sections for D2 is a quiescent state +diff --git a/dpdk/doc/guides/rawdevs/ntb.rst b/dpdk/doc/guides/rawdevs/ntb.rst +index 58472135f5..aa7d809649 100644 +--- a/dpdk/doc/guides/rawdevs/ntb.rst ++++ b/dpdk/doc/guides/rawdevs/ntb.rst +@@ -52,11 +52,11 @@ NTB PMD needs kernel PCI driver to support write combining (WC) to get + better performance. The difference will be more than 10 times. + To enable WC, there are 2 ways. + +-- Insert igb_uio with ``wc_active=1`` flag if use igb_uio driver. ++- Insert igb_uio with ``wc_activate=1`` flag if use igb_uio driver. + + .. code-block:: console + +- insmod igb_uio.ko wc_active=1 ++ insmod igb_uio.ko wc_activate=1 + + - Enable WC for NTB device's Bar 2 and Bar 4 (Mapped memory) manually. + The reference is https://www.kernel.org/doc/html/latest/x86/mtrr.html +diff --git a/dpdk/doc/guides/rel_notes/release_18_08.rst b/dpdk/doc/guides/rel_notes/release_18_08.rst +index 8a09dee95c..4ae388c331 100644 +--- a/dpdk/doc/guides/rel_notes/release_18_08.rst ++++ b/dpdk/doc/guides/rel_notes/release_18_08.rst +@@ -546,4 +546,4 @@ Tested Platforms + * Mellanox MLNX_OFED 4.2-1.4.21.0 + + * DPDK application running on ARM cores inside SmartNIC +- * Bluefield representors support planned for next release. ++ * BlueField representors support planned for next release. +diff --git a/dpdk/doc/guides/rel_notes/release_19_02.rst b/dpdk/doc/guides/rel_notes/release_19_02.rst +index ace1534eff..87dfbf5c7d 100644 +--- a/dpdk/doc/guides/rel_notes/release_19_02.rst ++++ b/dpdk/doc/guides/rel_notes/release_19_02.rst +@@ -109,7 +109,7 @@ New Features + ``CONFIG_RTE_IBVERBS_LINK_DLOPEN`` for make and ``ibverbs_link`` for meson. + * Added static linkage of ``mlx`` dependency. + * Improved stability of E-Switch flow driver. +- * Added new make build configuration to set the cacheline size for Bluefield ++ * Added new make build configuration to set the cacheline size for BlueField + correctly - ``arm64-bluefield-linux-gcc``. + + * **Updated the enic driver.** +diff --git a/dpdk/doc/guides/rel_notes/release_19_11.rst b/dpdk/doc/guides/rel_notes/release_19_11.rst +index 84aa03a1f2..6d7a084a1b 100644 +--- a/dpdk/doc/guides/rel_notes/release_19_11.rst ++++ b/dpdk/doc/guides/rel_notes/release_19_11.rst +@@ -206,7 +206,7 @@ New Features + * Added support for VLAN set VID offload command. + * Added support for matching on packets withe Geneve tunnel header. + * Added hairpin support. +- * Added ConnectX6-DX support. ++ * Added ConnectX-6 Dx support. + * Flow engine selected based on RDMA Core library version. + DV flow engine selected if version is rdma-core-24.0 or higher. + Verbs flow engine selected otherwise. +@@ -474,9 +474,8 @@ API Changes + + * event: The function ``rte_event_eth_tx_adapter_enqueue`` takes an additional + input as ``flags``. Flag ``RTE_EVENT_ETH_TX_ADAPTER_ENQUEUE_SAME_DEST`` which +- has been introduced in this release is used when used when all the packets +- enqueued in the Tx adapter are destined for the same Ethernet port ans Tx +- queue. ++ has been introduced in this release is used when all the packets enqueued in ++ the Tx adapter are destined for the same Ethernet port and Tx queue. + + * sched: The pipe nodes configuration parameters such as number of pipes, + pipe queue sizes, pipe profiles, etc., are moved from port level structure +@@ -918,3 +917,1001 @@ Tested Platforms + * OFED: + + * MLNX_OFED 4.7-1.0.0.2 ++ ++19.11.1 Release Notes ++--------------------- ++ ++19.11.1 Fixes ++~~~~~~~~~~~~~ ++ ++* acl: fix 32-bit match for range field ++* app/eventdev: fix pipeline test with meson build ++* app/pdump: fix build with clang ++* app/testpmd: add port check before manual detach ++* app/testpmd: call cleanup on exit ++* app/testpmd: fix device mcast list error handling ++* app/testpmd: fix GENEVE flow item ++* app/testpmd: fix hot-unplug detaching ++* app/testpmd: fix identifier size for port attach ++* app/testpmd: fix initial value when setting PFC ++* app/testpmd: fix RFC addresses for Tx only ++* app/testpmd: fix txonly flow generation entropy ++* app/testpmd: fix uninitialized members of MPLS ++* app/testpmd: fix uninitialized members when setting PFC ++* app/testpmd: rename function for detaching by devargs ++* app/testpmd: update Rx offload after setting MTU ++* app/test: remove meson dependency on file in /sys ++* bpf: fix headers install with meson ++* build: explicitly enable sse4 for meson ++* build: fix libm detection in meson ++* build: remove unneeded function versioning ++* bus/fslmc: remove conflicting memory barrier macro ++* cfgfile: fix symbols map ++* ci: fix Travis config warnings ++* ci: use meson 0.47.1 ++* common/cpt: check cipher and auth keys are set ++* common/cpt: fix component for empty IOV buffer ++* crypto/armv8: fix clang build ++* crypto/ccp: fix queue alignment ++* crypto/dpaa_sec: fix IOVA conversions ++* crypto/octeontx2: add kmod dependency info ++* devtools: add fixes flag to commit listing ++* devtools: fix debug build test ++* doc: add module EEPROM dump to mlx5 features ++* doc: clarify memory write combining in mlx5 guide ++* doc: fix build with python 3.8 ++* doc: fix devargs in OCTEON TX2 event device guide ++* doc: fix igb_uio parameter in ntb guide ++* doc: fix multi-producer enqueue figure in ring guide ++* doc: fix naming of Mellanox devices ++* doc: fix quiescent state description in RCU guide ++* doc: fix typos in 19.11 release notes ++* doc: fix warning with meson ++* doc: reduce indentation in meson build file ++* doc: reduce whitespace in meson build file ++* doc: update recommended versions for i40e ++* drivers/crypto: fix session-less mode ++* eal/linux: fix build error on RHEL 7.6 ++* eal/linux: fix build when VFIO is disabled ++* eal/linux: fix uninitialized data valgrind warning ++* eal/windows: fix cpuset macro name ++* ethdev: fix callback unregister with wildcard argument list ++* ethdev: fix flow API doxygen comment ++* ethdev: fix secondary process memory overwrite ++* ethdev: fix switching domain allocation ++* ethdev: fix VLAN offloads set if no driver callback ++* event/dpaa2: set number of order sequences ++* event/dsw: avoid credit leak on oversized enqueue bursts ++* event/dsw: flush buffers immediately on zero-sized enqueue ++* event/octeontx2: fix device name in device info ++* examples/ethtool: fix unchecked return value ++* examples/fips_validation: fix AES-GCM cipher length parsing ++* examples/fips_validation: fix cipher length for AES-GCM ++* examples/fips_validation: fix string token for CT length ++* examples/ioat: fix failure check for ioat dequeue ++* examples/ioat: fix invalid link status check ++* examples/ioat: fix unchecked return value ++* examples/ipsec-secgw: extend inline session to non AES-GCM ++* examples/ipsec-secgw: fix crash on unsupported algo ++* examples/l2fwd-event: fix core allocation in poll mode ++* examples/l2fwd-event: fix error checking ++* examples/l2fwd-event: fix ethdev RSS setup ++* examples/l2fwd-event: fix event device config ++* examples/l3fwd-power: fix a typo ++* examples/l3fwd-power: fix interrupt disable ++* examples/ntb: fix mempool ops setting ++* examples/power: fix ack for enable/disable turbo ++* examples/tep_term: remove redundant info get ++* examples/vhost_blk: check unused value on init ++* examples/vhost_blk: fix check of device path ++* fib: fix possible integer overflow ++* fix Mellanox copyright and SPDX tag ++* hash: fix lock-free flag doxygen ++* hash: fix meson headers packaging ++* kni: fix build with Linux 5.6 ++* kni: fix meson warning about console keyword ++* kni: fix not contiguous FIFO ++* kni: rename variable with namespace prefix ++* latency: fix calculation for multi-thread ++* lib: fix unnecessary double negation ++* maintainers: resign from flow API maintenance ++* maintainers: update for failsafe and PCI library ++* mem: fix munmap in error unwind ++* mempool: fix anonymous populate ++* mempool: fix populate with small virtual chunks ++* mempool: fix slow allocation of large pools ++* mempool/octeontx: fix error handling in initialization ++* mk: avoid combining -r and -export-dynamic linker options ++* net/af_xdp: fix fill queue addresses ++* net/af_xdp: fix maximum MTU ++* net/af_xdp: fix redundant check for wakeup need ++* net/af_xdp: fix umem frame size and headroom ++* net/bnx2x: fix reset of scan FP flag ++* net/bnx2x: fix to sync fastpath Rx queue access ++* net/bnx2x: fix VLAN stripped flag ++* net/bnx2x: support secondary process ++* net/bnxt: add a field for FW capabilities ++* net/bnxt: allow group ID 0 for RSS action ++* net/bnxt: do not log error if stats queried before start ++* net/bnxt: fix alloc filter to use a common routine ++* net/bnxt: fix buffer allocation reattempt ++* net/bnxt: fix bumping of L2 filter reference count ++* net/bnxt: fix crash in port stop while handling events ++* net/bnxt: fix default timeout for getting FW version ++* net/bnxt: fix enable/disable VLAN filtering ++* net/bnxt: fix flow creation ++* net/bnxt: fix flow flush to sync with flow destroy ++* net/bnxt: fix IOVA mapping ++* net/bnxt: fix link during port toggle ++* net/bnxt: fix MAC address setting when port is stopped ++* net/bnxt: fix max rings calculation ++* net/bnxt: fix non matching flow hitting filter rule ++* net/bnxt: fix overwriting error message ++* net/bnxt: fix port stop on error recovery failure ++* net/bnxt: fix probe in FreeBSD ++* net/bnxt: fix race condition when port is stopped ++* net/bnxt: fix recovery alarm race condition in port close ++* net/bnxt: fix request for hot reset support ++* net/bnxt: fix return code handling in VLAN config ++* net/bnxt: fix reusing L2 filter ++* net/bnxt: fix Tx queue profile selection ++* net/bnxt: fix unnecessary delay in port stop ++* net/bnxt: fix VLAN strip ++* net/bnxt: fix VLAN strip flags in SSE Rx ++* net/bnxt: handle HW filter setting when port is stopped ++* net/bnxt: remove a redundant variable ++* net/bnxt: remove redundant if statement ++* net/bnxt: remove redundant macro ++* net/bnxt: remove unnecessary memset ++* net/bnxt: remove unnecessary structure variable ++* net/bnxt: restore MAC filters during reset recovery ++* net/bnxt: restore VLAN filters during reset recovery ++* net/bnxt: use macro for PCI log format ++* net/cxgbe: announce Tx multi-segments offload ++* net/dpaa: fix Rx offload flags on jumbo MTU set ++* net/failsafe: fix reported hash key size in device info ++* net/fm10k: fix descriptor VLAN field filling in Tx ++* net/fm10k: fix non-x86 build ++* net/hns3: fix crash when closing port ++* net/hns3: fix dumping VF register information ++* net/hns3: fix link status on failed query ++* net/hns3: fix ring vector related mailbox command format ++* net/hns3: fix Rx queue search with broadcast packet ++* net/hns3: fix triggering reset procedure in slave process ++* net/i40e/base: add new link speed constants ++* net/i40e/base: fix buffer address ++* net/i40e/base: fix display of FEC settings ++* net/i40e/base: fix error message ++* net/i40e/base: fix missing link modes ++* net/i40e/base: fix retrying logic ++* net/i40e/base: fix Tx descriptors number ++* net/i40e: fix port close in FreeBSD ++* net/i40e: fix Tx when TSO is enabled ++* net/i40e: fix unchecked Tx cleanup error ++* net/i40e: set fixed flag for exact link speed ++* net/iavf: add TSO offload use basic path ++* net/iavf/base: fix adminq return ++* net/iavf/base: fix command buffer memory leak ++* net/iavf: fix Rx total stats ++* net/iavf: fix virtual channel return ++* net/ice: add outer IPv4 matching for GTP-U flow ++* net/ice/base: fix loop limit ++* net/ice/base: increase PF reset wait timeout ++* net/ice: disable TSO offload in vector path ++* net/ice: fix flow director flag ++* net/ice: fix flow director GTP-U pattern ++* net/ice: fix flow director passthru ++* net/ice: fix flow FDIR/switch memory leak ++* net/ice: fix GTP-U rule conflict ++* net/ice: fix packet type table ++* net/ice: fix queue MSI-X interrupt binding ++* net/ice: fix Tx when TSO is enabled ++* net/ice: fix unchecked Tx cleanup error ++* net/ice: fix VSI context ++* net/ice: use ethernet copy API to do MAC assignment ++* net/ipn3ke: fix line side statistics register read ++* net/ipn3ke: fix meson build ++* net/ixgbe: check for illegal Tx packets ++* net/ixgbe: fix blocking system events ++* net/ixgbe: fix flow control mode setting ++* net/ixgbe: fix link status ++* net/ixgbe: fix link up in FreeBSD ++* net/ixgbe: remove dead code ++* net/ixgbe: remove duplicate function declaration ++* net/ixgbe: set fixed flag for exact link speed ++* net/mlx5: add free on completion queue ++* net/mlx5: allow push VLAN without VID ++* net/mlx5: block pop VLAN action on Tx ++* net/mlx5: block push VLAN action on Rx ++* net/mlx5: clean up redundant assignment ++* net/mlx5: engage free on completion queue ++* net/mlx5: fix bit mask to validate push VLAN ++* net/mlx5: fix blocker for push VLAN on Rx ++* net/mlx5: fix build with clang 3.4.2 ++* net/mlx5: fix check for VLAN actions ++* net/mlx5: fix crash when meter action conf is null ++* net/mlx5: fix crash when setting hairpin queues ++* net/mlx5: fix dirty array of actions ++* net/mlx5: fix doorbell register offset type ++* net/mlx5: fix encap/decap validation ++* net/mlx5: fix flow match on GRE key ++* net/mlx5: fix GENEVE tunnel flow validation ++* net/mlx5: fix hairpin queue capacity ++* net/mlx5: fix ICMPv6 header rewrite actions ++* net/mlx5: fix ICMPv6 header rewrite action validation ++* net/mlx5: fix inline packet size for ConnectX-4 Lx ++* net/mlx5: fix item flag on GENEVE item validation ++* net/mlx5: fix L3 VXLAN RSS expansion ++* net/mlx5: fix layer flags missing in metadata ++* net/mlx5: fix layer type in header modify action ++* net/mlx5: fix layer validation with decapsulation ++* net/mlx5: fix legacy multi-packet write session ++* net/mlx5: fix masks of encap and decap actions ++* net/mlx5: fix matcher field usage for metadata entities ++* net/mlx5: fix match information in meter ++* net/mlx5: fix matching for ICMP fragments ++* net/mlx5: fix match on ethertype and CVLAN tag ++* net/mlx5: fix memory regions release deadlock ++* net/mlx5: fix metadata item endianness conversion ++* net/mlx5: fix metadata split with encap action ++* net/mlx5: fix meter header modify before decap ++* net/mlx5: fix meter suffix flow ++* net/mlx5: fix modify actions support limitation ++* net/mlx5: fix multiple flow table hash list ++* net/mlx5: fix pop VLAN action validation ++* net/mlx5: fix register usage in meter ++* net/mlx5: fix running without Rx queue ++* net/mlx5: fix setting of port ID for egress rules ++* net/mlx5: fix setting of Rx hash fields ++* net/mlx5: fix shared metadata matcher field setup ++* net/mlx5: fix tunnel flow priority ++* net/mlx5: fix Tx burst routines set ++* net/mlx5: fix VLAN actions in meter ++* net/mlx5: fix VLAN ID action offset ++* net/mlx5: fix VLAN match for DV mode ++* net/mlx5: fix VLAN VID action validation ++* net/mlx5: fix VXLAN-GPE item translation ++* net/mlx5: fix zero out UDP checksum in encap data ++* net/mlx5: make FDB default rule optional ++* net/mlx5: move Tx complete request routine ++* net/mlx5: optimize Rx hash fields conversion ++* net/mlx5: support maximum flow id allocation ++* net/mlx5: unify validation of drop action ++* net/mlx5: update description of validation functions ++* net/mlx5: update Tx error handling routine ++* net/mlx: add static ibverbs linkage with meson ++* net/mlx: fix build with clang 9 ++* net/mlx: fix overlinking with meson and glue dlopen ++* net/mlx: rename meson variable for dlopen option ++* net/mlx: workaround static linkage with meson ++* net/netvsc: disable before changing RSS parameters ++* net/netvsc: fix crash in secondary process ++* net/netvsc: fix RSS offload flag ++* net/netvsc: initialize link state ++* net/octeontx2: fix flow control initial state ++* net/octeontx2: fix getting supported packet types ++* net/octeontx2: fix PTP ++* net/octeontx2: fix PTP and HIGIG2 coexistence ++* net/octeontx2: fix Tx flow control for HIGIG ++* net/octeontx2: fix VF configuration ++* net/octeontx: fix memory leak of MAC address table ++* net/qede/base: fix number of ports per engine ++* net/qede: do not stop vport if not started ++* net/qede: fix VF reload ++* net/sfc: fix log format specifiers ++* net/tap: fix memory leak when unregister intr handler ++* net/vhost: allocate interface name from heap ++* net/vhost: check creation failure ++* net/vhost: delay driver setup ++* net/vhost: fix probing in secondary process ++* net/vhost: fix setup error path ++* net/vhost: prevent multiple setups on reconfiguration ++* net/virtio-user: check file descriptor before closing ++* net/virtio-user: check tap offload setting failure ++* net/virtio-user: do not close tap when disabling queue pairs ++* net/virtio-user: do not reset virtqueues for split ring ++* net/virtio-user: fix packed ring server mode ++* raw/ntb: fix write memory barrier ++* service: don't walk out of bounds when checking services ++* test/common: fix log2 check ++* test/compress: replace test vector ++* test/crypto: fix missing operation status check ++* test/event: fix OCTEON TX2 event device name ++* test/event: fix unintended vdev creation ++* test: fix build without ring PMD ++* test/ipsec: fix a typo in function name ++* usertools: fix syntax warning in python 3.8 ++* usertools: fix telemetry client with python 3 ++* vfio: fix mapping failures in ppc64le ++* vhost: catch overflow causing mmap of size 0 ++* vhost: check message header size read ++* vhost/crypto: fix fetch size ++* vhost: do not treat empty socket message as error ++* vhost: fix crash on port deletion ++* vhost: fix deadlock on port deletion ++* vhost: fix inflight resubmit check ++* vhost: fix packed virtqueue ready condition ++* vhost: fix socket initial value ++* vhost: flush shadow Tx if no more packets ++* vhost: protect log address translation in IOTLB update ++ ++19.11.1 Validation ++~~~~~~~~~~~~~~~~~~ ++ ++* Red Hat(R) Testing ++ ++ * Platform ++ ++ * RHEL 8 ++ * Kernel 4.18 ++ * Qemu 4.2 ++ * X540-AT2 NIC(ixgbe, 10G) ++ ++ * Functionality ++ ++ * Guest with device assignment(PF) throughput testing(1G hugepage size) ++ * Guest with device assignment(PF) throughput testing(2M hugepage size) ++ * Guest with device assignment(VF) throughput testing ++ * PVP (host dpdk testpmd as vswitch) 1Q: throughput testing ++ * PVP vhost-user 2Q throughput testing ++ * PVP vhost-user 1Q - cross numa node throughput testing ++ * Guest with vhost-user 2 queues throughput testing ++ * vhost-user reconnect with dpdk-client, qemu-server: qemu reconnect ++ * PVP 1Q live migration testing ++ * PVP 1Q cross numa node live migration testing ++ * Guest with ovs+dpdk+vhost-user 1Q live migration testing ++ * Guest with ovs+dpdk+vhost-user 1Q live migration testing (2M) ++ * Guest with ovs+dpdk+vhost-user 2Q live migration testing ++ ++* Intel(R) Testing ++ ++ * Basic Intel(R) NIC(ixgbe, i40e and ice) testing ++ * PF (i40e) ++ * PF (ixgbe) ++ * PF (ice) ++ * VF ++ * Compile Testing ++ * Intel NIC single core/NIC performance ++ ++ * Basic cryptodev and virtio testing ++ ++ * cryptodev ++ * vhost/virtio basic loopback, PVP and performance test ++ ++* Mellanox(R) Testing ++ ++ * Basic functionality with testpmd ++ ++ * Tx/Rx ++ * xstats ++ * Timestamps ++ * Link status ++ * RTE flow and flow_director ++ * RSS ++ * VLAN stripping and insertion ++ * Checksum/TSO ++ * ptype ++ * l3fwd-power example application ++ * Multi-process example applications ++ ++ * ConnectX-5 ++ ++ * RHEL 7.4 ++ * Kernel 3.10.0-693.el7.x86_64 ++ * Driver MLNX_OFED_LINUX-5.0-1.0.0.0 ++ * fw 16.27.1016 ++ ++ * ConnectX-4 Lx ++ ++ * RHEL 7.4 ++ * Kernel 3.10.0-693.el7.x86_64 ++ * Driver MLNX_OFED_LINUX-5.0-1.0.0.0 ++ * fw 14.27.1016 ++ ++* Broadcom(R) Testing ++ ++ * Functionality ++ ++ * Tx/Rx ++ * Link status ++ * RSS ++ * Checksum/TSO ++ * VLAN filtering ++ * statistics ++ * MTU ++ ++ * Platform ++ ++ * BCM57414 NetXtreme-E 10Gb/25Gb Ethernet Controller, Firmware: 216.1.169.0 ++ * BCM57508 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb/200Gb Ethernet, Firmware : 216.0.314.0 ++ ++* IBM(R) Testing ++ ++ * Functionality ++ ++ * Basic PF on Mellanox ++ * Single port stability test using l3fwd (16 cpus) and TRex, tested 64 ++ and 1500 byte packets at a 0.0% drop rate for 4 hours each ++ * Performance: no degradation compared to 19.11.0 ++ ++ * Platform ++ ++ * Ubuntu 18.04.4 LTS ++ * Kernel 4.15.0-88-generic ++ * IBM Power9 Model 8335-101 CPU: 2.3 (pvr 004e 1203) ++ * Mellanox Technologies MT28800 Family [ConnectX-5 Ex], firmware version: 16.26.4012, MLNX_OFED_LINUX-4.7-3.2.9.1 ++ ++19.11.2 Release Notes ++--------------------- ++ ++19.11.2 Fixes ++~~~~~~~~~~~~~ ++ ++* 2cf9c470eb vhost: check log mmap offset and size overflow (CVE-2020-10722) ++* 8e9652b0b6 vhost: fix translated address not checked (CVE-2020-10723) ++* 95e1f29c26 vhost/crypto: validate keys lengths (CVE-2020-10724) ++* 963b6eea05 vhost: fix potential memory space leak (CVE-2020-10725) ++* c9c630a117 vhost: fix potential fd leak (CVE-2020-10726) ++* cd0ea71bb6 vhost: fix vring index check (CVE-2020-10726) ++ ++19.11.2 Validation ++~~~~~~~~~~~~~~~~~~ ++ ++* Red Hat(R) Testing ++ ++ * Platform ++ ++ * RHEL 8.3 ++ * Kernel 4.18 ++ * Qemu 4.2 ++ * X540-AT2 NIC(ixgbe, 10G) ++ ++ * Functionality ++ ++ * PVP (host dpdk testpmd as vswitch) 1Q: throughput testing ++ * PVP vhost-user 2Q throughput testing ++ * PVP vhost-user 1Q - cross numa node throughput testing ++ * PVP 1Q live migration testing ++ * PVP 1Q cross numa node live migration testing ++ ++* Intel(R) Testing ++ ++ * Virtio features ++ ++ * vhost/virtio loopback test with virtio user as server mode ++ * loopback multi queues ++ * loopback multi paths port restart ++ * vhost/virtio pvp multi-paths performance ++ * pvp multi-queues and port restart ++ * vhost dequeue zero copy ++ * pvp share lib ++ * pvp vhost user reconnect ++ * pvp test with 4k pages ++ * pvp test with 2M hugepages ++ * pvp virtio bonding ++ * pvp test with diff qemu version ++ * vhost enqueue interrupt ++ * vhost event idx interrupt ++ * vhost virtio pmd interrupt ++ * vhost virtio user interrupt ++ * virtio event idx interrupt ++ * virtio user for container networking ++ * virtio user as exceptional path ++ * vhost xstats ++ * virtio-pmd multi-process ++ * vm2vm virtio pmd ++ * vm2vm virtio-net iperf ++ * vm2vm virtio-user ++ * vhost user live migration ++ ++19.11.3 Release Notes ++--------------------- ++ ++19.11.3 Fixes ++~~~~~~~~~~~~~ ++ ++* app/crypto-perf: fix display of sample test vector ++* app/eventdev: check Tx adapter service ID ++* app: fix usage help of options separated by dashes ++* app/pipeline: fix build with gcc 10 ++* app: remove extra new line after link duplex ++* app/testpmd: add parsing for QinQ VLAN headers ++* app/testpmd: fix DCB set ++* app/testpmd: fix memory failure handling for i40e DDP ++* app/testpmd: fix PPPoE flow command ++* app/testpmd: fix statistics after reset ++* baseband/turbo_sw: fix exposed LLR decimals assumption ++* bbdev: fix doxygen comments ++* build: disable gcc 10 zero-length-bounds warning ++* build: fix linker warnings with clang on Windows ++* build: support MinGW-w64 with Meson ++* buildtools: get static mlx dependencies for meson ++* bus/fslmc: fix dereferencing null pointer ++* bus/fslmc: fix size of qman fq descriptor ++* bus/pci: fix devargs on probing again ++* bus/pci: fix UIO resource access from secondary process ++* bus/vmbus: fix comment spelling ++* ci: fix telemetry dependency in Travis ++* common/iavf: update copyright ++* common/mlx5: fix build with -fno-common ++* common/mlx5: fix build with rdma-core 21 ++* common/mlx5: fix netlink buffer allocation from stack ++* common/mlx5: fix umem buffer alignment ++* common/octeontx: fix gcc 9.1 ABI break ++* common/qat: fix GEN3 marketing name ++* contigmem: cleanup properly when load fails ++* crypto/caam_jr: fix check of file descriptors ++* crypto/caam_jr: fix IRQ functions return type ++* crypto/ccp: fix fd leak on probe failure ++* cryptodev: add asymmetric session-less feature name ++* cryptodev: fix missing device id range checking ++* cryptodev: fix SHA-1 digest enum comment ++* crypto/kasumi: fix extern declaration ++* crypto/nitrox: fix CSR register address generation ++* crypto/nitrox: fix oversized device name ++* crypto/octeontx2: fix build with gcc 10 ++* crypto/openssl: fix out-of-place encryption ++* crypto/qat: fix cipher descriptor for ZUC and SNOW ++* crypto/qat: support plain SHA1..SHA512 hashes ++* devtools: fix symbol map change check ++* doc: add i40e limitation for flow director ++* doc: add NASM installation steps ++* doc: fix API index ++* doc: fix build issue in ABI guide ++* doc: fix build with doxygen 1.8.18 ++* doc: fix default symbol binding in ABI guide ++* doc: fix log level example in Linux guide ++* doc: fix LTO config option ++* doc: fix matrix CSS for recent sphinx ++* doc: fix multicast filter feature announcement ++* doc: fix number of failsafe sub-devices ++* doc: fix reference in ABI guide ++* doc: fix sphinx compatibility ++* doc: fix typo in contributors guide ++* doc: fix typo in contributors guide ++* doc: fix typos in ABI policy ++* doc: prefer https when pointing to dpdk.org ++* drivers: add crypto as dependency for event drivers ++* drivers/crypto: disable gcc 10 no-common errors ++* drivers/crypto: fix build with make 4.3 ++* drivers/crypto: fix log type variables for -fno-common ++* drivers: fix log type variables for -fno-common ++* eal/arm64: fix precise TSC ++* eal: fix C++17 compilation ++* eal: fix comments spelling ++* eal: fix log message print for regex ++* eal: fix PRNG init with HPET enabled ++* eal: fix typo in endian conversion macros ++* eal/freebsd: fix queuing duplicate alarm callbacks ++* eal/ppc: fix bool type after altivec include ++* eal/ppc: fix build with gcc 9.3 ++* eal/x86: ignore gcc 10 stringop-overflow warnings ++* ethdev: fix build when vtune profiling is on ++* ethdev: fix spelling ++* eventdev: fix probe and remove for secondary process ++* event/dsw: avoid reusing previously recorded events ++* event/dsw: fix enqueue burst return value ++* event/dsw: remove redundant control ring poll ++* event/dsw: remove unnecessary read barrier ++* event/octeontx2: fix build for O1 optimization ++* event/octeontx2: fix queue removal from Rx adapter ++* examples/eventdev: fix build with gcc 10 ++* examples/eventdev: fix crash on exit ++* examples/fips_validation: fix parsing of algorithms ++* examples/ip_pipeline: remove check of null response ++* examples/ipsec-gw: fix gcc 10 maybe-uninitialized warning ++* examples/kni: fix crash during MTU set ++* examples/kni: fix MTU change to setup Tx queue ++* examples/l2fwd-keepalive: fix mbuf pool size ++* examples/qos_sched: fix build with gcc 10 ++* examples: remove extra new line after link duplex ++* examples/vhost_blk: fix build with gcc 10 ++* examples/vmdq: fix output of pools/queues ++* examples/vmdq: fix RSS configuration ++* examples/vm_power: drop Unix path limit redefinition ++* examples/vm_power: fix build with -fno-common ++* fib: fix headers for C++ support ++* fix same typo in multiple places ++* fix various typos found by Lintian ++* ipsec: check SAD lookup error ++* ipsec: fix build dependency on hash lib ++* kvargs: fix buffer overflow when parsing list ++* kvargs: fix invalid token parsing on FreeBSD ++* kvargs: fix strcmp helper documentation ++* log: fix level picked with globbing on type register ++* lpm6: fix comments spelling ++* lpm6: fix size of tbl8 group ++* mem: fix overflow on allocation ++* mem: mark pages as not accessed when freeing memory ++* mem: mark pages as not accessed when reserving VA ++* mempool/dpaa2: install missing header with meson ++* mempool/octeontx2: fix build for gcc O1 optimization ++* mempool: remove inline functions from export list ++* mem: preallocate VA space in no-huge mode ++* mk: fix static linkage of mlx dependency ++* net/avp: fix gcc 10 maybe-uninitialized warning ++* net/bnxt: do not use PMD log type ++* net/bnxt: fix error log for command timeout ++* net/bnxt: fix FW version query ++* net/bnxt: fix HWRM command during FW reset ++* net/bnxt: fix max ring count ++* net/bnxt: fix memory leak during queue restart ++* net/bnxt: fix number of TQM ring ++* net/bnxt: fix port start failure handling ++* net/bnxt: fix possible stack smashing ++* net/bnxt: fix Rx ring producer index ++* net/bnxt: fix storing MAC address twice ++* net/bnxt: fix TQM ring context memory size ++* net/bnxt: fix using RSS config struct ++* net/bnxt: fix VLAN add when port is stopped ++* net/bnxt: fix VNIC Rx queue count on VNIC free ++* net/bnxt: use true/false for bool types ++* net/dpaa2: fix 10G port negotiation ++* net/dpaa2: fix congestion ID for multiple traffic classes ++* net/dpaa: use dynamic log type ++* net/e1000: fix port hotplug for multi-process ++* net/ena/base: fix documentation of functions ++* net/ena/base: fix indentation in CQ polling ++* net/ena/base: fix indentation of multiple defines ++* net/ena/base: fix testing for supported hash function ++* net/ena/base: make allocation macros thread-safe ++* net/ena/base: prevent allocation of zero sized memory ++* net/ena: fix build for O1 optimization ++* net/ena: set IO ring size to valid value ++* net/enetc: fix Rx lock-up ++* net/enic: fix flow action reordering ++* net/failsafe: fix fd leak ++* net/hinic: allocate IO memory with socket id ++* net/hinic/base: fix PF firmware hot-active problem ++* net/hinic/base: fix port start during FW hot update ++* net/hinic: fix LRO ++* net/hinic: fix queues resource free ++* net/hinic: fix repeating cable log and length check ++* net/hinic: fix snprintf length of cable info ++* net/hinic: fix TSO ++* net/hinic: fix Tx mbuf length while copying ++* net/hns3: add free threshold in Rx ++* net/hns3: add RSS hash offload to capabilities ++* net/hns3: clear residual flow rules on init ++* net/hns3: fix configuring illegal VLAN PVID ++* net/hns3: fix configuring RSS hash when rules are flushed ++* net/hns3: fix crash when flushing RSS flow rules with FLR ++* net/hns3: fix default error code of command interface ++* net/hns3: fix default VLAN filter configuration for PF ++* net/hns3: fix mailbox opcode data type ++* net/hns3: fix MSI-X interrupt during initialization ++* net/hns3: fix packets offload features flags in Rx ++* net/hns3: fix promiscuous mode for PF ++* net/hns3: fix return value of setting VLAN offload ++* net/hns3: fix return value when clearing statistics ++* net/hns3: fix RSS indirection table configuration ++* net/hns3: fix RSS key length ++* net/hns3: fix Rx interrupt after reset ++* net/hns3: fix status after repeated resets ++* net/hns3: fix Tx interrupt when enabling Rx interrupt ++* net/hns3: fix VLAN filter when setting promisucous mode ++* net/hns3: fix VLAN PVID when configuring device ++* net/hns3: reduce judgements of free Tx ring space ++* net/hns3: remove one IO barrier in Rx ++* net/hns3: remove unnecessary assignments in Tx ++* net/hns3: replace memory barrier with data dependency order ++* net/hns3: support different numbers of Rx and Tx queues ++* net/hns3: support Rx interrupt ++* net/i40e/base: update copyright ++* net/i40e: fix flow director enabling ++* net/i40e: fix flow director for ARP packets ++* net/i40e: fix flow director initialisation ++* net/i40e: fix flush of flow director filter ++* net/i40e: fix queue region in RSS flow ++* net/i40e: fix queue related exception handling ++* net/i40e: fix setting L2TAG ++* net/i40e: fix wild pointer ++* net/i40e: fix X722 performance ++* net/i40e: relax barrier in Tx ++* net/i40e: relax barrier in Tx for NEON ++* net/iavf: fix link speed ++* net/iavf: fix setting L2TAG ++* net/iavf: fix stats query error code ++* net/ice: add action number check for switch ++* net/ice/base: check memory pointer before copying ++* net/ice/base: fix binary order for GTPU filter ++* net/ice/base: fix MAC write command ++* net/ice/base: fix uninitialized stack variables ++* net/ice/base: minor fixes ++* net/ice/base: read PSM clock frequency from register ++* net/ice/base: remove unused code in switch rule ++* net/ice/base: update copyright ++* net/ice: change default tunnel type ++* net/ice: fix crash in switch filter ++* net/ice: fix hash flow crash ++* net/ice: fix input set of VLAN item ++* net/ice: fix RSS advanced rule ++* net/ice: fix RSS for GTPU ++* net/ice: fix setting L2TAG ++* net/ice: fix variable initialization ++* net/ice: remove bulk alloc option ++* net/ice: remove unnecessary variable ++* net/ice: support mark only action for flow director ++* net/ipn3ke: use control thread to check link status ++* net/ixgbe/base: update copyright ++* net/ixgbe: check driver type in MACsec API ++* net/ixgbe: fix link state timing on fiber ports ++* net/ixgbe: fix link status after port reset ++* net/ixgbe: fix link status inconsistencies ++* net/ixgbe: fix link status synchronization on BSD ++* net/ixgbe: fix resource leak after thread exits normally ++* net/ixgbe: fix statistics in flow control mode ++* net/memif: fix init when already connected ++* net/memif: fix resource leak ++* net/mlx4: fix build with -fno-common ++* net/mlx4: fix drop queue error handling ++* net/mlx5: add device parameter for MPRQ stride size ++* net/mlx5: add multi-segment packets in MPRQ mode ++* net/mlx5: enable MPRQ multi-stride operations ++* net/mlx5: fix actions validation on root table ++* net/mlx5: fix assert in doorbell lookup ++* net/mlx5: fix assert in dynamic metadata handling ++* net/mlx5: fix assert in modify converting ++* net/mlx5: fix build with separate glue lib for dlopen ++* net/mlx5: fix call to modify action without init item ++* net/mlx5: fix counter container usage ++* net/mlx5: fix crash when releasing meter table ++* net/mlx5: fix CVLAN tag set in IP item translation ++* net/mlx5: fix doorbell bitmap management offsets ++* net/mlx5: fix gcc 10 enum-conversion warning ++* net/mlx5: fix header modify action validation ++* net/mlx5: fix imissed counter overflow ++* net/mlx5: fix jump table leak ++* net/mlx5: fix mask used for IPv6 item validation ++* net/mlx5: fix matching for UDP tunnels with Verbs ++* net/mlx5: fix match on empty VLAN item in DV mode ++* net/mlx5: fix metadata for compressed Rx CQEs ++* net/mlx5: fix meter color register consideration ++* net/mlx5: fix meter suffix table leak ++* net/mlx5: fix packet length assert in MPRQ ++* net/mlx5: fix push VLAN action to use item info ++* net/mlx5: fix RSS enablement ++* net/mlx5: fix RSS key copy to TIR context ++* net/mlx5: fix Tx queue release debug log timing ++* net/mlx5: fix validation of push VLAN without full mask ++* net/mlx5: fix validation of VXLAN/VXLAN-GPE specs ++* net/mlx5: fix VLAN flow action with wildcard VLAN item ++* net/mlx5: fix VLAN ID check ++* net/mlx5: fix VLAN PCP item calculation ++* net/mlx5: fix zero metadata action ++* net/mlx5: fix zero value validation for metadata ++* net/mlx5: improve logging of MPRQ selection ++* net/mlx5: reduce Tx completion index memory loads ++* net/mlx5: set dynamic flow metadata in Rx queues ++* net/mlx5: update VLAN and encap actions validation ++* net/mlx5: use open/read/close for ib stats query ++* net/mvneta: do not use PMD log type ++* net/mvpp2: fix build with gcc 10 ++* net/netvsc: avoid possible live lock ++* net/netvsc: do not configure RSS if disabled ++* net/netvsc: do RSS across Rx queue only ++* net/netvsc: fix comment spelling ++* net/netvsc: fix memory free on device close ++* net/netvsc: handle Rx packets during multi-channel setup ++* net/netvsc: handle Tx completions based on burst size ++* net/netvsc: propagate descriptor limits from VF ++* net/netvsc: remove process event optimization ++* net/netvsc: split send buffers from Tx descriptors ++* net/nfp: fix dangling pointer on probe failure ++* net/nfp: fix log format specifiers ++* net/null: fix secondary burst function selection ++* net/null: remove redundant check ++* net/octeontx2: disable unnecessary error interrupts ++* net/octeontx2: enable error and RAS interrupt in configure ++* net/octeontx2: fix buffer size assignment ++* net/octeontx2: fix device configuration sequence ++* net/octeontx2: fix link information for loopback port ++* net/octeontx: fix dangling pointer on init failure ++* net/octeontx: fix meson build for disabled drivers ++* net/pfe: do not use PMD log type ++* net/pfe: fix double free of MAC address ++* net/qede: fix link state configuration ++* net/qede: fix port reconfiguration ++* net/ring: fix device pointer on allocation ++* net/sfc/base: fix build when EVB is enabled ++* net/sfc/base: fix manual filter delete in EF10 ++* net/sfc/base: handle manual and auto filter clashes in EF10 ++* net/sfc/base: reduce filter priorities to implemented only ++* net/sfc/base: refactor filter lookup loop in EF10 ++* net/sfc/base: reject automatic filter creation by users ++* net/sfc/base: use simpler EF10 family conditional check ++* net/sfc/base: use simpler EF10 family run-time checks ++* net/sfc: fix initialization error path ++* net/sfc: fix promiscuous and allmulticast toggles errors ++* net/sfc: fix reported promiscuous/multicast mode ++* net/sfc: fix Rx queue start failure path ++* net/sfc: set priority of created filters to manual ++* net/softnic: fix memory leak for thread ++* net/softnic: fix resource leak for pipeline ++* net/tap: do not use PMD log type ++* net/tap: fix check for mbuf number of segment ++* net/tap: fix crash in flow destroy ++* net/tap: fix fd leak on creation failure ++* net/tap: fix file close on remove ++* net/tap: fix mbuf and mem leak during queue release ++* net/tap: fix mbuf double free when writev fails ++* net/tap: fix queues fd check before close ++* net/tap: fix unexpected link handler ++* net/tap: remove unused assert ++* net/thunderx: use dynamic log type ++* net/vhost: fix potential memory leak on close ++* net/virtio: do not use PMD log type ++* net/virtio: fix crash when device reconnecting ++* net/virtio: fix outdated comment ++* net/virtio: fix unexpected event after reconnect ++* net/virtio-user: fix devargs parsing ++* net/vmxnet3: fix RSS setting on v4 ++* net/vmxnet3: handle bad host framing ++* pci: accept 32-bit domain numbers ++* pci: fix build on FreeBSD ++* pci: fix build on ppc ++* pci: reject negative values in PCI id ++* pci: remove unneeded includes in public header file ++* remove references to private PCI probe function ++* Revert "common/qat: fix GEN3 marketing name" ++* Revert "net/bnxt: fix number of TQM ring" ++* Revert "net/bnxt: fix TQM ring context memory size" ++* security: fix crash at accessing non-implemented ops ++* security: fix return types in documentation ++* security: fix session counter ++* security: fix verification of parameters ++* service: fix crash on exit ++* service: fix identification of service running on other lcore ++* service: fix race condition for MT unsafe service ++* service: remove rte prefix from static functions ++* telemetry: fix port stats retrieval ++* test/crypto: fix flag check ++* test/crypto: fix statistics case ++* test: fix build with gcc 10 ++* test/flow_classify: enable multi-sockets system ++* test/ipsec: fix crash in session destroy ++* test/kvargs: fix invalid cases check ++* test/kvargs: fix to consider empty elements as valid ++* test: load drivers when required ++* test: remove redundant macro ++* test: skip some subtests in no-huge mode ++* timer: protect initialization with lock ++* usertools: check for pci.ids in /usr/share/misc ++* vfio: fix race condition with sysfs ++* vfio: fix use after free with multiprocess ++* vhost/crypto: add missing user protocol flag ++* vhost: fix packed ring zero-copy ++* vhost: fix peer close check ++* vhost: fix shadowed descriptors not flushed ++* vhost: fix shadow update ++* vhost: fix zero-copy server mode ++* vhost: handle mbuf allocation failure ++* vhost: make IOTLB cache name unique among processes ++* vhost: prevent zero-copy with incompatible client mode ++* vhost: remove unused variable ++ ++19.11.3 Validation ++~~~~~~~~~~~~~~~~~~ ++ ++* Intel(R) Testing ++ ++ * Basic Intel(R) NIC(ixgbe, i40e and ice) testing ++ * PF (i40e) ++ * PF (ixgbe) ++ * PF (ice) ++ * VF (i40e) ++ * VF (ixgbe) ++ * VF (ice) ++ * Compile Testing ++ * Intel NIC single core/NIC performance ++ ++ * Basic cryptodev and virtio testing ++ ++ * vhost/virtio basic loopback, PVP and performance test ++ * cryptodev Function/Performance ++ ++* Mellanox(R) Testing ++ ++ * Basic functionality with testpmd ++ ++ * Tx/Rx ++ * xstats ++ * Timestamps ++ * Link status ++ * RTE flow and flow_director ++ * RSS ++ * VLAN stripping and insertion ++ * Checksum/TSO ++ * ptype ++ * l3fwd-power example application ++ * Multi-process example applications ++ ++ * ConnectX-5 ++ ++ * RHEL 7.4 ++ * Driver MLNX_OFED_LINUX-5.0-2.1.8.0 ++ * fw 16.27.2008 ++ ++ * ConnectX-4 Lx ++ ++ * RHEL 7.4 ++ * Driver MLNX_OFED_LINUX-5.0-2.1.8.0 ++ * fw 14.27.1016 ++ ++* Red Hat(R) Testing ++ ++ * Platform ++ ++ * RHEL 8 ++ * Kernel 4.18 ++ * Qemu 4.2 ++ * X540-AT2 NIC(ixgbe, 10G) ++ ++ * Functionality ++ ++ * Guest with device assignment(PF) throughput testing(1G hugepage size) ++ * Guest with device assignment(PF) throughput testing(2M hugepage size) ++ * Guest with device assignment(VF) throughput testing ++ * PVP (host dpdk testpmd as vswitch) 1Q: throughput testing ++ * PVP vhost-user 2Q throughput testing ++ * PVP vhost-user 1Q - cross numa node throughput testing ++ * Guest with vhost-user 2 queues throughput testing ++ * vhost-user reconnect with dpdk-client, qemu-server: qemu reconnect ++ * PVP 1Q live migration testing ++ * PVP 1Q cross numa node live migration testing ++ * Guest with ovs+dpdk+vhost-user 1Q live migration testing ++ * Guest with ovs+dpdk+vhost-user 1Q live migration testing (2M) ++ * Guest with ovs+dpdk+vhost-user 2Q live migration testing ++ * Allocate memory from the NUMA node which Virtio device locates ++ ++ ++* Intel(R) Testing with Open vSwitch ++ ++ * OVS testing with OVS branches master and 2.13 with VSPERF ++ ++ * Tested NICs ++ ++ * i40e (X710) ++ * ixgbe (82599ES) ++ * ice ++ * vhost ++ ++ * Functionality ++ ++ * P2P ++ * PVP ++ * PVPV ++ * PVVP ++ * Multiqueue RSS ++ * Vhost reconnect ++ * Jumbo frames 1500, 6000, 9702 ++ ++ ++* Microsoft(R) Testing ++ ++ * Platform ++ ++ * Azure ++ * Ubuntu 16.04-LTS ++ * Ubuntu 18.04-DAILY-LTS ++ * RHEL 7-RAW ++ * RHEL 7.5 ++ * CentOS 7.5 ++ * SLES-15-sp1 gen1 ++ * Mellanox(R) ConnectX-4 ++ * LISAv2 test framework ++ ++ * Functionality ++ ++ * VERIFY-DPDK-COMPLIANCE - verifies kernel is supported and that the build is successful ++ * VERIFY-DPDK-BUILD-AND-TESTPMD-TEST - verifies using testpmd that packets can be sent from a VM to another VM ++ * VERIFY-SRIOV-FAILSAFE-FOR-DPDK - disables/enables Accelerated Networking for the NICs under test and makes sure DPDK works in both scenarios ++ * VERIFY-DPDK-FAILSAFE-DURING-TRAFFIC - disables/enables Accelerated Networking for the NICs while generating traffic using testpmd ++ * PERF-DPDK-FWD-PPS-DS15 - verifies DPDK forwarding performance using testpmd on 2, 4, 8 cores, rx and io mode on size Standard_DS15_v2 ++ * PERF-DPDK-SINGLE-CORE-PPS-DS4 - verifies DPDK performance using testpmd on 1 core, rx and io mode on size Standard_DS4_v2 ++ * PERF-DPDK-SINGLE-CORE-PPS-DS15 - verifies DPDK performance using testpmd on 1 core, rx and io mode on size Standard_DS15_v2 ++ * PERF-DPDK-MULTICORE-PPS-DS15 - verifies DPDK performance using testpmd on 2, 4, 8 cores, rx and io mode on size Standard_DS15_v2 ++ * PERF-DPDK-MULTICORE-PPS-F32 - verifies DPDK performance using testpmd on 2, 4, 8, 16 cores, rx and io mode on size Standard_F32s_v2 ++ * DPDK-RING-LATENCY - verifies DPDK CPU latency using dpdk-ring-ping ++ * VERIFY-DPDK-PRIMARY-SECONDARY-PROCESSES - verifies primary / secondary processes support for DPDK. Runs only on RHEL and Ubuntu distros with Linux kernel >= 4.20 ++ * VERIFY-DPDK-OVS - builds OVS with DPDK support and tests if the OVS DPDK ports can be created. Runs only on Ubuntu distro. +diff --git a/dpdk/doc/guides/sample_app_ug/l2_forward_event.rst b/dpdk/doc/guides/sample_app_ug/l2_forward_event.rst +index 8c519c3046..c5fad93d00 100644 +--- a/dpdk/doc/guides/sample_app_ug/l2_forward_event.rst ++++ b/dpdk/doc/guides/sample_app_ug/l2_forward_event.rst +@@ -202,9 +202,6 @@ chapters that related to the Poll Mode and Event mode Driver in the + + .. code-block:: c + +- if (rte_pci_probe() < 0) +- rte_panic("Cannot probe PCI\n"); +- + /* reset l2fwd_dst_ports */ + + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) +@@ -234,11 +231,6 @@ chapters that related to the Poll Mode and Event mode Driver in the + rte_eth_dev_info_get((uint8_t) portid, &dev_info); + } + +-Observe that: +- +-* rte_pci_probe() parses the devices on the PCI bus and initializes recognized +- devices. +- + The next step is to configure the RX and TX queues. For each port, there is only + one RX queue (only one lcore is able to poll a given port). The number of TX + queues depends on the number of available lcores. The rte_eth_dev_configure() +diff --git a/dpdk/doc/guides/sample_app_ug/l2_forward_real_virtual.rst b/dpdk/doc/guides/sample_app_ug/l2_forward_real_virtual.rst +index 39d6b0067a..671d0c7c19 100644 +--- a/dpdk/doc/guides/sample_app_ug/l2_forward_real_virtual.rst ++++ b/dpdk/doc/guides/sample_app_ug/l2_forward_real_virtual.rst +@@ -194,9 +194,6 @@ in the *DPDK Programmer's Guide* - Rel 1.4 EAR and the *DPDK API Reference*. + + .. code-block:: c + +- if (rte_pci_probe() < 0) +- rte_exit(EXIT_FAILURE, "Cannot probe PCI\n"); +- + /* reset l2fwd_dst_ports */ + + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) +@@ -226,12 +223,6 @@ in the *DPDK Programmer's Guide* - Rel 1.4 EAR and the *DPDK API Reference*. + rte_eth_dev_info_get((uint8_t) portid, &dev_info); + } + +-Observe that: +- +-* rte_igb_pmd_init_all() simultaneously registers the driver as a PCI driver and as an Ethernet* Poll Mode Driver. +- +-* rte_pci_probe() parses the devices on the PCI bus and initializes recognized devices. +- + The next step is to configure the RX and TX queues. + For each port, there is only one RX queue (only one lcore is able to poll a given port). + The number of TX queues depends on the number of available lcores. +diff --git a/dpdk/doc/guides/sample_app_ug/link_status_intr.rst b/dpdk/doc/guides/sample_app_ug/link_status_intr.rst +index 5283be8b7c..04c40f2854 100644 +--- a/dpdk/doc/guides/sample_app_ug/link_status_intr.rst ++++ b/dpdk/doc/guides/sample_app_ug/link_status_intr.rst +@@ -88,9 +88,6 @@ To fully understand this code, it is recommended to study the chapters that rela + + .. code-block:: c + +- if (rte_pci_probe() < 0) +- rte_exit(EXIT_FAILURE, "Cannot probe PCI\n"); +- + /* + * Each logical core is assigned a dedicated TX queue on each port. + */ +@@ -115,10 +112,6 @@ To fully understand this code, it is recommended to study the chapters that rela + rte_eth_dev_info_get((uint8_t) portid, &dev_info); + } + +-Observe that: +- +-* rte_pci_probe() parses the devices on the PCI bus and initializes recognized devices. +- + The next step is to configure the RX and TX queues. + For each port, there is only one RX queue (only one lcore is able to poll a given port). + The number of TX queues depends on the number of available lcores. +diff --git a/dpdk/doc/guides/sample_app_ug/multi_process.rst b/dpdk/doc/guides/sample_app_ug/multi_process.rst +index 9c374da6f7..f2a79a6397 100644 +--- a/dpdk/doc/guides/sample_app_ug/multi_process.rst ++++ b/dpdk/doc/guides/sample_app_ug/multi_process.rst +@@ -209,7 +209,7 @@ How the Application Works + ^^^^^^^^^^^^^^^^^^^^^^^^^ + + The initialization calls in both the primary and secondary instances are the same for the most part, +-calling the rte_eal_init(), 1 G and 10 G driver initialization and then rte_pci_probe() functions. ++calling the rte_eal_init(), 1 G and 10 G driver initialization and then probing devices. + Thereafter, the initialization done depends on whether the process is configured as a primary or secondary instance. + + In the primary instance, a memory pool is created for the packet mbufs and the network ports to be used are initialized - +diff --git a/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst +index 73ef0b41d3..78bdf60fe6 100644 +--- a/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst ++++ b/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst +@@ -237,7 +237,7 @@ Display the RSS hash functions and RSS hash key of a port:: + clear port + ~~~~~~~~~~ + +-Clear the port statistics for a given port or for all ports:: ++Clear the port statistics and forward engine statistics for a given port or for all ports:: + + testpmd> clear port (info|stats|xstats|fdir|stat_qmap) (port_id|all) + +diff --git a/dpdk/doc/guides/windows_gsg/build_dpdk.rst b/dpdk/doc/guides/windows_gsg/build_dpdk.rst +index 6711e07e21..a0e51dfcb3 100644 +--- a/dpdk/doc/guides/windows_gsg/build_dpdk.rst ++++ b/dpdk/doc/guides/windows_gsg/build_dpdk.rst +@@ -7,15 +7,22 @@ Compiling the DPDK Target from Source + System Requirements + ------------------- + +-The DPDK and its applications require the Clang-LLVM C compiler +-and Microsoft MSVC linker. ++Building the DPDK and its applications requires one of the following ++environments: ++ ++* The Clang-LLVM C compiler and Microsoft MSVC linker. ++* The MinGW-w64 toolchain (either native or cross). ++ + The Meson Build system is used to prepare the sources for compilation + with the Ninja backend. + The installation of these tools is covered in this section. + + ++Option 1. Clang-LLVM C Compiler and Microsoft MSVC Linker ++--------------------------------------------------------- ++ + Install the Compiler +--------------------- ++~~~~~~~~~~~~~~~~~~~~ + + Download and install the clang compiler from + `LLVM website <http://releases.llvm.org/download.html>`_. +@@ -25,7 +32,7 @@ For example, Clang-LLVM direct download link:: + + + Install the Linker +------------------- ++~~~~~~~~~~~~~~~~~~ + + Download and install the Build Tools for Visual Studio to link and build the + files on windows, +@@ -34,6 +41,18 @@ When installing build tools, select the "Visual C++ build tools" option + and ensure the Windows SDK is selected. + + ++Option 2. MinGW-w64 Toolchain ++----------------------------- ++ ++Obtain the latest version from ++`MinGW-w64 website <http://mingw-w64.org/doku.php/download>`_. ++On Windows, install to a folder without spaces in its name, like ``C:\MinGW``. ++This path is assumed for the rest of this guide. ++ ++Version 4.0.4 for Ubuntu 16.04 cannot be used due to a ++`MinGW-w64 bug <https://sourceforge.net/p/mingw-w64/bugs/562/>`_. ++ ++ + Install the Build System + ------------------------ + +@@ -43,6 +62,8 @@ A good option to choose is the MSI installer for both meson and ninja together:: + + http://mesonbuild.com/Getting-meson.html#installing-meson-and-ninja-with-the-msi-installer%22 + ++Recommended version is either Meson 0.47.1 (baseline) or the latest release. ++ + Install the Backend + ------------------- + +@@ -56,23 +77,30 @@ Build the code + The build environment is setup to build the EAL and the helloworld example by + default. + +-Using the ninja backend +-~~~~~~~~~~~~~~~~~~~~~~~~ ++Option 1. Native Build on Windows ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-Specifying the compiler might be required to complete the meson command. ++When using Clang-LLVM, specifying the compiler might be required to complete ++the meson command: + + .. code-block:: console + + set CC=clang + ++When using MinGW-w64, it is sufficient to have toolchain executables in PATH: ++ ++.. code-block:: console ++ ++ set PATH=C:\MinGW\mingw64\bin;%PATH% ++ + To compile the examples, the flag ``-Dexamples`` is required. + + .. code-block:: console + + cd C:\Users\me\dpdk + meson -Dexamples=helloworld build +- cd build +- ninja ++ ninja -C build ++ + + Run the helloworld example + ========================== +@@ -87,3 +115,8 @@ Navigate to the examples in the build directory and run `dpdk-helloworld.exe`. + hello from core 3 + hello from core 0 + hello from core 2 ++ ++Note for MinGW-w64: applications are linked to ``libwinpthread-1.dll`` ++by default. To run the example, either add toolchain executables directory ++to the PATH or copy the library to the working directory. ++Alternatively, static linking may be used (mind the LGPLv2.1 license). +diff --git a/dpdk/drivers/Makefile b/dpdk/drivers/Makefile +index 7d5da5d9f5..cfc24b2d0b 100644 +--- a/dpdk/drivers/Makefile ++++ b/dpdk/drivers/Makefile +@@ -19,7 +19,7 @@ DEPDIRS-common/qat := bus mempool + DIRS-$(CONFIG_RTE_LIBRTE_COMPRESSDEV) += compress + DEPDIRS-compress := bus mempool + DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += event +-DEPDIRS-event := common bus mempool net ++DEPDIRS-event := common bus mempool net crypto + DIRS-$(CONFIG_RTE_LIBRTE_RAWDEV) += raw + DEPDIRS-raw := common bus mempool net event + +diff --git a/dpdk/drivers/baseband/turbo_sw/bbdev_turbo_software.c b/dpdk/drivers/baseband/turbo_sw/bbdev_turbo_software.c +index f2fe7a2194..e6d9501240 100644 +--- a/dpdk/drivers/baseband/turbo_sw/bbdev_turbo_software.c ++++ b/dpdk/drivers/baseband/turbo_sw/bbdev_turbo_software.c +@@ -218,7 +218,7 @@ info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) + RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | + RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE, + .llr_size = 8, +- .llr_decimals = 2, ++ .llr_decimals = 4, + .harq_memory_size = 0, + .num_buffers_src = + RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, +diff --git a/dpdk/drivers/bus/dpaa/rte_dpaa_bus.h b/dpdk/drivers/bus/dpaa/rte_dpaa_bus.h +index 9bf2cd9d68..373aca9785 100644 +--- a/dpdk/drivers/bus/dpaa/rte_dpaa_bus.h ++++ b/dpdk/drivers/bus/dpaa/rte_dpaa_bus.h +@@ -132,7 +132,23 @@ static inline void *rte_dpaa_mem_ptov(phys_addr_t paddr) + } + + /* If not, Fallback to full memseg list searching */ +- return rte_mem_iova2virt(paddr); ++ va = rte_mem_iova2virt(paddr); ++ ++ dpaax_iova_table_update(paddr, va, RTE_CACHE_LINE_SIZE); ++ ++ return va; ++} ++ ++static inline rte_iova_t ++rte_dpaa_mem_vtop(void *vaddr) ++{ ++ const struct rte_memseg *ms; ++ ++ ms = rte_mem_virt2memseg(vaddr, NULL); ++ if (ms) ++ return ms->iova + RTE_PTR_DIFF(vaddr, ms->addr); ++ ++ return (size_t)NULL; + } + + /** +diff --git a/dpdk/drivers/bus/fslmc/mc/fsl_mc_sys.h b/dpdk/drivers/bus/fslmc/mc/fsl_mc_sys.h +index d0c7b39f8d..a310c5697e 100644 +--- a/dpdk/drivers/bus/fslmc/mc/fsl_mc_sys.h ++++ b/dpdk/drivers/bus/fslmc/mc/fsl_mc_sys.h +@@ -32,11 +32,10 @@ struct fsl_mc_io { + #include <sys/uio.h> + #include <linux/byteorder/little_endian.h> + +-#ifndef dmb +-#define dmb() {__asm__ __volatile__("" : : : "memory"); } +-#endif +-#define __iormb() dmb() +-#define __iowmb() dmb() ++#include <rte_atomic.h> ++ ++#define __iormb() rte_io_rmb() ++#define __iowmb() rte_io_wmb() + #define __arch_getq(a) (*(volatile uint64_t *)(a)) + #define __arch_putq(v, a) (*(volatile uint64_t *)(a) = (v)) + #define __arch_putq32(v, a) (*(volatile uint32_t *)(a) = (v)) +diff --git a/dpdk/drivers/bus/fslmc/qbman/qbman_debug.c b/dpdk/drivers/bus/fslmc/qbman/qbman_debug.c +index 0bb2ce880f..34374ae4b6 100644 +--- a/dpdk/drivers/bus/fslmc/qbman/qbman_debug.c ++++ b/dpdk/drivers/bus/fslmc/qbman/qbman_debug.c +@@ -20,26 +20,27 @@ struct qbman_fq_query_desc { + uint8_t verb; + uint8_t reserved[3]; + uint32_t fqid; +- uint8_t reserved2[57]; ++ uint8_t reserved2[56]; + }; + + int qbman_fq_query_state(struct qbman_swp *s, uint32_t fqid, + struct qbman_fq_query_np_rslt *r) + { + struct qbman_fq_query_desc *p; ++ struct qbman_fq_query_np_rslt *var; + + p = (struct qbman_fq_query_desc *)qbman_swp_mc_start(s); + if (!p) + return -EBUSY; + + p->fqid = fqid; +- *r = *(struct qbman_fq_query_np_rslt *)qbman_swp_mc_complete(s, p, +- QBMAN_FQ_QUERY_NP); +- if (!r) { ++ var = qbman_swp_mc_complete(s, p, QBMAN_FQ_QUERY_NP); ++ if (!var) { + pr_err("qbman: Query FQID %d NP fields failed, no response\n", + fqid); + return -EIO; + } ++ *r = *var; + + /* Decode the outcome */ + QBMAN_BUG_ON((r->verb & QBMAN_RESPONSE_VERB_MASK) != QBMAN_FQ_QUERY_NP); +diff --git a/dpdk/drivers/bus/ifpga/ifpga_bus.c b/dpdk/drivers/bus/ifpga/ifpga_bus.c +index dfd6b1fba9..addbc3e86b 100644 +--- a/dpdk/drivers/bus/ifpga/ifpga_bus.c ++++ b/dpdk/drivers/bus/ifpga/ifpga_bus.c +@@ -24,6 +24,7 @@ + #include <rte_kvargs.h> + #include <rte_alarm.h> + #include <rte_string_fns.h> ++#include <rte_debug.h> + + #include "rte_rawdev.h" + #include "rte_rawdev_pmd.h" +diff --git a/dpdk/drivers/bus/ifpga/rte_bus_ifpga.h b/dpdk/drivers/bus/ifpga/rte_bus_ifpga.h +index 88a6289642..a6eeaaf568 100644 +--- a/dpdk/drivers/bus/ifpga/rte_bus_ifpga.h ++++ b/dpdk/drivers/bus/ifpga/rte_bus_ifpga.h +@@ -17,6 +17,7 @@ extern "C" { + + #include <rte_bus.h> + #include <rte_pci.h> ++#include <rte_interrupts.h> + #include <rte_spinlock.h> + + /** Name of Intel FPGA Bus */ +diff --git a/dpdk/drivers/bus/pci/linux/pci.c b/dpdk/drivers/bus/pci/linux/pci.c +index 740a2cdad4..71b0a3053d 100644 +--- a/dpdk/drivers/bus/pci/linux/pci.c ++++ b/dpdk/drivers/bus/pci/linux/pci.c +@@ -377,6 +377,11 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) + */ + RTE_LOG(ERR, EAL, "Unexpected device scan at %s!\n", + filename); ++ else if (dev2->device.devargs != ++ dev->device.devargs) { ++ rte_devargs_remove(dev2->device.devargs); ++ pci_name_set(dev2); ++ } + } + free(dev); + } +diff --git a/dpdk/drivers/bus/pci/linux/pci_vfio.c b/dpdk/drivers/bus/pci/linux/pci_vfio.c +index 64cd84a689..ba60e7ce99 100644 +--- a/dpdk/drivers/bus/pci/linux/pci_vfio.c ++++ b/dpdk/drivers/bus/pci/linux/pci_vfio.c +@@ -149,6 +149,38 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) + return 0; + } + ++/* enable PCI bus memory space */ ++static int ++pci_vfio_enable_bus_memory(int dev_fd) ++{ ++ uint16_t cmd; ++ int ret; ++ ++ ret = pread64(dev_fd, &cmd, sizeof(cmd), ++ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + ++ PCI_COMMAND); ++ ++ if (ret != sizeof(cmd)) { ++ RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); ++ return -1; ++ } ++ ++ if (cmd & PCI_COMMAND_MEMORY) ++ return 0; ++ ++ cmd |= PCI_COMMAND_MEMORY; ++ ret = pwrite64(dev_fd, &cmd, sizeof(cmd), ++ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + ++ PCI_COMMAND); ++ ++ if (ret != sizeof(cmd)) { ++ RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); ++ return -1; ++ } ++ ++ return 0; ++} ++ + /* set PCI bus mastering */ + static int + pci_vfio_set_bus_master(int dev_fd, bool op) +@@ -427,6 +459,11 @@ pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd) + return -1; + } + ++ if (pci_vfio_enable_bus_memory(vfio_dev_fd)) { ++ RTE_LOG(ERR, EAL, "Cannot enable bus memory!\n"); ++ return -1; ++ } ++ + /* set bus mastering for the device */ + if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { + RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); +diff --git a/dpdk/drivers/bus/pci/pci_common.c b/dpdk/drivers/bus/pci/pci_common.c +index 3f55420769..ab73c009ac 100644 +--- a/dpdk/drivers/bus/pci/pci_common.c ++++ b/dpdk/drivers/bus/pci/pci_common.c +@@ -288,8 +288,8 @@ pci_probe_all_drivers(struct rte_pci_device *dev) + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + */ +-int +-rte_pci_probe(void) ++static int ++pci_probe(void) + { + struct rte_pci_device *dev = NULL; + size_t probed = 0, failed = 0; +@@ -675,7 +675,7 @@ rte_pci_get_iommu_class(void) + struct rte_pci_bus rte_pci_bus = { + .bus = { + .scan = rte_pci_scan, +- .probe = rte_pci_probe, ++ .probe = pci_probe, + .find_device = pci_find_device, + .plug = pci_plug, + .unplug = pci_unplug, +diff --git a/dpdk/drivers/bus/pci/pci_common_uio.c b/dpdk/drivers/bus/pci/pci_common_uio.c +index 7ea73dbc5b..f4dca9da91 100644 +--- a/dpdk/drivers/bus/pci/pci_common_uio.c ++++ b/dpdk/drivers/bus/pci/pci_common_uio.c +@@ -70,6 +70,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev) + } + return -1; + } ++ dev->mem_resource[i].addr = mapaddr; + } + return 0; + } +diff --git a/dpdk/drivers/bus/pci/private.h b/dpdk/drivers/bus/pci/private.h +index a205d4d9f0..af1c7ae5fe 100644 +--- a/dpdk/drivers/bus/pci/private.h ++++ b/dpdk/drivers/bus/pci/private.h +@@ -17,16 +17,6 @@ struct rte_pci_device; + + extern struct rte_pci_bus rte_pci_bus; + +-/** +- * Probe the PCI bus +- * +- * @return +- * - 0 on success. +- * - !0 on error. +- */ +-int +-rte_pci_probe(void); +- + /** + * Scan the content of the PCI bus, and the devices in the devices + * list +diff --git a/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c b/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c +index 10e50c9b5a..5451bfd150 100644 +--- a/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c ++++ b/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c +@@ -165,7 +165,7 @@ vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev, int idx, + dev->resource[idx].addr = mapaddr; + vmbus_map_addr = RTE_PTR_ADD(mapaddr, size); + +- /* Record result of sucessful mapping for use by secondary */ ++ /* Record result of successful mapping for use by secondary */ + maps[idx].addr = mapaddr; + maps[idx].size = size; + +diff --git a/dpdk/drivers/bus/vmbus/vmbus_common.c b/dpdk/drivers/bus/vmbus/vmbus_common.c +index 48a219f735..3adef01c95 100644 +--- a/dpdk/drivers/bus/vmbus/vmbus_common.c ++++ b/dpdk/drivers/bus/vmbus/vmbus_common.c +@@ -131,7 +131,7 @@ vmbus_probe_one_driver(struct rte_vmbus_driver *dr, + } + + /* +- * IF device class GUID mathces, call the probe function of ++ * If device class GUID matches, call the probe function of + * registere drivers for the vmbus device. + * Return -1 if initialization failed, + * and 1 if no driver found for this device. +diff --git a/dpdk/drivers/common/cpt/cpt_ucode.h b/dpdk/drivers/common/cpt/cpt_ucode.h +index d5a0135d73..9dc0fc652a 100644 +--- a/dpdk/drivers/common/cpt/cpt_ucode.h ++++ b/dpdk/drivers/common/cpt/cpt_ucode.h +@@ -298,7 +298,7 @@ cpt_fc_ciph_set_key(void *ctx, cipher_type_t type, const uint8_t *key, + cpt_fc_ciph_set_key_kasumi_f8_cbc(cpt_ctx, key, key_len); + goto success; + default: +- break; ++ return -1; + } + + /* Only for FC_GEN case */ +@@ -377,7 +377,7 @@ fill_sg_comp_from_iov(sg_comp_t *list, + { + int32_t j; + uint32_t extra_len = extra_buf ? extra_buf->size : 0; +- uint32_t size = *psize - extra_len; ++ uint32_t size = *psize; + buf_ptr_t *bufs; + + bufs = from->bufs; +@@ -386,9 +386,6 @@ fill_sg_comp_from_iov(sg_comp_t *list, + uint32_t e_len; + sg_comp_t *to = &list[i >> 2]; + +- if (!bufs[j].size) +- continue; +- + if (unlikely(from_offset)) { + if (from_offset >= bufs[j].size) { + from_offset -= bufs[j].size; +@@ -420,18 +417,19 @@ fill_sg_comp_from_iov(sg_comp_t *list, + to->u.s.len[i % 4] = rte_cpu_to_be_16(e_len); + } + ++ extra_len = RTE_MIN(extra_len, size); + /* Insert extra data ptr */ + if (extra_len) { + i++; + to = &list[i >> 2]; + to->u.s.len[i % 4] = +- rte_cpu_to_be_16(extra_buf->size); ++ rte_cpu_to_be_16(extra_len); + to->ptr[i % 4] = + rte_cpu_to_be_64(extra_buf->dma_addr); +- +- /* size already decremented by extra len */ ++ size -= extra_len; + } + ++ next_len = RTE_MIN(next_len, size); + /* insert the rest of the data */ + if (next_len) { + i++; +@@ -2620,10 +2618,13 @@ fill_sess_aead(struct rte_crypto_sym_xform *xform, + sess->iv_length = aead_form->iv.length; + sess->aad_length = aead_form->aad_length; + +- cpt_fc_ciph_set_key(ctx, enc_type, aead_form->key.data, +- aead_form->key.length, NULL); ++ if (unlikely(cpt_fc_ciph_set_key(ctx, enc_type, aead_form->key.data, ++ aead_form->key.length, NULL))) ++ return -1; + +- cpt_fc_auth_set_key(ctx, auth_type, NULL, 0, aead_form->digest_length); ++ if (unlikely(cpt_fc_auth_set_key(ctx, auth_type, NULL, 0, ++ aead_form->digest_length))) ++ return -1; + + return 0; + } +@@ -2723,8 +2724,9 @@ fill_sess_cipher(struct rte_crypto_sym_xform *xform, + sess->iv_length = c_form->iv.length; + sess->is_null = is_null; + +- cpt_fc_ciph_set_key(SESS_PRIV(sess), enc_type, c_form->key.data, +- c_form->key.length, NULL); ++ if (unlikely(cpt_fc_ciph_set_key(SESS_PRIV(sess), enc_type, ++ c_form->key.data, c_form->key.length, NULL))) ++ return -1; + + return 0; + } +@@ -2823,8 +2825,10 @@ fill_sess_auth(struct rte_crypto_sym_xform *xform, + sess->auth_iv_offset = a_form->iv.offset; + sess->auth_iv_length = a_form->iv.length; + } +- cpt_fc_auth_set_key(SESS_PRIV(sess), auth_type, a_form->key.data, +- a_form->key.length, a_form->digest_length); ++ if (unlikely(cpt_fc_auth_set_key(SESS_PRIV(sess), auth_type, ++ a_form->key.data, a_form->key.length, ++ a_form->digest_length))) ++ return -1; + + return 0; + } +@@ -2867,9 +2871,13 @@ fill_sess_gmac(struct rte_crypto_sym_xform *xform, + sess->iv_length = a_form->iv.length; + sess->mac_len = a_form->digest_length; + +- cpt_fc_ciph_set_key(ctx, enc_type, a_form->key.data, +- a_form->key.length, NULL); +- cpt_fc_auth_set_key(ctx, auth_type, NULL, 0, a_form->digest_length); ++ if (unlikely(cpt_fc_ciph_set_key(ctx, enc_type, a_form->key.data, ++ a_form->key.length, NULL))) ++ return -1; ++ ++ if (unlikely(cpt_fc_auth_set_key(ctx, auth_type, NULL, 0, ++ a_form->digest_length))) ++ return -1; + + return 0; + } +diff --git a/dpdk/drivers/common/octeontx/octeontx_mbox.c b/dpdk/drivers/common/octeontx/octeontx_mbox.c +index 2fd2531072..effe0b267e 100644 +--- a/dpdk/drivers/common/octeontx/octeontx_mbox.c ++++ b/dpdk/drivers/common/octeontx/octeontx_mbox.c +@@ -279,7 +279,7 @@ octeontx_start_domain(void) + } + + static int +-octeontx_check_mbox_version(struct mbox_intf_ver app_intf_ver, ++octeontx_check_mbox_version(struct mbox_intf_ver *app_intf_ver, + struct mbox_intf_ver *intf_ver) + { + struct mbox_intf_ver kernel_intf_ver = {0}; +@@ -290,8 +290,9 @@ octeontx_check_mbox_version(struct mbox_intf_ver app_intf_ver, + hdr.coproc = NO_COPROC; + hdr.msg = RM_INTERFACE_VERSION; + +- result = octeontx_mbox_send(&hdr, &app_intf_ver, sizeof(app_intf_ver), +- &kernel_intf_ver, sizeof(kernel_intf_ver)); ++ result = octeontx_mbox_send(&hdr, app_intf_ver, ++ sizeof(struct mbox_intf_ver), ++ &kernel_intf_ver, sizeof(kernel_intf_ver)); + if (result != sizeof(kernel_intf_ver)) { + mbox_log_err("Could not send interface version. Err=%d. FuncErr=%d\n", + result, hdr.res_code); +@@ -301,9 +302,9 @@ octeontx_check_mbox_version(struct mbox_intf_ver app_intf_ver, + if (intf_ver) + *intf_ver = kernel_intf_ver; + +- if (app_intf_ver.platform != kernel_intf_ver.platform || +- app_intf_ver.major != kernel_intf_ver.major || +- app_intf_ver.minor != kernel_intf_ver.minor) ++ if (app_intf_ver->platform != kernel_intf_ver.platform || ++ app_intf_ver->major != kernel_intf_ver.major || ++ app_intf_ver->minor != kernel_intf_ver.minor) + result = -EINVAL; + + return result; +@@ -312,7 +313,7 @@ octeontx_check_mbox_version(struct mbox_intf_ver app_intf_ver, + int + octeontx_mbox_init(void) + { +- const struct mbox_intf_ver MBOX_INTERFACE_VERSION = { ++ struct mbox_intf_ver MBOX_INTERFACE_VERSION = { + .platform = 0x01, + .major = 0x01, + .minor = 0x03 +@@ -330,7 +331,7 @@ octeontx_mbox_init(void) + return ret; + } + +- ret = octeontx_check_mbox_version(MBOX_INTERFACE_VERSION, ++ ret = octeontx_check_mbox_version(&MBOX_INTERFACE_VERSION, + &rm_intf_ver); + if (ret < 0) { + mbox_log_err("MBOX version: Kernel(%d.%d.%d) != DPDK(%d.%d.%d)", +diff --git a/dpdk/drivers/common/octeontx2/hw/otx2_npc.h b/dpdk/drivers/common/octeontx2/hw/otx2_npc.h +index a0536e0aed..600084ff31 100644 +--- a/dpdk/drivers/common/octeontx2/hw/otx2_npc.h ++++ b/dpdk/drivers/common/octeontx2/hw/otx2_npc.h +@@ -201,7 +201,8 @@ enum npc_kpu_lb_ltype { + }; + + enum npc_kpu_lc_ltype { +- NPC_LT_LC_IP = 1, ++ NPC_LT_LC_PTP = 1, ++ NPC_LT_LC_IP, + NPC_LT_LC_IP_OPT, + NPC_LT_LC_IP6, + NPC_LT_LC_IP6_EXT, +@@ -209,11 +210,10 @@ enum npc_kpu_lc_ltype { + NPC_LT_LC_RARP, + NPC_LT_LC_MPLS, + NPC_LT_LC_NSH, +- NPC_LT_LC_PTP, + NPC_LT_LC_FCOE, + }; + +-/* Don't modify Ltypes upto SCTP, otherwise it will ++/* Don't modify Ltypes up to SCTP, otherwise it will + * effect flow tag calculation and thus RSS. + */ + enum npc_kpu_ld_ltype { +@@ -260,7 +260,7 @@ enum npc_kpu_lg_ltype { + NPC_LT_LG_TU_ETHER_IN_NSH, + }; + +-/* Don't modify Ltypes upto SCTP, otherwise it will ++/* Don't modify Ltypes up to SCTP, otherwise it will + * effect flow tag calculation and thus RSS. + */ + enum npc_kpu_lh_ltype { +diff --git a/dpdk/drivers/compress/octeontx/otx_zip_pmd.c b/dpdk/drivers/compress/octeontx/otx_zip_pmd.c +index 9e00c86630..bff8ef035e 100644 +--- a/dpdk/drivers/compress/octeontx/otx_zip_pmd.c ++++ b/dpdk/drivers/compress/octeontx/otx_zip_pmd.c +@@ -406,7 +406,7 @@ zip_pmd_qp_setup(struct rte_compressdev *dev, uint16_t qp_id, + + qp->name = name; + +- /* Create completion queue upto max_inflight_ops */ ++ /* Create completion queue up to max_inflight_ops */ + qp->processed_pkts = zip_pmd_qp_create_processed_pkts_ring(qp, + max_inflight_ops, socket_id); + if (qp->processed_pkts == NULL) +diff --git a/dpdk/drivers/compress/zlib/zlib_pmd.c b/dpdk/drivers/compress/zlib/zlib_pmd.c +index 19f9200c22..e39be2ed86 100644 +--- a/dpdk/drivers/compress/zlib/zlib_pmd.c ++++ b/dpdk/drivers/compress/zlib/zlib_pmd.c +@@ -7,6 +7,8 @@ + + #include "zlib_pmd_private.h" + ++int zlib_logtype_driver; ++ + /** Compute next mbuf in the list, assign data buffer and length, + * returns 0 if mbuf is NULL + */ +diff --git a/dpdk/drivers/compress/zlib/zlib_pmd_private.h b/dpdk/drivers/compress/zlib/zlib_pmd_private.h +index bda49759dc..e36c5dc615 100644 +--- a/dpdk/drivers/compress/zlib/zlib_pmd_private.h ++++ b/dpdk/drivers/compress/zlib/zlib_pmd_private.h +@@ -14,7 +14,7 @@ + + #define DEF_MEM_LEVEL 8 + +-int zlib_logtype_driver; ++extern int zlib_logtype_driver; + #define ZLIB_PMD_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, zlib_logtype_driver, "%s(): "fmt "\n", \ + __func__, ##args) +diff --git a/dpdk/drivers/crypto/aesni_gcm/Makefile b/dpdk/drivers/crypto/aesni_gcm/Makefile +index d8190a2ff4..b443167d51 100644 +--- a/dpdk/drivers/crypto/aesni_gcm/Makefile ++++ b/dpdk/drivers/crypto/aesni_gcm/Makefile +@@ -20,7 +20,8 @@ LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring + LDLIBS += -lrte_cryptodev + LDLIBS += -lrte_bus_vdev + +-IMB_HDR = $(shell echo '\#include <intel-ipsec-mb.h>' | \ ++H := \# ++IMB_HDR = $(shell echo '$Hinclude <intel-ipsec-mb.h>' | \ + $(CC) -E $(EXTRA_CFLAGS) - | grep 'intel-ipsec-mb.h' | \ + head -n1 | cut -d'"' -f2) + +diff --git a/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c b/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c +index 1a03be31dc..c9c9eb8ca9 100644 +--- a/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c ++++ b/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c +@@ -13,6 +13,8 @@ + + #include "aesni_gcm_pmd_private.h" + ++int aesni_gcm_logtype_driver; ++ + static uint8_t cryptodev_driver_id; + + /** Parse crypto xform chain and set private session parameters */ +diff --git a/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd_private.h b/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd_private.h +index 2039adb533..7347c4769f 100644 +--- a/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd_private.h ++++ b/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd_private.h +@@ -20,7 +20,7 @@ + /**< AES-NI GCM PMD device name */ + + /** AES-NI GCM PMD LOGTYPE DRIVER */ +-int aesni_gcm_logtype_driver; ++extern int aesni_gcm_logtype_driver; + #define AESNI_GCM_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, aesni_gcm_logtype_driver, \ + "%s() line %u: "fmt "\n", __func__, __LINE__, \ +diff --git a/dpdk/drivers/crypto/aesni_mb/Makefile b/dpdk/drivers/crypto/aesni_mb/Makefile +index f1530e74c4..aa2e428106 100644 +--- a/dpdk/drivers/crypto/aesni_mb/Makefile ++++ b/dpdk/drivers/crypto/aesni_mb/Makefile +@@ -20,7 +20,8 @@ LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring + LDLIBS += -lrte_cryptodev + LDLIBS += -lrte_bus_vdev + +-IMB_HDR = $(shell echo '\#include <intel-ipsec-mb.h>' | \ ++H := \# ++IMB_HDR = $(shell echo '$Hinclude <intel-ipsec-mb.h>' | \ + $(CC) -E $(EXTRA_CFLAGS) - | grep 'intel-ipsec-mb.h' | \ + head -n1 | cut -d'"' -f2) + +diff --git a/dpdk/drivers/crypto/aesni_mb/aesni_mb_pmd_private.h b/dpdk/drivers/crypto/aesni_mb/aesni_mb_pmd_private.h +index 3456693c25..b3cb2f1cf9 100644 +--- a/dpdk/drivers/crypto/aesni_mb/aesni_mb_pmd_private.h ++++ b/dpdk/drivers/crypto/aesni_mb/aesni_mb_pmd_private.h +@@ -19,7 +19,7 @@ enum aesni_mb_vector_mode { + /**< AES-NI Multi buffer PMD device name */ + + /** AESNI_MB PMD LOGTYPE DRIVER */ +-int aesni_mb_logtype_driver; ++extern int aesni_mb_logtype_driver; + + #define AESNI_MB_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, aesni_mb_logtype_driver, \ +diff --git a/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c b/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c +index 97d9f81766..40feae3c62 100644 +--- a/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c ++++ b/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c +@@ -14,6 +14,8 @@ + + #include "aesni_mb_pmd_private.h" + ++int aesni_mb_logtype_driver; ++ + #define AES_CCM_DIGEST_MIN_LEN 4 + #define AES_CCM_DIGEST_MAX_LEN 16 + #define HMAC_MAX_BLOCK_SIZE 128 +@@ -729,10 +731,10 @@ get_session(struct aesni_mb_qp *qp, struct rte_crypto_op *op) + op->sym->session, + cryptodev_driver_id); + } else { +- void *_sess = NULL; ++ void *_sess = rte_cryptodev_sym_session_create(qp->sess_mp); + void *_sess_private_data = NULL; + +- if (rte_mempool_get(qp->sess_mp, (void **)&_sess)) ++ if (_sess == NULL) + return NULL; + + if (rte_mempool_get(qp->sess_mp_priv, +diff --git a/dpdk/drivers/crypto/armv8/rte_armv8_pmd.c b/dpdk/drivers/crypto/armv8/rte_armv8_pmd.c +index 7dc83e69e1..fd3093315a 100644 +--- a/dpdk/drivers/crypto/armv8/rte_armv8_pmd.c ++++ b/dpdk/drivers/crypto/armv8/rte_armv8_pmd.c +@@ -84,12 +84,12 @@ crypto_op_ca_encrypt = { + + static const crypto_func_tbl_t + crypto_op_ca_decrypt = { +- NULL ++ { {NULL} } + }; + + static const crypto_func_tbl_t + crypto_op_ac_encrypt = { +- NULL ++ { {NULL} } + }; + + static const crypto_func_tbl_t +@@ -369,7 +369,16 @@ armv8_crypto_set_session_chained_parameters(struct armv8_crypto_session *sess, + /* Select cipher key */ + sess->cipher.key.length = cipher_xform->cipher.key.length; + /* Set cipher direction */ +- cop = sess->cipher.direction; ++ switch (sess->cipher.direction) { ++ case RTE_CRYPTO_CIPHER_OP_ENCRYPT: ++ cop = ARMV8_CRYPTO_CIPHER_OP_ENCRYPT; ++ break; ++ case RTE_CRYPTO_CIPHER_OP_DECRYPT: ++ cop = ARMV8_CRYPTO_CIPHER_OP_DECRYPT; ++ break; ++ default: ++ return -ENOTSUP; ++ } + /* Set cipher algorithm */ + calg = cipher_xform->cipher.algo; + +diff --git a/dpdk/drivers/crypto/caam_jr/Makefile b/dpdk/drivers/crypto/caam_jr/Makefile +index 1b1f25a2a2..5b27b84c09 100644 +--- a/dpdk/drivers/crypto/caam_jr/Makefile ++++ b/dpdk/drivers/crypto/caam_jr/Makefile +@@ -16,6 +16,13 @@ CFLAGS += -D _GNU_SOURCE + CFLAGS += -O3 + CFLAGS += $(WERROR_FLAGS) + ++# FIXME: temporary solution for Bugzilla 469 ++ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) ++ifeq ($(shell test $(GCC_VERSION) -ge 100 && echo 1), 1) ++CFLAGS += -fcommon ++endif ++endif ++ + CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa/include + CFLAGS += -I$(RTE_SDK)/drivers/common/dpaax + CFLAGS += -I$(RTE_SDK)/drivers/common/dpaax/caamflib/ +diff --git a/dpdk/drivers/crypto/caam_jr/caam_jr.c b/dpdk/drivers/crypto/caam_jr/caam_jr.c +index 8aaa3d45f6..86aa9a103e 100644 +--- a/dpdk/drivers/crypto/caam_jr/caam_jr.c ++++ b/dpdk/drivers/crypto/caam_jr/caam_jr.c +@@ -2084,7 +2084,7 @@ static struct rte_security_ops caam_jr_security_ops = { + static void + close_job_ring(struct sec_job_ring_t *job_ring) + { +- if (job_ring->irq_fd) { ++ if (job_ring->irq_fd != -1) { + /* Producer index is frozen. If consumer index is not equal + * with producer index, then we have descs to flush. + */ +@@ -2093,7 +2093,7 @@ close_job_ring(struct sec_job_ring_t *job_ring) + + /* free the uio job ring */ + free_job_ring(job_ring->irq_fd); +- job_ring->irq_fd = 0; ++ job_ring->irq_fd = -1; + caam_jr_dma_free(job_ring->input_ring); + caam_jr_dma_free(job_ring->output_ring); + g_job_rings_no--; +@@ -2197,7 +2197,7 @@ caam_jr_dev_uninit(struct rte_cryptodev *dev) + * + */ + static void * +-init_job_ring(void *reg_base_addr, uint32_t irq_id) ++init_job_ring(void *reg_base_addr, int irq_id) + { + struct sec_job_ring_t *job_ring = NULL; + int i, ret = 0; +@@ -2207,7 +2207,7 @@ init_job_ring(void *reg_base_addr, uint32_t irq_id) + int irq_coalescing_count = 0; + + for (i = 0; i < MAX_SEC_JOB_RINGS; i++) { +- if (g_job_rings[i].irq_fd == 0) { ++ if (g_job_rings[i].irq_fd == -1) { + job_ring = &g_job_rings[i]; + g_job_rings_no++; + break; +@@ -2460,6 +2460,15 @@ cryptodev_caam_jr_remove(struct rte_vdev_device *vdev) + return rte_cryptodev_pmd_destroy(cryptodev); + } + ++static void ++sec_job_rings_init(void) ++{ ++ int i; ++ ++ for (i = 0; i < MAX_SEC_JOB_RINGS; i++) ++ g_job_rings[i].irq_fd = -1; ++} ++ + static struct rte_vdev_driver cryptodev_caam_jr_drv = { + .probe = cryptodev_caam_jr_probe, + .remove = cryptodev_caam_jr_remove +@@ -2474,6 +2483,12 @@ RTE_PMD_REGISTER_PARAM_STRING(CRYPTODEV_NAME_CAAM_JR_PMD, + RTE_PMD_REGISTER_CRYPTO_DRIVER(caam_jr_crypto_drv, cryptodev_caam_jr_drv.driver, + cryptodev_driver_id); + ++RTE_INIT(caam_jr_init) ++{ ++ sec_uio_job_rings_init(); ++ sec_job_rings_init(); ++} ++ + RTE_INIT(caam_jr_init_log) + { + caam_jr_logtype = rte_log_register("pmd.crypto.caam"); +diff --git a/dpdk/drivers/crypto/caam_jr/caam_jr_hw_specific.h b/dpdk/drivers/crypto/caam_jr/caam_jr_hw_specific.h +index 5f58a585d7..bbe8bc3f90 100644 +--- a/dpdk/drivers/crypto/caam_jr/caam_jr_hw_specific.h ++++ b/dpdk/drivers/crypto/caam_jr/caam_jr_hw_specific.h +@@ -360,7 +360,7 @@ struct sec_job_ring_t { + * bitwise operations. + */ + +- uint32_t irq_fd; /* The file descriptor used for polling from ++ int irq_fd; /* The file descriptor used for polling from + * user space for interrupts notifications + */ + uint32_t jr_mode; /* Model used by SEC Driver to receive +diff --git a/dpdk/drivers/crypto/caam_jr/caam_jr_pvt.h b/dpdk/drivers/crypto/caam_jr/caam_jr_pvt.h +index 98cd4438aa..552d6b9b1b 100644 +--- a/dpdk/drivers/crypto/caam_jr/caam_jr_pvt.h ++++ b/dpdk/drivers/crypto/caam_jr/caam_jr_pvt.h +@@ -216,7 +216,7 @@ calc_chksum(void *buffer, int len) + } + struct uio_job_ring { + uint32_t jr_id; +- uint32_t uio_fd; ++ int uio_fd; + void *register_base_addr; + int map_size; + int uio_minor_number; +@@ -224,8 +224,9 @@ struct uio_job_ring { + + int sec_cleanup(void); + int sec_configure(void); ++void sec_uio_job_rings_init(void); + struct uio_job_ring *config_job_ring(void); +-void free_job_ring(uint32_t uio_fd); ++void free_job_ring(int uio_fd); + + /* For Dma memory allocation of specified length and alignment */ + static inline void * +@@ -279,7 +280,7 @@ static inline rte_iova_t caam_jr_dma_vtop(void *ptr) + * @retval 0 for success + * @retval -1 value for error + */ +-uint32_t caam_jr_enable_irqs(uint32_t uio_fd); ++int caam_jr_enable_irqs(int uio_fd); + + /** @brief Request to SEC kernel driver to disable interrupts for descriptor + * finished processing +@@ -292,6 +293,6 @@ uint32_t caam_jr_enable_irqs(uint32_t uio_fd); + * @retval -1 value for error + * + */ +-uint32_t caam_jr_disable_irqs(uint32_t uio_fd); ++int caam_jr_disable_irqs(int uio_fd); + + #endif +diff --git a/dpdk/drivers/crypto/caam_jr/caam_jr_uio.c b/dpdk/drivers/crypto/caam_jr/caam_jr_uio.c +index b1bb44ca42..e4ee102344 100644 +--- a/dpdk/drivers/crypto/caam_jr/caam_jr_uio.c ++++ b/dpdk/drivers/crypto/caam_jr/caam_jr_uio.c +@@ -145,7 +145,7 @@ file_read_first_line(const char root[], const char subdir[], + "%s/%s/%s", root, subdir, filename); + + fd = open(absolute_file_name, O_RDONLY); +- SEC_ASSERT(fd > 0, fd, "Error opening file %s", ++ SEC_ASSERT(fd >= 0, fd, "Error opening file %s", + absolute_file_name); + + /* read UIO device name from first line in file */ +@@ -179,7 +179,7 @@ file_read_first_line(const char root[], const char subdir[], + * kernel driver as well. No special return values are used. + */ + static int +-sec_uio_send_command(uint32_t uio_fd, int32_t uio_command) ++sec_uio_send_command(int uio_fd, int32_t uio_command) + { + int ret; + +@@ -201,8 +201,8 @@ sec_uio_send_command(uint32_t uio_fd, int32_t uio_command) + * @retval 0 for success + * @retval -1 value for error + */ +-uint32_t +-caam_jr_enable_irqs(uint32_t uio_fd) ++int ++caam_jr_enable_irqs(int uio_fd) + { + int ret; + +@@ -232,8 +232,8 @@ caam_jr_enable_irqs(uint32_t uio_fd) + * @retval -1 value for error + * + */ +-uint32_t +-caam_jr_disable_irqs(uint32_t uio_fd) ++int ++caam_jr_disable_irqs(int uio_fd) + { + int ret; + +@@ -322,12 +322,12 @@ uio_map_registers(int uio_device_fd, int uio_device_id, + } + + void +-free_job_ring(uint32_t uio_fd) ++free_job_ring(int uio_fd) + { + struct uio_job_ring *job_ring = NULL; + int i; + +- if (!uio_fd) ++ if (uio_fd == -1) + return; + + for (i = 0; i < MAX_SEC_JOB_RINGS; i++) { +@@ -347,7 +347,7 @@ free_job_ring(uint32_t uio_fd) + job_ring->jr_id, job_ring->uio_fd); + close(job_ring->uio_fd); + g_uio_jr_num--; +- job_ring->uio_fd = 0; ++ job_ring->uio_fd = -1; + if (job_ring->register_base_addr == NULL) + return; + +@@ -370,7 +370,7 @@ uio_job_ring *config_job_ring(void) + int i; + + for (i = 0; i < MAX_SEC_JOB_RINGS; i++) { +- if (g_uio_job_ring[i].uio_fd == 0) { ++ if (g_uio_job_ring[i].uio_fd == -1) { + job_ring = &g_uio_job_ring[i]; + g_uio_jr_num++; + break; +@@ -389,7 +389,7 @@ uio_job_ring *config_job_ring(void) + + /* Open device file */ + job_ring->uio_fd = open(uio_device_file_name, O_RDWR); +- SEC_ASSERT(job_ring->uio_fd > 0, NULL, ++ SEC_ASSERT(job_ring->uio_fd >= 0, NULL, + "Failed to open UIO device file for job ring %d", + job_ring->jr_id); + +@@ -488,12 +488,22 @@ sec_cleanup(void) + /* I need to close the fd after shutdown UIO commands need to be + * sent using the fd + */ +- if (job_ring->uio_fd != 0) { ++ if (job_ring->uio_fd != -1) { + CAAM_JR_INFO( + "Closed device file for job ring %d , fd = %d", + job_ring->jr_id, job_ring->uio_fd); + close(job_ring->uio_fd); ++ job_ring->uio_fd = -1; + } + } + return 0; + } ++ ++void ++sec_uio_job_rings_init(void) ++{ ++ int i; ++ ++ for (i = 0; i < MAX_SEC_JOB_RINGS; i++) ++ g_uio_job_ring[i].uio_fd = -1; ++} +diff --git a/dpdk/drivers/crypto/caam_jr/meson.build b/dpdk/drivers/crypto/caam_jr/meson.build +index 50132aebef..0dbfa2ee9c 100644 +--- a/dpdk/drivers/crypto/caam_jr/meson.build ++++ b/dpdk/drivers/crypto/caam_jr/meson.build +@@ -14,6 +14,11 @@ sources = files('caam_jr_capabilities.c', + + allow_experimental_apis = true + ++# FIXME: temporary solution for Bugzilla 469 ++if (toolchain == 'gcc' and cc.version().version_compare('>=10.0.0')) ++ cflags += '-fcommon' ++endif ++ + includes += include_directories('../../bus/dpaa/include/') + includes += include_directories('../../common/dpaax/') + includes += include_directories('../../common/dpaax/caamflib/') +diff --git a/dpdk/drivers/crypto/ccp/ccp_dev.c b/dpdk/drivers/crypto/ccp/ccp_dev.c +index 80fe6a4533..7d98b2eb25 100644 +--- a/dpdk/drivers/crypto/ccp/ccp_dev.c ++++ b/dpdk/drivers/crypto/ccp/ccp_dev.c +@@ -760,7 +760,7 @@ ccp_probe_device(const char *dirname, uint16_t domain, + return 0; + fail: + CCP_LOG_ERR("CCP Device probe failed"); +- if (uio_fd > 0) ++ if (uio_fd >= 0) + close(uio_fd); + if (ccp_dev) + rte_free(ccp_dev); +diff --git a/dpdk/drivers/crypto/ccp/ccp_dev.h b/dpdk/drivers/crypto/ccp/ccp_dev.h +index f4ad9eafd5..37e04218ce 100644 +--- a/dpdk/drivers/crypto/ccp/ccp_dev.h ++++ b/dpdk/drivers/crypto/ccp/ccp_dev.h +@@ -220,7 +220,7 @@ struct ccp_queue { + /**< lsb assigned for sha ctx */ + uint32_t sb_hmac; + /**< lsb assigned for hmac ctx */ +-} ____cacheline_aligned; ++} __rte_cache_aligned; + + /** + * A structure describing a CCP device. +diff --git a/dpdk/drivers/crypto/dpaa2_sec/Makefile b/dpdk/drivers/crypto/dpaa2_sec/Makefile +index 96b9c78435..183e9412ae 100644 +--- a/dpdk/drivers/crypto/dpaa2_sec/Makefile ++++ b/dpdk/drivers/crypto/dpaa2_sec/Makefile +@@ -20,6 +20,13 @@ CFLAGS += -Wno-implicit-fallthrough + endif + endif + ++# FIXME: temporary solution for Bugzilla 469 ++ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) ++ifeq ($(shell test $(GCC_VERSION) -ge 100 && echo 1), 1) ++CFLAGS += -fcommon ++endif ++endif ++ + CFLAGS += -I$(RTE_SDK)/drivers/common/dpaax + CFLAGS += -I$(RTE_SDK)/drivers/common/dpaax/caamflib + CFLAGS += -I$(RTE_SDK)/drivers/crypto/dpaa2_sec/ +diff --git a/dpdk/drivers/crypto/dpaa2_sec/meson.build b/dpdk/drivers/crypto/dpaa2_sec/meson.build +index ab9c8c8bf9..3f1dfd67da 100644 +--- a/dpdk/drivers/crypto/dpaa2_sec/meson.build ++++ b/dpdk/drivers/crypto/dpaa2_sec/meson.build +@@ -12,4 +12,9 @@ sources = files('dpaa2_sec_dpseci.c', + + allow_experimental_apis = true + ++# FIXME: temporary solution for Bugzilla 469 ++if (toolchain == 'gcc' and cc.version().version_compare('>=10.0.0')) ++ cflags += '-fcommon' ++endif ++ + includes += include_directories('mc', '../../common/dpaax', '../../common/dpaax/caamflib') +diff --git a/dpdk/drivers/crypto/dpaa_sec/Makefile b/dpdk/drivers/crypto/dpaa_sec/Makefile +index fbfd775855..b5a97b9f6e 100644 +--- a/dpdk/drivers/crypto/dpaa_sec/Makefile ++++ b/dpdk/drivers/crypto/dpaa_sec/Makefile +@@ -14,6 +14,13 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API + CFLAGS += -O3 + CFLAGS += $(WERROR_FLAGS) + ++# FIXME: temporary solution for Bugzilla 469 ++ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) ++ifeq ($(shell test $(GCC_VERSION) -ge 100 && echo 1), 1) ++CFLAGS += -fcommon ++endif ++endif ++ + CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa + CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa/include + CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa/base/qbman +diff --git a/dpdk/drivers/crypto/dpaa_sec/dpaa_sec.c b/dpdk/drivers/crypto/dpaa_sec/dpaa_sec.c +index e0b307cecd..df684d2654 100644 +--- a/dpdk/drivers/crypto/dpaa_sec/dpaa_sec.c ++++ b/dpdk/drivers/crypto/dpaa_sec/dpaa_sec.c +@@ -94,31 +94,6 @@ dpaa_sec_alloc_ctx(dpaa_sec_session *ses, int sg_count) + return ctx; + } + +-static inline rte_iova_t +-dpaa_mem_vtop(void *vaddr) +-{ +- const struct rte_memseg *ms; +- +- ms = rte_mem_virt2memseg(vaddr, NULL); +- if (ms) { +- dpaax_iova_table_update(ms->iova, ms->addr, ms->len); +- return ms->iova + RTE_PTR_DIFF(vaddr, ms->addr); +- } +- return (size_t)NULL; +-} +- +-static inline void * +-dpaa_mem_ptov(rte_iova_t paddr) +-{ +- void *va; +- +- va = (void *)dpaax_iova_table_get_va(paddr); +- if (likely(va)) +- return va; +- +- return rte_mem_iova2virt(paddr); +-} +- + static void + ern_sec_fq_handler(struct qman_portal *qm __rte_unused, + struct qman_fq *fq, +@@ -183,7 +158,7 @@ dqrr_out_fq_cb_rx(struct qman_portal *qm __always_unused, + * sg[0] is for output + * sg[1] for input + */ +- job = dpaa_mem_ptov(qm_fd_addr_get64(fd)); ++ job = rte_dpaa_mem_ptov(qm_fd_addr_get64(fd)); + + ctx = container_of(job, struct dpaa_sec_op_ctx, job); + ctx->fd_status = fd->status; +@@ -304,12 +279,12 @@ dpaa_sec_prep_pdcp_cdb(dpaa_sec_session *ses) + + if (!(cdb->sh_desc[2] & 1) && cipherdata.keylen) { + cipherdata.key = +- (size_t)dpaa_mem_vtop((void *)(size_t)cipherdata.key); ++ (size_t)rte_dpaa_mem_vtop((void *)(size_t)cipherdata.key); + cipherdata.key_type = RTA_DATA_PTR; + } + if (!(cdb->sh_desc[2] & (1 << 1)) && authdata.keylen) { + authdata.key = +- (size_t)dpaa_mem_vtop((void *)(size_t)authdata.key); ++ (size_t)rte_dpaa_mem_vtop((void *)(size_t)authdata.key); + authdata.key_type = RTA_DATA_PTR; + } + +@@ -405,14 +380,14 @@ dpaa_sec_prep_ipsec_cdb(dpaa_sec_session *ses) + if (cdb->sh_desc[2] & 1) + cipherdata.key_type = RTA_DATA_IMM; + else { +- cipherdata.key = (size_t)dpaa_mem_vtop( ++ cipherdata.key = (size_t)rte_dpaa_mem_vtop( + (void *)(size_t)cipherdata.key); + cipherdata.key_type = RTA_DATA_PTR; + } + if (cdb->sh_desc[2] & (1<<1)) + authdata.key_type = RTA_DATA_IMM; + else { +- authdata.key = (size_t)dpaa_mem_vtop( ++ authdata.key = (size_t)rte_dpaa_mem_vtop( + (void *)(size_t)authdata.key); + authdata.key_type = RTA_DATA_PTR; + } +@@ -591,14 +566,14 @@ dpaa_sec_prep_cdb(dpaa_sec_session *ses) + if (cdb->sh_desc[2] & 1) + alginfo_c.key_type = RTA_DATA_IMM; + else { +- alginfo_c.key = (size_t)dpaa_mem_vtop( ++ alginfo_c.key = (size_t)rte_dpaa_mem_vtop( + (void *)(size_t)alginfo_c.key); + alginfo_c.key_type = RTA_DATA_PTR; + } + if (cdb->sh_desc[2] & (1<<1)) + alginfo_a.key_type = RTA_DATA_IMM; + else { +- alginfo_a.key = (size_t)dpaa_mem_vtop( ++ alginfo_a.key = (size_t)rte_dpaa_mem_vtop( + (void *)(size_t)alginfo_a.key); + alginfo_a.key_type = RTA_DATA_PTR; + } +@@ -674,7 +649,7 @@ dpaa_sec_deq(struct dpaa_sec_qp *qp, struct rte_crypto_op **ops, int nb_ops) + * sg[0] is for output + * sg[1] for input + */ +- job = dpaa_mem_ptov(qm_fd_addr_get64(fd)); ++ job = rte_dpaa_mem_ptov(qm_fd_addr_get64(fd)); + + ctx = container_of(job, struct dpaa_sec_op_ctx, job); + ctx->fd_status = fd->status; +@@ -768,7 +743,7 @@ build_auth_only_sg(struct rte_crypto_op *op, dpaa_sec_session *ses) + in_sg->extension = 1; + in_sg->final = 1; + in_sg->length = data_len; +- qm_sg_entry_set64(in_sg, dpaa_mem_vtop(&cf->sg[2])); ++ qm_sg_entry_set64(in_sg, rte_dpaa_mem_vtop(&cf->sg[2])); + + /* 1st seg */ + sg = in_sg + 1; +@@ -788,7 +763,7 @@ build_auth_only_sg(struct rte_crypto_op *op, dpaa_sec_session *ses) + } else { + sg->length = ses->iv.length; + } +- qm_sg_entry_set64(sg, dpaa_mem_vtop(iv_ptr)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(iv_ptr)); + in_sg->length += sg->length; + cpu_to_hw_sg(sg); + sg++; +@@ -821,7 +796,7 @@ build_auth_only_sg(struct rte_crypto_op *op, dpaa_sec_session *ses) + sg++; + rte_memcpy(old_digest, sym->auth.digest.data, + ses->digest_length); +- start_addr = dpaa_mem_vtop(old_digest); ++ start_addr = rte_dpaa_mem_vtop(old_digest); + qm_sg_entry_set64(sg, start_addr); + sg->length = ses->digest_length; + in_sg->length += ses->digest_length; +@@ -888,7 +863,7 @@ build_auth_only(struct rte_crypto_op *op, dpaa_sec_session *ses) + in_sg->extension = 1; + in_sg->final = 1; + in_sg->length = data_len; +- qm_sg_entry_set64(in_sg, dpaa_mem_vtop(&cf->sg[2])); ++ qm_sg_entry_set64(in_sg, rte_dpaa_mem_vtop(&cf->sg[2])); + sg = &cf->sg[2]; + + if (ses->iv.length) { +@@ -906,7 +881,7 @@ build_auth_only(struct rte_crypto_op *op, dpaa_sec_session *ses) + } else { + sg->length = ses->iv.length; + } +- qm_sg_entry_set64(sg, dpaa_mem_vtop(iv_ptr)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(iv_ptr)); + in_sg->length += sg->length; + cpu_to_hw_sg(sg); + sg++; +@@ -923,7 +898,7 @@ build_auth_only(struct rte_crypto_op *op, dpaa_sec_session *ses) + rte_memcpy(old_digest, sym->auth.digest.data, + ses->digest_length); + /* let's check digest by hw */ +- start_addr = dpaa_mem_vtop(old_digest); ++ start_addr = rte_dpaa_mem_vtop(old_digest); + sg++; + qm_sg_entry_set64(sg, start_addr); + sg->length = ses->digest_length; +@@ -987,7 +962,7 @@ build_cipher_only_sg(struct rte_crypto_op *op, dpaa_sec_session *ses) + out_sg = &cf->sg[0]; + out_sg->extension = 1; + out_sg->length = data_len; +- qm_sg_entry_set64(out_sg, dpaa_mem_vtop(&cf->sg[2])); ++ qm_sg_entry_set64(out_sg, rte_dpaa_mem_vtop(&cf->sg[2])); + cpu_to_hw_sg(out_sg); + + /* 1st seg */ +@@ -1016,11 +991,11 @@ build_cipher_only_sg(struct rte_crypto_op *op, dpaa_sec_session *ses) + in_sg->length = data_len + ses->iv.length; + + sg++; +- qm_sg_entry_set64(in_sg, dpaa_mem_vtop(sg)); ++ qm_sg_entry_set64(in_sg, rte_dpaa_mem_vtop(sg)); + cpu_to_hw_sg(in_sg); + + /* IV */ +- qm_sg_entry_set64(sg, dpaa_mem_vtop(IV_ptr)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(IV_ptr)); + sg->length = ses->iv.length; + cpu_to_hw_sg(sg); + +@@ -1098,11 +1073,11 @@ build_cipher_only(struct rte_crypto_op *op, dpaa_sec_session *ses) + sg->extension = 1; + sg->final = 1; + sg->length = data_len + ses->iv.length; +- qm_sg_entry_set64(sg, dpaa_mem_vtop(&cf->sg[2])); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(&cf->sg[2])); + cpu_to_hw_sg(sg); + + sg = &cf->sg[2]; +- qm_sg_entry_set64(sg, dpaa_mem_vtop(IV_ptr)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(IV_ptr)); + sg->length = ses->iv.length; + cpu_to_hw_sg(sg); + +@@ -1163,7 +1138,7 @@ build_cipher_auth_gcm_sg(struct rte_crypto_op *op, dpaa_sec_session *ses) + + /* output sg entries */ + sg = &cf->sg[2]; +- qm_sg_entry_set64(out_sg, dpaa_mem_vtop(sg)); ++ qm_sg_entry_set64(out_sg, rte_dpaa_mem_vtop(sg)); + cpu_to_hw_sg(out_sg); + + /* 1st seg */ +@@ -1206,18 +1181,18 @@ build_cipher_auth_gcm_sg(struct rte_crypto_op *op, dpaa_sec_session *ses) + + /* input sg entries */ + sg++; +- qm_sg_entry_set64(in_sg, dpaa_mem_vtop(sg)); ++ qm_sg_entry_set64(in_sg, rte_dpaa_mem_vtop(sg)); + cpu_to_hw_sg(in_sg); + + /* 1st seg IV */ +- qm_sg_entry_set64(sg, dpaa_mem_vtop(IV_ptr)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(IV_ptr)); + sg->length = ses->iv.length; + cpu_to_hw_sg(sg); + + /* 2nd seg auth only */ + if (ses->auth_only_len) { + sg++; +- qm_sg_entry_set64(sg, dpaa_mem_vtop(sym->aead.aad.data)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(sym->aead.aad.data)); + sg->length = ses->auth_only_len; + cpu_to_hw_sg(sg); + } +@@ -1243,7 +1218,7 @@ build_cipher_auth_gcm_sg(struct rte_crypto_op *op, dpaa_sec_session *ses) + sg++; + memcpy(ctx->digest, sym->aead.digest.data, + ses->digest_length); +- qm_sg_entry_set64(sg, dpaa_mem_vtop(ctx->digest)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(ctx->digest)); + sg->length = ses->digest_length; + } + sg->final = 1; +@@ -1281,9 +1256,9 @@ build_cipher_auth_gcm(struct rte_crypto_op *op, dpaa_sec_session *ses) + /* input */ + rte_prefetch0(cf->sg); + sg = &cf->sg[2]; +- qm_sg_entry_set64(&cf->sg[1], dpaa_mem_vtop(sg)); ++ qm_sg_entry_set64(&cf->sg[1], rte_dpaa_mem_vtop(sg)); + if (is_encode(ses)) { +- qm_sg_entry_set64(sg, dpaa_mem_vtop(IV_ptr)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(IV_ptr)); + sg->length = ses->iv.length; + length += sg->length; + cpu_to_hw_sg(sg); +@@ -1291,7 +1266,7 @@ build_cipher_auth_gcm(struct rte_crypto_op *op, dpaa_sec_session *ses) + sg++; + if (ses->auth_only_len) { + qm_sg_entry_set64(sg, +- dpaa_mem_vtop(sym->aead.aad.data)); ++ rte_dpaa_mem_vtop(sym->aead.aad.data)); + sg->length = ses->auth_only_len; + length += sg->length; + cpu_to_hw_sg(sg); +@@ -1303,7 +1278,7 @@ build_cipher_auth_gcm(struct rte_crypto_op *op, dpaa_sec_session *ses) + sg->final = 1; + cpu_to_hw_sg(sg); + } else { +- qm_sg_entry_set64(sg, dpaa_mem_vtop(IV_ptr)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(IV_ptr)); + sg->length = ses->iv.length; + length += sg->length; + cpu_to_hw_sg(sg); +@@ -1311,7 +1286,7 @@ build_cipher_auth_gcm(struct rte_crypto_op *op, dpaa_sec_session *ses) + sg++; + if (ses->auth_only_len) { + qm_sg_entry_set64(sg, +- dpaa_mem_vtop(sym->aead.aad.data)); ++ rte_dpaa_mem_vtop(sym->aead.aad.data)); + sg->length = ses->auth_only_len; + length += sg->length; + cpu_to_hw_sg(sg); +@@ -1326,7 +1301,7 @@ build_cipher_auth_gcm(struct rte_crypto_op *op, dpaa_sec_session *ses) + ses->digest_length); + sg++; + +- qm_sg_entry_set64(sg, dpaa_mem_vtop(ctx->digest)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(ctx->digest)); + sg->length = ses->digest_length; + length += sg->length; + sg->final = 1; +@@ -1340,7 +1315,7 @@ build_cipher_auth_gcm(struct rte_crypto_op *op, dpaa_sec_session *ses) + + /* output */ + sg++; +- qm_sg_entry_set64(&cf->sg[0], dpaa_mem_vtop(sg)); ++ qm_sg_entry_set64(&cf->sg[0], rte_dpaa_mem_vtop(sg)); + qm_sg_entry_set64(sg, + dst_start_addr + sym->aead.data.offset); + sg->length = sym->aead.data.length; +@@ -1409,7 +1384,7 @@ build_cipher_auth_sg(struct rte_crypto_op *op, dpaa_sec_session *ses) + + /* output sg entries */ + sg = &cf->sg[2]; +- qm_sg_entry_set64(out_sg, dpaa_mem_vtop(sg)); ++ qm_sg_entry_set64(out_sg, rte_dpaa_mem_vtop(sg)); + cpu_to_hw_sg(out_sg); + + /* 1st seg */ +@@ -1451,11 +1426,11 @@ build_cipher_auth_sg(struct rte_crypto_op *op, dpaa_sec_session *ses) + + /* input sg entries */ + sg++; +- qm_sg_entry_set64(in_sg, dpaa_mem_vtop(sg)); ++ qm_sg_entry_set64(in_sg, rte_dpaa_mem_vtop(sg)); + cpu_to_hw_sg(in_sg); + + /* 1st seg IV */ +- qm_sg_entry_set64(sg, dpaa_mem_vtop(IV_ptr)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(IV_ptr)); + sg->length = ses->iv.length; + cpu_to_hw_sg(sg); + +@@ -1481,7 +1456,7 @@ build_cipher_auth_sg(struct rte_crypto_op *op, dpaa_sec_session *ses) + sg++; + memcpy(ctx->digest, sym->auth.digest.data, + ses->digest_length); +- qm_sg_entry_set64(sg, dpaa_mem_vtop(ctx->digest)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(ctx->digest)); + sg->length = ses->digest_length; + } + sg->final = 1; +@@ -1518,9 +1493,9 @@ build_cipher_auth(struct rte_crypto_op *op, dpaa_sec_session *ses) + /* input */ + rte_prefetch0(cf->sg); + sg = &cf->sg[2]; +- qm_sg_entry_set64(&cf->sg[1], dpaa_mem_vtop(sg)); ++ qm_sg_entry_set64(&cf->sg[1], rte_dpaa_mem_vtop(sg)); + if (is_encode(ses)) { +- qm_sg_entry_set64(sg, dpaa_mem_vtop(IV_ptr)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(IV_ptr)); + sg->length = ses->iv.length; + length += sg->length; + cpu_to_hw_sg(sg); +@@ -1532,7 +1507,7 @@ build_cipher_auth(struct rte_crypto_op *op, dpaa_sec_session *ses) + sg->final = 1; + cpu_to_hw_sg(sg); + } else { +- qm_sg_entry_set64(sg, dpaa_mem_vtop(IV_ptr)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(IV_ptr)); + sg->length = ses->iv.length; + length += sg->length; + cpu_to_hw_sg(sg); +@@ -1548,7 +1523,7 @@ build_cipher_auth(struct rte_crypto_op *op, dpaa_sec_session *ses) + ses->digest_length); + sg++; + +- qm_sg_entry_set64(sg, dpaa_mem_vtop(ctx->digest)); ++ qm_sg_entry_set64(sg, rte_dpaa_mem_vtop(ctx->digest)); + sg->length = ses->digest_length; + length += sg->length; + sg->final = 1; +@@ -1562,7 +1537,7 @@ build_cipher_auth(struct rte_crypto_op *op, dpaa_sec_session *ses) + + /* output */ + sg++; +- qm_sg_entry_set64(&cf->sg[0], dpaa_mem_vtop(sg)); ++ qm_sg_entry_set64(&cf->sg[0], rte_dpaa_mem_vtop(sg)); + qm_sg_entry_set64(sg, dst_start_addr + sym->cipher.data.offset); + sg->length = sym->cipher.data.length; + length = sg->length; +@@ -1656,7 +1631,7 @@ build_proto_sg(struct rte_crypto_op *op, dpaa_sec_session *ses) + /* output */ + out_sg = &cf->sg[0]; + out_sg->extension = 1; +- qm_sg_entry_set64(out_sg, dpaa_mem_vtop(&cf->sg[2])); ++ qm_sg_entry_set64(out_sg, rte_dpaa_mem_vtop(&cf->sg[2])); + + /* 1st seg */ + sg = &cf->sg[2]; +@@ -1689,7 +1664,7 @@ build_proto_sg(struct rte_crypto_op *op, dpaa_sec_session *ses) + in_len = mbuf->data_len; + + sg++; +- qm_sg_entry_set64(in_sg, dpaa_mem_vtop(sg)); ++ qm_sg_entry_set64(in_sg, rte_dpaa_mem_vtop(sg)); + + /* 1st seg */ + qm_sg_entry_set64(sg, rte_pktmbuf_mtophys(mbuf)); +@@ -1884,7 +1859,7 @@ dpaa_sec_enqueue_burst(void *qp, struct rte_crypto_op **ops, + inq[loop] = ses->inq[rte_lcore_id() % MAX_DPAA_CORES]; + fd->opaque_addr = 0; + fd->cmd = 0; +- qm_fd_addr_set64(fd, dpaa_mem_vtop(cf->sg)); ++ qm_fd_addr_set64(fd, rte_dpaa_mem_vtop(cf->sg)); + fd->_format1 = qm_fd_compound; + fd->length29 = 2 * sizeof(struct qm_sg_entry); + +@@ -2349,7 +2324,7 @@ dpaa_sec_attach_sess_q(struct dpaa_sec_qp *qp, dpaa_sec_session *sess) + } + } + ret = dpaa_sec_init_rx(sess->inq[rte_lcore_id() % MAX_DPAA_CORES], +- dpaa_mem_vtop(&sess->cdb), ++ rte_dpaa_mem_vtop(&sess->cdb), + qman_fq_fqid(&qp->outq)); + if (ret) + DPAA_SEC_ERR("Unable to init sec queue"); +@@ -3149,7 +3124,7 @@ dpaa_sec_process_parallel_event(void *event, + * sg[0] is for output + * sg[1] for input + */ +- job = dpaa_mem_ptov(qm_fd_addr_get64(fd)); ++ job = rte_dpaa_mem_ptov(qm_fd_addr_get64(fd)); + + ctx = container_of(job, struct dpaa_sec_op_ctx, job); + ctx->fd_status = fd->status; +@@ -3204,7 +3179,7 @@ dpaa_sec_process_atomic_event(void *event, + * sg[0] is for output + * sg[1] for input + */ +- job = dpaa_mem_ptov(qm_fd_addr_get64(fd)); ++ job = rte_dpaa_mem_ptov(qm_fd_addr_get64(fd)); + + ctx = container_of(job, struct dpaa_sec_op_ctx, job); + ctx->fd_status = fd->status; +diff --git a/dpdk/drivers/crypto/dpaa_sec/meson.build b/dpdk/drivers/crypto/dpaa_sec/meson.build +index 9f17d3a43e..e819f9cf1b 100644 +--- a/dpdk/drivers/crypto/dpaa_sec/meson.build ++++ b/dpdk/drivers/crypto/dpaa_sec/meson.build +@@ -6,11 +6,16 @@ if not is_linux + reason = 'only supported on linux' + endif + +-deps += ['bus_dpaa', 'security'] ++deps += ['bus_dpaa', 'mempool_dpaa', 'security'] + sources = files('dpaa_sec.c') + + allow_experimental_apis = true + ++# FIXME: temporary solution for Bugzilla 469 ++if (toolchain == 'gcc' and cc.version().version_compare('>=10.0.0')) ++ cflags += '-fcommon' ++endif ++ + includes += include_directories('../../bus/dpaa/include') + includes += include_directories('../../common/dpaax') + includes += include_directories('../../common/dpaax/caamflib/') +diff --git a/dpdk/drivers/crypto/kasumi/kasumi_pmd_private.h b/dpdk/drivers/crypto/kasumi/kasumi_pmd_private.h +index 7ac19c5735..f0b83f2272 100644 +--- a/dpdk/drivers/crypto/kasumi/kasumi_pmd_private.h ++++ b/dpdk/drivers/crypto/kasumi/kasumi_pmd_private.h +@@ -11,7 +11,7 @@ + /**< KASUMI PMD device name */ + + /** KASUMI PMD LOGTYPE DRIVER */ +-int kasumi_logtype_driver; ++extern int kasumi_logtype_driver; + + #define KASUMI_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, kasumi_logtype_driver, \ +@@ -72,6 +72,6 @@ kasumi_set_session_parameters(struct kasumi_session *sess, + + + /** device specific operations function pointer structure */ +-struct rte_cryptodev_ops *rte_kasumi_pmd_ops; ++extern struct rte_cryptodev_ops *rte_kasumi_pmd_ops; + + #endif /* _KASUMI_PMD_PRIVATE_H_ */ +diff --git a/dpdk/drivers/crypto/kasumi/rte_kasumi_pmd.c b/dpdk/drivers/crypto/kasumi/rte_kasumi_pmd.c +index d0583ef073..c87dca5f48 100644 +--- a/dpdk/drivers/crypto/kasumi/rte_kasumi_pmd.c ++++ b/dpdk/drivers/crypto/kasumi/rte_kasumi_pmd.c +@@ -17,6 +17,7 @@ + #define KASUMI_MAX_BURST 4 + #define BYTE_LEN 8 + ++int kasumi_logtype_driver; + static uint8_t cryptodev_driver_id; + + /** Get xform chain order. */ +diff --git a/dpdk/drivers/crypto/mvsam/mrvl_pmd_private.h b/dpdk/drivers/crypto/mvsam/mrvl_pmd_private.h +index 09702b9e3e..e575330ef5 100644 +--- a/dpdk/drivers/crypto/mvsam/mrvl_pmd_private.h ++++ b/dpdk/drivers/crypto/mvsam/mrvl_pmd_private.h +@@ -13,7 +13,7 @@ + /**< Marvell PMD device name */ + + /** MRVL PMD LOGTYPE DRIVER */ +-int mrvl_logtype_driver; ++extern int mrvl_logtype_driver; + + #define MRVL_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, mrvl_logtype_driver, \ +diff --git a/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd.c b/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd.c +index 3c0fe216f0..63782ce974 100644 +--- a/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd.c ++++ b/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd.c +@@ -19,6 +19,7 @@ + #define MRVL_PMD_MAX_NB_SESS_ARG ("max_nb_sessions") + #define MRVL_PMD_DEFAULT_MAX_NB_SESSIONS 2048 + ++int mrvl_logtype_driver; + static uint8_t cryptodev_driver_id; + + struct mrvl_pmd_init_params { +diff --git a/dpdk/drivers/crypto/nitrox/nitrox_csr.h b/dpdk/drivers/crypto/nitrox/nitrox_csr.h +index 8cd92e38be..de7a3c6713 100644 +--- a/dpdk/drivers/crypto/nitrox/nitrox_csr.h ++++ b/dpdk/drivers/crypto/nitrox/nitrox_csr.h +@@ -12,18 +12,18 @@ + #define NITROX_CSR_ADDR(bar_addr, offset) (bar_addr + (offset)) + + /* NPS packet registers */ +-#define NPS_PKT_IN_INSTR_CTLX(_i) (0x10060 + ((_i) * 0x40000)) +-#define NPS_PKT_IN_INSTR_BADDRX(_i) (0x10068 + ((_i) * 0x40000)) +-#define NPS_PKT_IN_INSTR_RSIZEX(_i) (0x10070 + ((_i) * 0x40000)) +-#define NPS_PKT_IN_DONE_CNTSX(_i) (0x10080 + ((_i) * 0x40000)) +-#define NPS_PKT_IN_INSTR_BAOFF_DBELLX(_i) (0x10078 + ((_i) * 0x40000)) +-#define NPS_PKT_IN_INT_LEVELSX(_i) (0x10088 + ((_i) * 0x40000)) +-#define NPS_PKT_SLC_CTLX(_i) (0x10000 + ((_i) * 0x40000)) +-#define NPS_PKT_SLC_CNTSX(_i) (0x10008 + ((_i) * 0x40000)) +-#define NPS_PKT_SLC_INT_LEVELSX(_i) (0x10010 + ((_i) * 0x40000)) ++#define NPS_PKT_IN_INSTR_CTLX(_i) (0x10060UL + ((_i) * 0x40000UL)) ++#define NPS_PKT_IN_INSTR_BADDRX(_i) (0x10068UL + ((_i) * 0x40000UL)) ++#define NPS_PKT_IN_INSTR_RSIZEX(_i) (0x10070UL + ((_i) * 0x40000UL)) ++#define NPS_PKT_IN_DONE_CNTSX(_i) (0x10080UL + ((_i) * 0x40000UL)) ++#define NPS_PKT_IN_INSTR_BAOFF_DBELLX(_i) (0x10078UL + ((_i) * 0x40000UL)) ++#define NPS_PKT_IN_INT_LEVELSX(_i) (0x10088UL + ((_i) * 0x40000UL)) ++#define NPS_PKT_SLC_CTLX(_i) (0x10000UL + ((_i) * 0x40000UL)) ++#define NPS_PKT_SLC_CNTSX(_i) (0x10008UL + ((_i) * 0x40000UL)) ++#define NPS_PKT_SLC_INT_LEVELSX(_i) (0x10010UL + ((_i) * 0x40000UL)) + + /* AQM Virtual Function Registers */ +-#define AQMQ_QSZX(_i) (0x20008 + ((_i)*0x40000)) ++#define AQMQ_QSZX(_i) (0x20008UL + ((_i) * 0x40000UL)) + + static inline uint64_t + nitrox_read_csr(uint8_t *bar_addr, uint64_t offset) +diff --git a/dpdk/drivers/crypto/nitrox/nitrox_sym.c b/dpdk/drivers/crypto/nitrox/nitrox_sym.c +index 56410c44d7..d1b32fec92 100644 +--- a/dpdk/drivers/crypto/nitrox/nitrox_sym.c ++++ b/dpdk/drivers/crypto/nitrox/nitrox_sym.c +@@ -683,7 +683,8 @@ nitrox_sym_pmd_create(struct nitrox_device *ndev) + struct rte_cryptodev *cdev; + + rte_pci_device_name(&ndev->pdev->addr, name, sizeof(name)); +- snprintf(name + strlen(name), RTE_CRYPTODEV_NAME_MAX_LEN, "_n5sym"); ++ snprintf(name + strlen(name), RTE_CRYPTODEV_NAME_MAX_LEN - strlen(name), ++ "_n5sym"); + ndev->rte_sym_dev.driver = &nitrox_rte_sym_drv; + ndev->rte_sym_dev.numa_node = ndev->pdev->device.numa_node; + ndev->rte_sym_dev.devargs = NULL; +diff --git a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev.c b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev.c +index 7fd216bb39..417eda6de6 100644 +--- a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev.c ++++ b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev.c +@@ -24,6 +24,8 @@ + + int otx2_cpt_logtype; + ++uint8_t otx2_cryptodev_driver_id; ++ + static struct rte_pci_id pci_id_cpt_table[] = { + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, +@@ -145,6 +147,7 @@ static struct cryptodev_driver otx2_cryptodev_drv; + RTE_INIT(otx2_cpt_init_log); + RTE_PMD_REGISTER_PCI(CRYPTODEV_NAME_OCTEONTX2_PMD, otx2_cryptodev_pmd); + RTE_PMD_REGISTER_PCI_TABLE(CRYPTODEV_NAME_OCTEONTX2_PMD, pci_id_cpt_table); ++RTE_PMD_REGISTER_KMOD_DEP(CRYPTODEV_NAME_OCTEONTX2_PMD, "vfio-pci"); + RTE_PMD_REGISTER_CRYPTO_DRIVER(otx2_cryptodev_drv, otx2_cryptodev_pmd.driver, + otx2_cryptodev_driver_id); + +diff --git a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev.h b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev.h +index 8e0ebc292a..c0aa661b3b 100644 +--- a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev.h ++++ b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev.h +@@ -38,6 +38,6 @@ extern int otx2_cpt_logtype; + /* + * Crypto device driver ID + */ +-uint8_t otx2_cryptodev_driver_id; ++extern uint8_t otx2_cryptodev_driver_id; + + #endif /* _OTX2_CRYPTODEV_H_ */ +diff --git a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_ops.h b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_ops.h +index a2724f7227..f83e36b486 100644 +--- a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_ops.h ++++ b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_ops.h +@@ -16,6 +16,6 @@ enum otx2_cpt_egrp { + OTX2_CPT_EGRP_AE = 2 + }; + +-struct rte_cryptodev_ops otx2_cpt_ops; ++extern struct rte_cryptodev_ops otx2_cpt_ops; + + #endif /* _OTX2_CRYPTODEV_OPS_H_ */ +diff --git a/dpdk/drivers/crypto/openssl/openssl_pmd_private.h b/dpdk/drivers/crypto/openssl/openssl_pmd_private.h +index 43ac3813df..b2054b3754 100644 +--- a/dpdk/drivers/crypto/openssl/openssl_pmd_private.h ++++ b/dpdk/drivers/crypto/openssl/openssl_pmd_private.h +@@ -16,7 +16,7 @@ + /**< Open SSL Crypto PMD device name */ + + /** OPENSSL PMD LOGTYPE DRIVER */ +-int openssl_logtype_driver; ++extern int openssl_logtype_driver; + #define OPENSSL_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, openssl_logtype_driver, \ + "%s() line %u: " fmt "\n", __func__, __LINE__, \ +diff --git a/dpdk/drivers/crypto/openssl/rte_openssl_pmd.c b/dpdk/drivers/crypto/openssl/rte_openssl_pmd.c +index 91f028308c..c294f60b7d 100644 +--- a/dpdk/drivers/crypto/openssl/rte_openssl_pmd.c ++++ b/dpdk/drivers/crypto/openssl/rte_openssl_pmd.c +@@ -18,6 +18,7 @@ + + #define DES_BLOCK_SIZE 8 + ++int openssl_logtype_driver; + static uint8_t cryptodev_driver_id; + + #if (OPENSSL_VERSION_NUMBER < 0x10100000L) +@@ -762,10 +763,10 @@ get_session(struct openssl_qp *qp, struct rte_crypto_op *op) + return NULL; + + /* provide internal session */ +- void *_sess = NULL; ++ void *_sess = rte_cryptodev_sym_session_create(qp->sess_mp); + void *_sess_private_data = NULL; + +- if (rte_mempool_get(qp->sess_mp, (void **)&_sess)) ++ if (_sess == NULL) + return NULL; + + if (rte_mempool_get(qp->sess_mp_priv, +@@ -2037,6 +2038,26 @@ process_asym_op(struct openssl_qp *qp, struct rte_crypto_op *op, + return retval; + } + ++static void ++copy_plaintext(struct rte_mbuf *m_src, struct rte_mbuf *m_dst, ++ struct rte_crypto_op *op) ++{ ++ uint8_t *p_src, *p_dst; ++ ++ p_src = rte_pktmbuf_mtod(m_src, uint8_t *); ++ p_dst = rte_pktmbuf_mtod(m_dst, uint8_t *); ++ ++ /** ++ * Copy the content between cipher offset and auth offset ++ * for generating correct digest. ++ */ ++ if (op->sym->cipher.data.offset > op->sym->auth.data.offset) ++ memcpy(p_dst + op->sym->auth.data.offset, ++ p_src + op->sym->auth.data.offset, ++ op->sym->cipher.data.offset - ++ op->sym->auth.data.offset); ++} ++ + /** Process crypto operation for mbuf */ + static int + process_op(struct openssl_qp *qp, struct rte_crypto_op *op, +@@ -2059,6 +2080,9 @@ process_op(struct openssl_qp *qp, struct rte_crypto_op *op, + break; + case OPENSSL_CHAIN_CIPHER_AUTH: + process_openssl_cipher_op(op, sess, msrc, mdst); ++ /* OOP */ ++ if (msrc != mdst) ++ copy_plaintext(msrc, mdst, op); + process_openssl_auth_op(qp, op, sess, mdst, mdst); + break; + case OPENSSL_CHAIN_AUTH_CIPHER: +diff --git a/dpdk/drivers/crypto/qat/qat_sym_capabilities.h b/dpdk/drivers/crypto/qat/qat_sym_capabilities.h +index 028a56c568..dbeea43408 100644 +--- a/dpdk/drivers/crypto/qat/qat_sym_capabilities.h ++++ b/dpdk/drivers/crypto/qat/qat_sym_capabilities.h +@@ -6,6 +6,111 @@ + #define _QAT_SYM_CAPABILITIES_H_ + + #define QAT_BASE_GEN1_SYM_CAPABILITIES \ ++ { /* SHA1 */ \ ++ .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \ ++ {.sym = { \ ++ .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH, \ ++ {.auth = { \ ++ .algo = RTE_CRYPTO_AUTH_SHA1, \ ++ .block_size = 64, \ ++ .key_size = { \ ++ .min = 0, \ ++ .max = 0, \ ++ .increment = 0 \ ++ }, \ ++ .digest_size = { \ ++ .min = 1, \ ++ .max = 20, \ ++ .increment = 1 \ ++ }, \ ++ .iv_size = { 0 } \ ++ }, } \ ++ }, } \ ++ }, \ ++ { /* SHA224 */ \ ++ .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \ ++ {.sym = { \ ++ .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH, \ ++ {.auth = { \ ++ .algo = RTE_CRYPTO_AUTH_SHA224, \ ++ .block_size = 64, \ ++ .key_size = { \ ++ .min = 0, \ ++ .max = 0, \ ++ .increment = 0 \ ++ }, \ ++ .digest_size = { \ ++ .min = 1, \ ++ .max = 28, \ ++ .increment = 1 \ ++ }, \ ++ .iv_size = { 0 } \ ++ }, } \ ++ }, } \ ++ }, \ ++ { /* SHA256 */ \ ++ .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \ ++ {.sym = { \ ++ .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH, \ ++ {.auth = { \ ++ .algo = RTE_CRYPTO_AUTH_SHA256, \ ++ .block_size = 64, \ ++ .key_size = { \ ++ .min = 0, \ ++ .max = 0, \ ++ .increment = 0 \ ++ }, \ ++ .digest_size = { \ ++ .min = 1, \ ++ .max = 32, \ ++ .increment = 1 \ ++ }, \ ++ .iv_size = { 0 } \ ++ }, } \ ++ }, } \ ++ }, \ ++ { /* SHA384 */ \ ++ .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \ ++ {.sym = { \ ++ .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH, \ ++ {.auth = { \ ++ .algo = RTE_CRYPTO_AUTH_SHA384, \ ++ .block_size = 128, \ ++ .key_size = { \ ++ .min = 0, \ ++ .max = 0, \ ++ .increment = 0 \ ++ }, \ ++ .digest_size = { \ ++ .min = 1, \ ++ .max = 48, \ ++ .increment = 1 \ ++ }, \ ++ .iv_size = { 0 } \ ++ }, } \ ++ }, } \ ++ }, \ ++ { /* SHA512 */ \ ++ .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \ ++ {.sym = { \ ++ .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH, \ ++ {.auth = { \ ++ .algo = RTE_CRYPTO_AUTH_SHA512, \ ++ .block_size = 128, \ ++ .key_size = { \ ++ .min = 0, \ ++ .max = 0, \ ++ .increment = 0 \ ++ }, \ ++ .digest_size = { \ ++ .min = 1, \ ++ .max = 64, \ ++ .increment = 1 \ ++ }, \ ++ .iv_size = { 0 } \ ++ }, } \ ++ }, } \ ++ }, \ + { /* SHA1 HMAC */ \ + .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \ + {.sym = { \ +diff --git a/dpdk/drivers/crypto/qat/qat_sym_session.c b/dpdk/drivers/crypto/qat/qat_sym_session.c +index 72290ba480..695ba7e178 100644 +--- a/dpdk/drivers/crypto/qat/qat_sym_session.c ++++ b/dpdk/drivers/crypto/qat/qat_sym_session.c +@@ -19,6 +19,41 @@ + #include "qat_sym_session.h" + #include "qat_sym_pmd.h" + ++/* SHA1 - 20 bytes - Initialiser state can be found in FIPS stds 180-2 */ ++static const uint8_t sha1InitialState[] = { ++ 0x67, 0x45, 0x23, 0x01, 0xef, 0xcd, 0xab, 0x89, 0x98, 0xba, ++ 0xdc, 0xfe, 0x10, 0x32, 0x54, 0x76, 0xc3, 0xd2, 0xe1, 0xf0}; ++ ++/* SHA 224 - 32 bytes - Initialiser state can be found in FIPS stds 180-2 */ ++static const uint8_t sha224InitialState[] = { ++ 0xc1, 0x05, 0x9e, 0xd8, 0x36, 0x7c, 0xd5, 0x07, 0x30, 0x70, 0xdd, ++ 0x17, 0xf7, 0x0e, 0x59, 0x39, 0xff, 0xc0, 0x0b, 0x31, 0x68, 0x58, ++ 0x15, 0x11, 0x64, 0xf9, 0x8f, 0xa7, 0xbe, 0xfa, 0x4f, 0xa4}; ++ ++/* SHA 256 - 32 bytes - Initialiser state can be found in FIPS stds 180-2 */ ++static const uint8_t sha256InitialState[] = { ++ 0x6a, 0x09, 0xe6, 0x67, 0xbb, 0x67, 0xae, 0x85, 0x3c, 0x6e, 0xf3, ++ 0x72, 0xa5, 0x4f, 0xf5, 0x3a, 0x51, 0x0e, 0x52, 0x7f, 0x9b, 0x05, ++ 0x68, 0x8c, 0x1f, 0x83, 0xd9, 0xab, 0x5b, 0xe0, 0xcd, 0x19}; ++ ++/* SHA 384 - 64 bytes - Initialiser state can be found in FIPS stds 180-2 */ ++static const uint8_t sha384InitialState[] = { ++ 0xcb, 0xbb, 0x9d, 0x5d, 0xc1, 0x05, 0x9e, 0xd8, 0x62, 0x9a, 0x29, ++ 0x2a, 0x36, 0x7c, 0xd5, 0x07, 0x91, 0x59, 0x01, 0x5a, 0x30, 0x70, ++ 0xdd, 0x17, 0x15, 0x2f, 0xec, 0xd8, 0xf7, 0x0e, 0x59, 0x39, 0x67, ++ 0x33, 0x26, 0x67, 0xff, 0xc0, 0x0b, 0x31, 0x8e, 0xb4, 0x4a, 0x87, ++ 0x68, 0x58, 0x15, 0x11, 0xdb, 0x0c, 0x2e, 0x0d, 0x64, 0xf9, 0x8f, ++ 0xa7, 0x47, 0xb5, 0x48, 0x1d, 0xbe, 0xfa, 0x4f, 0xa4}; ++ ++/* SHA 512 - 64 bytes - Initialiser state can be found in FIPS stds 180-2 */ ++static const uint8_t sha512InitialState[] = { ++ 0x6a, 0x09, 0xe6, 0x67, 0xf3, 0xbc, 0xc9, 0x08, 0xbb, 0x67, 0xae, ++ 0x85, 0x84, 0xca, 0xa7, 0x3b, 0x3c, 0x6e, 0xf3, 0x72, 0xfe, 0x94, ++ 0xf8, 0x2b, 0xa5, 0x4f, 0xf5, 0x3a, 0x5f, 0x1d, 0x36, 0xf1, 0x51, ++ 0x0e, 0x52, 0x7f, 0xad, 0xe6, 0x82, 0xd1, 0x9b, 0x05, 0x68, 0x8c, ++ 0x2b, 0x3e, 0x6c, 0x1f, 0x1f, 0x83, 0xd9, 0xab, 0xfb, 0x41, 0xbd, ++ 0x6b, 0x5b, 0xe0, 0xcd, 0x19, 0x13, 0x7e, 0x21, 0x79}; ++ + /** Frees a context previously created + * Depends on openssl libcrypto + */ +@@ -580,8 +615,29 @@ qat_sym_session_configure_auth(struct rte_cryptodev *dev, + const uint8_t *key_data = auth_xform->key.data; + uint8_t key_length = auth_xform->key.length; + session->aes_cmac = 0; ++ session->auth_mode = ICP_QAT_HW_AUTH_MODE1; + + switch (auth_xform->algo) { ++ case RTE_CRYPTO_AUTH_SHA1: ++ session->qat_hash_alg = ICP_QAT_HW_AUTH_ALGO_SHA1; ++ session->auth_mode = ICP_QAT_HW_AUTH_MODE0; ++ break; ++ case RTE_CRYPTO_AUTH_SHA224: ++ session->qat_hash_alg = ICP_QAT_HW_AUTH_ALGO_SHA224; ++ session->auth_mode = ICP_QAT_HW_AUTH_MODE0; ++ break; ++ case RTE_CRYPTO_AUTH_SHA256: ++ session->qat_hash_alg = ICP_QAT_HW_AUTH_ALGO_SHA256; ++ session->auth_mode = ICP_QAT_HW_AUTH_MODE0; ++ break; ++ case RTE_CRYPTO_AUTH_SHA384: ++ session->qat_hash_alg = ICP_QAT_HW_AUTH_ALGO_SHA384; ++ session->auth_mode = ICP_QAT_HW_AUTH_MODE0; ++ break; ++ case RTE_CRYPTO_AUTH_SHA512: ++ session->qat_hash_alg = ICP_QAT_HW_AUTH_ALGO_SHA512; ++ session->auth_mode = ICP_QAT_HW_AUTH_MODE0; ++ break; + case RTE_CRYPTO_AUTH_SHA1_HMAC: + session->qat_hash_alg = ICP_QAT_HW_AUTH_ALGO_SHA1; + break; +@@ -635,11 +691,6 @@ qat_sym_session_configure_auth(struct rte_cryptodev *dev, + } + session->qat_hash_alg = ICP_QAT_HW_AUTH_ALGO_ZUC_3G_128_EIA3; + break; +- case RTE_CRYPTO_AUTH_SHA1: +- case RTE_CRYPTO_AUTH_SHA256: +- case RTE_CRYPTO_AUTH_SHA512: +- case RTE_CRYPTO_AUTH_SHA224: +- case RTE_CRYPTO_AUTH_SHA384: + case RTE_CRYPTO_AUTH_MD5: + case RTE_CRYPTO_AUTH_AES_CBC_MAC: + QAT_LOG(ERR, "Crypto: Unsupported hash alg %u", +@@ -727,6 +778,8 @@ qat_sym_session_configure_aead(struct rte_cryptodev *dev, + session->cipher_iv.offset = xform->aead.iv.offset; + session->cipher_iv.length = xform->aead.iv.length; + ++ session->auth_mode = ICP_QAT_HW_AUTH_MODE1; ++ + switch (aead_xform->algo) { + case RTE_CRYPTO_AEAD_AES_GCM: + if (qat_sym_validate_aes_key(aead_xform->key.length, +@@ -1524,7 +1577,7 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + (struct icp_qat_fw_la_auth_req_params *) + ((char *)&req_tmpl->serv_specif_rqpars + + ICP_QAT_FW_HASH_REQUEST_PARAMETERS_OFFSET); +- uint16_t state1_size = 0, state2_size = 0; ++ uint16_t state1_size = 0, state2_size = 0, cd_extra_size = 0; + uint16_t hash_offset, cd_size; + uint32_t *aad_len = NULL; + uint32_t wordIndex = 0; +@@ -1574,10 +1627,11 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + hash = (struct icp_qat_hw_auth_setup *)cdesc->cd_cur_ptr; + hash->auth_config.reserved = 0; + hash->auth_config.config = +- ICP_QAT_HW_AUTH_CONFIG_BUILD(ICP_QAT_HW_AUTH_MODE1, ++ ICP_QAT_HW_AUTH_CONFIG_BUILD(cdesc->auth_mode, + cdesc->qat_hash_alg, digestsize); + +- if (cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_SNOW_3G_UIA2 ++ if (cdesc->auth_mode == ICP_QAT_HW_AUTH_MODE0 ++ || cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_SNOW_3G_UIA2 + || cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_KASUMI_F9 + || cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_ZUC_3G_128_EIA3 + || cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC +@@ -1600,6 +1654,15 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + */ + switch (cdesc->qat_hash_alg) { + case ICP_QAT_HW_AUTH_ALGO_SHA1: ++ if (cdesc->auth_mode == ICP_QAT_HW_AUTH_MODE0) { ++ /* Plain SHA-1 */ ++ rte_memcpy(cdesc->cd_cur_ptr, sha1InitialState, ++ sizeof(sha1InitialState)); ++ state1_size = qat_hash_get_state1_size( ++ cdesc->qat_hash_alg); ++ break; ++ } ++ /* SHA-1 HMAC */ + if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA1, authkey, + authkeylen, cdesc->cd_cur_ptr, &state1_size, + cdesc->aes_cmac)) { +@@ -1609,6 +1672,15 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + state2_size = RTE_ALIGN_CEIL(ICP_QAT_HW_SHA1_STATE2_SZ, 8); + break; + case ICP_QAT_HW_AUTH_ALGO_SHA224: ++ if (cdesc->auth_mode == ICP_QAT_HW_AUTH_MODE0) { ++ /* Plain SHA-224 */ ++ rte_memcpy(cdesc->cd_cur_ptr, sha224InitialState, ++ sizeof(sha224InitialState)); ++ state1_size = qat_hash_get_state1_size( ++ cdesc->qat_hash_alg); ++ break; ++ } ++ /* SHA-224 HMAC */ + if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA224, authkey, + authkeylen, cdesc->cd_cur_ptr, &state1_size, + cdesc->aes_cmac)) { +@@ -1618,6 +1690,15 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + state2_size = ICP_QAT_HW_SHA224_STATE2_SZ; + break; + case ICP_QAT_HW_AUTH_ALGO_SHA256: ++ if (cdesc->auth_mode == ICP_QAT_HW_AUTH_MODE0) { ++ /* Plain SHA-256 */ ++ rte_memcpy(cdesc->cd_cur_ptr, sha256InitialState, ++ sizeof(sha256InitialState)); ++ state1_size = qat_hash_get_state1_size( ++ cdesc->qat_hash_alg); ++ break; ++ } ++ /* SHA-256 HMAC */ + if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA256, authkey, + authkeylen, cdesc->cd_cur_ptr, &state1_size, + cdesc->aes_cmac)) { +@@ -1627,6 +1708,15 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + state2_size = ICP_QAT_HW_SHA256_STATE2_SZ; + break; + case ICP_QAT_HW_AUTH_ALGO_SHA384: ++ if (cdesc->auth_mode == ICP_QAT_HW_AUTH_MODE0) { ++ /* Plain SHA-384 */ ++ rte_memcpy(cdesc->cd_cur_ptr, sha384InitialState, ++ sizeof(sha384InitialState)); ++ state1_size = qat_hash_get_state1_size( ++ cdesc->qat_hash_alg); ++ break; ++ } ++ /* SHA-384 HMAC */ + if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA384, authkey, + authkeylen, cdesc->cd_cur_ptr, &state1_size, + cdesc->aes_cmac)) { +@@ -1636,6 +1726,15 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + state2_size = ICP_QAT_HW_SHA384_STATE2_SZ; + break; + case ICP_QAT_HW_AUTH_ALGO_SHA512: ++ if (cdesc->auth_mode == ICP_QAT_HW_AUTH_MODE0) { ++ /* Plain SHA-512 */ ++ rte_memcpy(cdesc->cd_cur_ptr, sha512InitialState, ++ sizeof(sha512InitialState)); ++ state1_size = qat_hash_get_state1_size( ++ cdesc->qat_hash_alg); ++ break; ++ } ++ /* SHA-512 HMAC */ + if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA512, authkey, + authkeylen, cdesc->cd_cur_ptr, &state1_size, + cdesc->aes_cmac)) { +@@ -1700,7 +1799,7 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + memcpy(cipherconfig->key, authkey, authkeylen); + memset(cipherconfig->key + authkeylen, + 0, ICP_QAT_HW_SNOW_3G_UEA2_IV_SZ); +- cdesc->cd_cur_ptr += sizeof(struct icp_qat_hw_cipher_config) + ++ cd_extra_size += sizeof(struct icp_qat_hw_cipher_config) + + authkeylen + ICP_QAT_HW_SNOW_3G_UEA2_IV_SZ; + auth_param->hash_state_sz = ICP_QAT_HW_SNOW_3G_UEA2_IV_SZ >> 3; + break; +@@ -1716,8 +1815,7 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + + ICP_QAT_HW_ZUC_3G_EEA3_IV_SZ); + + memcpy(cdesc->cd_cur_ptr + state1_size, authkey, authkeylen); +- cdesc->cd_cur_ptr += state1_size + state2_size +- + ICP_QAT_HW_ZUC_3G_EEA3_IV_SZ; ++ cd_extra_size += ICP_QAT_HW_ZUC_3G_EEA3_IV_SZ; + auth_param->hash_state_sz = ICP_QAT_HW_ZUC_3G_EEA3_IV_SZ >> 3; + cdesc->min_qat_dev_gen = QAT_GEN2; + +@@ -1803,7 +1901,7 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + RTE_ALIGN_CEIL(hash_cd_ctrl->inner_state1_sz, 8)) + >> 3); + +- cdesc->cd_cur_ptr += state1_size + state2_size; ++ cdesc->cd_cur_ptr += state1_size + state2_size + cd_extra_size; + cd_size = cdesc->cd_cur_ptr-(uint8_t *)&cdesc->cd; + + cd_pars->u.s.content_desc_addr = cdesc->cd_paddr; +diff --git a/dpdk/drivers/crypto/qat/qat_sym_session.h b/dpdk/drivers/crypto/qat/qat_sym_session.h +index 98985d6867..bcab5b2b60 100644 +--- a/dpdk/drivers/crypto/qat/qat_sym_session.h ++++ b/dpdk/drivers/crypto/qat/qat_sym_session.h +@@ -62,6 +62,7 @@ struct qat_sym_session { + enum icp_qat_hw_cipher_mode qat_mode; + enum icp_qat_hw_auth_algo qat_hash_alg; + enum icp_qat_hw_auth_op auth_op; ++ enum icp_qat_hw_auth_mode auth_mode; + void *bpi_ctx; + struct qat_sym_cd cd; + uint8_t *cd_cur_ptr; +diff --git a/dpdk/drivers/crypto/snow3g/rte_snow3g_pmd.c b/dpdk/drivers/crypto/snow3g/rte_snow3g_pmd.c +index 9d07e1ab2c..2b1b90b011 100644 +--- a/dpdk/drivers/crypto/snow3g/rte_snow3g_pmd.c ++++ b/dpdk/drivers/crypto/snow3g/rte_snow3g_pmd.c +@@ -16,6 +16,7 @@ + #define SNOW3G_MAX_BURST 8 + #define BYTE_LEN 8 + ++int snow3g_logtype_driver; + static uint8_t cryptodev_driver_id; + + /** Get xform chain order. */ +diff --git a/dpdk/drivers/crypto/snow3g/snow3g_pmd_private.h b/dpdk/drivers/crypto/snow3g/snow3g_pmd_private.h +index 1fe05eb567..1070800960 100644 +--- a/dpdk/drivers/crypto/snow3g/snow3g_pmd_private.h ++++ b/dpdk/drivers/crypto/snow3g/snow3g_pmd_private.h +@@ -11,7 +11,7 @@ + /**< SNOW 3G PMD device name */ + + /** SNOW 3G PMD LOGTYPE DRIVER */ +-int snow3g_logtype_driver; ++extern int snow3g_logtype_driver; + + #define SNOW3G_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, snow3g_logtype_driver, \ +diff --git a/dpdk/drivers/crypto/zuc/rte_zuc_pmd.c b/dpdk/drivers/crypto/zuc/rte_zuc_pmd.c +index 8e214cd50e..265aabaebf 100644 +--- a/dpdk/drivers/crypto/zuc/rte_zuc_pmd.c ++++ b/dpdk/drivers/crypto/zuc/rte_zuc_pmd.c +@@ -14,6 +14,7 @@ + #define ZUC_MAX_BURST 4 + #define BYTE_LEN 8 + ++int zuc_logtype_driver; + static uint8_t cryptodev_driver_id; + + /** Get xform chain order. */ +diff --git a/dpdk/drivers/crypto/zuc/zuc_pmd_private.h b/dpdk/drivers/crypto/zuc/zuc_pmd_private.h +index 428efd4bb5..dc492b1710 100644 +--- a/dpdk/drivers/crypto/zuc/zuc_pmd_private.h ++++ b/dpdk/drivers/crypto/zuc/zuc_pmd_private.h +@@ -8,10 +8,10 @@ + #include <sso_zuc.h> + + #define CRYPTODEV_NAME_ZUC_PMD crypto_zuc +-/**< KASUMI PMD device name */ ++/**< ZUC PMD device name */ + + /** ZUC PMD LOGTYPE DRIVER */ +-int zuc_logtype_driver; ++extern int zuc_logtype_driver; + #define ZUC_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, zuc_logtype_driver, \ + "%s()... line %u: " fmt "\n", __func__, __LINE__, \ +diff --git a/dpdk/drivers/event/dpaa2/dpaa2_eventdev.c b/dpdk/drivers/event/dpaa2/dpaa2_eventdev.c +index d71361666c..2be6e12f66 100644 +--- a/dpdk/drivers/event/dpaa2/dpaa2_eventdev.c ++++ b/dpdk/drivers/event/dpaa2/dpaa2_eventdev.c +@@ -391,7 +391,7 @@ dpaa2_eventdev_info_get(struct rte_eventdev *dev, + dev_info->max_event_priority_levels = + DPAA2_EVENT_MAX_EVENT_PRIORITY_LEVELS; + dev_info->max_event_ports = rte_fslmc_get_device_count(DPAA2_IO); +- /* we only support dpio upto number of cores*/ ++ /* we only support dpio up to number of cores */ + if (dev_info->max_event_ports > rte_lcore_count()) + dev_info->max_event_ports = rte_lcore_count(); + dev_info->max_event_port_dequeue_depth = +@@ -479,6 +479,8 @@ dpaa2_eventdev_queue_def_conf(struct rte_eventdev *dev, uint8_t queue_id, + RTE_SET_USED(queue_id); + + queue_conf->nb_atomic_flows = DPAA2_EVENT_QUEUE_ATOMIC_FLOWS; ++ queue_conf->nb_atomic_order_sequences = ++ DPAA2_EVENT_QUEUE_ORDER_SEQUENCES; + queue_conf->schedule_type = RTE_SCHED_TYPE_PARALLEL; + queue_conf->priority = RTE_EVENT_DEV_PRIORITY_NORMAL; + } +diff --git a/dpdk/drivers/event/dsw/dsw_event.c b/dpdk/drivers/event/dsw/dsw_event.c +index 61a66fabf3..0df9209e4f 100644 +--- a/dpdk/drivers/event/dsw/dsw_event.c ++++ b/dpdk/drivers/event/dsw/dsw_event.c +@@ -658,6 +658,9 @@ dsw_port_consider_migration(struct dsw_evdev *dsw, + if (dsw->num_ports == 1) + return; + ++ if (seen_events_len < DSW_MAX_EVENTS_RECORDED) ++ return; ++ + DSW_LOG_DP_PORT(DEBUG, source_port->id, "Considering migration.\n"); + + /* Randomize interval to avoid having all threads considering +@@ -930,11 +933,6 @@ dsw_port_ctl_process(struct dsw_evdev *dsw, struct dsw_port *port) + { + struct dsw_ctl_msg msg; + +- /* So any table loads happens before the ring dequeue, in the +- * case of a 'paus' message. +- */ +- rte_smp_rmb(); +- + if (dsw_port_ctl_dequeue(port, &msg) == 0) { + switch (msg.type) { + case DSW_CTL_PAUS_REQ: +@@ -1018,12 +1016,12 @@ dsw_event_enqueue(void *port, const struct rte_event *ev) + } + + static __rte_always_inline uint16_t +-dsw_event_enqueue_burst_generic(void *port, const struct rte_event events[], ++dsw_event_enqueue_burst_generic(struct dsw_port *source_port, ++ const struct rte_event events[], + uint16_t events_len, bool op_types_known, + uint16_t num_new, uint16_t num_release, + uint16_t num_non_release) + { +- struct dsw_port *source_port = port; + struct dsw_evdev *dsw = source_port->dsw; + bool enough_credits; + uint16_t i; +@@ -1047,12 +1045,10 @@ dsw_event_enqueue_burst_generic(void *port, const struct rte_event events[], + */ + if (unlikely(events_len == 0)) { + dsw_port_note_op(source_port, DSW_MAX_PORT_OPS_PER_BG_TASK); ++ dsw_port_flush_out_buffers(dsw, source_port); + return 0; + } + +- if (unlikely(events_len > source_port->enqueue_depth)) +- events_len = source_port->enqueue_depth; +- + dsw_port_note_op(source_port, events_len); + + if (!op_types_known) +@@ -1101,31 +1097,48 @@ dsw_event_enqueue_burst_generic(void *port, const struct rte_event events[], + DSW_LOG_DP_PORT(DEBUG, source_port->id, "%d non-release events " + "accepted.\n", num_non_release); + +- return num_non_release; ++ return (num_non_release + num_release); + } + + uint16_t + dsw_event_enqueue_burst(void *port, const struct rte_event events[], + uint16_t events_len) + { +- return dsw_event_enqueue_burst_generic(port, events, events_len, false, +- 0, 0, 0); ++ struct dsw_port *source_port = port; ++ ++ if (unlikely(events_len > source_port->enqueue_depth)) ++ events_len = source_port->enqueue_depth; ++ ++ return dsw_event_enqueue_burst_generic(source_port, events, ++ events_len, false, 0, 0, 0); + } + + uint16_t + dsw_event_enqueue_new_burst(void *port, const struct rte_event events[], + uint16_t events_len) + { +- return dsw_event_enqueue_burst_generic(port, events, events_len, true, +- events_len, 0, events_len); ++ struct dsw_port *source_port = port; ++ ++ if (unlikely(events_len > source_port->enqueue_depth)) ++ events_len = source_port->enqueue_depth; ++ ++ return dsw_event_enqueue_burst_generic(source_port, events, ++ events_len, true, events_len, ++ 0, events_len); + } + + uint16_t + dsw_event_enqueue_forward_burst(void *port, const struct rte_event events[], + uint16_t events_len) + { +- return dsw_event_enqueue_burst_generic(port, events, events_len, true, +- 0, 0, events_len); ++ struct dsw_port *source_port = port; ++ ++ if (unlikely(events_len > source_port->enqueue_depth)) ++ events_len = source_port->enqueue_depth; ++ ++ return dsw_event_enqueue_burst_generic(source_port, events, ++ events_len, true, 0, 0, ++ events_len); + } + + uint16_t +@@ -1179,11 +1192,6 @@ static uint16_t + dsw_port_dequeue_burst(struct dsw_port *port, struct rte_event *events, + uint16_t num) + { +- struct dsw_port *source_port = port; +- struct dsw_evdev *dsw = source_port->dsw; +- +- dsw_port_ctl_process(dsw, source_port); +- + if (unlikely(port->in_buffer_len > 0)) { + uint16_t dequeued = RTE_MIN(num, port->in_buffer_len); + +diff --git a/dpdk/drivers/event/octeontx2/otx2_evdev.h b/dpdk/drivers/event/octeontx2/otx2_evdev.h +index 231a12a52b..ef523dc9da 100644 +--- a/dpdk/drivers/event/octeontx2/otx2_evdev.h ++++ b/dpdk/drivers/event/octeontx2/otx2_evdev.h +@@ -16,7 +16,7 @@ + #include "otx2_mempool.h" + #include "otx2_tim_evdev.h" + +-#define EVENTDEV_NAME_OCTEONTX2_PMD otx2_eventdev ++#define EVENTDEV_NAME_OCTEONTX2_PMD event_octeontx2 + + #define sso_func_trace otx2_sso_dbg + +diff --git a/dpdk/drivers/event/octeontx2/otx2_evdev_adptr.c b/dpdk/drivers/event/octeontx2/otx2_evdev_adptr.c +index 233cba2aa3..8bdcfa3ea5 100644 +--- a/dpdk/drivers/event/octeontx2/otx2_evdev_adptr.c ++++ b/dpdk/drivers/event/octeontx2/otx2_evdev_adptr.c +@@ -133,7 +133,7 @@ sso_rxq_disable(struct otx2_eth_dev *dev, uint16_t qid) + aq = otx2_mbox_alloc_msg_nix_aq_enq(mbox); + aq->qidx = qid; + aq->ctype = NIX_AQ_CTYPE_CQ; +- aq->op = NIX_AQ_INSTOP_INIT; ++ aq->op = NIX_AQ_INSTOP_WRITE; + + aq->cq.ena = 1; + aq->cq.caching = 1; +@@ -144,7 +144,7 @@ sso_rxq_disable(struct otx2_eth_dev *dev, uint16_t qid) + + rc = otx2_mbox_process(mbox); + if (rc < 0) { +- otx2_err("Failed to init cq context"); ++ otx2_err("Failed to enable cq context"); + goto fail; + } + +diff --git a/dpdk/drivers/event/octeontx2/otx2_evdev_stats.h b/dpdk/drivers/event/octeontx2/otx2_evdev_stats.h +index 9d7c694ee6..74fcec8a07 100644 +--- a/dpdk/drivers/event/octeontx2/otx2_evdev_stats.h ++++ b/dpdk/drivers/event/octeontx2/otx2_evdev_stats.h +@@ -67,7 +67,7 @@ otx2_sso_xstats_get(const struct rte_eventdev *event_dev, + + switch (mode) { + case RTE_EVENT_DEV_XSTATS_DEVICE: +- break; ++ return 0; + case RTE_EVENT_DEV_XSTATS_PORT: + if (queue_port_id >= (signed int)dev->nb_event_ports) + goto invalid_value; +diff --git a/dpdk/drivers/mempool/dpaa2/meson.build b/dpdk/drivers/mempool/dpaa2/meson.build +index d79fc31644..a4fe684fc4 100644 +--- a/dpdk/drivers/mempool/dpaa2/meson.build ++++ b/dpdk/drivers/mempool/dpaa2/meson.build +@@ -9,5 +9,7 @@ endif + deps += ['bus_fslmc'] + sources = files('dpaa2_hw_mempool.c') + ++install_headers('rte_dpaa2_mempool.h') ++ + # depends on fslmc bus which uses experimental API + allow_experimental_apis = true +diff --git a/dpdk/drivers/mempool/octeontx/octeontx_fpavf.c b/dpdk/drivers/mempool/octeontx/octeontx_fpavf.c +index c97267db3c..63f8fb3b50 100644 +--- a/dpdk/drivers/mempool/octeontx/octeontx_fpavf.c ++++ b/dpdk/drivers/mempool/octeontx/octeontx_fpavf.c +@@ -305,10 +305,8 @@ octeontx_fpapf_pool_destroy(unsigned int gpool_index) + int ret = -1; + + fpa = octeontx_get_fpavf(gpool_index); +- if (fpa == NULL) { +- ret = -EINVAL; +- goto err; +- } ++ if (fpa == NULL) ++ return -EINVAL; + + hdr.coproc = FPA_COPROC; + hdr.msg = FPA_CONFIGSET; +diff --git a/dpdk/drivers/mempool/octeontx2/otx2_mempool_ops.c b/dpdk/drivers/mempool/octeontx2/otx2_mempool_ops.c +index ea4b1c45d2..18bdb0b4c4 100644 +--- a/dpdk/drivers/mempool/octeontx2/otx2_mempool_ops.c ++++ b/dpdk/drivers/mempool/octeontx2/otx2_mempool_ops.c +@@ -637,10 +637,10 @@ static int + otx2_npa_alloc(struct rte_mempool *mp) + { + uint32_t block_size, block_count; ++ uint64_t aura_handle = 0; + struct otx2_npa_lf *lf; + struct npa_aura_s aura; + struct npa_pool_s pool; +- uint64_t aura_handle; + int rc; + + lf = otx2_npa_lf_obj_get(); +diff --git a/dpdk/drivers/meson.build b/dpdk/drivers/meson.build +index 72eec46088..32d68aa1d0 100644 +--- a/dpdk/drivers/meson.build ++++ b/dpdk/drivers/meson.build +@@ -9,8 +9,8 @@ endif + dpdk_driver_classes = ['common', + 'bus', + 'mempool', # depends on common and bus. +- 'raw', # depends on common and bus. +- 'net', # depends on common, bus, mempool and raw. ++ 'net', # depends on common, bus, mempool ++ 'raw', # depends on common, bus and net. + 'crypto', # depends on common, bus and mempool (net in future). + 'compress', # depends on common, bus, mempool. + 'event', # depends on common, bus, mempool and net. +diff --git a/dpdk/drivers/net/af_xdp/rte_eth_af_xdp.c b/dpdk/drivers/net/af_xdp/rte_eth_af_xdp.c +index 2b1245ee4f..d75281c10d 100644 +--- a/dpdk/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/dpdk/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -34,6 +34,7 @@ + #include <rte_log.h> + #include <rte_memory.h> + #include <rte_memzone.h> ++#include <rte_mempool.h> + #include <rte_mbuf.h> + #include <rte_malloc.h> + #include <rte_ring.h> +@@ -58,13 +59,6 @@ static int af_xdp_logtype; + + #define ETH_AF_XDP_FRAME_SIZE 2048 + #define ETH_AF_XDP_NUM_BUFFERS 4096 +-#ifdef XDP_UMEM_UNALIGNED_CHUNK_FLAG +-#define ETH_AF_XDP_MBUF_OVERHEAD 128 /* sizeof(struct rte_mbuf) */ +-#define ETH_AF_XDP_DATA_HEADROOM \ +- (ETH_AF_XDP_MBUF_OVERHEAD + RTE_PKTMBUF_HEADROOM) +-#else +-#define ETH_AF_XDP_DATA_HEADROOM 0 +-#endif + #define ETH_AF_XDP_DFLT_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS + #define ETH_AF_XDP_DFLT_START_QUEUE_IDX 0 + #define ETH_AF_XDP_DFLT_QUEUE_COUNT 1 +@@ -171,7 +165,8 @@ reserve_fill_queue_zc(struct xsk_umem_info *umem, uint16_t reserve_size, + uint64_t addr; + + fq_addr = xsk_ring_prod__fill_addr(fq, idx++); +- addr = (uint64_t)bufs[i] - (uint64_t)umem->buffer; ++ addr = (uint64_t)bufs[i] - (uint64_t)umem->buffer - ++ umem->mb_pool->header_size; + *fq_addr = addr; + } + +@@ -270,8 +265,11 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + addr = xsk_umem__extract_addr(addr); + + bufs[i] = (struct rte_mbuf *) +- xsk_umem__get_data(umem->buffer, addr); +- bufs[i]->data_off = offset - sizeof(struct rte_mbuf); ++ xsk_umem__get_data(umem->buffer, addr + ++ umem->mb_pool->header_size); ++ bufs[i]->data_off = offset - sizeof(struct rte_mbuf) - ++ rte_pktmbuf_priv_size(umem->mb_pool) - ++ umem->mb_pool->header_size; + + rte_pktmbuf_pkt_len(bufs[i]) = len; + rte_pktmbuf_data_len(bufs[i]) = len; +@@ -384,7 +382,8 @@ pull_umem_cq(struct xsk_umem_info *umem, int size) + #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) + addr = xsk_umem__extract_addr(addr); + rte_pktmbuf_free((struct rte_mbuf *) +- xsk_umem__get_data(umem->buffer, addr)); ++ xsk_umem__get_data(umem->buffer, ++ addr + umem->mb_pool->header_size)); + #else + rte_ring_enqueue(umem->buf_ring, (void *)addr); + #endif +@@ -442,9 +441,11 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + } + desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx); + desc->len = mbuf->pkt_len; +- addr = (uint64_t)mbuf - (uint64_t)umem->buffer; ++ addr = (uint64_t)mbuf - (uint64_t)umem->buffer - ++ umem->mb_pool->header_size; + offset = rte_pktmbuf_mtod(mbuf, uint64_t) - +- (uint64_t)mbuf; ++ (uint64_t)mbuf + ++ umem->mb_pool->header_size; + offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT; + desc->addr = addr | offset; + count++; +@@ -465,9 +466,11 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx); + desc->len = mbuf->pkt_len; + +- addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer; ++ addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer - ++ umem->mb_pool->header_size; + offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) - +- (uint64_t)local_mbuf; ++ (uint64_t)local_mbuf + ++ umem->mb_pool->header_size; + pkt = xsk_umem__get_data(umem->buffer, addr + offset); + offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT; + desc->addr = addr | offset; +@@ -480,10 +483,7 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + tx_bytes += mbuf->pkt_len; + } + +-#if defined(XDP_USE_NEED_WAKEUP) +- if (xsk_ring_prod__needs_wakeup(&txq->tx)) +-#endif +- kick_tx(txq); ++ kick_tx(txq); + + out: + xsk_ring_prod__submit(&txq->tx, count); +@@ -595,7 +595,14 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) + dev_info->max_tx_queues = internals->queue_cnt; + + dev_info->min_mtu = RTE_ETHER_MIN_MTU; +- dev_info->max_mtu = ETH_AF_XDP_FRAME_SIZE - ETH_AF_XDP_DATA_HEADROOM; ++#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) ++ dev_info->max_mtu = getpagesize() - ++ sizeof(struct rte_mempool_objhdr) - ++ sizeof(struct rte_mbuf) - ++ RTE_PKTMBUF_HEADROOM - XDP_PACKET_HEADROOM; ++#else ++ dev_info->max_mtu = ETH_AF_XDP_FRAME_SIZE - XDP_PACKET_HEADROOM; ++#endif + + dev_info->default_rxportconf.nb_queues = 1; + dev_info->default_txportconf.nb_queues = 1; +@@ -758,11 +765,13 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals __rte_unused, + void *base_addr = NULL; + struct rte_mempool *mb_pool = rxq->mb_pool; + +- usr_config.frame_size = rte_pktmbuf_data_room_size(mb_pool) + +- ETH_AF_XDP_MBUF_OVERHEAD + +- mb_pool->private_data_size; +- usr_config.frame_headroom = ETH_AF_XDP_DATA_HEADROOM + +- mb_pool->private_data_size; ++ usr_config.frame_size = rte_mempool_calc_obj_size(mb_pool->elt_size, ++ mb_pool->flags, ++ NULL); ++ usr_config.frame_headroom = mb_pool->header_size + ++ sizeof(struct rte_mbuf) + ++ rte_pktmbuf_priv_size(mb_pool) + ++ RTE_PKTMBUF_HEADROOM; + + umem = rte_zmalloc_socket("umem", sizeof(*umem), 0, rte_socket_id()); + if (umem == NULL) { +@@ -795,7 +804,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + .fill_size = ETH_AF_XDP_DFLT_NUM_DESCS, + .comp_size = ETH_AF_XDP_DFLT_NUM_DESCS, + .frame_size = ETH_AF_XDP_FRAME_SIZE, +- .frame_headroom = ETH_AF_XDP_DATA_HEADROOM }; ++ .frame_headroom = 0 }; + char ring_name[RTE_RING_NAMESIZE]; + char mz_name[RTE_MEMZONE_NAMESIZE]; + int ret; +@@ -820,8 +829,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + + for (i = 0; i < ETH_AF_XDP_NUM_BUFFERS; i++) + rte_ring_enqueue(umem->buf_ring, +- (void *)(i * ETH_AF_XDP_FRAME_SIZE + +- ETH_AF_XDP_DATA_HEADROOM)); ++ (void *)(i * ETH_AF_XDP_FRAME_SIZE)); + + snprintf(mz_name, sizeof(mz_name), "af_xdp_umem_%s_%u", + internals->if_name, rxq->xsk_queue_idx); +@@ -930,7 +938,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, + /* Now get the space available for data in the mbuf */ + buf_size = rte_pktmbuf_data_room_size(mb_pool) - + RTE_PKTMBUF_HEADROOM; +- data_size = ETH_AF_XDP_FRAME_SIZE - ETH_AF_XDP_DATA_HEADROOM; ++ data_size = ETH_AF_XDP_FRAME_SIZE; + + if (data_size > buf_size) { + AF_XDP_LOG(ERR, "%s: %d bytes will not fit in mbuf (%d bytes)\n", +diff --git a/dpdk/drivers/net/avp/avp_ethdev.c b/dpdk/drivers/net/avp/avp_ethdev.c +index cd747b6beb..1abe96ce50 100644 +--- a/dpdk/drivers/net/avp/avp_ethdev.c ++++ b/dpdk/drivers/net/avp/avp_ethdev.c +@@ -1694,7 +1694,7 @@ avp_xmit_scattered_pkts(void *tx_queue, + uint16_t nb_pkts) + { + struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST * +- RTE_AVP_MAX_MBUF_SEGMENTS)]; ++ RTE_AVP_MAX_MBUF_SEGMENTS)] = {}; + struct avp_queue *txq = (struct avp_queue *)tx_queue; + struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST]; + struct avp_dev *avp = txq->avp; +diff --git a/dpdk/drivers/net/bnx2x/bnx2x.c b/dpdk/drivers/net/bnx2x/bnx2x.c +index ed31335ac5..0b4030e2b9 100644 +--- a/dpdk/drivers/net/bnx2x/bnx2x.c ++++ b/dpdk/drivers/net/bnx2x/bnx2x.c +@@ -1167,6 +1167,10 @@ static int bnx2x_has_rx_work(struct bnx2x_fastpath *fp) + if (unlikely((rx_cq_cons_sb & MAX_RCQ_ENTRIES(rxq)) == + MAX_RCQ_ENTRIES(rxq))) + rx_cq_cons_sb++; ++ ++ PMD_RX_LOG(DEBUG, "hw CQ cons = %d, sw CQ cons = %d", ++ rx_cq_cons_sb, rxq->rx_cq_head); ++ + return rxq->rx_cq_head != rx_cq_cons_sb; + } + +@@ -1249,9 +1253,12 @@ static uint8_t bnx2x_rxeof(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp) + uint16_t bd_cons, bd_prod, bd_prod_fw, comp_ring_cons; + uint16_t hw_cq_cons, sw_cq_cons, sw_cq_prod; + ++ rte_spinlock_lock(&(fp)->rx_mtx); ++ + rxq = sc->rx_queues[fp->index]; + if (!rxq) { + PMD_RX_LOG(ERR, "RX queue %d is NULL", fp->index); ++ rte_spinlock_unlock(&(fp)->rx_mtx); + return 0; + } + +@@ -1321,9 +1328,14 @@ static uint8_t bnx2x_rxeof(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp) + rxq->rx_cq_head = sw_cq_cons; + rxq->rx_cq_tail = sw_cq_prod; + ++ PMD_RX_LOG(DEBUG, "BD prod = %d, sw CQ prod = %d", ++ bd_prod_fw, sw_cq_prod); ++ + /* Update producers */ + bnx2x_update_rx_prod(sc, fp, bd_prod_fw, sw_cq_prod); + ++ rte_spinlock_unlock(&(fp)->rx_mtx); ++ + return sw_cq_cons != hw_cq_cons; + } + +@@ -4577,10 +4589,10 @@ static void bnx2x_handle_fp_tq(struct bnx2x_fastpath *fp) + bnx2x_handle_fp_tq(fp); + return; + } ++ /* We have completed slow path completion, clear the flag */ ++ rte_atomic32_set(&sc->scan_fp, 0); + } + +- /* Assuming we have completed slow path completion, clear the flag */ +- rte_atomic32_set(&sc->scan_fp, 0); + bnx2x_ack_sb(sc, fp->igu_sb_id, USTORM_ID, + le16toh(fp->fp_hc_idx), IGU_INT_ENABLE, 1); + } +diff --git a/dpdk/drivers/net/bnx2x/bnx2x.h b/dpdk/drivers/net/bnx2x/bnx2x.h +index 3383c76759..1dbc98197d 100644 +--- a/dpdk/drivers/net/bnx2x/bnx2x.h ++++ b/dpdk/drivers/net/bnx2x/bnx2x.h +@@ -360,6 +360,9 @@ struct bnx2x_fastpath { + /* pointer back to parent structure */ + struct bnx2x_softc *sc; + ++ /* Used to synchronize fastpath Rx access */ ++ rte_spinlock_t rx_mtx; ++ + /* status block */ + struct bnx2x_dma sb_dma; + union bnx2x_host_hc_status_block status_block; +diff --git a/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c b/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c +index 20b045ff87..7864b5b80a 100644 +--- a/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c ++++ b/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c +@@ -598,6 +598,11 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf) + + eth_dev->dev_ops = is_vf ? &bnx2xvf_eth_dev_ops : &bnx2x_eth_dev_ops; + ++ if (rte_eal_process_type() != RTE_PROC_PRIMARY) { ++ PMD_DRV_LOG(ERR, sc, "Skipping device init from secondary process"); ++ return 0; ++ } ++ + rte_eth_copy_pci_info(eth_dev, pci_dev); + + sc->pcie_bus = pci_dev->addr.bus; +diff --git a/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c b/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c +index ae97dfee36..e201b68db8 100644 +--- a/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c ++++ b/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c +@@ -346,6 +346,8 @@ bnx2x_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + uint16_t len, pad; + struct rte_mbuf *rx_mb = NULL; + ++ rte_spinlock_lock(&(fp)->rx_mtx); ++ + hw_cq_cons = le16toh(*fp->rx_cq_cons_sb); + if ((hw_cq_cons & USABLE_RCQ_ENTRIES_PER_PAGE) == + USABLE_RCQ_ENTRIES_PER_PAGE) { +@@ -357,8 +359,10 @@ bnx2x_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + sw_cq_cons = rxq->rx_cq_head; + sw_cq_prod = rxq->rx_cq_tail; + +- if (sw_cq_cons == hw_cq_cons) ++ if (sw_cq_cons == hw_cq_cons) { ++ rte_spinlock_unlock(&(fp)->rx_mtx); + return 0; ++ } + + while (nb_rx < nb_pkts && sw_cq_cons != hw_cq_cons) { + +@@ -414,7 +418,7 @@ bnx2x_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + */ + if (cqe_fp->pars_flags.flags & PARSING_FLAGS_VLAN) { + rx_mb->vlan_tci = cqe_fp->vlan_tag; +- rx_mb->ol_flags |= PKT_RX_VLAN; ++ rx_mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; + } + + rx_pkts[nb_rx] = rx_mb; +@@ -439,6 +443,8 @@ bnx2x_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + + bnx2x_upd_rx_prod_fast(sc, fp, bd_prod, sw_cq_prod); + ++ rte_spinlock_unlock(&(fp)->rx_mtx); ++ + return nb_rx; + } + +diff --git a/dpdk/drivers/net/bnxt/bnxt.h b/dpdk/drivers/net/bnxt/bnxt.h +index e259c8239d..404d58a037 100644 +--- a/dpdk/drivers/net/bnxt/bnxt.h ++++ b/dpdk/drivers/net/bnxt/bnxt.h +@@ -117,6 +117,14 @@ + #define BNXT_NUM_ASYNC_CPR(bp) 1 + #endif + ++/* In FreeBSD OS, nic_uio driver does not support interrupts */ ++#ifdef RTE_EXEC_ENV_FREEBSD ++#ifdef BNXT_NUM_ASYNC_CPR ++#undef BNXT_NUM_ASYNC_CPR ++#endif ++#define BNXT_NUM_ASYNC_CPR(bp) 0 ++#endif ++ + #define BNXT_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET + #define BNXT_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET + +@@ -231,9 +239,10 @@ struct bnxt_pf_info { + uint8_t evb_mode; + }; + +-/* Max wait time is 10 * 100ms = 1s */ +-#define BNXT_LINK_WAIT_CNT 10 +-#define BNXT_LINK_WAIT_INTERVAL 100 ++/* Max wait time for link up is 10s and link down is 500ms */ ++#define BNXT_LINK_UP_WAIT_CNT 200 ++#define BNXT_LINK_DOWN_WAIT_CNT 10 ++#define BNXT_LINK_WAIT_INTERVAL 50 + struct bnxt_link_info { + uint32_t phy_flags; + uint8_t mac_type; +@@ -461,6 +470,11 @@ struct bnxt_error_recovery_info { + uint32_t last_reset_counter; + }; + ++/* Frequency for the FUNC_DRV_IF_CHANGE retry in milliseconds */ ++#define BNXT_IF_CHANGE_RETRY_INTERVAL 50 ++/* Maximum retry count for FUNC_DRV_IF_CHANGE */ ++#define BNXT_IF_CHANGE_RETRY_COUNT 40 ++ + /* address space location of register */ + #define BNXT_FW_STATUS_REG_TYPE_MASK 3 + /* register is located in PCIe config space */ +@@ -485,7 +499,6 @@ struct bnxt { + void *bar0; + + struct rte_eth_dev *eth_dev; +- struct rte_eth_rss_conf rss_conf; + struct rte_pci_device *pdev; + void *doorbell_base; + +@@ -507,19 +520,17 @@ struct bnxt { + #define BNXT_FLAG_STINGRAY BIT(14) + #define BNXT_FLAG_FW_RESET BIT(15) + #define BNXT_FLAG_FATAL_ERROR BIT(16) +-#define BNXT_FLAG_FW_CAP_IF_CHANGE BIT(17) +-#define BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE BIT(18) +-#define BNXT_FLAG_FW_CAP_ERROR_RECOVERY BIT(19) +-#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED BIT(20) +-#define BNXT_FLAG_FW_CAP_ERR_RECOVER_RELOAD BIT(21) +-#define BNXT_FLAG_EXT_STATS_SUPPORTED BIT(22) +-#define BNXT_FLAG_NEW_RM BIT(23) +-#define BNXT_FLAG_INIT_DONE BIT(24) +-#define BNXT_FLAG_FW_CAP_ONE_STEP_TX_TS BIT(25) +-#define BNXT_FLAG_ADV_FLOW_MGMT BIT(26) ++#define BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE BIT(17) ++#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED BIT(18) ++#define BNXT_FLAG_EXT_STATS_SUPPORTED BIT(19) ++#define BNXT_FLAG_NEW_RM BIT(20) ++#define BNXT_FLAG_FW_CAP_ONE_STEP_TX_TS BIT(22) ++#define BNXT_FLAG_ADV_FLOW_MGMT BIT(23) ++#define BNXT_FLAG_NPAR_PF BIT(24) ++#define BNXT_FLAG_DFLT_MAC_SET BIT(26) + #define BNXT_PF(bp) (!((bp)->flags & BNXT_FLAG_VF)) + #define BNXT_VF(bp) ((bp)->flags & BNXT_FLAG_VF) +-#define BNXT_NPAR(bp) ((bp)->port_partition_type) ++#define BNXT_NPAR(bp) ((bp)->flags & BNXT_FLAG_NPAR_PF) + #define BNXT_MH(bp) ((bp)->flags & BNXT_FLAG_MULTI_HOST) + #define BNXT_SINGLE_PF(bp) (BNXT_PF(bp) && !BNXT_NPAR(bp) && !BNXT_MH(bp)) + #define BNXT_USE_CHIMP_MB 0 //For non-CFA commands, everything uses Chimp. +@@ -529,6 +540,13 @@ struct bnxt { + #define BNXT_STINGRAY(bp) ((bp)->flags & BNXT_FLAG_STINGRAY) + #define BNXT_HAS_NQ(bp) BNXT_CHIP_THOR(bp) + #define BNXT_HAS_RING_GRPS(bp) (!BNXT_CHIP_THOR(bp)) ++#define BNXT_HAS_DFLT_MAC_SET(bp) ((bp)->flags & BNXT_FLAG_DFLT_MAC_SET) ++ ++ uint32_t fw_cap; ++#define BNXT_FW_CAP_HOT_RESET BIT(0) ++#define BNXT_FW_CAP_IF_CHANGE BIT(1) ++#define BNXT_FW_CAP_ERROR_RECOVERY BIT(2) ++#define BNXT_FW_CAP_ERR_RECOVER_RELOAD BIT(3) + + uint32_t flow_flags; + #define BNXT_FLOW_FLAG_L2_HDR_SRC_FILTER_EN BIT(0) +@@ -588,8 +606,10 @@ struct bnxt { + uint16_t max_resp_len; + uint16_t hwrm_max_ext_req_len; + +- /* default command timeout value of 50ms */ +-#define HWRM_CMD_TIMEOUT 50000 ++ /* default command timeout value of 500ms */ ++#define DFLT_HWRM_CMD_TIMEOUT 500000 ++ /* short command timeout value of 50ms */ ++#define SHORT_HWRM_CMD_TIMEOUT 50000 + /* default HWRM request timeout value */ + uint32_t hwrm_cmd_timeout; + +@@ -603,18 +623,24 @@ struct bnxt { + uint8_t max_q; + + uint16_t fw_fid; +- uint8_t dflt_mac_addr[RTE_ETHER_ADDR_LEN]; + uint16_t max_rsscos_ctx; + uint16_t max_cp_rings; + uint16_t max_tx_rings; + uint16_t max_rx_rings; + #define MAX_STINGRAY_RINGS 128U +-#define BNXT_MAX_RINGS(bp) \ +- (BNXT_STINGRAY(bp) ? RTE_MIN(RTE_MIN(bp->max_rx_rings, \ ++/* For sake of symmetry, max Tx rings == max Rx rings, one stat ctx for each */ ++#define BNXT_MAX_RX_RINGS(bp) \ ++ (BNXT_STINGRAY(bp) ? RTE_MIN(RTE_MIN(bp->max_rx_rings / 2U, \ + MAX_STINGRAY_RINGS), \ +- bp->max_stat_ctx) : \ +- RTE_MIN(bp->max_rx_rings, bp->max_stat_ctx)) ++ bp->max_stat_ctx / 2U) : \ ++ RTE_MIN(bp->max_rx_rings / 2U, \ ++ bp->max_stat_ctx / 2U)) ++#define BNXT_MAX_TX_RINGS(bp) \ ++ (RTE_MIN((bp)->max_tx_rings, BNXT_MAX_RX_RINGS(bp))) + ++#define BNXT_MAX_RINGS(bp) \ ++ (RTE_MIN((((bp)->max_cp_rings - BNXT_NUM_ASYNC_CPR(bp)) / 2U), \ ++ BNXT_MAX_TX_RINGS(bp))) + uint16_t max_nq_rings; + uint16_t max_l2_ctx; + uint16_t max_rx_em_flows; +@@ -628,8 +654,6 @@ struct bnxt { + #define BNXT_OUTER_TPID_BD_SHFT 16 + uint32_t outer_tpid_bd; + struct bnxt_pf_info pf; +- uint8_t port_partition_type; +- uint8_t dev_stopped; + uint8_t vxlan_port_cnt; + uint8_t geneve_port_cnt; + uint16_t vxlan_port; +@@ -653,7 +677,8 @@ struct bnxt { + }; + + int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu); +-int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete); ++int bnxt_link_update(struct rte_eth_dev *eth_dev, int wait_to_complete, ++ bool exp_link_status); + int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg); + int is_bnxt_in_error(struct bnxt *bp); + uint16_t bnxt_rss_ctxts(const struct bnxt *bp); +@@ -671,11 +696,23 @@ extern const struct rte_flow_ops bnxt_flow_ops; + #define bnxt_release_flow_lock(bp) \ + pthread_mutex_unlock(&(bp)->flow_lock) + ++#define BNXT_VALID_VNIC_OR_RET(bp, vnic_id) do { \ ++ if ((vnic_id) >= (bp)->max_vnics) { \ ++ rte_flow_error_set(error, \ ++ EINVAL, \ ++ RTE_FLOW_ERROR_TYPE_ATTR_GROUP, \ ++ NULL, \ ++ "Group id is invalid!"); \ ++ rc = -rte_errno; \ ++ goto ret; \ ++ } \ ++} while (0) ++ + extern int bnxt_logtype_driver; + #define PMD_DRV_LOG_RAW(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, bnxt_logtype_driver, "%s(): " fmt, \ + __func__, ## args) + + #define PMD_DRV_LOG(level, fmt, args...) \ +- PMD_DRV_LOG_RAW(level, fmt, ## args) ++ PMD_DRV_LOG_RAW(level, fmt, ## args) + #endif +diff --git a/dpdk/drivers/net/bnxt/bnxt_cpr.c b/dpdk/drivers/net/bnxt/bnxt_cpr.c +index e6f30fecbf..c0e492e6c2 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_cpr.c ++++ b/dpdk/drivers/net/bnxt/bnxt_cpr.c +@@ -21,7 +21,7 @@ void bnxt_wait_for_device_shutdown(struct bnxt *bp) + * the SHUTDOWN bit in health register + */ + if (!(bp->recovery_info && +- (bp->flags & BNXT_FLAG_FW_CAP_ERR_RECOVER_RELOAD))) ++ (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD))) + return; + + /* Driver has to wait for fw_reset_max_msecs or shutdown bit which comes +@@ -63,7 +63,7 @@ void bnxt_handle_async_event(struct bnxt *bp, + case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE: + case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE: + /* FALLTHROUGH */ +- bnxt_link_update_op(bp->eth_dev, 0); ++ bnxt_link_update(bp->eth_dev, 0, ETH_LINK_UP); + break; + case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD: + PMD_DRV_LOG(INFO, "Async event: PF driver unloaded\n"); +@@ -76,6 +76,12 @@ void bnxt_handle_async_event(struct bnxt *bp, + PMD_DRV_LOG(INFO, "Port conn async event\n"); + break; + case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: ++ /* Ignore reset notify async events when stopping the port */ ++ if (!bp->eth_dev->data->dev_started) { ++ bp->flags |= BNXT_FLAG_FATAL_ERROR; ++ return; ++ } ++ + event_data = rte_le_to_cpu_32(async_cmp->event_data1); + /* timestamp_lo/hi values are in units of 100ms */ + bp->fw_reset_max_msecs = async_cmp->timestamp_hi ? +diff --git a/dpdk/drivers/net/bnxt/bnxt_ethdev.c b/dpdk/drivers/net/bnxt/bnxt_ethdev.c +index 41848f36f8..f1bd575356 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_ethdev.c ++++ b/dpdk/drivers/net/bnxt/bnxt_ethdev.c +@@ -132,6 +132,8 @@ static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev); + static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev); + static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev); + static void bnxt_cancel_fw_health_check(struct bnxt *bp); ++static int bnxt_restore_vlan_filters(struct bnxt *bp); ++static void bnxt_dev_recover(void *arg); + + int is_bnxt_in_error(struct bnxt *bp) + { +@@ -228,14 +230,97 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool reconfig) + return rc; + } + +-static int bnxt_init_chip(struct bnxt *bp) ++static int bnxt_setup_one_vnic(struct bnxt *bp, uint16_t vnic_id) + { ++ struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf; ++ struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; ++ uint64_t rx_offloads = dev_conf->rxmode.offloads; + struct bnxt_rx_queue *rxq; ++ unsigned int j; ++ int rc; ++ ++ rc = bnxt_vnic_grp_alloc(bp, vnic); ++ if (rc) ++ goto err_out; ++ ++ PMD_DRV_LOG(DEBUG, "vnic[%d] = %p vnic->fw_grp_ids = %p\n", ++ vnic_id, vnic, vnic->fw_grp_ids); ++ ++ rc = bnxt_hwrm_vnic_alloc(bp, vnic); ++ if (rc) ++ goto err_out; ++ ++ /* Alloc RSS context only if RSS mode is enabled */ ++ if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS) { ++ int j, nr_ctxs = bnxt_rss_ctxts(bp); ++ ++ rc = 0; ++ for (j = 0; j < nr_ctxs; j++) { ++ rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, j); ++ if (rc) ++ break; ++ } ++ if (rc) { ++ PMD_DRV_LOG(ERR, ++ "HWRM vnic %d ctx %d alloc failure rc: %x\n", ++ vnic_id, j, rc); ++ goto err_out; ++ } ++ vnic->num_lb_ctxts = nr_ctxs; ++ } ++ ++ /* ++ * Firmware sets pf pair in default vnic cfg. If the VLAN strip ++ * setting is not available at this time, it will not be ++ * configured correctly in the CFA. ++ */ ++ if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP) ++ vnic->vlan_strip = true; ++ else ++ vnic->vlan_strip = false; ++ ++ rc = bnxt_hwrm_vnic_cfg(bp, vnic); ++ if (rc) ++ goto err_out; ++ ++ rc = bnxt_set_hwrm_vnic_filters(bp, vnic); ++ if (rc) ++ goto err_out; ++ ++ for (j = 0; j < bp->rx_num_qs_per_vnic; j++) { ++ rxq = bp->eth_dev->data->rx_queues[j]; ++ ++ PMD_DRV_LOG(DEBUG, ++ "rxq[%d]->vnic=%p vnic->fw_grp_ids=%p\n", ++ j, rxq->vnic, rxq->vnic->fw_grp_ids); ++ ++ if (BNXT_HAS_RING_GRPS(bp) && rxq->rx_deferred_start) ++ rxq->vnic->fw_grp_ids[j] = INVALID_HW_RING_ID; ++ } ++ ++ rc = bnxt_vnic_rss_configure(bp, vnic); ++ if (rc) ++ goto err_out; ++ ++ bnxt_hwrm_vnic_plcmode_cfg(bp, vnic); ++ ++ if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) ++ bnxt_hwrm_vnic_tpa_cfg(bp, vnic, 1); ++ else ++ bnxt_hwrm_vnic_tpa_cfg(bp, vnic, 0); ++ ++ return 0; ++err_out: ++ PMD_DRV_LOG(ERR, "HWRM vnic %d cfg failure rc: %x\n", ++ vnic_id, rc); ++ return rc; ++} ++ ++static int bnxt_init_chip(struct bnxt *bp) ++{ + struct rte_eth_link new; + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(bp->eth_dev); +- struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf; + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; +- uint64_t rx_offloads = dev_conf->rxmode.offloads; + uint32_t intr_vector = 0; + uint32_t queue_id, base = BNXT_MISC_VEC_ID; + uint32_t vec = BNXT_MISC_VEC_ID; +@@ -303,93 +388,11 @@ static int bnxt_init_chip(struct bnxt *bp) + + /* VNIC configuration */ + for (i = 0; i < bp->nr_vnics; i++) { +- struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf; +- struct bnxt_vnic_info *vnic = &bp->vnic_info[i]; +- +- rc = bnxt_vnic_grp_alloc(bp, vnic); ++ rc = bnxt_setup_one_vnic(bp, i); + if (rc) + goto err_out; +- +- PMD_DRV_LOG(DEBUG, "vnic[%d] = %p vnic->fw_grp_ids = %p\n", +- i, vnic, vnic->fw_grp_ids); +- +- rc = bnxt_hwrm_vnic_alloc(bp, vnic); +- if (rc) { +- PMD_DRV_LOG(ERR, "HWRM vnic %d alloc failure rc: %x\n", +- i, rc); +- goto err_out; +- } +- +- /* Alloc RSS context only if RSS mode is enabled */ +- if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS) { +- int j, nr_ctxs = bnxt_rss_ctxts(bp); +- +- rc = 0; +- for (j = 0; j < nr_ctxs; j++) { +- rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, j); +- if (rc) +- break; +- } +- if (rc) { +- PMD_DRV_LOG(ERR, +- "HWRM vnic %d ctx %d alloc failure rc: %x\n", +- i, j, rc); +- goto err_out; +- } +- vnic->num_lb_ctxts = nr_ctxs; +- } +- +- /* +- * Firmware sets pf pair in default vnic cfg. If the VLAN strip +- * setting is not available at this time, it will not be +- * configured correctly in the CFA. +- */ +- if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP) +- vnic->vlan_strip = true; +- else +- vnic->vlan_strip = false; +- +- rc = bnxt_hwrm_vnic_cfg(bp, vnic); +- if (rc) { +- PMD_DRV_LOG(ERR, "HWRM vnic %d cfg failure rc: %x\n", +- i, rc); +- goto err_out; +- } +- +- rc = bnxt_set_hwrm_vnic_filters(bp, vnic); +- if (rc) { +- PMD_DRV_LOG(ERR, +- "HWRM vnic %d filter failure rc: %x\n", +- i, rc); +- goto err_out; +- } +- +- for (j = 0; j < bp->rx_num_qs_per_vnic; j++) { +- rxq = bp->eth_dev->data->rx_queues[j]; +- +- PMD_DRV_LOG(DEBUG, +- "rxq[%d]->vnic=%p vnic->fw_grp_ids=%p\n", +- j, rxq->vnic, rxq->vnic->fw_grp_ids); +- +- if (BNXT_HAS_RING_GRPS(bp) && rxq->rx_deferred_start) +- rxq->vnic->fw_grp_ids[j] = INVALID_HW_RING_ID; +- } +- +- rc = bnxt_vnic_rss_configure(bp, vnic); +- if (rc) { +- PMD_DRV_LOG(ERR, +- "HWRM vnic set RSS failure rc: %x\n", rc); +- goto err_out; +- } +- +- bnxt_hwrm_vnic_plcmode_cfg(bp, vnic); +- +- if (bp->eth_dev->data->dev_conf.rxmode.offloads & +- DEV_RX_OFFLOAD_TCP_LRO) +- bnxt_hwrm_vnic_tpa_cfg(bp, vnic, 1); +- else +- bnxt_hwrm_vnic_tpa_cfg(bp, vnic, 0); + } ++ + rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, &bp->vnic_info[0], 0, NULL); + if (rc) { + PMD_DRV_LOG(ERR, +@@ -439,8 +442,11 @@ static int bnxt_init_chip(struct bnxt *bp) + + /* enable uio/vfio intr/eventfd mapping */ + rc = rte_intr_enable(intr_handle); ++#ifndef RTE_EXEC_ENV_FREEBSD ++ /* In FreeBSD OS, nic_uio driver does not support interrupts */ + if (rc) + goto err_free; ++#endif + + rc = bnxt_get_hwrm_link_config(bp, &new); + if (rc) { +@@ -827,7 +833,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) + struct bnxt *bp = eth_dev->data->dev_private; + uint64_t rx_offloads = eth_dev->data->dev_conf.rxmode.offloads; + int vlan_mask = 0; +- int rc; ++ int rc, retry_cnt = BNXT_IF_CHANGE_RETRY_COUNT; + + if (!eth_dev->data->nb_tx_queues || !eth_dev->data->nb_rx_queues) { + PMD_DRV_LOG(ERR, "Queues are not configured yet!\n"); +@@ -840,14 +846,23 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) + bp->rx_cp_nr_rings, RTE_ETHDEV_QUEUE_STAT_CNTRS); + } + +- rc = bnxt_hwrm_if_change(bp, 1); +- if (!rc) { +- if (bp->flags & BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE) { +- rc = bnxt_handle_if_change_status(bp); +- if (rc) +- return rc; +- } ++ do { ++ rc = bnxt_hwrm_if_change(bp, true); ++ if (rc == 0 || rc != -EAGAIN) ++ break; ++ ++ rte_delay_ms(BNXT_IF_CHANGE_RETRY_INTERVAL); ++ } while (retry_cnt--); ++ ++ if (rc) ++ return rc; ++ ++ if (bp->flags & BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE) { ++ rc = bnxt_handle_if_change_status(bp); ++ if (rc) ++ return rc; + } ++ + bnxt_enable_int(bp); + + rc = bnxt_init_chip(bp); +@@ -855,8 +870,9 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) + goto error; + + eth_dev->data->scattered_rx = bnxt_scattered_rx(eth_dev); ++ eth_dev->data->dev_started = 1; + +- bnxt_link_update_op(eth_dev, 1); ++ bnxt_link_update(eth_dev, 1, ETH_LINK_UP); + + if (rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) + vlan_mask |= ETH_VLAN_FILTER_MASK; +@@ -869,19 +885,17 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) + eth_dev->rx_pkt_burst = bnxt_receive_function(eth_dev); + eth_dev->tx_pkt_burst = bnxt_transmit_function(eth_dev); + +- bp->flags |= BNXT_FLAG_INIT_DONE; +- eth_dev->data->dev_started = 1; +- bp->dev_stopped = 0; + pthread_mutex_lock(&bp->def_cp_lock); + bnxt_schedule_fw_health_check(bp); + pthread_mutex_unlock(&bp->def_cp_lock); + return 0; + + error: +- bnxt_hwrm_if_change(bp, 0); + bnxt_shutdown_nic(bp); + bnxt_free_tx_mbufs(bp); + bnxt_free_rx_mbufs(bp); ++ bnxt_hwrm_if_change(bp, false); ++ eth_dev->data->dev_started = 0; + return rc; + } + +@@ -929,18 +943,14 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) + + bnxt_cancel_fw_health_check(bp); + +- bp->flags &= ~BNXT_FLAG_INIT_DONE; +- if (bp->eth_dev->data->dev_started) { +- /* TBD: STOP HW queues DMA */ +- eth_dev->data->dev_link.link_status = 0; +- } + bnxt_dev_set_link_down_op(eth_dev); + + /* Wait for link to be reset and the async notification to process. +- * During reset recovery, there is no need to wait ++ * During reset recovery, there is no need to wait and ++ * VF/NPAR functions do not have privilege to change PHY config. + */ +- if (!is_bnxt_in_error(bp)) +- rte_delay_ms(BNXT_LINK_WAIT_INTERVAL * 2); ++ if (!is_bnxt_in_error(bp) && BNXT_SINGLE_PF(bp)) ++ bnxt_link_update(eth_dev, 1, ETH_LINK_DOWN); + + /* Clean queue intr-vector mapping */ + rte_intr_efd_disable(intr_handle); +@@ -955,8 +965,7 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) + /* Process any remaining notifications in default completion queue */ + bnxt_int_handler(eth_dev); + bnxt_shutdown_nic(bp); +- bnxt_hwrm_if_change(bp, 0); +- bp->dev_stopped = 1; ++ bnxt_hwrm_if_change(bp, false); + bp->rx_cosq_cnt = 0; + } + +@@ -964,7 +973,11 @@ static void bnxt_dev_close_op(struct rte_eth_dev *eth_dev) + { + struct bnxt *bp = eth_dev->data->dev_private; + +- if (bp->dev_stopped == 0) ++ /* cancel the recovery handler before remove dev */ ++ rte_eal_alarm_cancel(bnxt_dev_reset_and_resume, (void *)bp); ++ rte_eal_alarm_cancel(bnxt_dev_recover, (void *)bp); ++ ++ if (eth_dev->data->dev_started) + bnxt_dev_stop_op(eth_dev); + + if (eth_dev->data->mac_addrs != NULL) { +@@ -1084,17 +1097,23 @@ static int bnxt_mac_addr_add_op(struct rte_eth_dev *eth_dev, + return -EINVAL; + } + ++ /* Filter settings will get applied when port is started */ ++ if (!eth_dev->data->dev_started) ++ return 0; ++ + rc = bnxt_add_mac_filter(bp, vnic, mac_addr, index, pool); + + return rc; + } + +-int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete) ++int bnxt_link_update(struct rte_eth_dev *eth_dev, int wait_to_complete, ++ bool exp_link_status) + { + int rc = 0; + struct bnxt *bp = eth_dev->data->dev_private; + struct rte_eth_link new; +- unsigned int cnt = BNXT_LINK_WAIT_CNT; ++ int cnt = exp_link_status ? BNXT_LINK_UP_WAIT_CNT : ++ BNXT_LINK_DOWN_WAIT_CNT; + + rc = is_bnxt_in_error(bp); + if (rc) +@@ -1112,7 +1131,7 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete) + goto out; + } + +- if (!wait_to_complete || new.link_status) ++ if (!wait_to_complete || new.link_status == exp_link_status) + break; + + rte_delay_ms(BNXT_LINK_WAIT_INTERVAL); +@@ -1134,6 +1153,12 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete) + return rc; + } + ++static int bnxt_link_update_op(struct rte_eth_dev *eth_dev, ++ int wait_to_complete) ++{ ++ return bnxt_link_update(eth_dev, wait_to_complete, ETH_LINK_UP); ++} ++ + static int bnxt_promiscuous_enable_op(struct rte_eth_dev *eth_dev) + { + struct bnxt *bp = eth_dev->data->dev_private; +@@ -1145,6 +1170,10 @@ static int bnxt_promiscuous_enable_op(struct rte_eth_dev *eth_dev) + if (rc) + return rc; + ++ /* Filter settings will get applied when port is started */ ++ if (!eth_dev->data->dev_started) ++ return 0; ++ + if (bp->vnic_info == NULL) + return 0; + +@@ -1170,6 +1199,10 @@ static int bnxt_promiscuous_disable_op(struct rte_eth_dev *eth_dev) + if (rc) + return rc; + ++ /* Filter settings will get applied when port is started */ ++ if (!eth_dev->data->dev_started) ++ return 0; ++ + if (bp->vnic_info == NULL) + return 0; + +@@ -1195,6 +1228,10 @@ static int bnxt_allmulticast_enable_op(struct rte_eth_dev *eth_dev) + if (rc) + return rc; + ++ /* Filter settings will get applied when port is started */ ++ if (!eth_dev->data->dev_started) ++ return 0; ++ + if (bp->vnic_info == NULL) + return 0; + +@@ -1220,6 +1257,10 @@ static int bnxt_allmulticast_disable_op(struct rte_eth_dev *eth_dev) + if (rc) + return rc; + ++ /* Filter settings will get applied when port is started */ ++ if (!eth_dev->data->dev_started) ++ return 0; ++ + if (bp->vnic_info == NULL) + return 0; + +@@ -1399,7 +1440,9 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev, + } + + bp->flags |= BNXT_FLAG_UPDATE_HASH; +- memcpy(&bp->rss_conf, rss_conf, sizeof(*rss_conf)); ++ memcpy(ð_dev->data->dev_conf.rx_adv_conf.rss_conf, ++ rss_conf, ++ sizeof(*rss_conf)); + + /* Update the default RSS VNIC(s) */ + vnic = &bp->vnic_info[0]; +@@ -1476,7 +1519,7 @@ static int bnxt_rss_hash_conf_get_op(struct rte_eth_dev *eth_dev, + } + if (hash_types) { + PMD_DRV_LOG(ERR, +- "Unknwon RSS config from firmware (%08x), RSS disabled", ++ "Unknown RSS config from firmware (%08x), RSS disabled", + vnic->hash_type); + return -ENOTSUP; + } +@@ -1808,6 +1851,11 @@ static int bnxt_vlan_filter_set_op(struct rte_eth_dev *eth_dev, + if (rc) + return rc; + ++ if (!eth_dev->data->dev_started) { ++ PMD_DRV_LOG(ERR, "port must be started before setting vlan\n"); ++ return -EINVAL; ++ } ++ + /* These operations apply to ALL existing MAC/VLAN filters */ + if (on) + return bnxt_add_vlan_filter(bp, vlan_id); +@@ -1841,18 +1889,12 @@ static int bnxt_del_dflt_mac_filter(struct bnxt *bp, + } + + static int +-bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask) ++bnxt_config_vlan_hw_filter(struct bnxt *bp, uint64_t rx_offloads) + { +- struct bnxt *bp = dev->data->dev_private; +- uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; + struct bnxt_vnic_info *vnic; + unsigned int i; + int rc; + +- rc = is_bnxt_in_error(bp); +- if (rc) +- return rc; +- + vnic = BNXT_GET_DEFAULT_VNIC(bp); + if (!(rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER)) { + /* Remove any VLAN filters programmed */ +@@ -1876,18 +1918,102 @@ bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask) + PMD_DRV_LOG(DEBUG, "VLAN Filtering: %d\n", + !!(rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER)); + ++ return 0; ++} ++ ++static int bnxt_free_one_vnic(struct bnxt *bp, uint16_t vnic_id) ++{ ++ struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; ++ unsigned int i; ++ int rc; ++ ++ /* Destroy vnic filters and vnic */ ++ if (bp->eth_dev->data->dev_conf.rxmode.offloads & ++ DEV_RX_OFFLOAD_VLAN_FILTER) { ++ for (i = 0; i < RTE_ETHER_MAX_VLAN_ID; i++) ++ bnxt_del_vlan_filter(bp, i); ++ } ++ bnxt_del_dflt_mac_filter(bp, vnic); ++ ++ rc = bnxt_hwrm_vnic_free(bp, vnic); ++ if (rc) ++ return rc; ++ ++ rte_free(vnic->fw_grp_ids); ++ vnic->fw_grp_ids = NULL; ++ ++ vnic->rx_queue_cnt = 0; ++ ++ return 0; ++} ++ ++static int ++bnxt_config_vlan_hw_stripping(struct bnxt *bp, uint64_t rx_offloads) ++{ ++ struct bnxt_vnic_info *vnic = BNXT_GET_DEFAULT_VNIC(bp); ++ int rc; ++ ++ /* Destroy, recreate and reconfigure the default vnic */ ++ rc = bnxt_free_one_vnic(bp, 0); ++ if (rc) ++ return rc; ++ ++ /* default vnic 0 */ ++ rc = bnxt_setup_one_vnic(bp, 0); ++ if (rc) ++ return rc; ++ ++ if (bp->eth_dev->data->dev_conf.rxmode.offloads & ++ DEV_RX_OFFLOAD_VLAN_FILTER) { ++ rc = bnxt_add_vlan_filter(bp, 0); ++ if (rc) ++ return rc; ++ rc = bnxt_restore_vlan_filters(bp); ++ if (rc) ++ return rc; ++ } else { ++ rc = bnxt_add_mac_filter(bp, vnic, NULL, 0, 0); ++ if (rc) ++ return rc; ++ } ++ ++ rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, vnic, 0, NULL); ++ if (rc) ++ return rc; ++ ++ PMD_DRV_LOG(DEBUG, "VLAN Strip Offload: %d\n", ++ !!(rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)); ++ ++ return rc; ++} ++ ++static int ++bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask) ++{ ++ uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; ++ struct bnxt *bp = dev->data->dev_private; ++ int rc; ++ ++ rc = is_bnxt_in_error(bp); ++ if (rc) ++ return rc; ++ ++ /* Filter settings will get applied when port is started */ ++ if (!dev->data->dev_started) ++ return 0; ++ ++ if (mask & ETH_VLAN_FILTER_MASK) { ++ /* Enable or disable VLAN filtering */ ++ rc = bnxt_config_vlan_hw_filter(bp, rx_offloads); ++ if (rc) ++ return rc; ++ } ++ + if (mask & ETH_VLAN_STRIP_MASK) { + /* Enable or disable VLAN stripping */ +- for (i = 0; i < bp->nr_vnics; i++) { +- struct bnxt_vnic_info *vnic = &bp->vnic_info[i]; +- if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP) +- vnic->vlan_strip = true; +- else +- vnic->vlan_strip = false; +- bnxt_hwrm_vnic_cfg(bp, vnic); +- } +- PMD_DRV_LOG(DEBUG, "VLAN Strip Offload: %d\n", +- !!(rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)); ++ rc = bnxt_config_vlan_hw_stripping(bp, rx_offloads); ++ if (rc) ++ return rc; + } + + if (mask & ETH_VLAN_EXTEND_MASK) { +@@ -1965,7 +2091,6 @@ bnxt_set_default_mac_addr_op(struct rte_eth_dev *dev, + struct bnxt *bp = dev->data->dev_private; + /* Default Filter is tied to VNIC 0 */ + struct bnxt_vnic_info *vnic = &bp->vnic_info[0]; +- struct bnxt_filter_info *filter; + int rc; + + rc = is_bnxt_in_error(bp); +@@ -1978,32 +2103,27 @@ bnxt_set_default_mac_addr_op(struct rte_eth_dev *dev, + if (rte_is_zero_ether_addr(addr)) + return -EINVAL; + +- STAILQ_FOREACH(filter, &vnic->filter, next) { +- /* Default Filter is at Index 0 */ +- if (filter->mac_index != 0) +- continue; ++ /* Filter settings will get applied when port is started */ ++ if (!dev->data->dev_started) ++ return 0; + +- memcpy(filter->l2_addr, addr, RTE_ETHER_ADDR_LEN); +- memset(filter->l2_addr_mask, 0xff, RTE_ETHER_ADDR_LEN); +- filter->flags |= HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH_RX | +- HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_OUTERMOST; +- filter->enables |= +- HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR | +- HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR_MASK; ++ /* Check if the requested MAC is already added */ ++ if (memcmp(addr, bp->mac_addr, RTE_ETHER_ADDR_LEN) == 0) ++ return 0; + +- rc = bnxt_hwrm_set_l2_filter(bp, vnic->fw_vnic_id, filter); +- if (rc) { +- memcpy(filter->l2_addr, bp->mac_addr, +- RTE_ETHER_ADDR_LEN); +- return rc; +- } ++ /* Destroy filter and re-create it */ ++ bnxt_del_dflt_mac_filter(bp, vnic); + +- memcpy(bp->mac_addr, addr, RTE_ETHER_ADDR_LEN); +- PMD_DRV_LOG(DEBUG, "Set MAC addr\n"); +- return 0; ++ memcpy(bp->mac_addr, addr, RTE_ETHER_ADDR_LEN); ++ if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_VLAN_FILTER) { ++ /* This filter will allow only untagged packets */ ++ rc = bnxt_add_vlan_filter(bp, 0); ++ } else { ++ rc = bnxt_add_mac_filter(bp, vnic, addr, 0, 0); + } + +- return 0; ++ PMD_DRV_LOG(DEBUG, "Set MAC addr\n"); ++ return rc; + } + + static int +@@ -2053,10 +2173,11 @@ bnxt_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) + uint8_t fw_major = (bp->fw_ver >> 24) & 0xff; + uint8_t fw_minor = (bp->fw_ver >> 16) & 0xff; + uint8_t fw_updt = (bp->fw_ver >> 8) & 0xff; ++ uint8_t fw_rsvd = bp->fw_ver & 0xff; + int ret; + +- ret = snprintf(fw_version, fw_size, "%d.%d.%d", +- fw_major, fw_minor, fw_updt); ++ ret = snprintf(fw_version, fw_size, "%d.%d.%d.%d", ++ fw_major, fw_minor, fw_updt, fw_rsvd); + + ret += 1; /* add the size of '\0' */ + if (fw_size < (uint32_t)ret) +@@ -3500,9 +3621,9 @@ bnxt_get_eeprom_length_op(struct rte_eth_dev *dev) + if (rc) + return rc; + +- PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x\n", +- bp->pdev->addr.domain, bp->pdev->addr.bus, +- bp->pdev->addr.devid, bp->pdev->addr.function); ++ PMD_DRV_LOG(INFO, PCI_PRI_FMT "\n", ++ bp->pdev->addr.domain, bp->pdev->addr.bus, ++ bp->pdev->addr.devid, bp->pdev->addr.function); + + rc = bnxt_hwrm_nvm_get_dir_info(bp, &dir_entries, &entry_length); + if (rc != 0) +@@ -3524,10 +3645,10 @@ bnxt_get_eeprom_op(struct rte_eth_dev *dev, + if (rc) + return rc; + +- PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x in_eeprom->offset = %d " +- "len = %d\n", bp->pdev->addr.domain, +- bp->pdev->addr.bus, bp->pdev->addr.devid, +- bp->pdev->addr.function, in_eeprom->offset, in_eeprom->length); ++ PMD_DRV_LOG(INFO, PCI_PRI_FMT " in_eeprom->offset = %d len = %d\n", ++ bp->pdev->addr.domain, bp->pdev->addr.bus, ++ bp->pdev->addr.devid, bp->pdev->addr.function, ++ in_eeprom->offset, in_eeprom->length); + + if (in_eeprom->offset == 0) /* special offset value to get directory */ + return bnxt_get_nvram_directory(bp, in_eeprom->length, +@@ -3600,10 +3721,10 @@ bnxt_set_eeprom_op(struct rte_eth_dev *dev, + if (rc) + return rc; + +- PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x in_eeprom->offset = %d " +- "len = %d\n", bp->pdev->addr.domain, +- bp->pdev->addr.bus, bp->pdev->addr.devid, +- bp->pdev->addr.function, in_eeprom->offset, in_eeprom->length); ++ PMD_DRV_LOG(INFO, PCI_PRI_FMT " in_eeprom->offset = %d len = %d\n", ++ bp->pdev->addr.domain, bp->pdev->addr.bus, ++ bp->pdev->addr.devid, bp->pdev->addr.function, ++ in_eeprom->offset, in_eeprom->length); + + if (!BNXT_PF(bp)) { + PMD_DRV_LOG(ERR, "NVM write not supported from a VF\n"); +@@ -3790,22 +3911,98 @@ static void bnxt_dev_cleanup(struct bnxt *bp) + { + bnxt_set_hwrm_link_config(bp, false); + bp->link_info.link_up = 0; +- if (bp->dev_stopped == 0) ++ if (bp->eth_dev->data->dev_started) + bnxt_dev_stop_op(bp->eth_dev); + + bnxt_uninit_resources(bp, true); + } + ++static int bnxt_restore_vlan_filters(struct bnxt *bp) ++{ ++ struct rte_eth_dev *dev = bp->eth_dev; ++ struct rte_vlan_filter_conf *vfc; ++ int vidx, vbit, rc; ++ uint16_t vlan_id; ++ ++ for (vlan_id = 1; vlan_id <= RTE_ETHER_MAX_VLAN_ID; vlan_id++) { ++ vfc = &dev->data->vlan_filter_conf; ++ vidx = vlan_id / 64; ++ vbit = vlan_id % 64; ++ ++ /* Each bit corresponds to a VLAN id */ ++ if (vfc->ids[vidx] & (UINT64_C(1) << vbit)) { ++ rc = bnxt_add_vlan_filter(bp, vlan_id); ++ if (rc) ++ return rc; ++ } ++ } ++ ++ return 0; ++} ++ ++static int bnxt_restore_mac_filters(struct bnxt *bp) ++{ ++ struct rte_eth_dev *dev = bp->eth_dev; ++ struct rte_eth_dev_info dev_info; ++ struct rte_ether_addr *addr; ++ uint64_t pool_mask; ++ uint32_t pool = 0; ++ uint16_t i; ++ int rc; ++ ++ if (BNXT_VF(bp) & !BNXT_VF_IS_TRUSTED(bp)) ++ return 0; ++ ++ rc = bnxt_dev_info_get_op(dev, &dev_info); ++ if (rc) ++ return rc; ++ ++ /* replay MAC address configuration */ ++ for (i = 1; i < dev_info.max_mac_addrs; i++) { ++ addr = &dev->data->mac_addrs[i]; ++ ++ /* skip zero address */ ++ if (rte_is_zero_ether_addr(addr)) ++ continue; ++ ++ pool = 0; ++ pool_mask = dev->data->mac_pool_sel[i]; ++ ++ do { ++ if (pool_mask & 1ULL) { ++ rc = bnxt_mac_addr_add_op(dev, addr, i, pool); ++ if (rc) ++ return rc; ++ } ++ pool_mask >>= 1; ++ pool++; ++ } while (pool_mask); ++ } ++ ++ return 0; ++} ++ + static int bnxt_restore_filters(struct bnxt *bp) + { + struct rte_eth_dev *dev = bp->eth_dev; + int ret = 0; + +- if (dev->data->all_multicast) ++ if (dev->data->all_multicast) { + ret = bnxt_allmulticast_enable_op(dev); +- if (dev->data->promiscuous) ++ if (ret) ++ return ret; ++ } ++ if (dev->data->promiscuous) { + ret = bnxt_promiscuous_enable_op(dev); ++ if (ret) ++ return ret; ++ } + ++ ret = bnxt_restore_mac_filters(bp); ++ if (ret) ++ return ret; ++ ++ ret = bnxt_restore_vlan_filters(bp); + /* TODO restore other filters as well */ + return ret; + } +@@ -3820,7 +4017,7 @@ static void bnxt_dev_recover(void *arg) + bp->flags &= ~BNXT_FLAG_FATAL_ERROR; + + do { +- rc = bnxt_hwrm_ver_get(bp); ++ rc = bnxt_hwrm_ver_get(bp, SHORT_HWRM_CMD_TIMEOUT); + if (rc == 0) + break; + rte_delay_ms(BNXT_FW_READY_WAIT_INTERVAL); +@@ -3844,15 +4041,17 @@ static void bnxt_dev_recover(void *arg) + rc = bnxt_dev_start_op(bp->eth_dev); + if (rc) { + PMD_DRV_LOG(ERR, "Failed to start port after reset\n"); +- goto err; ++ goto err_start; + } + + rc = bnxt_restore_filters(bp); + if (rc) +- goto err; ++ goto err_start; + + PMD_DRV_LOG(INFO, "Recovered from FW reset\n"); + return; ++err_start: ++ bnxt_dev_stop_op(bp->eth_dev); + err: + bp->flags |= BNXT_FLAG_FATAL_ERROR; + bnxt_uninit_resources(bp, false); +@@ -4138,18 +4337,6 @@ static int bnxt_alloc_ctx_mem_blk(__rte_unused struct bnxt *bp, + + memset(mz->addr, 0, mz->len); + mz_phys_addr = mz->iova; +- if ((unsigned long)mz->addr == mz_phys_addr) { +- PMD_DRV_LOG(DEBUG, +- "physical address same as virtual\n"); +- PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n"); +- mz_phys_addr = rte_mem_virt2iova(mz->addr); +- if (mz_phys_addr == RTE_BAD_IOVA) { +- PMD_DRV_LOG(ERR, +- "unable to map addr to phys memory\n"); +- return -ENOMEM; +- } +- } +- rte_mem_lock_page(((char *)mz->addr)); + + rmem->pg_tbl = mz->addr; + rmem->pg_tbl_map = mz_phys_addr; +@@ -4173,22 +4360,8 @@ static int bnxt_alloc_ctx_mem_blk(__rte_unused struct bnxt *bp, + + memset(mz->addr, 0, mz->len); + mz_phys_addr = mz->iova; +- if ((unsigned long)mz->addr == mz_phys_addr) { +- PMD_DRV_LOG(DEBUG, +- "Memzone physical address same as virtual.\n"); +- PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n"); +- for (sz = 0; sz < mem_size; sz += BNXT_PAGE_SIZE) +- rte_mem_lock_page(((char *)mz->addr) + sz); +- mz_phys_addr = rte_mem_virt2iova(mz->addr); +- if (mz_phys_addr == RTE_BAD_IOVA) { +- PMD_DRV_LOG(ERR, +- "unable to map addr to phys memory\n"); +- return -ENOMEM; +- } +- } + + for (sz = 0, i = 0; sz < mem_size; sz += BNXT_PAGE_SIZE, i++) { +- rte_mem_lock_page(((char *)mz->addr) + sz); + rmem->pg_arr[i] = ((char *)mz->addr) + sz; + rmem->dma_arr[i] = mz_phys_addr + sz; + +@@ -4365,18 +4538,6 @@ static int bnxt_alloc_stats_mem(struct bnxt *bp) + } + memset(mz->addr, 0, mz->len); + mz_phys_addr = mz->iova; +- if ((unsigned long)mz->addr == mz_phys_addr) { +- PMD_DRV_LOG(DEBUG, +- "Memzone physical address same as virtual.\n"); +- PMD_DRV_LOG(DEBUG, +- "Using rte_mem_virt2iova()\n"); +- mz_phys_addr = rte_mem_virt2iova(mz->addr); +- if (mz_phys_addr == RTE_BAD_IOVA) { +- PMD_DRV_LOG(ERR, +- "Can't map address to physical memory\n"); +- return -ENOMEM; +- } +- } + + bp->rx_mem_zone = (const void *)mz; + bp->hw_rx_port_stats = mz->addr; +@@ -4403,17 +4564,6 @@ static int bnxt_alloc_stats_mem(struct bnxt *bp) + } + memset(mz->addr, 0, mz->len); + mz_phys_addr = mz->iova; +- if ((unsigned long)mz->addr == mz_phys_addr) { +- PMD_DRV_LOG(DEBUG, +- "Memzone physical address same as virtual\n"); +- PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n"); +- mz_phys_addr = rte_mem_virt2iova(mz->addr); +- if (mz_phys_addr == RTE_BAD_IOVA) { +- PMD_DRV_LOG(ERR, +- "Can't map address to physical memory\n"); +- return -ENOMEM; +- } +- } + + bp->tx_mem_zone = (const void *)mz; + bp->hw_tx_port_stats = mz->addr; +@@ -4461,7 +4611,7 @@ static int bnxt_setup_mac_addr(struct rte_eth_dev *eth_dev) + return -ENOMEM; + } + +- if (bnxt_check_zero_bytes(bp->dflt_mac_addr, RTE_ETHER_ADDR_LEN)) { ++ if (!BNXT_HAS_DFLT_MAC_SET(bp)) { + if (BNXT_PF(bp)) + return -EINVAL; + +@@ -4474,14 +4624,11 @@ static int bnxt_setup_mac_addr(struct rte_eth_dev *eth_dev) + bp->mac_addr[3], bp->mac_addr[4], bp->mac_addr[5]); + + rc = bnxt_hwrm_set_mac(bp); +- if (!rc) +- memcpy(&bp->eth_dev->data->mac_addrs[0], bp->mac_addr, +- RTE_ETHER_ADDR_LEN); +- return rc; ++ if (rc) ++ return rc; + } + + /* Copy the permanent MAC from the FUNC_QCAPS response */ +- memcpy(bp->mac_addr, bp->dflt_mac_addr, RTE_ETHER_ADDR_LEN); + memcpy(ð_dev->data->mac_addrs[0], bp->mac_addr, RTE_ETHER_ADDR_LEN); + + return rc; +@@ -4492,7 +4639,7 @@ static int bnxt_restore_dflt_mac(struct bnxt *bp) + int rc = 0; + + /* MAC is already configured in FW */ +- if (!bnxt_check_zero_bytes(bp->dflt_mac_addr, RTE_ETHER_ADDR_LEN)) ++ if (BNXT_HAS_DFLT_MAC_SET(bp)) + return 0; + + /* Restore the old MAC configured */ +@@ -4546,7 +4693,9 @@ static int bnxt_init_fw(struct bnxt *bp) + uint16_t mtu; + int rc = 0; + +- rc = bnxt_hwrm_ver_get(bp); ++ bp->fw_cap = 0; ++ ++ rc = bnxt_hwrm_ver_get(bp, DFLT_HWRM_CMD_TIMEOUT); + if (rc) + return rc; + +@@ -4581,7 +4730,7 @@ static int bnxt_init_fw(struct bnxt *bp) + /* Get the adapter error recovery support info */ + rc = bnxt_hwrm_error_recovery_qcfg(bp); + if (rc) +- bp->flags &= ~BNXT_FLAG_FW_CAP_ERROR_RECOVERY; ++ bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY; + + bnxt_hwrm_port_led_qcaps(bp); + +@@ -4693,8 +4842,6 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev) + + bp = eth_dev->data->dev_private; + +- bp->dev_stopped = 1; +- + if (bnxt_vf_pciid(pci_dev->id.device_id)) + bp->flags |= BNXT_FLAG_VF; + +@@ -4796,7 +4943,7 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev) + bp->rx_mem_zone = NULL; + } + +- if (bp->dev_stopped == 0) ++ if (eth_dev->data->dev_started) + bnxt_dev_close_op(eth_dev); + if (bp->pf.vf_info) + rte_free(bp->pf.vf_info); +diff --git a/dpdk/drivers/net/bnxt/bnxt_filter.c b/dpdk/drivers/net/bnxt/bnxt_filter.c +index da1a6c24a9..622a9bb417 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_filter.c ++++ b/dpdk/drivers/net/bnxt/bnxt_filter.c +@@ -26,22 +26,20 @@ struct bnxt_filter_info *bnxt_alloc_filter(struct bnxt *bp) + { + struct bnxt_filter_info *filter; + +- /* Find the 1st unused filter from the free_filter_list pool*/ +- filter = STAILQ_FIRST(&bp->free_filter_list); ++ filter = bnxt_get_unused_filter(bp); + if (!filter) { + PMD_DRV_LOG(ERR, "No more free filter resources\n"); + return NULL; + } +- STAILQ_REMOVE_HEAD(&bp->free_filter_list, next); + + filter->mac_index = INVALID_MAC_INDEX; + /* Default to L2 MAC Addr filter */ + filter->flags = HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH_RX; + filter->enables = HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR | + HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR_MASK; +- memcpy(filter->l2_addr, bp->eth_dev->data->mac_addrs->addr_bytes, +- RTE_ETHER_ADDR_LEN); ++ memcpy(filter->l2_addr, bp->mac_addr, RTE_ETHER_ADDR_LEN); + memset(filter->l2_addr_mask, 0xff, RTE_ETHER_ADDR_LEN); ++ + return filter; + } + +diff --git a/dpdk/drivers/net/bnxt/bnxt_filter.h b/dpdk/drivers/net/bnxt/bnxt_filter.h +index 9db3e74877..fc40f112ba 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_filter.h ++++ b/dpdk/drivers/net/bnxt/bnxt_filter.h +@@ -77,6 +77,10 @@ struct bnxt_filter_info { + uint16_t ip_addr_type; + uint16_t ethertype; + uint32_t priority; ++ /* Backptr to vnic. As of now, used only by an L2 filter ++ * to remember which vnic it was created on ++ */ ++ struct bnxt_vnic_info *vnic; + }; + + struct bnxt_filter_info *bnxt_alloc_filter(struct bnxt *bp); +diff --git a/dpdk/drivers/net/bnxt/bnxt_flow.c b/dpdk/drivers/net/bnxt/bnxt_flow.c +index 76e9584da7..d901479ee1 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_flow.c ++++ b/dpdk/drivers/net/bnxt/bnxt_flow.c +@@ -746,10 +746,9 @@ bnxt_find_matching_l2_filter(struct bnxt *bp, struct bnxt_filter_info *nf) + { + struct bnxt_filter_info *mf, *f0; + struct bnxt_vnic_info *vnic0; +- struct rte_flow *flow; + int i; + +- vnic0 = &bp->vnic_info[0]; ++ vnic0 = BNXT_GET_DEFAULT_VNIC(bp); + f0 = STAILQ_FIRST(&vnic0->filter); + + /* This flow has same DST MAC as the port/l2 filter. */ +@@ -762,8 +761,7 @@ bnxt_find_matching_l2_filter(struct bnxt *bp, struct bnxt_filter_info *nf) + if (vnic->fw_vnic_id == INVALID_VNIC_ID) + continue; + +- STAILQ_FOREACH(flow, &vnic->flow_list, next) { +- mf = flow->filter; ++ STAILQ_FOREACH(mf, &vnic->filter, next) { + + if (mf->matching_l2_fltr_ptr) + continue; +@@ -798,6 +796,8 @@ bnxt_create_l2_filter(struct bnxt *bp, struct bnxt_filter_info *nf, + if (filter1 == NULL) + return NULL; + ++ memcpy(filter1, nf, sizeof(*filter1)); ++ + filter1->flags = HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_XDP_DISABLE; + filter1->flags |= HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH_RX; + if (nf->valid_flags & BNXT_FLOW_L2_SRC_VALID_FLAG || +@@ -867,7 +867,6 @@ bnxt_create_l2_filter(struct bnxt *bp, struct bnxt_filter_info *nf, + bnxt_free_filter(bp, filter1); + return NULL; + } +- filter1->l2_ref_cnt++; + return filter1; + } + +@@ -880,11 +879,14 @@ bnxt_get_l2_filter(struct bnxt *bp, struct bnxt_filter_info *nf, + l2_filter = bnxt_find_matching_l2_filter(bp, nf); + if (l2_filter) { + l2_filter->l2_ref_cnt++; +- nf->matching_l2_fltr_ptr = l2_filter; + } else { + l2_filter = bnxt_create_l2_filter(bp, nf, vnic); +- nf->matching_l2_fltr_ptr = NULL; ++ if (l2_filter) { ++ STAILQ_INSERT_TAIL(&vnic->filter, l2_filter, next); ++ l2_filter->vnic = vnic; ++ } + } ++ nf->matching_l2_fltr_ptr = l2_filter; + + return l2_filter; + } +@@ -1054,16 +1056,9 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev, + vnic_id = act_q->index; + } + ++ BNXT_VALID_VNIC_OR_RET(bp, vnic_id); ++ + vnic = &bp->vnic_info[vnic_id]; +- if (vnic == NULL) { +- rte_flow_error_set(error, +- EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, +- act, +- "No matching VNIC found."); +- rc = -rte_errno; +- goto ret; +- } + if (vnic->rx_queue_cnt) { + if (vnic->start_grp_id != act_q->index) { + PMD_DRV_LOG(ERR, +@@ -1126,7 +1121,16 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev, + PMD_DRV_LOG(DEBUG, + "Setting vnic ff_idx %d\n", vnic->ff_pool_idx); + filter->dst_id = vnic->fw_vnic_id; +- filter1 = bnxt_get_l2_filter(bp, filter, vnic); ++ ++ /* For ntuple filter, create the L2 filter with default VNIC. ++ * The user specified redirect queue will be set while creating ++ * the ntuple filter in hardware. ++ */ ++ vnic0 = BNXT_GET_DEFAULT_VNIC(bp); ++ if (use_ntuple) ++ filter1 = bnxt_get_l2_filter(bp, filter, vnic0); ++ else ++ filter1 = bnxt_get_l2_filter(bp, filter, vnic); + if (filter1 == NULL) { + rte_flow_error_set(error, + ENOSPC, +@@ -1252,28 +1256,10 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev, + rss = (const struct rte_flow_action_rss *)act->conf; + + vnic_id = attr->group; +- if (!vnic_id) { +- PMD_DRV_LOG(ERR, "Group id cannot be 0\n"); +- rte_flow_error_set(error, +- EINVAL, +- RTE_FLOW_ERROR_TYPE_ATTR, +- NULL, +- "Group id cannot be 0"); +- rc = -rte_errno; +- goto ret; +- } ++ ++ BNXT_VALID_VNIC_OR_RET(bp, vnic_id); + + vnic = &bp->vnic_info[vnic_id]; +- if (vnic == NULL) { +- rte_flow_error_set(error, +- EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, +- act, +- "No matching VNIC for RSS group."); +- rc = -rte_errno; +- goto ret; +- } +- PMD_DRV_LOG(DEBUG, "VNIC found\n"); + + /* Check if requested RSS config matches RSS config of VNIC + * only if it is not a fresh VNIC configuration. +@@ -1420,11 +1406,6 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev, + goto ret; + } + +- if (filter1 && !filter->matching_l2_fltr_ptr) { +- bnxt_free_filter(bp, filter1); +- filter1->fw_l2_filter_id = -1; +- } +- + done: + act = bnxt_flow_non_void_action(++act); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { +@@ -1448,7 +1429,7 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev, + if (rxq && !vnic->rx_queue_cnt) + rxq->vnic = &bp->vnic_info[0]; + } +- return rc; ++ return -rte_errno; + } + + static +@@ -1537,10 +1518,13 @@ bnxt_update_filter(struct bnxt *bp, struct bnxt_filter_info *old_filter, + * filter which points to the new destination queue and so we clear + * the previous L2 filter. For ntuple filters, we are going to reuse + * the old L2 filter and create new NTUPLE filter with this new +- * destination queue subsequently during bnxt_flow_create. ++ * destination queue subsequently during bnxt_flow_create. So we ++ * decrement the ref cnt of the L2 filter that would've been bumped ++ * up previously in bnxt_validate_and_parse_flow as the old n-tuple ++ * filter that was referencing it will be deleted now. + */ ++ bnxt_hwrm_clear_l2_filter(bp, old_filter); + if (new_filter->filter_type == HWRM_CFA_L2_FILTER) { +- bnxt_hwrm_clear_l2_filter(bp, old_filter); + bnxt_hwrm_set_l2_filter(bp, new_filter->dst_id, new_filter); + } else { + if (new_filter->filter_type == HWRM_CFA_EM_FILTER) +@@ -1663,7 +1647,9 @@ bnxt_flow_create(struct rte_eth_dev *dev, + + filter = bnxt_get_unused_filter(bp); + if (filter == NULL) { +- PMD_DRV_LOG(ERR, "Not enough resources for a new flow.\n"); ++ rte_flow_error_set(error, ENOSPC, ++ RTE_FLOW_ERROR_TYPE_HANDLE, NULL, ++ "Not enough resources for a new flow"); + goto free_flow; + } + +@@ -1765,7 +1751,7 @@ bnxt_flow_create(struct rte_eth_dev *dev, + rte_flow_error_set(error, 0, + RTE_FLOW_ERROR_TYPE_NONE, NULL, + "Flow with pattern exists, updating destination queue"); +- else ++ else if (!rte_errno) + rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to create flow."); +@@ -1817,46 +1803,24 @@ static int bnxt_handle_tunnel_redirect_destroy(struct bnxt *bp, + } + + static int +-bnxt_flow_destroy(struct rte_eth_dev *dev, +- struct rte_flow *flow, +- struct rte_flow_error *error) ++_bnxt_flow_destroy(struct bnxt *bp, ++ struct rte_flow *flow, ++ struct rte_flow_error *error) + { +- struct bnxt *bp = dev->data->dev_private; + struct bnxt_filter_info *filter; + struct bnxt_vnic_info *vnic; + int ret = 0; + +- bnxt_acquire_flow_lock(bp); +- if (!flow) { +- rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_HANDLE, NULL, +- "Invalid flow: failed to destroy flow."); +- bnxt_release_flow_lock(bp); +- return -EINVAL; +- } +- + filter = flow->filter; + vnic = flow->vnic; + +- if (!filter) { +- rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_HANDLE, NULL, +- "Invalid flow: failed to destroy flow."); +- bnxt_release_flow_lock(bp); +- return -EINVAL; +- } +- + if (filter->filter_type == HWRM_CFA_TUNNEL_REDIRECT_FILTER && + filter->enables == filter->tunnel_type) { +- ret = bnxt_handle_tunnel_redirect_destroy(bp, +- filter, +- error); +- if (!ret) { ++ ret = bnxt_handle_tunnel_redirect_destroy(bp, filter, error); ++ if (!ret) + goto done; +- } else { +- bnxt_release_flow_lock(bp); ++ else + return ret; +- } + } + + ret = bnxt_match_filter(bp, filter); +@@ -1903,7 +1867,36 @@ bnxt_flow_destroy(struct rte_eth_dev *dev, + "Failed to destroy flow."); + } + ++ return ret; ++} ++ ++static int ++bnxt_flow_destroy(struct rte_eth_dev *dev, ++ struct rte_flow *flow, ++ struct rte_flow_error *error) ++{ ++ struct bnxt *bp = dev->data->dev_private; ++ int ret = 0; ++ ++ bnxt_acquire_flow_lock(bp); ++ if (!flow) { ++ rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_HANDLE, NULL, ++ "Invalid flow: failed to destroy flow."); ++ bnxt_release_flow_lock(bp); ++ return -EINVAL; ++ } ++ ++ if (!flow->filter) { ++ rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_HANDLE, NULL, ++ "Invalid flow: failed to destroy flow."); ++ bnxt_release_flow_lock(bp); ++ return -EINVAL; ++ } ++ ret = _bnxt_flow_destroy(bp, flow, error); + bnxt_release_flow_lock(bp); ++ + return ret; + } + +@@ -1911,7 +1904,6 @@ static int + bnxt_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error) + { + struct bnxt *bp = dev->data->dev_private; +- struct bnxt_filter_info *filter = NULL; + struct bnxt_vnic_info *vnic; + struct rte_flow *flow; + unsigned int i; +@@ -1925,66 +1917,17 @@ bnxt_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error) + + while (!STAILQ_EMPTY(&vnic->flow_list)) { + flow = STAILQ_FIRST(&vnic->flow_list); +- filter = flow->filter; +- +- if (filter->filter_type == +- HWRM_CFA_TUNNEL_REDIRECT_FILTER && +- filter->enables == filter->tunnel_type) { +- ret = +- bnxt_handle_tunnel_redirect_destroy(bp, +- filter, +- error); +- if (!ret) { +- goto done; +- } else { +- bnxt_release_flow_lock(bp); +- return ret; +- } +- } +- +- if (filter->filter_type == HWRM_CFA_EM_FILTER) +- ret = bnxt_hwrm_clear_em_filter(bp, filter); +- if (filter->filter_type == HWRM_CFA_NTUPLE_FILTER) +- ret = bnxt_hwrm_clear_ntuple_filter(bp, filter); +- else if (i) +- ret = bnxt_hwrm_clear_l2_filter(bp, filter); +- +- if (ret) { +- rte_flow_error_set +- (error, +- -ret, +- RTE_FLOW_ERROR_TYPE_HANDLE, +- NULL, +- "Failed to flush flow in HW."); +- bnxt_release_flow_lock(bp); +- return -rte_errno; +- } +-done: +- STAILQ_REMOVE(&vnic->flow_list, flow, +- rte_flow, next); +- +- STAILQ_REMOVE(&vnic->filter, +- filter, +- bnxt_filter_info, +- next); +- bnxt_free_filter(bp, filter); + +- rte_free(flow); ++ if (!flow->filter) ++ continue; + +- /* If this was the last flow associated with this vnic, +- * switch the queue back to RSS pool. +- */ +- if (STAILQ_EMPTY(&vnic->flow_list)) { +- rte_free(vnic->fw_grp_ids); +- if (vnic->rx_queue_cnt > 1) +- bnxt_hwrm_vnic_ctx_free(bp, vnic); +- bnxt_hwrm_vnic_free(bp, vnic); +- vnic->rx_queue_cnt = 0; +- } ++ ret = _bnxt_flow_destroy(bp, flow, error); ++ if (ret) ++ break; + } + } +- + bnxt_release_flow_lock(bp); ++ + return ret; + } + +diff --git a/dpdk/drivers/net/bnxt/bnxt_hwrm.c b/dpdk/drivers/net/bnxt/bnxt_hwrm.c +index 41730089b1..33f352fade 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_hwrm.c ++++ b/dpdk/drivers/net/bnxt/bnxt_hwrm.c +@@ -100,11 +100,7 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg, + if (bp->flags & BNXT_FLAG_FATAL_ERROR) + return 0; + +- /* For VER_GET command, set timeout as 50ms */ +- if (rte_cpu_to_le_16(req->req_type) == HWRM_VER_GET) +- timeout = HWRM_CMD_TIMEOUT; +- else +- timeout = bp->hwrm_cmd_timeout; ++ timeout = bp->hwrm_cmd_timeout; + + if (bp->flags & BNXT_FLAG_SHORT_CMD || + msg_len > bp->max_req_len) { +@@ -168,8 +164,9 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg, + rte_cpu_to_le_16(req->req_type) == HWRM_VER_GET) + return -ETIMEDOUT; + +- PMD_DRV_LOG(ERR, "Error(timeout) sending msg 0x%04x\n", +- req->req_type); ++ PMD_DRV_LOG(ERR, ++ "Error(timeout) sending msg 0x%04x, seq_id %d\n", ++ req->req_type, req->seq_id); + return -ETIMEDOUT; + } + return 0; +@@ -188,6 +185,10 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg, + */ + #define HWRM_PREP(req, type, kong) do { \ + rte_spinlock_lock(&bp->hwrm_lock); \ ++ if (bp->hwrm_cmd_resp_addr == NULL) { \ ++ rte_spinlock_unlock(&bp->hwrm_lock); \ ++ return -EACCES; \ ++ } \ + memset(bp->hwrm_cmd_resp_addr, 0, bp->max_resp_len); \ + req.req_type = rte_cpu_to_le_16(HWRM_##type); \ + req.cmpl_ring = rte_cpu_to_le_16(-1); \ +@@ -221,6 +222,8 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg, + rc = -EINVAL; \ + else if (rc == HWRM_ERR_CODE_CMD_NOT_SUPPORTED) \ + rc = -ENOTSUP; \ ++ else if (rc == HWRM_ERR_CODE_HOT_RESET_PROGRESS) \ ++ rc = -EAGAIN; \ + else if (rc > 0) \ + rc = -EIO; \ + return rc; \ +@@ -249,6 +252,8 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg, + rc = -EINVAL; \ + else if (rc == HWRM_ERR_CODE_CMD_NOT_SUPPORTED) \ + rc = -ENOTSUP; \ ++ else if (rc == HWRM_ERR_CODE_HOT_RESET_PROGRESS) \ ++ rc = -EAGAIN; \ + else if (rc > 0) \ + rc = -EIO; \ + return rc; \ +@@ -309,8 +314,8 @@ int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, + if (vlan_table) { + if (!(mask & HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_VLAN_NONVLAN)) + mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_VLANONLY; +- req.vlan_tag_tbl_addr = rte_cpu_to_le_64( +- rte_mem_virt2iova(vlan_table)); ++ req.vlan_tag_tbl_addr = ++ rte_cpu_to_le_64(rte_malloc_virt2iova(vlan_table)); + req.num_vlan_tags = rte_cpu_to_le_32((uint32_t)vlan_count); + } + req.mask = rte_cpu_to_le_32(mask); +@@ -351,7 +356,7 @@ int bnxt_hwrm_cfa_vlan_antispoof_cfg(struct bnxt *bp, uint16_t fid, + req.fid = rte_cpu_to_le_16(fid); + + req.vlan_tag_mask_tbl_addr = +- rte_cpu_to_le_64(rte_mem_virt2iova(vlan_table)); ++ rte_cpu_to_le_64(rte_malloc_virt2iova(vlan_table)); + req.num_vlan_entries = rte_cpu_to_le_32((uint32_t)vlan_count); + + rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB); +@@ -363,10 +368,11 @@ int bnxt_hwrm_cfa_vlan_antispoof_cfg(struct bnxt *bp, uint16_t fid, + } + + int bnxt_hwrm_clear_l2_filter(struct bnxt *bp, +- struct bnxt_filter_info *filter) ++ struct bnxt_filter_info *filter) + { + int rc = 0; + struct bnxt_filter_info *l2_filter = filter; ++ struct bnxt_vnic_info *vnic = NULL; + struct hwrm_cfa_l2_filter_free_input req = {.req_type = 0 }; + struct hwrm_cfa_l2_filter_free_output *resp = bp->hwrm_cmd_resp_addr; + +@@ -379,6 +385,9 @@ int bnxt_hwrm_clear_l2_filter(struct bnxt *bp, + PMD_DRV_LOG(DEBUG, "filter: %p l2_filter: %p ref_cnt: %d\n", + filter, l2_filter, l2_filter->l2_ref_cnt); + ++ if (l2_filter->l2_ref_cnt == 0) ++ return 0; ++ + if (l2_filter->l2_ref_cnt > 0) + l2_filter->l2_ref_cnt--; + +@@ -395,6 +404,14 @@ int bnxt_hwrm_clear_l2_filter(struct bnxt *bp, + HWRM_UNLOCK(); + + filter->fw_l2_filter_id = UINT64_MAX; ++ if (l2_filter->l2_ref_cnt == 0) { ++ vnic = l2_filter->vnic; ++ if (vnic) { ++ STAILQ_REMOVE(&vnic->filter, l2_filter, ++ bnxt_filter_info, next); ++ bnxt_free_filter(bp, l2_filter); ++ } ++ } + + return 0; + } +@@ -475,6 +492,8 @@ int bnxt_hwrm_set_l2_filter(struct bnxt *bp, + filter->fw_l2_filter_id = rte_le_to_cpu_64(resp->l2_filter_id); + HWRM_UNLOCK(); + ++ filter->l2_ref_cnt++; ++ + return rc; + } + +@@ -627,7 +646,12 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) + } + + bp->fw_fid = rte_le_to_cpu_32(resp->fid); +- memcpy(bp->dflt_mac_addr, &resp->mac_address, RTE_ETHER_ADDR_LEN); ++ if (!bnxt_check_zero_bytes(resp->mac_address, RTE_ETHER_ADDR_LEN)) { ++ bp->flags |= BNXT_FLAG_DFLT_MAC_SET; ++ memcpy(bp->mac_addr, &resp->mac_address, RTE_ETHER_ADDR_LEN); ++ } else { ++ bp->flags &= ~BNXT_FLAG_DFLT_MAC_SET; ++ } + bp->max_rsscos_ctx = rte_le_to_cpu_16(resp->max_rsscos_ctx); + bp->max_cp_rings = rte_le_to_cpu_16(resp->max_cmpl_rings); + bp->max_tx_rings = rte_le_to_cpu_16(resp->max_tx_rings); +@@ -661,16 +685,15 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) + bp->flags |= BNXT_FLAG_EXT_STATS_SUPPORTED; + + if (flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_ERROR_RECOVERY_CAPABLE) { +- bp->flags |= BNXT_FLAG_FW_CAP_ERROR_RECOVERY; ++ bp->fw_cap |= BNXT_FW_CAP_ERROR_RECOVERY; + PMD_DRV_LOG(DEBUG, "Adapter Error recovery SUPPORTED\n"); +- } else { +- bp->flags &= ~BNXT_FLAG_FW_CAP_ERROR_RECOVERY; + } + + if (flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_ERR_RECOVER_RELOAD) +- bp->flags |= BNXT_FLAG_FW_CAP_ERR_RECOVER_RELOAD; +- else +- bp->flags &= ~BNXT_FLAG_FW_CAP_ERR_RECOVER_RELOAD; ++ bp->fw_cap |= BNXT_FW_CAP_ERR_RECOVER_RELOAD; ++ ++ if (flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_HOT_RESET_CAPABLE) ++ bp->fw_cap |= BNXT_FW_CAP_HOT_RESET; + + HWRM_UNLOCK(); + +@@ -756,8 +779,9 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp) + if (bp->flags & BNXT_FLAG_REGISTERED) + return 0; + +- flags = HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_HOT_RESET_SUPPORT; +- if (bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY) ++ if (bp->fw_cap & BNXT_FW_CAP_HOT_RESET) ++ flags = HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_HOT_RESET_SUPPORT; ++ if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) + flags |= HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_ERROR_RECOVERY_SUPPORT; + + /* PFs and trusted VFs should indicate the support of the +@@ -797,7 +821,7 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp) + ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE | + ASYNC_CMPL_EVENT_ID_LINK_SPEED_CHANGE | + ASYNC_CMPL_EVENT_ID_RESET_NOTIFY); +- if (bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY) ++ if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) + req.async_event_fwd[0] |= + rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_ERROR_RECOVERY); + req.async_event_fwd[1] |= +@@ -810,7 +834,7 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp) + + flags = rte_le_to_cpu_32(resp->flags); + if (flags & HWRM_FUNC_DRV_RGTR_OUTPUT_FLAGS_IF_CHANGE_SUPPORTED) +- bp->flags |= BNXT_FLAG_FW_CAP_IF_CHANGE; ++ bp->fw_cap |= BNXT_FW_CAP_IF_CHANGE; + + HWRM_UNLOCK(); + +@@ -934,7 +958,7 @@ int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp) + return rc; + } + +-int bnxt_hwrm_ver_get(struct bnxt *bp) ++int bnxt_hwrm_ver_get(struct bnxt *bp, uint32_t timeout) + { + int rc = 0; + struct hwrm_ver_get_input req = {.req_type = 0 }; +@@ -945,6 +969,7 @@ int bnxt_hwrm_ver_get(struct bnxt *bp) + uint32_t dev_caps_cfg; + + bp->max_req_len = HWRM_MAX_REQ_LEN; ++ bp->hwrm_cmd_timeout = timeout; + HWRM_PREP(req, VER_GET, BNXT_USE_CHIMP_MB); + + req.hwrm_intf_maj = HWRM_VERSION_MAJOR; +@@ -979,7 +1004,7 @@ int bnxt_hwrm_ver_get(struct bnxt *bp) + /* convert timeout to usec */ + bp->hwrm_cmd_timeout *= 1000; + if (!bp->hwrm_cmd_timeout) +- bp->hwrm_cmd_timeout = HWRM_CMD_TIMEOUT; ++ bp->hwrm_cmd_timeout = DFLT_HWRM_CMD_TIMEOUT; + + if (resp->hwrm_intf_maj_8b != HWRM_VERSION_MAJOR) { + PMD_DRV_LOG(ERR, "Unsupported firmware API version\n"); +@@ -1011,9 +1036,8 @@ int bnxt_hwrm_ver_get(struct bnxt *bp) + rc = -ENOMEM; + goto error; + } +- rte_mem_lock_page(bp->hwrm_cmd_resp_addr); + bp->hwrm_cmd_resp_dma_addr = +- rte_mem_virt2iova(bp->hwrm_cmd_resp_addr); ++ rte_malloc_virt2iova(bp->hwrm_cmd_resp_addr); + if (bp->hwrm_cmd_resp_dma_addr == RTE_BAD_IOVA) { + PMD_DRV_LOG(ERR, + "Unable to map response buffer to physical memory.\n"); +@@ -1048,9 +1072,8 @@ int bnxt_hwrm_ver_get(struct bnxt *bp) + rc = -ENOMEM; + goto error; + } +- rte_mem_lock_page(bp->hwrm_short_cmd_req_addr); + bp->hwrm_short_cmd_req_dma_addr = +- rte_mem_virt2iova(bp->hwrm_short_cmd_req_addr); ++ rte_malloc_virt2iova(bp->hwrm_short_cmd_req_addr); + if (bp->hwrm_short_cmd_req_dma_addr == RTE_BAD_IOVA) { + rte_free(bp->hwrm_short_cmd_req_addr); + PMD_DRV_LOG(ERR, +@@ -1210,6 +1233,35 @@ static int bnxt_hwrm_port_phy_qcfg(struct bnxt *bp, + return rc; + } + ++static bool bnxt_find_lossy_profile(struct bnxt *bp) ++{ ++ int i = 0; ++ ++ for (i = BNXT_COS_QUEUE_COUNT - 1; i >= 0; i--) { ++ if (bp->tx_cos_queue[i].profile == ++ HWRM_QUEUE_SERVICE_PROFILE_LOSSY) { ++ bp->tx_cosq_id[0] = bp->tx_cos_queue[i].id; ++ return true; ++ } ++ } ++ return false; ++} ++ ++static void bnxt_find_first_valid_profile(struct bnxt *bp) ++{ ++ int i = 0; ++ ++ for (i = BNXT_COS_QUEUE_COUNT - 1; i >= 0; i--) { ++ if (bp->tx_cos_queue[i].profile != ++ HWRM_QUEUE_SERVICE_PROFILE_UNKNOWN && ++ bp->tx_cos_queue[i].id != ++ HWRM_QUEUE_SERVICE_PROFILE_UNKNOWN) { ++ bp->tx_cosq_id[0] = bp->tx_cos_queue[i].id; ++ break; ++ } ++ } ++} ++ + int bnxt_hwrm_queue_qportcfg(struct bnxt *bp) + { + int rc = 0; +@@ -1269,14 +1321,13 @@ int bnxt_hwrm_queue_qportcfg(struct bnxt *bp) + bp->tx_cos_queue[i].id; + } + } else { +- for (i = BNXT_COS_QUEUE_COUNT - 1; i >= 0; i--) { +- if (bp->tx_cos_queue[i].profile == +- HWRM_QUEUE_SERVICE_PROFILE_LOSSY) { +- bp->tx_cosq_id[0] = +- bp->tx_cos_queue[i].id; +- break; +- } +- } ++ /* When CoS classification is disabled, for normal NIC ++ * operations, ideally we should look to use LOSSY. ++ * If not found, fallback to the first valid profile ++ */ ++ if (!bnxt_find_lossy_profile(bp)) ++ bnxt_find_first_valid_profile(bp); ++ + } + } + +@@ -2325,13 +2376,6 @@ void bnxt_free_hwrm_rx_ring(struct bnxt *bp, int queue_index) + if (BNXT_HAS_RING_GRPS(bp)) + bp->grp_info[queue_index].rx_fw_ring_id = + INVALID_HW_RING_ID; +- memset(rxr->rx_desc_ring, 0, +- rxr->rx_ring_struct->ring_size * +- sizeof(*rxr->rx_desc_ring)); +- memset(rxr->rx_buf_ring, 0, +- rxr->rx_ring_struct->ring_size * +- sizeof(*rxr->rx_buf_ring)); +- rxr->rx_prod = 0; + } + ring = rxr->ag_ring_struct; + if (ring->fw_ring_id != INVALID_HW_RING_ID) { +@@ -2339,11 +2383,6 @@ void bnxt_free_hwrm_rx_ring(struct bnxt *bp, int queue_index) + BNXT_CHIP_THOR(bp) ? + HWRM_RING_FREE_INPUT_RING_TYPE_RX_AGG : + HWRM_RING_FREE_INPUT_RING_TYPE_RX); +- ring->fw_ring_id = INVALID_HW_RING_ID; +- memset(rxr->ag_buf_ring, 0, +- rxr->ag_ring_struct->ring_size * +- sizeof(*rxr->ag_buf_ring)); +- rxr->ag_prod = 0; + if (BNXT_HAS_RING_GRPS(bp)) + bp->grp_info[queue_index].ag_fw_ring_id = + INVALID_HW_RING_ID; +@@ -2430,11 +2469,10 @@ int bnxt_alloc_hwrm_resources(struct bnxt *bp) + pdev->addr.bus, pdev->addr.devid, pdev->addr.function); + bp->max_resp_len = HWRM_MAX_RESP_LEN; + bp->hwrm_cmd_resp_addr = rte_malloc(type, bp->max_resp_len, 0); +- rte_mem_lock_page(bp->hwrm_cmd_resp_addr); + if (bp->hwrm_cmd_resp_addr == NULL) + return -ENOMEM; + bp->hwrm_cmd_resp_dma_addr = +- rte_mem_virt2iova(bp->hwrm_cmd_resp_addr); ++ rte_malloc_virt2iova(bp->hwrm_cmd_resp_addr); + if (bp->hwrm_cmd_resp_dma_addr == RTE_BAD_IOVA) { + PMD_DRV_LOG(ERR, + "unable to map response address to physical memory\n"); +@@ -2455,8 +2493,7 @@ int bnxt_clear_hwrm_vnic_filters(struct bnxt *bp, struct bnxt_vnic_info *vnic) + rc = bnxt_hwrm_clear_em_filter(bp, filter); + else if (filter->filter_type == HWRM_CFA_NTUPLE_FILTER) + rc = bnxt_hwrm_clear_ntuple_filter(bp, filter); +- else +- rc = bnxt_hwrm_clear_l2_filter(bp, filter); ++ rc = bnxt_hwrm_clear_l2_filter(bp, filter); + STAILQ_REMOVE(&vnic->filter, filter, bnxt_filter_info, next); + bnxt_free_filter(bp, filter); + } +@@ -2478,8 +2515,7 @@ bnxt_clear_hwrm_vnic_flows(struct bnxt *bp, struct bnxt_vnic_info *vnic) + rc = bnxt_hwrm_clear_em_filter(bp, filter); + else if (filter->filter_type == HWRM_CFA_NTUPLE_FILTER) + rc = bnxt_hwrm_clear_ntuple_filter(bp, filter); +- else +- rc = bnxt_hwrm_clear_l2_filter(bp, filter); ++ rc = bnxt_hwrm_clear_l2_filter(bp, filter); + + STAILQ_REMOVE(&vnic->flow_list, flow, rte_flow, next); + rte_free(flow); +@@ -2914,10 +2950,10 @@ int bnxt_hwrm_func_qcfg(struct bnxt *bp, uint16_t *mtu) + case HWRM_FUNC_QCFG_OUTPUT_PORT_PARTITION_TYPE_NPAR1_5: + case HWRM_FUNC_QCFG_OUTPUT_PORT_PARTITION_TYPE_NPAR2_0: + /* FALLTHROUGH */ +- bp->port_partition_type = resp->port_partition_type; ++ bp->flags |= BNXT_FLAG_NPAR_PF; + break; + default: +- bp->port_partition_type = 0; ++ bp->flags &= ~BNXT_FLAG_NPAR_PF; + break; + } + +@@ -3053,9 +3089,9 @@ static void add_random_mac_if_needed(struct bnxt *bp, + } + } + +-static void reserve_resources_from_vf(struct bnxt *bp, +- struct hwrm_func_cfg_input *cfg_req, +- int vf) ++static int reserve_resources_from_vf(struct bnxt *bp, ++ struct hwrm_func_cfg_input *cfg_req, ++ int vf) + { + struct hwrm_func_qcaps_input req = {0}; + struct hwrm_func_qcaps_output *resp = bp->hwrm_cmd_resp_addr; +@@ -3089,6 +3125,8 @@ static void reserve_resources_from_vf(struct bnxt *bp, + bp->max_ring_grps -= rte_le_to_cpu_16(resp->max_hw_ring_grps); + + HWRM_UNLOCK(); ++ ++ return 0; + } + + int bnxt_hwrm_func_qcfg_current_vf_vlan(struct bnxt *bp, int vf) +@@ -3382,7 +3420,7 @@ int bnxt_hwrm_func_buf_rgtr(struct bnxt *bp) + page_getenum(bp->pf.active_vfs * HWRM_MAX_REQ_LEN)); + req.req_buf_len = rte_cpu_to_le_16(HWRM_MAX_REQ_LEN); + req.req_buf_page_addr0 = +- rte_cpu_to_le_64(rte_mem_virt2iova(bp->pf.vf_req_buf)); ++ rte_cpu_to_le_64(rte_malloc_virt2iova(bp->pf.vf_req_buf)); + if (req.req_buf_page_addr0 == RTE_BAD_IOVA) { + PMD_DRV_LOG(ERR, + "unable to map buffer address to physical memory\n"); +@@ -3812,10 +3850,9 @@ int bnxt_get_nvram_directory(struct bnxt *bp, uint32_t len, uint8_t *data) + + buflen = dir_entries * entry_length; + buf = rte_malloc("nvm_dir", buflen, 0); +- rte_mem_lock_page(buf); + if (buf == NULL) + return -ENOMEM; +- dma_handle = rte_mem_virt2iova(buf); ++ dma_handle = rte_malloc_virt2iova(buf); + if (dma_handle == RTE_BAD_IOVA) { + PMD_DRV_LOG(ERR, + "unable to map response address to physical memory\n"); +@@ -3846,11 +3883,10 @@ int bnxt_hwrm_get_nvram_item(struct bnxt *bp, uint32_t index, + struct hwrm_nvm_read_output *resp = bp->hwrm_cmd_resp_addr; + + buf = rte_malloc("nvm_item", length, 0); +- rte_mem_lock_page(buf); + if (!buf) + return -ENOMEM; + +- dma_handle = rte_mem_virt2iova(buf); ++ dma_handle = rte_malloc_virt2iova(buf); + if (dma_handle == RTE_BAD_IOVA) { + PMD_DRV_LOG(ERR, + "unable to map response address to physical memory\n"); +@@ -3900,11 +3936,10 @@ int bnxt_hwrm_flash_nvram(struct bnxt *bp, uint16_t dir_type, + uint8_t *buf; + + buf = rte_malloc("nvm_write", data_len, 0); +- rte_mem_lock_page(buf); + if (!buf) + return -ENOMEM; + +- dma_handle = rte_mem_virt2iova(buf); ++ dma_handle = rte_malloc_virt2iova(buf); + if (dma_handle == RTE_BAD_IOVA) { + PMD_DRV_LOG(ERR, + "unable to map response address to physical memory\n"); +@@ -3967,7 +4002,7 @@ static int bnxt_hwrm_func_vf_vnic_query(struct bnxt *bp, uint16_t vf, + + req.vf_id = rte_cpu_to_le_16(bp->pf.first_vf_id + vf); + req.max_vnic_id_cnt = rte_cpu_to_le_32(bp->pf.total_vnics); +- req.vnic_id_tbl_addr = rte_cpu_to_le_64(rte_mem_virt2iova(vnic_ids)); ++ req.vnic_id_tbl_addr = rte_cpu_to_le_64(rte_malloc_virt2iova(vnic_ids)); + + if (req.vnic_id_tbl_addr == RTE_BAD_IOVA) { + HWRM_UNLOCK(); +@@ -4831,7 +4866,6 @@ int bnxt_hwrm_set_mac(struct bnxt *bp) + + HWRM_CHECK_RESULT(); + +- memcpy(bp->dflt_mac_addr, bp->mac_addr, RTE_ETHER_ADDR_LEN); + HWRM_UNLOCK(); + + return rc; +@@ -4844,7 +4878,7 @@ int bnxt_hwrm_if_change(struct bnxt *bp, bool up) + uint32_t flags; + int rc; + +- if (!(bp->flags & BNXT_FLAG_FW_CAP_IF_CHANGE)) ++ if (!(bp->fw_cap & BNXT_FW_CAP_IF_CHANGE)) + return 0; + + /* Do not issue FUNC_DRV_IF_CHANGE during reset recovery. +@@ -4887,7 +4921,7 @@ int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp) + int rc; + + /* Older FW does not have error recovery support */ +- if (!(bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY)) ++ if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)) + return 0; + + if (!info) { +diff --git a/dpdk/drivers/net/bnxt/bnxt_hwrm.h b/dpdk/drivers/net/bnxt/bnxt_hwrm.h +index abe5de9db6..2753720aef 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_hwrm.h ++++ b/dpdk/drivers/net/bnxt/bnxt_hwrm.h +@@ -35,6 +35,9 @@ struct bnxt_cp_ring_info; + #define HWRM_QUEUE_SERVICE_PROFILE_LOSSY \ + HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LOSSY + ++#define HWRM_QUEUE_SERVICE_PROFILE_UNKNOWN \ ++ HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN ++ + #define HWRM_FUNC_RESOURCE_QCAPS_OUTPUT_VF_RESV_STRATEGY_MINIMAL_STATIC \ + HWRM_FUNC_RESOURCE_QCAPS_OUTPUT_VF_RESERVATION_STRATEGY_MINIMAL_STATIC + #define HWRM_FUNC_RESOURCE_QCAPS_OUTPUT_VF_RESV_STRATEGY_MAXIMAL \ +@@ -117,7 +120,7 @@ int bnxt_hwrm_stat_ctx_free(struct bnxt *bp, + int bnxt_hwrm_ctx_qstats(struct bnxt *bp, uint32_t cid, int idx, + struct rte_eth_stats *stats, uint8_t rx); + +-int bnxt_hwrm_ver_get(struct bnxt *bp); ++int bnxt_hwrm_ver_get(struct bnxt *bp, uint32_t timeout); + + int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic); + int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic); +diff --git a/dpdk/drivers/net/bnxt/bnxt_irq.c b/dpdk/drivers/net/bnxt/bnxt_irq.c +index 846325ea96..40e1b0c980 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_irq.c ++++ b/dpdk/drivers/net/bnxt/bnxt_irq.c +@@ -181,5 +181,13 @@ int bnxt_request_int(struct bnxt *bp) + irq->requested = 1; + } + ++#ifdef RTE_EXEC_ENV_FREEBSD ++ /** ++ * In FreeBSD OS, nic_uio does not support interrupts and ++ * interrupt register callback will fail. ++ */ ++ rc = 0; ++#endif ++ + return rc; + } +diff --git a/dpdk/drivers/net/bnxt/bnxt_ring.c b/dpdk/drivers/net/bnxt/bnxt_ring.c +index ea46fa9bc0..1999cd7861 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_ring.c ++++ b/dpdk/drivers/net/bnxt/bnxt_ring.c +@@ -110,9 +110,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx, + uint64_t rx_offloads = bp->eth_dev->data->dev_conf.rxmode.offloads; + const struct rte_memzone *mz = NULL; + char mz_name[RTE_MEMZONE_NAMESIZE]; +- rte_iova_t mz_phys_addr_base; + rte_iova_t mz_phys_addr; +- int sz; + + int stats_len = (tx_ring_info || rx_ring_info) ? + RTE_CACHE_LINE_ROUNDUP(sizeof(struct hwrm_stat_ctx_query_output) - +@@ -214,22 +212,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx, + return -ENOMEM; + } + memset(mz->addr, 0, mz->len); +- mz_phys_addr_base = mz->iova; + mz_phys_addr = mz->iova; +- if ((unsigned long)mz->addr == mz_phys_addr_base) { +- PMD_DRV_LOG(DEBUG, +- "Memzone physical address same as virtual.\n"); +- PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n"); +- for (sz = 0; sz < total_alloc_len; sz += getpagesize()) +- rte_mem_lock_page(((char *)mz->addr) + sz); +- mz_phys_addr_base = rte_mem_virt2iova(mz->addr); +- mz_phys_addr = rte_mem_virt2iova(mz->addr); +- if (mz_phys_addr == RTE_BAD_IOVA) { +- PMD_DRV_LOG(ERR, +- "unable to map ring address to physical memory\n"); +- return -ENOMEM; +- } +- } + + if (tx_ring_info) { + txq->mz = mz; +@@ -615,7 +598,7 @@ int bnxt_alloc_hwrm_rx_ring(struct bnxt *bp, int queue_index) + + if (rxq->rx_started) { + if (bnxt_init_one_rx_ring(rxq)) { +- RTE_LOG(ERR, PMD, ++ PMD_DRV_LOG(ERR, + "bnxt_init_one_rx_ring failed!\n"); + bnxt_rx_queue_release_op(rxq); + rc = -ENOMEM; +diff --git a/dpdk/drivers/net/bnxt/bnxt_rxq.c b/dpdk/drivers/net/bnxt/bnxt_rxq.c +index 457ebede0e..e42308a97f 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_rxq.c ++++ b/dpdk/drivers/net/bnxt/bnxt_rxq.c +@@ -168,10 +168,8 @@ int bnxt_mq_rx_configure(struct bnxt *bp) + if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) { + struct rte_eth_rss_conf *rss = &dev_conf->rx_adv_conf.rss_conf; + +- if (bp->flags & BNXT_FLAG_UPDATE_HASH) { +- rss = &bp->rss_conf; ++ if (bp->flags & BNXT_FLAG_UPDATE_HASH) + bp->flags &= ~BNXT_FLAG_UPDATE_HASH; +- } + + for (i = 0; i < bp->nr_vnics; i++) { + vnic = &bp->vnic_info[i]; +diff --git a/dpdk/drivers/net/bnxt/bnxt_rxr.c b/dpdk/drivers/net/bnxt/bnxt_rxr.c +index 3b713c2427..fa3b4a6be4 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_rxr.c ++++ b/dpdk/drivers/net/bnxt/bnxt_rxr.c +@@ -678,10 +678,11 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + + /* Attempt to alloc Rx buf in case of a previous allocation failure. */ + if (rc == -ENOMEM) { +- int i; ++ int i = RING_NEXT(rxr->rx_ring_struct, prod); ++ int cnt = nb_rx_pkts; + +- for (i = prod; i <= nb_rx_pkts; +- i = RING_NEXT(rxr->rx_ring_struct, i)) { ++ for (; cnt; ++ i = RING_NEXT(rxr->rx_ring_struct, i), cnt--) { + struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[i]; + + /* Buffer already allocated for this index. */ +@@ -852,11 +853,13 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq) + + prod = rxr->rx_prod; + for (i = 0; i < ring->ring_size; i++) { +- if (bnxt_alloc_rx_data(rxq, rxr, prod) != 0) { +- PMD_DRV_LOG(WARNING, +- "init'ed rx ring %d with %d/%d mbufs only\n", +- rxq->queue_id, i, ring->ring_size); +- break; ++ if (unlikely(!rxr->rx_buf_ring[i].mbuf)) { ++ if (bnxt_alloc_rx_data(rxq, rxr, prod) != 0) { ++ PMD_DRV_LOG(WARNING, ++ "init'ed rx ring %d with %d/%d mbufs only\n", ++ rxq->queue_id, i, ring->ring_size); ++ break; ++ } + } + rxr->rx_prod = prod; + prod = RING_NEXT(rxr->rx_ring_struct, prod); +@@ -868,11 +871,13 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq) + prod = rxr->ag_prod; + + for (i = 0; i < ring->ring_size; i++) { +- if (bnxt_alloc_ag_data(rxq, rxr, prod) != 0) { +- PMD_DRV_LOG(WARNING, +- "init'ed AG ring %d with %d/%d mbufs only\n", +- rxq->queue_id, i, ring->ring_size); +- break; ++ if (unlikely(!rxr->ag_buf_ring[i].mbuf)) { ++ if (bnxt_alloc_ag_data(rxq, rxr, prod) != 0) { ++ PMD_DRV_LOG(WARNING, ++ "init'ed AG ring %d with %d/%d mbufs only\n", ++ rxq->queue_id, i, ring->ring_size); ++ break; ++ } + } + rxr->ag_prod = prod; + prod = RING_NEXT(rxr->ag_ring_struct, prod); +@@ -883,11 +888,13 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq) + unsigned int max_aggs = BNXT_TPA_MAX_AGGS(rxq->bp); + + for (i = 0; i < max_aggs; i++) { +- rxr->tpa_info[i].mbuf = +- __bnxt_alloc_rx_data(rxq->mb_pool); +- if (!rxr->tpa_info[i].mbuf) { +- rte_atomic64_inc(&rxq->rx_mbuf_alloc_fail); +- return -ENOMEM; ++ if (unlikely(!rxr->tpa_info[i].mbuf)) { ++ rxr->tpa_info[i].mbuf = ++ __bnxt_alloc_rx_data(rxq->mb_pool); ++ if (!rxr->tpa_info[i].mbuf) { ++ rte_atomic64_inc(&rxq->rx_mbuf_alloc_fail); ++ return -ENOMEM; ++ } + } + } + } +diff --git a/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_sse.c +index 22d9f9e84a..8b4c396821 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_sse.c ++++ b/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_sse.c +@@ -233,8 +233,13 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + /* Return no more than RTE_BNXT_MAX_RX_BURST per call. */ + nb_pkts = RTE_MIN(nb_pkts, RTE_BNXT_MAX_RX_BURST); + +- /* Make nb_pkts an integer multiple of RTE_BNXT_DESCS_PER_LOOP */ ++ /* ++ * Make nb_pkts an integer multiple of RTE_BNXT_DESCS_PER_LOOP. ++ * nb_pkts < RTE_BNXT_DESCS_PER_LOOP, just return no packet ++ */ + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_BNXT_DESCS_PER_LOOP); ++ if (!nb_pkts) ++ return 0; + + /* Handle RX burst request */ + while (1) { +@@ -289,7 +294,8 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + (RX_PKT_CMPL_METADATA_VID_MASK | + RX_PKT_CMPL_METADATA_DE | + RX_PKT_CMPL_METADATA_PRI_MASK); +- mbuf->ol_flags |= PKT_RX_VLAN; ++ mbuf->ol_flags |= ++ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; + } + + bnxt_parse_csum(mbuf, rxcmp1); +diff --git a/dpdk/drivers/net/bnxt/bnxt_stats.c b/dpdk/drivers/net/bnxt/bnxt_stats.c +index 14d355fd08..be5b514f49 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_stats.c ++++ b/dpdk/drivers/net/bnxt/bnxt_stats.c +@@ -389,11 +389,8 @@ int bnxt_stats_get_op(struct rte_eth_dev *eth_dev, + if (rc) + return rc; + +- memset(bnxt_stats, 0, sizeof(*bnxt_stats)); +- if (!(bp->flags & BNXT_FLAG_INIT_DONE)) { +- PMD_DRV_LOG(ERR, "Device Initialization not complete!\n"); ++ if (!eth_dev->data->dev_started) + return -EIO; +- } + + num_q_stats = RTE_MIN(bp->rx_cp_nr_rings, + (unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS); +@@ -437,7 +434,7 @@ int bnxt_stats_reset_op(struct rte_eth_dev *eth_dev) + if (ret) + return ret; + +- if (!(bp->flags & BNXT_FLAG_INIT_DONE)) { ++ if (!eth_dev->data->dev_started) { + PMD_DRV_LOG(ERR, "Device Initialization not complete!\n"); + return -EINVAL; + } +diff --git a/dpdk/drivers/net/bnxt/bnxt_vnic.c b/dpdk/drivers/net/bnxt/bnxt_vnic.c +index 104342e13b..bc054a8e0e 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_vnic.c ++++ b/dpdk/drivers/net/bnxt/bnxt_vnic.c +@@ -150,17 +150,6 @@ int bnxt_alloc_vnic_attributes(struct bnxt *bp) + return -ENOMEM; + } + mz_phys_addr = mz->iova; +- if ((unsigned long)mz->addr == mz_phys_addr) { +- PMD_DRV_LOG(DEBUG, +- "Memzone physical address same as virtual.\n"); +- PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n"); +- mz_phys_addr = rte_mem_virt2iova(mz->addr); +- if (mz_phys_addr == RTE_BAD_IOVA) { +- PMD_DRV_LOG(ERR, +- "unable to map to physical memory\n"); +- return -ENOMEM; +- } +- } + + for (i = 0; i < max_vnics; i++) { + vnic = &bp->vnic_info[i]; +diff --git a/dpdk/drivers/net/cxgbe/cxgbe.h b/dpdk/drivers/net/cxgbe/cxgbe.h +index 6c1f73ac4b..75a2e9931b 100644 +--- a/dpdk/drivers/net/cxgbe/cxgbe.h ++++ b/dpdk/drivers/net/cxgbe/cxgbe.h +@@ -40,7 +40,8 @@ + DEV_TX_OFFLOAD_IPV4_CKSUM | \ + DEV_TX_OFFLOAD_UDP_CKSUM | \ + DEV_TX_OFFLOAD_TCP_CKSUM | \ +- DEV_TX_OFFLOAD_TCP_TSO) ++ DEV_TX_OFFLOAD_TCP_TSO | \ ++ DEV_TX_OFFLOAD_MULTI_SEGS) + + #define CXGBE_RX_OFFLOADS (DEV_RX_OFFLOAD_VLAN_STRIP | \ + DEV_RX_OFFLOAD_IPV4_CKSUM | \ +diff --git a/dpdk/drivers/net/cxgbe/cxgbe_flow.c b/dpdk/drivers/net/cxgbe/cxgbe_flow.c +index 9070f4960d..2fb77b4abb 100644 +--- a/dpdk/drivers/net/cxgbe/cxgbe_flow.c ++++ b/dpdk/drivers/net/cxgbe/cxgbe_flow.c +@@ -230,7 +230,7 @@ ch_rte_parsetype_port(const void *dmask, const struct rte_flow_item *item, + if (val->index > 0x7) + return rte_flow_error_set(e, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, + item, +- "port index upto 0x7 is supported"); ++ "port index up to 0x7 is supported"); + + CXGBE_FILL_FS(val->index, mask->index, iport); + +diff --git a/dpdk/drivers/net/dpaa/dpaa_ethdev.c b/dpdk/drivers/net/dpaa/dpaa_ethdev.c +index 26805f17b7..5f81968d80 100644 +--- a/dpdk/drivers/net/dpaa/dpaa_ethdev.c ++++ b/dpdk/drivers/net/dpaa/dpaa_ethdev.c +@@ -174,7 +174,7 @@ dpaa_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) + } + + if (frame_size > RTE_ETHER_MAX_LEN) +- dev->data->dev_conf.rxmode.offloads &= ++ dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_JUMBO_FRAME; + else + dev->data->dev_conf.rxmode.offloads &= +@@ -881,8 +881,8 @@ dpaa_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) + PMD_INIT_FUNC_TRACE(); + + if (qman_query_fq_frm_cnt(rxq, &frm_cnt) == 0) { +- RTE_LOG(DEBUG, PMD, "RX frame count for q(%d) is %u\n", +- rx_queue_id, frm_cnt); ++ DPAA_PMD_DEBUG("RX frame count for q(%d) is %u", ++ rx_queue_id, frm_cnt); + } + return frm_cnt; + } +@@ -996,8 +996,7 @@ dpaa_dev_add_mac_addr(struct rte_eth_dev *dev, + ret = fman_if_add_mac_addr(dpaa_intf->fif, addr->addr_bytes, index); + + if (ret) +- RTE_LOG(ERR, PMD, "error: Adding the MAC ADDR failed:" +- " err = %d", ret); ++ DPAA_PMD_ERR("Adding the MAC ADDR failed: err = %d", ret); + return 0; + } + +@@ -1023,7 +1022,7 @@ dpaa_dev_set_mac_addr(struct rte_eth_dev *dev, + + ret = fman_if_add_mac_addr(dpaa_intf->fif, addr->addr_bytes, 0); + if (ret) +- RTE_LOG(ERR, PMD, "error: Setting the MAC ADDR failed %d", ret); ++ DPAA_PMD_ERR("Setting the MAC ADDR failed %d", ret); + + return ret; + } +@@ -1306,6 +1305,7 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev) + struct fman_if *fman_intf; + struct fman_if_bpool *bp, *tmp_bp; + uint32_t cgrid[DPAA_MAX_NUM_PCD_QUEUES]; ++ char eth_buf[RTE_ETHER_ADDR_FMT_SIZE]; + + PMD_INIT_FUNC_TRACE(); + +@@ -1457,15 +1457,9 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev) + + /* copy the primary mac address */ + rte_ether_addr_copy(&fman_intf->mac_addr, ð_dev->data->mac_addrs[0]); ++ rte_ether_format_addr(eth_buf, sizeof(eth_buf), &fman_intf->mac_addr); + +- RTE_LOG(INFO, PMD, "net: dpaa: %s: %02x:%02x:%02x:%02x:%02x:%02x\n", +- dpaa_device->name, +- fman_intf->mac_addr.addr_bytes[0], +- fman_intf->mac_addr.addr_bytes[1], +- fman_intf->mac_addr.addr_bytes[2], +- fman_intf->mac_addr.addr_bytes[3], +- fman_intf->mac_addr.addr_bytes[4], +- fman_intf->mac_addr.addr_bytes[5]); ++ DPAA_PMD_INFO("net: dpaa: %s: %s", dpaa_device->name, eth_buf); + + /* Disable RX mode */ + fman_if_discard_rx_errors(fman_intf); +@@ -1578,8 +1572,7 @@ rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv __rte_unused, + + if (!is_global_init && (rte_eal_process_type() == RTE_PROC_PRIMARY)) { + if (access("/tmp/fmc.bin", F_OK) == -1) { +- RTE_LOG(INFO, PMD, +- "* FMC not configured.Enabling default mode\n"); ++ DPAA_PMD_INFO("* FMC not configured.Enabling default mode"); + default_q = 1; + } + +diff --git a/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c b/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c +index 2cde55e7cc..19196cbb65 100644 +--- a/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c ++++ b/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c +@@ -553,9 +553,6 @@ dpaa2_eth_dev_configure(struct rte_eth_dev *dev) + if (rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) + dpaa2_vlan_offload_set(dev, ETH_VLAN_FILTER_MASK); + +- /* update the current status */ +- dpaa2_dev_link_update(dev, 0); +- + return 0; + } + +@@ -663,7 +660,7 @@ dpaa2_dev_rx_queue_setup(struct rte_eth_dev *dev, + DPNI_CP_CONGESTION_GROUP, + DPNI_QUEUE_RX, + dpaa2_q->tc_index, +- flow_id, &taildrop); ++ dpaa2_q->cgid, &taildrop); + } else { + /*enabling per rx queue congestion control */ + taildrop.threshold = CONG_THRESHOLD_RX_BYTES_Q; +@@ -690,7 +687,7 @@ dpaa2_dev_rx_queue_setup(struct rte_eth_dev *dev, + ret = dpni_set_taildrop(dpni, CMD_PRI_LOW, priv->token, + DPNI_CP_CONGESTION_GROUP, DPNI_QUEUE_RX, + dpaa2_q->tc_index, +- flow_id, &taildrop); ++ dpaa2_q->cgid, &taildrop); + } else { + ret = dpni_set_taildrop(dpni, CMD_PRI_LOW, priv->token, + DPNI_CP_QUEUE, DPNI_QUEUE_RX, +@@ -1757,6 +1754,7 @@ dpaa2_dev_set_link_up(struct rte_eth_dev *dev) + /* changing tx burst function to start enqueues */ + dev->tx_pkt_burst = dpaa2_dev_tx; + dev->data->dev_link.link_status = state.up; ++ dev->data->dev_link.link_speed = state.rate; + + if (state.up) + DPAA2_PMD_INFO("Port %d Link is Up", dev->data->port_id); +diff --git a/dpdk/drivers/net/dpaa2/dpaa2_flow.c b/dpdk/drivers/net/dpaa2/dpaa2_flow.c +index 2212650320..8aa65db305 100644 +--- a/dpdk/drivers/net/dpaa2/dpaa2_flow.c ++++ b/dpdk/drivers/net/dpaa2/dpaa2_flow.c +@@ -1850,13 +1850,13 @@ struct rte_flow *dpaa2_flow_create(struct rte_eth_dev *dev, + key_iova = (size_t)rte_malloc(NULL, 256, 64); + if (!key_iova) { + DPAA2_PMD_ERR( +- "Memory allocation failure for rule configration\n"); ++ "Memory allocation failure for rule configuration\n"); + goto mem_failure; + } + mask_iova = (size_t)rte_malloc(NULL, 256, 64); + if (!mask_iova) { + DPAA2_PMD_ERR( +- "Memory allocation failure for rule configration\n"); ++ "Memory allocation failure for rule configuration\n"); + goto mem_failure; + } + +diff --git a/dpdk/drivers/net/dpaa2/dpaa2_mux.c b/dpdk/drivers/net/dpaa2/dpaa2_mux.c +index 1910cc4184..af90adb828 100644 +--- a/dpdk/drivers/net/dpaa2/dpaa2_mux.c ++++ b/dpdk/drivers/net/dpaa2/dpaa2_mux.c +@@ -84,7 +84,7 @@ rte_pmd_dpaa2_mux_flow_create(uint32_t dpdmux_id, + (2 * DIST_PARAM_IOVA_SIZE), RTE_CACHE_LINE_SIZE); + if (!flow) { + DPAA2_PMD_ERR( +- "Memory allocation failure for rule configration\n"); ++ "Memory allocation failure for rule configuration\n"); + goto creation_error; + } + key_iova = (void *)((size_t)flow + sizeof(struct rte_flow)); +diff --git a/dpdk/drivers/net/e1000/em_ethdev.c b/dpdk/drivers/net/e1000/em_ethdev.c +index 080cbe2df5..fc47993c83 100644 +--- a/dpdk/drivers/net/e1000/em_ethdev.c ++++ b/dpdk/drivers/net/e1000/em_ethdev.c +@@ -321,7 +321,7 @@ eth_em_dev_uninit(struct rte_eth_dev *eth_dev) + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) +- return -EPERM; ++ return 0; + + eth_em_close(eth_dev); + +diff --git a/dpdk/drivers/net/e1000/igb_ethdev.c b/dpdk/drivers/net/e1000/igb_ethdev.c +index a3e30dbe5a..e061615b88 100644 +--- a/dpdk/drivers/net/e1000/igb_ethdev.c ++++ b/dpdk/drivers/net/e1000/igb_ethdev.c +@@ -922,7 +922,7 @@ eth_igb_dev_uninit(struct rte_eth_dev *eth_dev) + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) +- return -EPERM; ++ return 0; + + eth_igb_close(eth_dev); + +@@ -1043,7 +1043,7 @@ eth_igbvf_dev_uninit(struct rte_eth_dev *eth_dev) + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) +- return -EPERM; ++ return 0; + + igbvf_dev_close(eth_dev); + +diff --git a/dpdk/drivers/net/ena/base/ena_com.c b/dpdk/drivers/net/ena/base/ena_com.c +index 8b51660a45..147c039124 100644 +--- a/dpdk/drivers/net/ena/base/ena_com.c ++++ b/dpdk/drivers/net/ena/base/ena_com.c +@@ -525,11 +525,11 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c + timeout = ENA_GET_SYSTEM_TIMEOUT(admin_queue->completion_timeout); + + while (1) { +- ENA_SPINLOCK_LOCK(admin_queue->q_lock, flags); +- ena_com_handle_admin_completion(admin_queue); +- ENA_SPINLOCK_UNLOCK(admin_queue->q_lock, flags); ++ ENA_SPINLOCK_LOCK(admin_queue->q_lock, flags); ++ ena_com_handle_admin_completion(admin_queue); ++ ENA_SPINLOCK_UNLOCK(admin_queue->q_lock, flags); + +- if (comp_ctx->status != ENA_CMD_SUBMITTED) ++ if (comp_ctx->status != ENA_CMD_SUBMITTED) + break; + + if (ENA_TIME_EXPIRE(timeout)) { +@@ -2313,7 +2313,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev) + if (unlikely(ret)) + return ret; + +- if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) { ++ if (get_resp.u.flow_hash_func.supported_func & BIT(rss->hash_func)) { + ena_trc_err("Func hash %d isn't supported by device, abort\n", + rss->hash_func); + return ENA_COM_UNSUPPORTED; +@@ -2356,12 +2356,14 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions func, + const u8 *key, u16 key_len, u32 init_val) + { +- struct ena_rss *rss = &ena_dev->rss; ++ struct ena_admin_feature_rss_flow_hash_control *hash_key; + struct ena_admin_get_feat_resp get_resp; +- struct ena_admin_feature_rss_flow_hash_control *hash_key = +- rss->hash_key; ++ enum ena_admin_hash_functions old_func; ++ struct ena_rss *rss = &ena_dev->rss; + int rc; + ++ hash_key = rss->hash_key; ++ + /* Make sure size is a mult of DWs */ + if (unlikely(key_len & 0x3)) + return ENA_COM_INVAL; +@@ -2373,7 +2375,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + if (unlikely(rc)) + return rc; + +- if (!((1 << func) & get_resp.u.flow_hash_func.supported_func)) { ++ if (!(BIT(func) & get_resp.u.flow_hash_func.supported_func)) { + ena_trc_err("Flow hash function %d isn't supported\n", func); + return ENA_COM_UNSUPPORTED; + } +@@ -2398,11 +2400,13 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + return ENA_COM_INVAL; + } + ++ old_func = rss->hash_func; ++ rss->hash_func = func; + rc = ena_com_set_hash_function(ena_dev); + + /* Restore the old function */ + if (unlikely(rc)) +- ena_com_get_hash_function(ena_dev, NULL, NULL); ++ rss->hash_func = old_func; + + return rc; + } +@@ -2424,7 +2428,11 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev, + if (unlikely(rc)) + return rc; + +- rss->hash_func = get_resp.u.flow_hash_func.selected_func; ++ /* ENA_FFS returns 1 in case the lsb is set */ ++ rss->hash_func = ENA_FFS(get_resp.u.flow_hash_func.selected_func); ++ if (rss->hash_func) ++ rss->hash_func--; ++ + if (func) + *func = rss->hash_func; + +diff --git a/dpdk/drivers/net/ena/base/ena_com.h b/dpdk/drivers/net/ena/base/ena_com.h +index ef42bd4f56..f1593345e8 100644 +--- a/dpdk/drivers/net/ena/base/ena_com.h ++++ b/dpdk/drivers/net/ena/base/ena_com.h +@@ -9,9 +9,9 @@ + #include "ena_plat.h" + #include "ena_includes.h" + +-#define ENA_MAX_NUM_IO_QUEUES 128U ++#define ENA_MAX_NUM_IO_QUEUES 128U + /* We need to queues for each IO (on for Tx and one for Rx) */ +-#define ENA_TOTAL_NUM_QUEUES (2 * (ENA_MAX_NUM_IO_QUEUES)) ++#define ENA_TOTAL_NUM_QUEUES (2 * (ENA_MAX_NUM_IO_QUEUES)) + + #define ENA_MAX_HANDLERS 256 + +@@ -55,9 +55,9 @@ + #define ENA_INTR_MODER_LEVEL_STRIDE 1 + #define ENA_INTR_BYTE_COUNT_NOT_SUPPORTED 0xFFFFFF + +-#define ENA_HW_HINTS_NO_TIMEOUT 0xFFFF ++#define ENA_HW_HINTS_NO_TIMEOUT 0xFFFF + +-#define ENA_FEATURE_MAX_QUEUE_EXT_VER 1 ++#define ENA_FEATURE_MAX_QUEUE_EXT_VER 1 + + enum ena_intr_moder_level { + ENA_INTR_MODER_LOWEST = 0, +@@ -404,7 +404,7 @@ extern "C" { + */ + int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev); + +-/* ena_com_set_mmio_read_mode - Enable/disable the mmio reg read mechanism ++/* ena_com_set_mmio_read_mode - Enable/disable the indirect mmio reg read mechanism + * @ena_dev: ENA communication layer struct + * @readless_supported: readless mode (enable/disable) + */ +@@ -527,7 +527,7 @@ bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev); + /* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler + * @ena_dev: ENA communication layer struct + * +- * This method go over the admin completion queue and wake up all the pending ++ * This method goes over the admin completion queue and wakes up all the pending + * threads that wait on the commands wait event. + * + * @note: Should be called after MSI-X interrupt. +@@ -537,7 +537,7 @@ void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev); + /* ena_com_aenq_intr_handler - AENQ interrupt handler + * @ena_dev: ENA communication layer struct + * +- * This method go over the async event notification queue and call the proper ++ * This method goes over the async event notification queue and calls the proper + * aenq handler. + */ + void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data); +@@ -554,14 +554,14 @@ void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev); + /* ena_com_wait_for_abort_completion - Wait for admin commands abort. + * @ena_dev: ENA communication layer struct + * +- * This method wait until all the outstanding admin commands will be completed. ++ * This method waits until all the outstanding admin commands are completed. + */ + void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev); + + /* ena_com_validate_version - Validate the device parameters + * @ena_dev: ENA communication layer struct + * +- * This method validate the device parameters are the same as the saved ++ * This method verifies the device parameters are the same as the saved + * parameters in ena_dev. + * This method is useful after device reset, to validate the device mac address + * and the device offloads are the same as before the reset. +@@ -763,7 +763,7 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev); + * + * Retrieve the hash control from the device. + * +- * @note, If the caller called ena_com_fill_hash_ctrl but didn't flash ++ * @note: If the caller called ena_com_fill_hash_ctrl but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. +@@ -815,7 +815,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev); + * + * Retrieve the RSS indirection table from the device. + * +- * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flash ++ * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flush + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. +@@ -841,14 +841,14 @@ int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev, + /* ena_com_delete_debug_area - Free the debug area resources. + * @ena_dev: ENA communication layer struct + * +- * Free the allocate debug area. ++ * Free the allocated debug area. + */ + void ena_com_delete_debug_area(struct ena_com_dev *ena_dev); + + /* ena_com_delete_host_info - Free the host info resources. + * @ena_dev: ENA communication layer struct + * +- * Free the allocate host info. ++ * Free the allocated host info. + */ + void ena_com_delete_host_info(struct ena_com_dev *ena_dev); + +@@ -889,9 +889,9 @@ int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, + * @cmd_completion: command completion return value. + * @cmd_comp_size: command completion size. + +- * Submit an admin command and then wait until the device will return a ++ * Submit an admin command and then wait until the device returns a + * completion. +- * The completion will be copyed into cmd_comp. ++ * The completion will be copied into cmd_comp. + * + * @return - 0 on success, negative value on failure. + */ +@@ -1083,7 +1083,7 @@ static inline void ena_com_calculate_interrupt_delay(struct ena_com_dev *ena_dev + * @intr_reg: interrupt register to update. + * @rx_delay_interval: Rx interval in usecs + * @tx_delay_interval: Tx interval in usecs +- * @unmask: unask enable/disable ++ * @unmask: unmask enable/disable + * + * Prepare interrupt update register with the supplied parameters. + */ +diff --git a/dpdk/drivers/net/ena/base/ena_plat_dpdk.h b/dpdk/drivers/net/ena/base/ena_plat_dpdk.h +index 9e1492cac4..da97efca46 100644 +--- a/dpdk/drivers/net/ena/base/ena_plat_dpdk.h ++++ b/dpdk/drivers/net/ena/base/ena_plat_dpdk.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright (c) 2015-2019 Amazon.com, Inc. or its affiliates. ++ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. + * All rights reserved. + */ + +@@ -178,18 +178,22 @@ do { \ + * Each rte_memzone should have unique name. + * To satisfy it, count number of allocations and add it to name. + */ +-extern uint32_t ena_alloc_cnt; ++extern rte_atomic32_t ena_alloc_cnt; + + #define ENA_MEM_ALLOC_COHERENT(dmadev, size, virt, phys, handle) \ + do { \ +- const struct rte_memzone *mz; \ +- char z_name[RTE_MEMZONE_NAMESIZE]; \ ++ const struct rte_memzone *mz = NULL; \ + ENA_TOUCH(dmadev); ENA_TOUCH(handle); \ +- snprintf(z_name, sizeof(z_name), \ +- "ena_alloc_%d", ena_alloc_cnt++); \ +- mz = rte_memzone_reserve(z_name, size, SOCKET_ID_ANY, \ +- RTE_MEMZONE_IOVA_CONTIG); \ +- handle = mz; \ ++ if (size > 0) { \ ++ char z_name[RTE_MEMZONE_NAMESIZE]; \ ++ snprintf(z_name, sizeof(z_name), \ ++ "ena_alloc_%d", \ ++ rte_atomic32_add_return(&ena_alloc_cnt, 1)); \ ++ mz = rte_memzone_reserve(z_name, size, \ ++ SOCKET_ID_ANY, \ ++ RTE_MEMZONE_IOVA_CONTIG); \ ++ handle = mz; \ ++ } \ + if (mz == NULL) { \ + virt = NULL; \ + phys = 0; \ +@@ -207,14 +211,17 @@ extern uint32_t ena_alloc_cnt; + #define ENA_MEM_ALLOC_COHERENT_NODE( \ + dmadev, size, virt, phys, mem_handle, node, dev_node) \ + do { \ +- const struct rte_memzone *mz; \ +- char z_name[RTE_MEMZONE_NAMESIZE]; \ ++ const struct rte_memzone *mz = NULL; \ + ENA_TOUCH(dmadev); ENA_TOUCH(dev_node); \ +- snprintf(z_name, sizeof(z_name), \ +- "ena_alloc_%d", ena_alloc_cnt++); \ +- mz = rte_memzone_reserve(z_name, size, node, \ ++ if (size > 0) { \ ++ char z_name[RTE_MEMZONE_NAMESIZE]; \ ++ snprintf(z_name, sizeof(z_name), \ ++ "ena_alloc_%d", \ ++ rte_atomic32_add_return(&ena_alloc_cnt, 1)); \ ++ mz = rte_memzone_reserve(z_name, size, node, \ + RTE_MEMZONE_IOVA_CONTIG); \ +- mem_handle = mz; \ ++ mem_handle = mz; \ ++ } \ + if (mz == NULL) { \ + virt = NULL; \ + phys = 0; \ +@@ -290,4 +297,6 @@ extern uint32_t ena_alloc_cnt; + + #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + ++#define ENA_FFS(x) ffs(x) ++ + #endif /* DPDK_ENA_COM_ENA_PLAT_DPDK_H_ */ +diff --git a/dpdk/drivers/net/ena/ena_ethdev.c b/dpdk/drivers/net/ena/ena_ethdev.c +index 8bbd80dfb3..e8753ce010 100644 +--- a/dpdk/drivers/net/ena/ena_ethdev.c ++++ b/dpdk/drivers/net/ena/ena_ethdev.c +@@ -89,7 +89,7 @@ struct ena_stats { + * Each rte_memzone should have unique name. + * To satisfy it, count number of allocation and add it to name. + */ +-uint32_t ena_alloc_cnt; ++rte_atomic32_t ena_alloc_cnt; + + static const struct ena_stats ena_stats_global_strings[] = { + ENA_STAT_GLOBAL_ENTRY(wd_expired), +@@ -1079,16 +1079,15 @@ static int ena_create_io_queue(struct ena_ring *ring) + ena_qid = ENA_IO_TXQ_IDX(ring->id); + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; + ctx.mem_queue_type = ena_dev->tx_mem_queue_type; +- ctx.queue_size = adapter->tx_ring_size; + for (i = 0; i < ring->ring_size; i++) + ring->empty_tx_reqs[i] = i; + } else { + ena_qid = ENA_IO_RXQ_IDX(ring->id); + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; +- ctx.queue_size = adapter->rx_ring_size; + for (i = 0; i < ring->ring_size; i++) + ring->empty_rx_reqs[i] = i; + } ++ ctx.queue_size = ring->ring_size; + ctx.qid = ena_qid; + ctx.msix_vector = -1; /* interrupts not used */ + ctx.numa_node = ring->numa_socket_id; +@@ -1675,7 +1674,7 @@ static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) + int rc; + + static int adapters_found; +- bool wd_state; ++ bool wd_state = false; + + eth_dev->dev_ops = &ena_dev_ops; + eth_dev->rx_pkt_burst = ð_ena_recv_pkts; +diff --git a/dpdk/drivers/net/enetc/base/enetc_hw.h b/dpdk/drivers/net/enetc/base/enetc_hw.h +index 2fe7ccb5bb..00813284ee 100644 +--- a/dpdk/drivers/net/enetc/base/enetc_hw.h ++++ b/dpdk/drivers/net/enetc/base/enetc_hw.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright 2018-2019 NXP ++ * Copyright 2018-2020 NXP + */ + + #ifndef _ENETC_HW_H_ +@@ -86,6 +86,7 @@ enum enetc_bdr_type {TX, RX}; + #define ENETC_PSIPMAR1(n) (0x00104 + (n) * 0x20) + #define ENETC_PCAPR0 0x00900 + #define ENETC_PCAPR1 0x00904 ++#define ENETC_PM0_RX_FIFO 0x801C + #define ENETC_PM0_IF_MODE 0x8300 + #define ENETC_PM1_IF_MODE 0x9300 + #define ENETC_PMO_IFM_RG BIT(2) +diff --git a/dpdk/drivers/net/enetc/enetc_ethdev.c b/dpdk/drivers/net/enetc/enetc_ethdev.c +index 20b77c006c..eb637d0306 100644 +--- a/dpdk/drivers/net/enetc/enetc_ethdev.c ++++ b/dpdk/drivers/net/enetc/enetc_ethdev.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright 2018-2019 NXP ++ * Copyright 2018-2020 NXP + */ + + #include <stdbool.h> +@@ -147,6 +147,9 @@ enetc_hardware_init(struct enetc_eth_hw *hw) + hw->hw.port = (void *)((size_t)hw->hw.reg + ENETC_PORT_BASE); + hw->hw.global = (void *)((size_t)hw->hw.reg + ENETC_GLOBAL_BASE); + ++ /* WA for Rx lock-up HW erratum */ ++ enetc_port_wr(enetc_hw, ENETC_PM0_RX_FIFO, 1); ++ + /* Enabling Station Interface */ + enetc_wr(enetc_hw, ENETC_SIMR, ENETC_SIMR_EN); + +diff --git a/dpdk/drivers/net/enic/enic_fm_flow.c b/dpdk/drivers/net/enic/enic_fm_flow.c +index e3e3d737bc..e8d5927674 100644 +--- a/dpdk/drivers/net/enic/enic_fm_flow.c ++++ b/dpdk/drivers/net/enic/enic_fm_flow.c +@@ -869,46 +869,36 @@ enic_fm_append_action_op(struct enic_flowman *fm, + return 0; + } + +-/* Steer operations need to appear before other ops */ ++/* NIC requires that 1st steer appear before decap. ++ * Correct example: steer, decap, steer, steer, ... ++ */ + static void + enic_fm_reorder_action_op(struct enic_flowman *fm) + { +- struct fm_action_op *dst, *dst_head, *src, *src_head; ++ struct fm_action_op *op, *steer, *decap; ++ struct fm_action_op tmp_op; + + ENICPMD_FUNC_TRACE(); +- /* Move steer ops to the front. */ +- src = fm->action.fma_action_ops; +- src_head = src; +- dst = fm->action_tmp.fma_action_ops; +- dst_head = dst; +- /* Copy steer ops to tmp */ +- while (src->fa_op != FMOP_END) { +- if (src->fa_op == FMOP_RQ_STEER) { +- ENICPMD_LOG(DEBUG, "move op: %ld -> dst %ld", +- (long)(src - src_head), +- (long)(dst - dst_head)); +- *dst = *src; +- dst++; +- } +- src++; +- } +- /* Then append non-steer ops */ +- src = src_head; +- while (src->fa_op != FMOP_END) { +- if (src->fa_op != FMOP_RQ_STEER) { +- ENICPMD_LOG(DEBUG, "move op: %ld -> dst %ld", +- (long)(src - src_head), +- (long)(dst - dst_head)); +- *dst = *src; +- dst++; +- } +- src++; ++ /* Find 1st steer and decap */ ++ op = fm->action.fma_action_ops; ++ steer = NULL; ++ decap = NULL; ++ while (op->fa_op != FMOP_END) { ++ if (!decap && op->fa_op == FMOP_DECAP_NOSTRIP) ++ decap = op; ++ else if (!steer && op->fa_op == FMOP_RQ_STEER) ++ steer = op; ++ op++; ++ } ++ /* If decap is before steer, swap */ ++ if (steer && decap && decap < steer) { ++ op = fm->action.fma_action_ops; ++ ENICPMD_LOG(DEBUG, "swap decap %ld <-> steer %ld", ++ (long)(decap - op), (long)(steer - op)); ++ tmp_op = *decap; ++ *decap = *steer; ++ *steer = tmp_op; + } +- /* Copy END */ +- *dst = *src; +- /* Finally replace the original action with the reordered one */ +- memcpy(fm->action.fma_action_ops, fm->action_tmp.fma_action_ops, +- sizeof(fm->action.fma_action_ops)); + } + + /* VXLAN decap is done via flowman compound action */ +@@ -1099,6 +1089,7 @@ enic_fm_copy_action(struct enic_flowman *fm, + PASSTHRU = 1 << 2, + COUNT = 1 << 3, + ENCAP = 1 << 4, ++ DECAP = 1 << 5, + }; + struct fm_tcam_match_entry *fmt; + struct fm_action_op fm_op; +@@ -1281,6 +1272,10 @@ enic_fm_copy_action(struct enic_flowman *fm, + break; + } + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: { ++ if (overlap & DECAP) ++ goto unsupported; ++ overlap |= DECAP; ++ + ret = enic_fm_copy_vxlan_decap(fm, fmt, actions, + error); + if (ret != 0) +diff --git a/dpdk/drivers/net/failsafe/failsafe.c b/dpdk/drivers/net/failsafe/failsafe.c +index 8af31d71b3..72362f35de 100644 +--- a/dpdk/drivers/net/failsafe/failsafe.c ++++ b/dpdk/drivers/net/failsafe/failsafe.c +@@ -190,6 +190,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev) + } + priv = PRIV(dev); + priv->data = dev->data; ++ priv->rxp = FS_RX_PROXY_INIT; + dev->dev_ops = &failsafe_ops; + dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0]; + dev->data->dev_link = eth_link; +diff --git a/dpdk/drivers/net/failsafe/failsafe_intr.c b/dpdk/drivers/net/failsafe/failsafe_intr.c +index 0f34c5bbac..bb5b089b31 100644 +--- a/dpdk/drivers/net/failsafe/failsafe_intr.c ++++ b/dpdk/drivers/net/failsafe/failsafe_intr.c +@@ -394,7 +394,7 @@ fs_rx_event_proxy_uninstall(struct fs_priv *priv) + free(priv->rxp.evec); + priv->rxp.evec = NULL; + } +- if (priv->rxp.efd > 0) { ++ if (priv->rxp.efd >= 0) { + close(priv->rxp.efd); + priv->rxp.efd = -1; + } +diff --git a/dpdk/drivers/net/failsafe/failsafe_ops.c b/dpdk/drivers/net/failsafe/failsafe_ops.c +index a87e49b97d..e1d08e46c8 100644 +--- a/dpdk/drivers/net/failsafe/failsafe_ops.c ++++ b/dpdk/drivers/net/failsafe/failsafe_ops.c +@@ -380,7 +380,7 @@ fs_rx_queue_release(void *queue) + rxq = queue; + dev = &rte_eth_devices[rxq->priv->data->port_id]; + fs_lock(dev, 0); +- if (rxq->event_fd > 0) ++ if (rxq->event_fd >= 0) + close(rxq->event_fd); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + if (ETH(sdev)->data->rx_queues != NULL && +@@ -1068,6 +1068,8 @@ fs_dev_merge_info(struct rte_eth_dev_info *info, + info->rx_queue_offload_capa &= sinfo->rx_queue_offload_capa; + info->tx_queue_offload_capa &= sinfo->tx_queue_offload_capa; + info->flow_type_rss_offloads &= sinfo->flow_type_rss_offloads; ++ info->hash_key_size = RTE_MIN(info->hash_key_size, ++ sinfo->hash_key_size); + } + + /** +@@ -1117,6 +1119,7 @@ fs_dev_infos_get(struct rte_eth_dev *dev, + infos->max_hash_mac_addrs = UINT32_MAX; + infos->max_vfs = UINT16_MAX; + infos->max_vmdq_pools = UINT16_MAX; ++ infos->hash_key_size = UINT8_MAX; + + /* + * Set of capabilities that can be verified upon +diff --git a/dpdk/drivers/net/failsafe/failsafe_private.h b/dpdk/drivers/net/failsafe/failsafe_private.h +index 8e9706aef0..651578a128 100644 +--- a/dpdk/drivers/net/failsafe/failsafe_private.h ++++ b/dpdk/drivers/net/failsafe/failsafe_private.h +@@ -58,6 +58,14 @@ struct rx_proxy { + enum rxp_service_state sstate; + }; + ++#define FS_RX_PROXY_INIT (struct rx_proxy){ \ ++ .efd = -1, \ ++ .evec = NULL, \ ++ .sid = 0, \ ++ .scid = 0, \ ++ .sstate = SS_NO_SERVICE, \ ++} ++ + struct rxq { + struct fs_priv *priv; + uint16_t qid; +diff --git a/dpdk/drivers/net/fm10k/Makefile b/dpdk/drivers/net/fm10k/Makefile +index 722bf1ee04..338de40f23 100644 +--- a/dpdk/drivers/net/fm10k/Makefile ++++ b/dpdk/drivers/net/fm10k/Makefile +@@ -73,6 +73,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_common.c + SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_mbx.c + SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_vf.c + SRCS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k_api.c ++ifeq ($(CONFIG_RTE_ARCH_X86), y) + SRCS-$(CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR) += fm10k_rxtx_vec.c ++endif + + include $(RTE_SDK)/mk/rte.lib.mk +diff --git a/dpdk/drivers/net/fm10k/fm10k_rxtx.c b/dpdk/drivers/net/fm10k/fm10k_rxtx.c +index 5c31121839..4accaa2cd6 100644 +--- a/dpdk/drivers/net/fm10k/fm10k_rxtx.c ++++ b/dpdk/drivers/net/fm10k/fm10k_rxtx.c +@@ -611,6 +611,8 @@ static inline void tx_xmit_pkt(struct fm10k_tx_queue *q, struct rte_mbuf *mb) + /* set vlan if requested */ + if (mb->ol_flags & PKT_TX_VLAN_PKT) + q->hw_ring[q->next_free].vlan = mb->vlan_tci; ++ else ++ q->hw_ring[q->next_free].vlan = 0; + + q->sw_ring[q->next_free] = mb; + q->hw_ring[q->next_free].buffer_addr = +diff --git a/dpdk/drivers/net/hinic/base/hinic_compat.h b/dpdk/drivers/net/hinic/base/hinic_compat.h +index e4a7f12d15..7bec03b9cb 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_compat.h ++++ b/dpdk/drivers/net/hinic/base/hinic_compat.h +@@ -150,22 +150,25 @@ static inline int hinic_test_and_set_bit(int nr, volatile unsigned long *addr) + } + + void *dma_zalloc_coherent(void *dev, size_t size, dma_addr_t *dma_handle, +- gfp_t flag); +-void *dma_zalloc_coherent_aligned(void *dev, size_t size, +- dma_addr_t *dma_handle, gfp_t flag); +-void *dma_zalloc_coherent_aligned256k(void *dev, size_t size, +- dma_addr_t *dma_handle, gfp_t flag); ++ unsigned int socket_id); ++ ++void *dma_zalloc_coherent_aligned(void *hwdev, size_t size, ++ dma_addr_t *dma_handle, unsigned int socket_id); ++ ++void *dma_zalloc_coherent_aligned256k(void *hwdev, size_t size, ++ dma_addr_t *dma_handle, unsigned int socket_id); ++ + void dma_free_coherent(void *dev, size_t size, void *virt, dma_addr_t phys); + + /* dma pool alloc and free */ + #define pci_pool dma_pool +-#define pci_pool_alloc(pool, flags, handle) dma_pool_alloc(pool, flags, handle) ++#define pci_pool_alloc(pool, handle) dma_pool_alloc(pool, handle) + #define pci_pool_free(pool, vaddr, addr) dma_pool_free(pool, vaddr, addr) + + struct dma_pool *dma_pool_create(const char *name, void *dev, size_t size, + size_t align, size_t boundary); + void dma_pool_destroy(struct dma_pool *pool); +-void *dma_pool_alloc(struct pci_pool *pool, int flags, dma_addr_t *dma_addr); ++void *dma_pool_alloc(struct pci_pool *pool, dma_addr_t *dma_addr); + void dma_pool_free(struct pci_pool *pool, void *vaddr, dma_addr_t dma); + + #define kzalloc(size, flag) rte_zmalloc(NULL, size, HINIC_MEM_ALLOC_ALIGN_MIN) +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_api_cmd.c b/dpdk/drivers/net/hinic/base/hinic_pmd_api_cmd.c +index dbffc2e3b0..b72edc0652 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_api_cmd.c ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_api_cmd.c +@@ -658,7 +658,7 @@ static int alloc_cmd_buf(struct hinic_api_cmd_chain *chain, + + cmd_vaddr_alloc = dma_zalloc_coherent(dev, (API_CMD_BUF_SIZE + + API_PAYLOAD_ALIGN_SIZE), +- &cmd_paddr, GFP_KERNEL); ++ &cmd_paddr, SOCKET_ID_ANY); + if (!cmd_vaddr_alloc) { + PMD_DRV_LOG(ERR, "Allocate API CMD dma memory failed"); + return -ENOMEM; +@@ -712,7 +712,7 @@ static int api_cmd_create_cell(struct hinic_api_cmd_chain *chain, + + node_vaddr_alloc = dma_zalloc_coherent(dev, (chain->cell_size + + API_CMD_NODE_ALIGN_SIZE), +- &node_paddr, GFP_KERNEL); ++ &node_paddr, SOCKET_ID_ANY); + if (!node_vaddr_alloc) { + PMD_DRV_LOG(ERR, "Allocate dma API CMD cell failed"); + return -ENOMEM; +@@ -889,8 +889,7 @@ static int api_chain_init(struct hinic_api_cmd_chain *chain, + + chain->wb_status = (struct hinic_api_cmd_status *) + dma_zalloc_coherent(dev, sizeof(*chain->wb_status), +- &chain->wb_status_paddr, +- GFP_KERNEL); ++ &chain->wb_status_paddr, SOCKET_ID_ANY); + if (!chain->wb_status) { + PMD_DRV_LOG(ERR, "Allocate DMA wb status failed"); + err = -ENOMEM; +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_cmdq.c b/dpdk/drivers/net/hinic/base/hinic_pmd_cmdq.c +index eb8de24d6e..2e98b9c286 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_cmdq.c ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_cmdq.c +@@ -7,6 +7,7 @@ + #include "hinic_pmd_hwif.h" + #include "hinic_pmd_wq.h" + #include "hinic_pmd_mgmt.h" ++#include "hinic_pmd_mbox.h" + #include "hinic_pmd_cmdq.h" + + #define CMDQ_CMD_TIMEOUT 5000 /* millisecond */ +@@ -171,8 +172,7 @@ struct hinic_cmd_buf *hinic_alloc_cmd_buf(void *hwdev) + return NULL; + } + +- cmd_buf->buf = pci_pool_alloc(cmdqs->cmd_buf_pool, GFP_KERNEL, +- &cmd_buf->dma_addr); ++ cmd_buf->buf = pci_pool_alloc(cmdqs->cmd_buf_pool, &cmd_buf->dma_addr); + if (!cmd_buf->buf) { + PMD_DRV_LOG(ERR, "Allocate cmd from the pool failed"); + goto alloc_pci_buf_err; +@@ -440,11 +440,17 @@ static int hinic_set_cmdq_ctxts(struct hinic_hwdev *hwdev) + cmdq_ctxt, in_size, NULL, + NULL, 0); + if (err) { +- PMD_DRV_LOG(ERR, "Set cmdq ctxt failed"); ++ if (err == HINIC_MBOX_PF_BUSY_ACTIVE_FW || ++ err == HINIC_DEV_BUSY_ACTIVE_FW) { ++ cmdqs->status |= HINIC_CMDQ_SET_FAIL; ++ PMD_DRV_LOG(ERR, "PF or VF fw is hot active"); ++ } ++ PMD_DRV_LOG(ERR, "Set cmdq ctxt failed, err: %d", err); + return -EFAULT; + } + } + ++ cmdqs->status &= ~HINIC_CMDQ_SET_FAIL; + cmdqs->status |= HINIC_CMDQ_ENABLE; + + return 0; +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_cmdq.h b/dpdk/drivers/net/hinic/base/hinic_pmd_cmdq.h +index da939e16fa..4ce0a4c5b8 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_cmdq.h ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_cmdq.h +@@ -170,6 +170,7 @@ struct hinic_cmdq_ctxt { + + enum hinic_cmdq_status { + HINIC_CMDQ_ENABLE = BIT(0), ++ HINIC_CMDQ_SET_FAIL = BIT(1) + }; + + enum hinic_cmdq_cmd_type { +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_eqs.c b/dpdk/drivers/net/hinic/base/hinic_pmd_eqs.c +index abe0daee6c..79e1b20bca 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_eqs.c ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_eqs.c +@@ -240,7 +240,7 @@ static int alloc_eq_pages(struct hinic_eq *eq) + eq->virt_addr[pg_num] = + (u8 *)dma_zalloc_coherent_aligned(eq->hwdev, + eq->page_size, &eq->dma_addr[pg_num], +- GFP_KERNEL); ++ SOCKET_ID_ANY); + if (!eq->virt_addr[pg_num]) { + err = -ENOMEM; + goto dma_alloc_err; +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_hwdev.c b/dpdk/drivers/net/hinic/base/hinic_pmd_hwdev.c +index 8b16897ade..fc11ecd7a4 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_hwdev.c ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_hwdev.c +@@ -112,9 +112,9 @@ void hinic_be32_to_cpu(void *data, u32 len) + } + } + +-static void * +-hinic_dma_mem_zalloc(struct hinic_hwdev *hwdev, size_t size, +- dma_addr_t *dma_handle, unsigned int flag, unsigned int align) ++static void *hinic_dma_mem_zalloc(struct hinic_hwdev *hwdev, size_t size, ++ dma_addr_t *dma_handle, unsigned int align, ++ unsigned int socket_id) + { + int rc, alloc_cnt; + const struct rte_memzone *mz; +@@ -129,8 +129,8 @@ hinic_dma_mem_zalloc(struct hinic_hwdev *hwdev, size_t size, + snprintf(z_name, sizeof(z_name), "%s_%d", + hwdev->pcidev_hdl->name, alloc_cnt); + +- mz = rte_memzone_reserve_aligned(z_name, size, SOCKET_ID_ANY, +- flag, align); ++ mz = rte_memzone_reserve_aligned(z_name, size, socket_id, ++ RTE_MEMZONE_IOVA_CONTIG, align); + if (!mz) { + PMD_DRV_LOG(ERR, "Alloc dma able memory failed, errno: %d, ma_name: %s, size: 0x%zx", + rte_errno, z_name, size); +@@ -209,25 +209,26 @@ hinic_dma_mem_free(struct hinic_hwdev *hwdev, size_t size, + (void)rte_memzone_free(mz); + } + +-void *dma_zalloc_coherent(void *hwdev, size_t size, +- dma_addr_t *dma_handle, gfp_t flag) ++void *dma_zalloc_coherent(void *hwdev, size_t size, dma_addr_t *dma_handle, ++ unsigned int socket_id) + { +- return hinic_dma_mem_zalloc(hwdev, size, dma_handle, flag, +- RTE_CACHE_LINE_SIZE); ++ return hinic_dma_mem_zalloc(hwdev, size, dma_handle, ++ RTE_CACHE_LINE_SIZE, socket_id); + } + + void *dma_zalloc_coherent_aligned(void *hwdev, size_t size, +- dma_addr_t *dma_handle, gfp_t flag) ++ dma_addr_t *dma_handle, unsigned int socket_id) + { +- return hinic_dma_mem_zalloc(hwdev, size, dma_handle, flag, +- HINIC_PAGE_SIZE); ++ return hinic_dma_mem_zalloc(hwdev, size, dma_handle, HINIC_PAGE_SIZE, ++ socket_id); + } + + void *dma_zalloc_coherent_aligned256k(void *hwdev, size_t size, +- dma_addr_t *dma_handle, gfp_t flag) ++ dma_addr_t *dma_handle, ++ unsigned int socket_id) + { +- return hinic_dma_mem_zalloc(hwdev, size, dma_handle, flag, +- HINIC_PAGE_SIZE * 64); ++ return hinic_dma_mem_zalloc(hwdev, size, dma_handle, ++ HINIC_PAGE_SIZE * 64, socket_id); + } + + void dma_free_coherent(void *hwdev, size_t size, void *virt, dma_addr_t phys) +@@ -304,12 +305,12 @@ void dma_pool_destroy(struct dma_pool *pool) + rte_free(pool); + } + +-void *dma_pool_alloc(struct pci_pool *pool, int flags, dma_addr_t *dma_addr) ++void *dma_pool_alloc(struct pci_pool *pool, dma_addr_t *dma_addr) + { + void *buf; + +- buf = hinic_dma_mem_zalloc(pool->hwdev, pool->elem_size, +- dma_addr, flags, (u32)pool->align); ++ buf = hinic_dma_mem_zalloc(pool->hwdev, pool->elem_size, dma_addr, ++ (u32)pool->align, SOCKET_ID_ANY); + if (buf) + rte_atomic32_inc(&pool->inuse); + +@@ -528,7 +529,7 @@ static int hinic_vf_rx_tx_flush(struct hinic_hwdev *hwdev) + + err = hinic_reinit_cmdq_ctxts(hwdev); + if (err) +- PMD_DRV_LOG(WARNING, "Reinit cmdq failed"); ++ PMD_DRV_LOG(WARNING, "Reinit cmdq failed when vf flush"); + + return err; + } +@@ -586,7 +587,7 @@ static int hinic_pf_rx_tx_flush(struct hinic_hwdev *hwdev) + + err = hinic_reinit_cmdq_ctxts(hwdev); + if (err) +- PMD_DRV_LOG(WARNING, "Reinit cmdq failed"); ++ PMD_DRV_LOG(WARNING, "Reinit cmdq failed when pf flush"); + + return 0; + } +@@ -1390,14 +1391,14 @@ static void print_cable_info(struct hinic_link_info *info) + } + + memcpy(tmp_vendor, info->vendor_name, sizeof(info->vendor_name)); +- snprintf(tmp_str, (sizeof(tmp_str) - 1), ++ snprintf(tmp_str, sizeof(tmp_str), + "Vendor: %s, %s, %s, length: %um, max_speed: %uGbps", + tmp_vendor, info->sfp_type ? "SFP" : "QSFP", port_type, + info->cable_length, info->cable_max_speed); + if (info->port_type != LINK_PORT_COPPER) +- snprintf(tmp_str + strlen(tmp_str), (sizeof(tmp_str) - 1), +- "%s, Temperature: %u", tmp_str, +- info->cable_temp); ++ snprintf(tmp_str + strlen(tmp_str), ++ sizeof(tmp_str) - strlen(tmp_str), ++ ", Temperature: %u", info->cable_temp); + + PMD_DRV_LOG(INFO, "Cable information: %s", tmp_str); + } +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_hwdev.h b/dpdk/drivers/net/hinic/base/hinic_pmd_hwdev.h +index ac21947dc2..d6896b3f13 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_hwdev.h ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_hwdev.h +@@ -442,7 +442,6 @@ struct hinic_hwdev { + struct hinic_msg_pf_to_mgmt *pf_to_mgmt; + struct hinic_cmdqs *cmdqs; + struct hinic_nic_io *nic_io; +- + }; + + int hinic_osdep_init(struct hinic_hwdev *hwdev); +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_mbox.c b/dpdk/drivers/net/hinic/base/hinic_pmd_mbox.c +index 3d3c1bc4ab..cfe91ddbad 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_mbox.c ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_mbox.c +@@ -404,10 +404,8 @@ static int alloc_mbox_wb_status(struct hinic_mbox_func_to_func *func_to_func) + struct hinic_hwif *hwif = hwdev->hwif; + u32 addr_h, addr_l; + +- send_mbox->wb_vaddr = dma_zalloc_coherent(hwdev, +- MBOX_WB_STATUS_LEN, +- &send_mbox->wb_paddr, +- GFP_KERNEL); ++ send_mbox->wb_vaddr = dma_zalloc_coherent(hwdev, MBOX_WB_STATUS_LEN, ++ &send_mbox->wb_paddr, SOCKET_ID_ANY); + if (!send_mbox->wb_vaddr) { + PMD_DRV_LOG(ERR, "Allocating memory for mailbox wb status failed"); + return -ENOMEM; +@@ -872,7 +870,7 @@ static int hinic_func_to_func_init(struct hinic_hwdev *hwdev) + + err = alloc_mbox_info(func_to_func->mbox_resp); + if (err) { +- PMD_DRV_LOG(ERR, "Allocating memory for mailbox responsing failed"); ++ PMD_DRV_LOG(ERR, "Allocating memory for mailbox responding failed"); + goto alloc_mbox_for_resp_err; + } + +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_mgmt.c b/dpdk/drivers/net/hinic/base/hinic_pmd_mgmt.c +index eee50a80d1..ea79c300af 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_mgmt.c ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_mgmt.c +@@ -248,6 +248,19 @@ static void free_msg_buf(struct hinic_msg_pf_to_mgmt *pf_to_mgmt) + free_recv_msg(&pf_to_mgmt->recv_msg_from_mgmt); + } + ++static int hinic_get_mgmt_channel_status(void *hwdev) ++{ ++ struct hinic_hwif *hwif = ((struct hinic_hwdev *)hwdev)->hwif; ++ u32 val; ++ ++ if (hinic_func_type((struct hinic_hwdev *)hwdev) == TYPE_VF) ++ return false; ++ ++ val = hinic_hwif_read_reg(hwif, HINIC_ICPL_RESERVD_ADDR); ++ ++ return HINIC_GET_MGMT_CHANNEL_STATUS(val, MGMT_CHANNEL_STATUS); ++} ++ + /** + * send_msg_to_mgmt_async - send async message + * @pf_to_mgmt: PF to MGMT channel +@@ -309,6 +322,14 @@ static int send_msg_to_mgmt_sync(struct hinic_msg_pf_to_mgmt *pf_to_mgmt, + u64 header; + u16 cmd_size = mgmt_msg_len(msg_len); + ++ /* If fw is hot active, return failed */ ++ if (hinic_get_mgmt_channel_status(pf_to_mgmt->hwdev)) { ++ if (mod == HINIC_MOD_COMM || mod == HINIC_MOD_L2NIC) ++ return HINIC_DEV_BUSY_ACTIVE_FW; ++ else ++ return -EBUSY; ++ } ++ + if (direction == HINIC_MSG_RESPONSE) + prepare_header(pf_to_mgmt, &header, msg_len, mod, ack_type, + direction, cmd, resp_msg_id); +@@ -462,19 +483,6 @@ hinic_pf_to_mgmt_sync(struct hinic_hwdev *hwdev, + return err; + } + +-static int hinic_get_mgmt_channel_status(void *hwdev) +-{ +- struct hinic_hwif *hwif = ((struct hinic_hwdev *)hwdev)->hwif; +- u32 val; +- +- if (hinic_func_type((struct hinic_hwdev *)hwdev) == TYPE_VF) +- return false; +- +- val = hinic_hwif_read_reg(hwif, HINIC_ICPL_RESERVD_ADDR); +- +- return HINIC_GET_MGMT_CHANNEL_STATUS(val, MGMT_CHANNEL_STATUS); +-} +- + int hinic_msg_to_mgmt_sync(void *hwdev, enum hinic_mod_type mod, u8 cmd, + void *buf_in, u16 in_size, + void *buf_out, u16 *out_size, u32 timeout) +@@ -484,10 +492,6 @@ int hinic_msg_to_mgmt_sync(void *hwdev, enum hinic_mod_type mod, u8 cmd, + if (!hwdev || in_size > HINIC_MSG_TO_MGMT_MAX_LEN) + return -EINVAL; + +- /* If status is hot upgrading, don't send message to mgmt */ +- if (hinic_get_mgmt_channel_status(hwdev)) +- return -EPERM; +- + if (hinic_func_type(hwdev) == TYPE_VF) { + rc = hinic_mbox_to_pf(hwdev, mod, cmd, buf_in, in_size, + buf_out, out_size, timeout); +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_mgmt.h b/dpdk/drivers/net/hinic/base/hinic_pmd_mgmt.h +index cc18843bf8..52b319ead4 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_mgmt.h ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_mgmt.h +@@ -34,6 +34,8 @@ + #define HINIC_MSG_HEADER_P2P_IDX_MASK 0xF + #define HINIC_MSG_HEADER_MSG_ID_MASK 0x3FF + ++#define HINIC_DEV_BUSY_ACTIVE_FW 0xFE ++ + #define HINIC_MSG_HEADER_GET(val, member) \ + (((val) >> HINIC_MSG_HEADER_##member##_SHIFT) & \ + HINIC_MSG_HEADER_##member##_MASK) +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_nicio.c b/dpdk/drivers/net/hinic/base/hinic_pmd_nicio.c +index f6cc03341b..d3e8f2e74d 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_nicio.c ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_nicio.c +@@ -533,6 +533,15 @@ int hinic_init_qp_ctxts(struct hinic_hwdev *hwdev) + return err; + } + ++ if (hwdev->cmdqs->status & HINIC_CMDQ_SET_FAIL) { ++ err = hinic_reinit_cmdq_ctxts(hwdev); ++ if (err) { ++ PMD_DRV_LOG(ERR, "Reinit cmdq context failed when dev start, err: %d", ++ err); ++ return err; ++ } ++ } ++ + err = init_qp_ctxts(nic_io); + if (err) { + PMD_DRV_LOG(ERR, "Init QP ctxts failed, rc: %d", err); +@@ -728,9 +737,10 @@ void hinic_update_rq_local_ci(struct hinic_hwdev *hwdev, u16 q_id, int wqe_cnt) + + static int hinic_alloc_nicio(struct hinic_hwdev *hwdev) + { +- int err; +- u16 max_qps, num_qp; + struct hinic_nic_io *nic_io = hwdev->nic_io; ++ struct rte_pci_device *pdev = hwdev->pcidev_hdl; ++ u16 max_qps, num_qp; ++ int err; + + max_qps = hinic_func_max_qnum(hwdev); + if ((max_qps & (max_qps - 1))) { +@@ -751,10 +761,10 @@ static int hinic_alloc_nicio(struct hinic_hwdev *hwdev) + goto alloc_qps_err; + } + +- nic_io->ci_vaddr_base = +- dma_zalloc_coherent(hwdev, ++ nic_io->ci_vaddr_base = dma_zalloc_coherent(hwdev, + CI_TABLE_SIZE(num_qp, HINIC_PAGE_SIZE), +- &nic_io->ci_dma_base, GFP_KERNEL); ++ &nic_io->ci_dma_base, ++ pdev->device.numa_node); + if (!nic_io->ci_vaddr_base) { + PMD_DRV_LOG(ERR, "Failed to allocate ci area"); + err = -ENOMEM; +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_wq.c b/dpdk/drivers/net/hinic/base/hinic_pmd_wq.c +index 04c81f9bc8..345248c3ea 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_wq.c ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_wq.c +@@ -15,13 +15,14 @@ static void free_wq_pages(struct hinic_hwdev *hwdev, struct hinic_wq *wq) + wq->queue_buf_vaddr = 0; + } + +-static int alloc_wq_pages(struct hinic_hwdev *hwdev, struct hinic_wq *wq) ++static int alloc_wq_pages(struct hinic_hwdev *hwdev, struct hinic_wq *wq, ++ unsigned int socket_id) + { + dma_addr_t dma_addr = 0; + + wq->queue_buf_vaddr = (u64)(u64 *) + dma_zalloc_coherent_aligned256k(hwdev, wq->wq_buf_size, +- &dma_addr, GFP_KERNEL); ++ &dma_addr, socket_id); + if (!wq->queue_buf_vaddr) { + PMD_DRV_LOG(ERR, "Failed to allocate wq page"); + return -ENOMEM; +@@ -40,7 +41,7 @@ static int alloc_wq_pages(struct hinic_hwdev *hwdev, struct hinic_wq *wq) + } + + int hinic_wq_allocate(struct hinic_hwdev *hwdev, struct hinic_wq *wq, +- u32 wqebb_shift, u16 q_depth) ++ u32 wqebb_shift, u16 q_depth, unsigned int socket_id) + { + int err; + +@@ -60,7 +61,7 @@ int hinic_wq_allocate(struct hinic_hwdev *hwdev, struct hinic_wq *wq, + return -EINVAL; + } + +- err = alloc_wq_pages(hwdev, wq); ++ err = alloc_wq_pages(hwdev, wq, socket_id); + if (err) { + PMD_DRV_LOG(ERR, "Failed to allocate wq pages"); + return err; +@@ -114,7 +115,7 @@ int hinic_cmdq_alloc(struct hinic_wq *wq, struct hinic_hwdev *hwdev, + wq[i].wq_buf_size = wq_buf_size; + wq[i].q_depth = q_depth; + +- err = alloc_wq_pages(hwdev, &wq[i]); ++ err = alloc_wq_pages(hwdev, &wq[i], SOCKET_ID_ANY); + if (err) { + PMD_DRV_LOG(ERR, "Failed to alloc CMDQ blocks"); + goto cmdq_block_err; +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_wq.h b/dpdk/drivers/net/hinic/base/hinic_pmd_wq.h +index 53ecc225c8..354d0338de 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_wq.h ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_wq.h +@@ -122,7 +122,7 @@ void hinic_cmdq_free(struct hinic_hwdev *hwdev, struct hinic_wq *wq, + int cmdq_blocks); + + int hinic_wq_allocate(struct hinic_hwdev *hwdev, struct hinic_wq *wq, +- u32 wqebb_shift, u16 q_depth); ++ u32 wqebb_shift, u16 q_depth, unsigned int socket_id); + + void hinic_wq_free(struct hinic_hwdev *hwdev, struct hinic_wq *wq); + +diff --git a/dpdk/drivers/net/hinic/hinic_pmd_ethdev.c b/dpdk/drivers/net/hinic/hinic_pmd_ethdev.c +index 803a39e2da..b81ecd0b8f 100644 +--- a/dpdk/drivers/net/hinic/hinic_pmd_ethdev.c ++++ b/dpdk/drivers/net/hinic/hinic_pmd_ethdev.c +@@ -57,6 +57,7 @@ + #define HINIC_DEFAULT_BURST_SIZE 32 + #define HINIC_DEFAULT_NB_QUEUES 1 + #define HINIC_DEFAULT_RING_SIZE 1024 ++#define HINIC_MAX_LRO_SIZE 65536 + + /* + * vlan_id is a 12 bit number. +@@ -439,7 +440,7 @@ static int hinic_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + nic_dev->rxqs[queue_idx] = rxq; + + /* alloc rx sq hw wqepage*/ +- rc = hinic_create_rq(hwdev, queue_idx, rq_depth); ++ rc = hinic_create_rq(hwdev, queue_idx, rq_depth, socket_id); + if (rc) { + PMD_DRV_LOG(ERR, "Create rxq[%d] failed, dev_name: %s, rq_depth: %d", + queue_idx, dev->data->name, rq_depth); +@@ -466,6 +467,7 @@ static int hinic_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + rxq->q_depth = rq_depth; + rxq->buf_len = (u16)buf_size; + rxq->rx_free_thresh = rx_free_thresh; ++ rxq->socket_id = socket_id; + + /* the last point cant do mbuf rearm in bulk */ + rxq->rxinfo_align_end = rxq->q_depth - rxq->rx_free_thresh; +@@ -593,7 +595,7 @@ static int hinic_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + nic_dev->txqs[queue_idx] = txq; + + /* alloc tx sq hw wqepage */ +- rc = hinic_create_sq(hwdev, queue_idx, sq_depth); ++ rc = hinic_create_sq(hwdev, queue_idx, sq_depth, socket_id); + if (rc) { + PMD_DRV_LOG(ERR, "Create txq[%d] failed, dev_name: %s, sq_depth: %d", + queue_idx, dev->data->name, sq_depth); +@@ -612,6 +614,7 @@ static int hinic_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + txq->sq_bot_sge_addr = HINIC_GET_WQ_TAIL(txq) - + sizeof(struct hinic_sq_bufdesc); + txq->cos = nic_dev->default_cos; ++ txq->socket_id = socket_id; + + /* alloc software txinfo */ + rc = hinic_setup_tx_resources(txq); +@@ -733,6 +736,7 @@ hinic_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) + info->max_mac_addrs = HINIC_MAX_UC_MAC_ADDRS; + info->min_mtu = HINIC_MIN_MTU_SIZE; + info->max_mtu = HINIC_MAX_MTU_SIZE; ++ info->max_lro_pkt_size = HINIC_MAX_LRO_SIZE; + + hinic_get_speed_capa(dev, &info->speed_capa); + info->rx_queue_offload_capa = 0; +@@ -808,12 +812,10 @@ static int hinic_config_rx_mode(struct hinic_nic_dev *nic_dev, u32 rx_mode_ctrl) + return 0; + } + +- + static int hinic_rxtx_configure(struct rte_eth_dev *dev) + { +- int err; + struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev); +- bool lro_en; ++ int err; + + /* rx configure, if rss enable, need to init default configuration */ + err = hinic_rx_configure(dev); +@@ -830,18 +832,6 @@ static int hinic_rxtx_configure(struct rte_eth_dev *dev) + goto set_rx_mode_fail; + } + +- /* config lro */ +- lro_en = dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_TCP_LRO ? +- true : false; +- +- err = hinic_set_rx_lro(nic_dev->hwdev, lro_en, lro_en, +- HINIC_LRO_WQE_NUM_DEFAULT); +- if (err) { +- PMD_DRV_LOG(ERR, "%s lro failed, err: %d", +- lro_en ? "Enable" : "Disable", err); +- goto set_rx_mode_fail; +- } +- + return HINIC_OK; + + set_rx_mode_fail: +diff --git a/dpdk/drivers/net/hinic/hinic_pmd_rx.c b/dpdk/drivers/net/hinic/hinic_pmd_rx.c +index f865f2f470..8572780c62 100644 +--- a/dpdk/drivers/net/hinic/hinic_pmd_rx.c ++++ b/dpdk/drivers/net/hinic/hinic_pmd_rx.c +@@ -209,7 +209,8 @@ void hinic_get_func_rx_buf_size(struct hinic_nic_dev *nic_dev) + nic_dev->hwdev->nic_io->rq_buf_size = buf_size; + } + +-int hinic_create_rq(struct hinic_hwdev *hwdev, u16 q_id, u16 rq_depth) ++int hinic_create_rq(struct hinic_hwdev *hwdev, u16 q_id, ++ u16 rq_depth, unsigned int socket_id) + { + int err; + struct hinic_nic_io *nic_io = hwdev->nic_io; +@@ -223,17 +224,15 @@ int hinic_create_rq(struct hinic_hwdev *hwdev, u16 q_id, u16 rq_depth) + nic_io->rq_depth = rq_depth; + + err = hinic_wq_allocate(hwdev, &nic_io->rq_wq[q_id], +- HINIC_RQ_WQEBB_SHIFT, nic_io->rq_depth); ++ HINIC_RQ_WQEBB_SHIFT, nic_io->rq_depth, socket_id); + if (err) { + PMD_DRV_LOG(ERR, "Failed to allocate WQ for RQ"); + return err; + } + rq->wq = &nic_io->rq_wq[q_id]; + +- rq->pi_virt_addr = +- (volatile u16 *)dma_zalloc_coherent(hwdev, HINIC_PAGE_SIZE, +- &rq->pi_dma_addr, +- GFP_KERNEL); ++ rq->pi_virt_addr = (volatile u16 *)dma_zalloc_coherent(hwdev, ++ HINIC_PAGE_SIZE, &rq->pi_dma_addr, socket_id); + if (!rq->pi_virt_addr) { + PMD_DRV_LOG(ERR, "Failed to allocate rq pi virt addr"); + err = -ENOMEM; +@@ -305,15 +304,13 @@ void hinic_rxq_stats_reset(struct hinic_rxq *rxq) + memset(rxq_stats, 0, sizeof(*rxq_stats)); + } + +-static int hinic_rx_alloc_cqe(struct hinic_rxq *rxq) ++static int hinic_rx_alloc_cqe(struct hinic_rxq *rxq, unsigned int socket_id) + { + size_t cqe_mem_size; + + cqe_mem_size = sizeof(struct hinic_rq_cqe) * rxq->q_depth; +- rxq->cqe_start_vaddr = +- dma_zalloc_coherent(rxq->nic_dev->hwdev, +- cqe_mem_size, &rxq->cqe_start_paddr, +- GFP_KERNEL); ++ rxq->cqe_start_vaddr = dma_zalloc_coherent(rxq->nic_dev->hwdev, ++ cqe_mem_size, &rxq->cqe_start_paddr, socket_id); + if (!rxq->cqe_start_vaddr) { + PMD_DRV_LOG(ERR, "Allocate cqe dma memory failed"); + return -ENOMEM; +@@ -369,11 +366,12 @@ int hinic_setup_rx_resources(struct hinic_rxq *rxq) + int err, pkts; + + rx_info_sz = rxq->q_depth * sizeof(*rxq->rx_info); +- rxq->rx_info = kzalloc_aligned(rx_info_sz, GFP_KERNEL); ++ rxq->rx_info = rte_zmalloc_socket("rx_info", rx_info_sz, ++ RTE_CACHE_LINE_SIZE, rxq->socket_id); + if (!rxq->rx_info) + return -ENOMEM; + +- err = hinic_rx_alloc_cqe(rxq); ++ err = hinic_rx_alloc_cqe(rxq, rxq->socket_id); + if (err) { + PMD_DRV_LOG(ERR, "Allocate rx cqe failed"); + goto rx_cqe_err; +@@ -392,7 +390,7 @@ int hinic_setup_rx_resources(struct hinic_rxq *rxq) + hinic_rx_free_cqe(rxq); + + rx_cqe_err: +- kfree(rxq->rx_info); ++ rte_free(rxq->rx_info); + rxq->rx_info = NULL; + + return err; +@@ -404,7 +402,7 @@ void hinic_free_rx_resources(struct hinic_rxq *rxq) + return; + + hinic_rx_free_cqe(rxq); +- kfree(rxq->rx_info); ++ rte_free(rxq->rx_info); + rxq->rx_info = NULL; + } + +@@ -415,7 +413,8 @@ void hinic_free_all_rx_resources(struct rte_eth_dev *eth_dev) + HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev); + + for (q_id = 0; q_id < nic_dev->num_rq; q_id++) { +- eth_dev->data->rx_queues[q_id] = NULL; ++ if (eth_dev->data->rx_queues != NULL) ++ eth_dev->data->rx_queues[q_id] = NULL; + + if (nic_dev->rxqs[q_id] == NULL) + continue; +@@ -658,6 +657,10 @@ int hinic_rx_configure(struct rte_eth_dev *dev) + struct rte_eth_rss_conf rss_conf = + dev->data->dev_conf.rx_adv_conf.rss_conf; + int err; ++ bool lro_en; ++ int max_lro_size; ++ int lro_wqe_num; ++ int buf_size; + + if (nic_dev->flags & ETH_MQ_RX_RSS_FLAG) { + if (rss_conf.rss_hf == 0) { +@@ -683,15 +686,42 @@ int hinic_rx_configure(struct rte_eth_dev *dev) + if (err) + goto rx_csum_ofl_err; + ++ /* config lro */ ++ lro_en = dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_TCP_LRO ? ++ true : false; ++ max_lro_size = dev->data->dev_conf.rxmode.max_lro_pkt_size; ++ buf_size = nic_dev->hwdev->nic_io->rq_buf_size; ++ lro_wqe_num = max_lro_size / buf_size ? (max_lro_size / buf_size) : 1; ++ ++ err = hinic_set_rx_lro(nic_dev->hwdev, lro_en, lro_en, lro_wqe_num); ++ if (err) { ++ PMD_DRV_LOG(ERR, "%s %s lro failed, err: %d, max_lro_size: %d", ++ dev->data->name, lro_en ? "Enable" : "Disable", ++ err, max_lro_size); ++ goto set_rx_lro_err; ++ } ++ + return 0; + ++set_rx_lro_err: + rx_csum_ofl_err: + rss_config_err: ++ + hinic_destroy_num_qps(nic_dev); + + return HINIC_ERROR; + } + ++static void hinic_rx_remove_lro(struct hinic_nic_dev *nic_dev) ++{ ++ int err; ++ ++ err = hinic_set_rx_lro(nic_dev->hwdev, false, false, 0); ++ if (err) ++ PMD_DRV_LOG(ERR, "%s disable LRO failed", ++ nic_dev->proc_dev_name); ++} ++ + void hinic_rx_remove_configure(struct rte_eth_dev *dev) + { + struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev); +@@ -700,6 +730,8 @@ void hinic_rx_remove_configure(struct rte_eth_dev *dev) + hinic_rss_deinit(nic_dev); + hinic_destroy_num_qps(nic_dev); + } ++ ++ hinic_rx_remove_lro(nic_dev); + } + + void hinic_free_all_rx_mbufs(struct hinic_rxq *rxq) +@@ -958,7 +990,7 @@ u16 hinic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, u16 nb_pkts) + volatile struct hinic_rq_cqe *rx_cqe; + u16 rx_buf_len, pkts = 0; + u16 sw_ci, ci_mask, wqebb_cnt = 0; +- u32 pkt_len, status, vlan_len; ++ u32 pkt_len, status, vlan_len, lro_num; + u64 rx_bytes = 0; + struct hinic_rq_cqe cqe; + u32 offload_type, rss_hash; +@@ -1026,6 +1058,13 @@ u16 hinic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, u16 nb_pkts) + rxm->ol_flags |= hinic_rx_rss_hash(offload_type, rss_hash, + &rxm->hash.rss); + ++ /* lro offload */ ++ lro_num = HINIC_GET_RX_NUM_LRO(cqe.status); ++ if (unlikely(lro_num != 0)) { ++ rxm->ol_flags |= PKT_RX_LRO; ++ rxm->tso_segsz = pkt_len / lro_num; ++ } ++ + /* 6. clear done bit */ + rx_cqe->status = 0; + +diff --git a/dpdk/drivers/net/hinic/hinic_pmd_rx.h b/dpdk/drivers/net/hinic/hinic_pmd_rx.h +index 1a80f95af4..49fa565173 100644 +--- a/dpdk/drivers/net/hinic/hinic_pmd_rx.h ++++ b/dpdk/drivers/net/hinic/hinic_pmd_rx.h +@@ -82,6 +82,8 @@ struct hinic_rxq { + u16 rx_free_thresh; + u16 rxinfo_align_end; + ++ u32 socket_id; ++ + unsigned long status; + struct hinic_rxq_stats rxq_stats; + +@@ -121,7 +123,8 @@ void hinic_rx_remove_configure(struct rte_eth_dev *dev); + + void hinic_get_func_rx_buf_size(struct hinic_nic_dev *nic_dev); + +-int hinic_create_rq(struct hinic_hwdev *hwdev, u16 q_id, u16 rq_depth); ++int hinic_create_rq(struct hinic_hwdev *hwdev, u16 q_id, ++ u16 rq_depth, unsigned int socket_id); + + void hinic_destroy_rq(struct hinic_hwdev *hwdev, u16 q_id); + +diff --git a/dpdk/drivers/net/hinic/hinic_pmd_tx.c b/dpdk/drivers/net/hinic/hinic_pmd_tx.c +index 985540a935..05b23d9924 100644 +--- a/dpdk/drivers/net/hinic/hinic_pmd_tx.c ++++ b/dpdk/drivers/net/hinic/hinic_pmd_tx.c +@@ -312,6 +312,8 @@ static inline struct rte_mbuf *hinic_copy_tx_mbuf(struct hinic_nic_dev *nic_dev, + mbuf = mbuf->next; + } + ++ dst_mbuf->pkt_len = dst_mbuf->data_len; ++ + return dst_mbuf; + } + +@@ -421,7 +423,7 @@ static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf, + *poff_info, + struct hinic_wqe_info *sqe_info) + { +- u32 total_len, limit_len, checked_len, left_len; ++ u32 total_len, limit_len, checked_len, left_len, adjust_mss; + u32 i, first_mss_sges, left_sges; + struct rte_mbuf *mbuf_head, *mbuf_pre; + +@@ -431,7 +433,9 @@ static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf, + /* tso sge number validation */ + if (unlikely(left_sges >= HINIC_NONTSO_PKT_MAX_SGE)) { + checked_len = 0; +- limit_len = mbuf->tso_segsz + poff_info->payload_offset; ++ adjust_mss = mbuf->tso_segsz >= TX_MSS_MIN ? ++ mbuf->tso_segsz : TX_MSS_MIN; ++ limit_len = adjust_mss + poff_info->payload_offset; + first_mss_sges = HINIC_NONTSO_PKT_MAX_SGE; + + /* each continues 17 mbufs segmust do one check */ +@@ -445,7 +449,7 @@ static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf, + mbuf_pre = mbuf; + mbuf = mbuf->next; + if (total_len >= limit_len) { +- limit_len = mbuf_head->tso_segsz; ++ limit_len = adjust_mss; + break; + } + } +@@ -1185,7 +1189,8 @@ void hinic_free_all_tx_resources(struct rte_eth_dev *eth_dev) + HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev); + + for (q_id = 0; q_id < nic_dev->num_sq; q_id++) { +- eth_dev->data->tx_queues[q_id] = NULL; ++ if (eth_dev->data->tx_queues != NULL) ++ eth_dev->data->tx_queues[q_id] = NULL; + + if (nic_dev->txqs[q_id] == NULL) + continue; +@@ -1216,7 +1221,8 @@ int hinic_setup_tx_resources(struct hinic_txq *txq) + u64 tx_info_sz; + + tx_info_sz = txq->q_depth * sizeof(*txq->tx_info); +- txq->tx_info = kzalloc_aligned(tx_info_sz, GFP_KERNEL); ++ txq->tx_info = rte_zmalloc_socket("tx_info", tx_info_sz, ++ RTE_CACHE_LINE_SIZE, txq->socket_id); + if (!txq->tx_info) + return -ENOMEM; + +@@ -1228,11 +1234,12 @@ void hinic_free_tx_resources(struct hinic_txq *txq) + if (txq->tx_info == NULL) + return; + +- kfree(txq->tx_info); ++ rte_free(txq->tx_info); + txq->tx_info = NULL; + } + +-int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id, u16 sq_depth) ++int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id, ++ u16 sq_depth, unsigned int socket_id) + { + int err; + struct hinic_nic_io *nic_io = hwdev->nic_io; +@@ -1246,7 +1253,8 @@ int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id, u16 sq_depth) + + /* alloc wq */ + err = hinic_wq_allocate(nic_io->hwdev, &nic_io->sq_wq[q_id], +- HINIC_SQ_WQEBB_SHIFT, nic_io->sq_depth); ++ HINIC_SQ_WQEBB_SHIFT, nic_io->sq_depth, ++ socket_id); + if (err) { + PMD_DRV_LOG(ERR, "Failed to allocate WQ for SQ"); + return err; +diff --git a/dpdk/drivers/net/hinic/hinic_pmd_tx.h b/dpdk/drivers/net/hinic/hinic_pmd_tx.h +index a1ca580b1b..dabbc6c1d8 100644 +--- a/dpdk/drivers/net/hinic/hinic_pmd_tx.h ++++ b/dpdk/drivers/net/hinic/hinic_pmd_tx.h +@@ -114,6 +114,7 @@ struct hinic_txq { + u16 q_id; + u16 q_depth; + u32 cos; ++ u32 socket_id; + + /* cacheline1 */ + struct hinic_txq_stats txq_stats; +@@ -137,7 +138,8 @@ void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats); + + void hinic_txq_stats_reset(struct hinic_txq *txq); + +-int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id, u16 sq_depth); ++int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id, ++ u16 sq_depth, unsigned int socket_id); + + void hinic_destroy_sq(struct hinic_hwdev *hwdev, u16 q_id); + +diff --git a/dpdk/drivers/net/hns3/hns3_cmd.c b/dpdk/drivers/net/hns3/hns3_cmd.c +index 65a5af8e4f..c7993634e3 100644 +--- a/dpdk/drivers/net/hns3/hns3_cmd.c ++++ b/dpdk/drivers/net/hns3/hns3_cmd.c +@@ -215,12 +215,12 @@ hns3_cmd_csq_clean(struct hns3_hw *hw) + head = hns3_read_dev(hw, HNS3_CMDQ_TX_HEAD_REG); + + if (!is_valid_csq_clean_head(csq, head)) { +- struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); + hns3_err(hw, "wrong cmd head (%u, %u-%u)", head, + csq->next_to_use, csq->next_to_clean); +- rte_atomic16_set(&hw->reset.disable_cmd, 1); +- +- hns3_schedule_delayed_reset(hns); ++ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { ++ rte_atomic16_set(&hw->reset.disable_cmd, 1); ++ hns3_schedule_delayed_reset(HNS3_DEV_HW_TO_ADAPTER(hw)); ++ } + + return -EIO; + } +@@ -289,7 +289,7 @@ hns3_cmd_convert_err_code(uint16_t desc_ret) + case HNS3_CMD_INVALID: + return -EBADR; + default: +- return -EIO; ++ return -EREMOTEIO; + } + } + +@@ -349,11 +349,23 @@ static int hns3_cmd_poll_reply(struct hns3_hw *hw) + + /* + * hns3_cmd_send - send command to command queue +- * @hw: pointer to the hw struct +- * @desc: prefilled descriptor for describing the command +- * @num : the number of descriptors to be sent + * +- * This is the main send command for command queue, it ++ * @param hw ++ * pointer to the hw struct ++ * @param desc ++ * prefilled descriptor for describing the command ++ * @param num ++ * the number of descriptors to be sent ++ * @return ++ * - -EBUSY if detect device is in resetting ++ * - -EIO if detect cmd csq corrupted (due to reset) or ++ * there is reset pending ++ * - -ENOMEM/-ETIME/...(Non-Zero) if other error case ++ * - Zero if operation completed successfully ++ * ++ * Note -BUSY/-EIO only used in reset case ++ * ++ * Note this is the main send command for command queue, it + * sends the queue, cleans the queue, etc + */ + int +@@ -517,7 +529,7 @@ hns3_cmd_init(struct hns3_hw *hw) + return 0; + + err_cmd_init: +- hns3_cmd_uninit(hw); ++ rte_atomic16_set(&hw->reset.disable_cmd, 1); + return ret; + } + +diff --git a/dpdk/drivers/net/hns3/hns3_cmd.h b/dpdk/drivers/net/hns3/hns3_cmd.h +index be0ecbe86b..26d4103962 100644 +--- a/dpdk/drivers/net/hns3/hns3_cmd.h ++++ b/dpdk/drivers/net/hns3/hns3_cmd.h +@@ -79,6 +79,7 @@ enum hns3_opcode_type { + HNS3_OPC_GBL_RST_STATUS = 0x0021, + HNS3_OPC_QUERY_FUNC_STATUS = 0x0022, + HNS3_OPC_QUERY_PF_RSRC = 0x0023, ++ HNS3_OPC_QUERY_VF_RSRC = 0x0024, + HNS3_OPC_GET_CFG_PARAM = 0x0025, + HNS3_OPC_PF_RST_DONE = 0x0026, + +@@ -209,6 +210,10 @@ enum hns3_opcode_type { + /* SFP command */ + HNS3_OPC_SFP_GET_SPEED = 0x7104, + ++ /* Interrupts commands */ ++ HNS3_OPC_ADD_RING_TO_VECTOR = 0x1503, ++ HNS3_OPC_DEL_RING_TO_VECTOR = 0x1504, ++ + /* Error INT commands */ + HNS3_QUERY_MSIX_INT_STS_BD_NUM = 0x1513, + HNS3_QUERY_CLEAR_ALL_MPF_MSIX_INT = 0x1514, +@@ -333,8 +338,9 @@ struct hns3_func_status_cmd { + uint8_t rsv[2]; + }; + +-#define HNS3_PF_VEC_NUM_S 0 +-#define HNS3_PF_VEC_NUM_M GENMASK(7, 0) ++#define HNS3_VEC_NUM_S 0 ++#define HNS3_VEC_NUM_M GENMASK(7, 0) ++#define HNS3_MIN_VECTOR_NUM 2 /* one for msi-x, another for IO */ + struct hns3_pf_res_cmd { + uint16_t tqp_num; + uint16_t buf_size; +@@ -347,6 +353,15 @@ struct hns3_pf_res_cmd { + uint32_t rsv[2]; + }; + ++struct hns3_vf_res_cmd { ++ uint16_t tqp_num; ++ uint16_t reserved; ++ uint16_t msixcap_localid_ba_nic; ++ uint16_t msixcap_localid_ba_rocee; ++ uint16_t vf_intr_vector_number; ++ uint16_t rsv[7]; ++}; ++ + #define HNS3_UMV_SPC_ALC_B 0 + struct hns3_umv_spc_alc_cmd { + uint8_t allocate; +@@ -673,6 +688,36 @@ struct hns3_tqp_map_cmd { + uint8_t rsv[18]; + }; + ++enum hns3_ring_type { ++ HNS3_RING_TYPE_TX, ++ HNS3_RING_TYPE_RX ++}; ++ ++enum hns3_int_gl_idx { ++ HNS3_RING_GL_RX, ++ HNS3_RING_GL_TX, ++ HNS3_RING_GL_IMMEDIATE = 3 ++}; ++ ++#define HNS3_RING_GL_IDX_S 0 ++#define HNS3_RING_GL_IDX_M GENMASK(1, 0) ++ ++#define HNS3_VECTOR_ELEMENTS_PER_CMD 10 ++ ++#define HNS3_INT_TYPE_S 0 ++#define HNS3_INT_TYPE_M GENMASK(1, 0) ++#define HNS3_TQP_ID_S 2 ++#define HNS3_TQP_ID_M GENMASK(12, 2) ++#define HNS3_INT_GL_IDX_S 13 ++#define HNS3_INT_GL_IDX_M GENMASK(14, 13) ++struct hns3_ctrl_vector_chain_cmd { ++ uint8_t int_vector_id; ++ uint8_t int_cause_num; ++ uint16_t tqp_type_and_id[HNS3_VECTOR_ELEMENTS_PER_CMD]; ++ uint8_t vfid; ++ uint8_t rsv; ++}; ++ + struct hns3_config_max_frm_size_cmd { + uint16_t max_frm_size; + uint8_t min_frm_size; +diff --git a/dpdk/drivers/net/hns3/hns3_dcb.c b/dpdk/drivers/net/hns3/hns3_dcb.c +index 19235dfb92..8688de2a73 100644 +--- a/dpdk/drivers/net/hns3/hns3_dcb.c ++++ b/dpdk/drivers/net/hns3/hns3_dcb.c +@@ -578,17 +578,48 @@ hns3_dcb_pri_shaper_cfg(struct hns3_hw *hw) + } + + void +-hns3_tc_queue_mapping_cfg(struct hns3_hw *hw) ++hns3_set_rss_size(struct hns3_hw *hw, uint16_t nb_rx_q) ++{ ++ struct hns3_rss_conf *rss_cfg = &hw->rss_info; ++ uint16_t rx_qnum_per_tc; ++ int i; ++ ++ rx_qnum_per_tc = nb_rx_q / hw->num_tc; ++ rx_qnum_per_tc = RTE_MIN(hw->rss_size_max, rx_qnum_per_tc); ++ if (hw->alloc_rss_size != rx_qnum_per_tc) { ++ hns3_info(hw, "rss size changes from %u to %u", ++ hw->alloc_rss_size, rx_qnum_per_tc); ++ hw->alloc_rss_size = rx_qnum_per_tc; ++ } ++ hw->used_rx_queues = hw->num_tc * hw->alloc_rss_size; ++ ++ /* ++ * When rss size is changed, we need to update rss redirection table ++ * maintained by driver. Besides, during the entire reset process, we ++ * need to ensure that the rss table information are not overwritten ++ * and configured directly to the hardware in the RESET_STAGE_RESTORE ++ * stage of the reset process. ++ */ ++ if (rte_atomic16_read(&hw->reset.resetting) == 0) { ++ for (i = 0; i < HNS3_RSS_IND_TBL_SIZE; i++) ++ rss_cfg->rss_indirection_tbl[i] = ++ i % hw->alloc_rss_size; ++ } ++} ++ ++void ++hns3_tc_queue_mapping_cfg(struct hns3_hw *hw, uint16_t nb_queue) + { + struct hns3_tc_queue_info *tc_queue; + uint8_t i; + ++ hw->tx_qnum_per_tc = nb_queue / hw->num_tc; + for (i = 0; i < HNS3_MAX_TC_NUM; i++) { + tc_queue = &hw->tc_queue[i]; + if (hw->hw_tc_map & BIT(i) && i < hw->num_tc) { + tc_queue->enable = true; +- tc_queue->tqp_offset = i * hw->alloc_rss_size; +- tc_queue->tqp_count = hw->alloc_rss_size; ++ tc_queue->tqp_offset = i * hw->tx_qnum_per_tc; ++ tc_queue->tqp_count = hw->tx_qnum_per_tc; + tc_queue->tc = i; + } else { + /* Set to default queue if TC is disable */ +@@ -598,30 +629,22 @@ hns3_tc_queue_mapping_cfg(struct hns3_hw *hw) + tc_queue->tc = 0; + } + } ++ hw->used_tx_queues = hw->num_tc * hw->tx_qnum_per_tc; + } + + static void +-hns3_dcb_update_tc_queue_mapping(struct hns3_hw *hw, uint16_t queue_num) ++hns3_dcb_update_tc_queue_mapping(struct hns3_hw *hw, uint16_t nb_rx_q, ++ uint16_t nb_tx_q) + { + struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); + struct hns3_pf *pf = &hns->pf; +- uint16_t tqpnum_per_tc; +- uint16_t alloc_tqps; +- +- alloc_tqps = RTE_MIN(hw->tqps_num, queue_num); +- hw->num_tc = RTE_MIN(alloc_tqps, hw->dcb_info.num_tc); +- tqpnum_per_tc = RTE_MIN(hw->rss_size_max, alloc_tqps / hw->num_tc); + +- if (hw->alloc_rss_size != tqpnum_per_tc) { +- PMD_INIT_LOG(INFO, "rss size changes from %d to %d", +- hw->alloc_rss_size, tqpnum_per_tc); +- hw->alloc_rss_size = tqpnum_per_tc; +- } +- hw->alloc_tqps = hw->num_tc * hw->alloc_rss_size; +- +- hns3_tc_queue_mapping_cfg(hw); ++ hw->num_tc = hw->dcb_info.num_tc; ++ hns3_set_rss_size(hw, nb_rx_q); ++ hns3_tc_queue_mapping_cfg(hw, nb_tx_q); + +- memcpy(pf->prio_tc, hw->dcb_info.prio_tc, HNS3_MAX_USER_PRIO); ++ if (!hns->is_vf) ++ memcpy(pf->prio_tc, hw->dcb_info.prio_tc, HNS3_MAX_USER_PRIO); + } + + int +@@ -1309,20 +1332,35 @@ hns3_dcb_info_cfg(struct hns3_adapter *hns) + for (i = 0; i < HNS3_MAX_USER_PRIO; i++) + hw->dcb_info.prio_tc[i] = dcb_rx_conf->dcb_tc[i]; + +- hns3_dcb_update_tc_queue_mapping(hw, hw->data->nb_rx_queues); ++ hns3_dcb_update_tc_queue_mapping(hw, hw->data->nb_rx_queues, ++ hw->data->nb_tx_queues); + } + +-static void ++static int + hns3_dcb_info_update(struct hns3_adapter *hns, uint8_t num_tc) + { + struct hns3_pf *pf = &hns->pf; + struct hns3_hw *hw = &hns->hw; ++ uint16_t nb_rx_q = hw->data->nb_rx_queues; ++ uint16_t nb_tx_q = hw->data->nb_tx_queues; + uint8_t bit_map = 0; + uint8_t i; + + if (pf->tx_sch_mode != HNS3_FLAG_TC_BASE_SCH_MODE && + hw->dcb_info.num_pg != 1) +- return; ++ return -EINVAL; ++ ++ if (nb_rx_q < num_tc) { ++ hns3_err(hw, "number of Rx queues(%d) is less than tcs(%d).", ++ nb_rx_q, num_tc); ++ return -EINVAL; ++ } ++ ++ if (nb_tx_q < num_tc) { ++ hns3_err(hw, "number of Tx queues(%d) is less than tcs(%d).", ++ nb_tx_q, num_tc); ++ return -EINVAL; ++ } + + /* Currently not support uncontinuous tc */ + hw->dcb_info.num_tc = num_tc; +@@ -1333,10 +1371,10 @@ hns3_dcb_info_update(struct hns3_adapter *hns, uint8_t num_tc) + bit_map = 1; + hw->dcb_info.num_tc = 1; + } +- + hw->hw_tc_map = bit_map; +- + hns3_dcb_info_cfg(hns); ++ ++ return 0; + } + + static int +@@ -1422,10 +1460,15 @@ hns3_dcb_configure(struct hns3_adapter *hns) + + hns3_dcb_cfg_validate(hns, &num_tc, &map_changed); + if (map_changed || rte_atomic16_read(&hw->reset.resetting)) { +- hns3_dcb_info_update(hns, num_tc); ++ ret = hns3_dcb_info_update(hns, num_tc); ++ if (ret) { ++ hns3_err(hw, "dcb info update failed: %d", ret); ++ return ret; ++ } ++ + ret = hns3_dcb_hw_configure(hns); + if (ret) { +- hns3_err(hw, "dcb sw configure fails: %d", ret); ++ hns3_err(hw, "dcb sw configure failed: %d", ret); + return ret; + } + } +@@ -1479,7 +1522,8 @@ hns3_dcb_init(struct hns3_hw *hw) + hns3_err(hw, "dcb info init failed: %d", ret); + return ret; + } +- hns3_dcb_update_tc_queue_mapping(hw, hw->tqps_num); ++ hns3_dcb_update_tc_queue_mapping(hw, hw->tqps_num, ++ hw->tqps_num); + } + + /* +@@ -1502,10 +1546,11 @@ static int + hns3_update_queue_map_configure(struct hns3_adapter *hns) + { + struct hns3_hw *hw = &hns->hw; +- uint16_t queue_num = hw->data->nb_rx_queues; ++ uint16_t nb_rx_q = hw->data->nb_rx_queues; ++ uint16_t nb_tx_q = hw->data->nb_tx_queues; + int ret; + +- hns3_dcb_update_tc_queue_mapping(hw, queue_num); ++ hns3_dcb_update_tc_queue_mapping(hw, nb_rx_q, nb_tx_q); + ret = hns3_q_to_qs_map(hw); + if (ret) { + hns3_err(hw, "failed to map nq to qs! ret = %d", ret); +diff --git a/dpdk/drivers/net/hns3/hns3_dcb.h b/dpdk/drivers/net/hns3/hns3_dcb.h +index 9ec4e704b3..9c2c5f21c1 100644 +--- a/dpdk/drivers/net/hns3/hns3_dcb.h ++++ b/dpdk/drivers/net/hns3/hns3_dcb.h +@@ -159,7 +159,9 @@ hns3_fc_enable(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); + int + hns3_dcb_pfc_enable(struct rte_eth_dev *dev, struct rte_eth_pfc_conf *pfc_conf); + +-void hns3_tc_queue_mapping_cfg(struct hns3_hw *hw); ++void hns3_set_rss_size(struct hns3_hw *hw, uint16_t nb_rx_q); ++ ++void hns3_tc_queue_mapping_cfg(struct hns3_hw *hw, uint16_t nb_queue); + + int hns3_dcb_cfg_update(struct hns3_adapter *hns); + +diff --git a/dpdk/drivers/net/hns3/hns3_ethdev.c b/dpdk/drivers/net/hns3/hns3_ethdev.c +index 72315718a8..3271b61fc8 100644 +--- a/dpdk/drivers/net/hns3/hns3_ethdev.c ++++ b/dpdk/drivers/net/hns3/hns3_ethdev.c +@@ -550,7 +550,7 @@ hns3_set_vlan_filter_ctrl(struct hns3_hw *hw, uint8_t vlan_type, + } + + static int +-hns3_enable_vlan_filter(struct hns3_adapter *hns, bool enable) ++hns3_vlan_filter_init(struct hns3_adapter *hns) + { + struct hns3_hw *hw = &hns->hw; + int ret; +@@ -558,14 +558,29 @@ hns3_enable_vlan_filter(struct hns3_adapter *hns, bool enable) + ret = hns3_set_vlan_filter_ctrl(hw, HNS3_FILTER_TYPE_VF, + HNS3_FILTER_FE_EGRESS, false, 0); + if (ret) { +- hns3_err(hw, "hns3 enable filter fail, ret =%d", ret); ++ hns3_err(hw, "failed to init vf vlan filter, ret = %d", ret); + return ret; + } + ++ ret = hns3_set_vlan_filter_ctrl(hw, HNS3_FILTER_TYPE_PORT, ++ HNS3_FILTER_FE_INGRESS, false, 0); ++ if (ret) ++ hns3_err(hw, "failed to init port vlan filter, ret = %d", ret); ++ ++ return ret; ++} ++ ++static int ++hns3_enable_vlan_filter(struct hns3_adapter *hns, bool enable) ++{ ++ struct hns3_hw *hw = &hns->hw; ++ int ret; ++ + ret = hns3_set_vlan_filter_ctrl(hw, HNS3_FILTER_TYPE_PORT, + HNS3_FILTER_FE_INGRESS, enable, 0); + if (ret) +- hns3_err(hw, "hns3 enable filter fail, ret =%d", ret); ++ hns3_err(hw, "failed to %s port vlan filter, ret = %d", ++ enable ? "enable" : "disable", ret); + + return ret; + } +@@ -583,6 +598,23 @@ hns3_vlan_offload_set(struct rte_eth_dev *dev, int mask) + rte_spinlock_lock(&hw->lock); + rxmode = &dev->data->dev_conf.rxmode; + tmp_mask = (unsigned int)mask; ++ if (tmp_mask & ETH_VLAN_FILTER_MASK) { ++ /* ignore vlan filter configuration during promiscuous mode */ ++ if (!dev->data->promiscuous) { ++ /* Enable or disable VLAN filter */ ++ enable = rxmode->offloads & DEV_RX_OFFLOAD_VLAN_FILTER ? ++ true : false; ++ ++ ret = hns3_enable_vlan_filter(hns, enable); ++ if (ret) { ++ rte_spinlock_unlock(&hw->lock); ++ hns3_err(hw, "failed to %s rx filter, ret = %d", ++ enable ? "enable" : "disable", ret); ++ return ret; ++ } ++ } ++ } ++ + if (tmp_mask & ETH_VLAN_STRIP_MASK) { + /* Enable or disable VLAN stripping */ + enable = rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP ? +@@ -591,7 +623,8 @@ hns3_vlan_offload_set(struct rte_eth_dev *dev, int mask) + ret = hns3_en_hw_strip_rxvtag(hns, enable); + if (ret) { + rte_spinlock_unlock(&hw->lock); +- hns3_err(hw, "failed to enable rx strip, ret =%d", ret); ++ hns3_err(hw, "failed to %s rx strip, ret = %d", ++ enable ? "enable" : "disable", ret); + return ret; + } + } +@@ -868,6 +901,12 @@ hns3_vlan_pvid_set(struct rte_eth_dev *dev, uint16_t pvid, int on) + struct hns3_hw *hw = &hns->hw; + int ret; + ++ if (pvid > RTE_ETHER_MAX_VLAN_ID) { ++ hns3_err(hw, "Invalid vlan_id = %u > %d", pvid, ++ RTE_ETHER_MAX_VLAN_ID); ++ return -EINVAL; ++ } ++ + rte_spinlock_lock(&hw->lock); + ret = hns3_vlan_pvid_configure(hns, pvid, on); + rte_spinlock_unlock(&hw->lock); +@@ -912,7 +951,7 @@ hns3_init_vlan_config(struct hns3_adapter *hns) + if (rte_atomic16_read(&hw->reset.resetting) == 0) + init_port_base_vlan_info(hw); + +- ret = hns3_enable_vlan_filter(hns, true); ++ ret = hns3_vlan_filter_init(hns); + if (ret) { + hns3_err(hw, "vlan init fail in pf, ret =%d", ret); + return ret; +@@ -954,17 +993,31 @@ hns3_restore_vlan_conf(struct hns3_adapter *hns) + { + struct hns3_pf *pf = &hns->pf; + struct hns3_hw *hw = &hns->hw; ++ uint64_t offloads; ++ bool enable; + int ret; + ++ if (!hw->data->promiscuous) { ++ /* restore vlan filter states */ ++ offloads = hw->data->dev_conf.rxmode.offloads; ++ enable = offloads & DEV_RX_OFFLOAD_VLAN_FILTER ? true : false; ++ ret = hns3_enable_vlan_filter(hns, enable); ++ if (ret) { ++ hns3_err(hw, "failed to restore vlan rx filter conf, " ++ "ret = %d", ret); ++ return ret; ++ } ++ } ++ + ret = hns3_set_vlan_rx_offload_cfg(hns, &pf->vtag_config.rx_vcfg); + if (ret) { +- hns3_err(hw, "hns3 restore vlan rx conf fail, ret =%d", ret); ++ hns3_err(hw, "failed to restore vlan rx conf, ret = %d", ret); + return ret; + } + + ret = hns3_set_vlan_tx_offload_cfg(hns, &pf->vtag_config.tx_vcfg); + if (ret) +- hns3_err(hw, "hns3 restore vlan tx conf fail, ret =%d", ret); ++ hns3_err(hw, "failed to restore vlan tx conf, ret = %d", ret); + + return ret; + } +@@ -976,6 +1029,7 @@ hns3_dev_configure_vlan(struct rte_eth_dev *dev) + struct rte_eth_dev_data *data = dev->data; + struct rte_eth_txmode *txmode; + struct hns3_hw *hw = &hns->hw; ++ int mask; + int ret; + + txmode = &data->dev_conf.txmode; +@@ -989,17 +1043,26 @@ hns3_dev_configure_vlan(struct rte_eth_dev *dev) + txmode->hw_vlan_reject_untagged); + + /* Apply vlan offload setting */ +- ret = hns3_vlan_offload_set(dev, ETH_VLAN_STRIP_MASK); ++ mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK; ++ ret = hns3_vlan_offload_set(dev, mask); + if (ret) { +- hns3_err(hw, "dev config vlan Strip failed, ret =%d", ret); ++ hns3_err(hw, "dev config rx vlan offload failed, ret = %d", ++ ret); + return ret; + } + ++ /* ++ * If pvid config is not set in rte_eth_conf, driver needn't to set ++ * VLAN pvid related configuration to hardware. ++ */ ++ if (txmode->pvid == 0 && txmode->hw_vlan_insert_pvid == 0) ++ return 0; ++ + /* Apply pvid setting */ + ret = hns3_vlan_pvid_set(dev, txmode->pvid, + txmode->hw_vlan_insert_pvid); + if (ret) +- hns3_err(hw, "dev config vlan pvid(%d) failed, ret =%d", ++ hns3_err(hw, "dev config vlan pvid(%d) failed, ret = %d", + txmode->pvid, ret); + + return ret; +@@ -2021,13 +2084,113 @@ hns3_check_dcb_cfg(struct rte_eth_dev *dev) + return hns3_check_mq_mode(dev); + } + ++static int ++hns3_bind_ring_with_vector(struct hns3_hw *hw, uint8_t vector_id, bool mmap, ++ enum hns3_ring_type queue_type, uint16_t queue_id) ++{ ++ struct hns3_cmd_desc desc; ++ struct hns3_ctrl_vector_chain_cmd *req = ++ (struct hns3_ctrl_vector_chain_cmd *)desc.data; ++ enum hns3_cmd_status status; ++ enum hns3_opcode_type op; ++ uint16_t tqp_type_and_id = 0; ++ const char *op_str; ++ uint16_t type; ++ uint16_t gl; ++ ++ op = mmap ? HNS3_OPC_ADD_RING_TO_VECTOR : HNS3_OPC_DEL_RING_TO_VECTOR; ++ hns3_cmd_setup_basic_desc(&desc, op, false); ++ req->int_vector_id = vector_id; ++ ++ if (queue_type == HNS3_RING_TYPE_RX) ++ gl = HNS3_RING_GL_RX; ++ else ++ gl = HNS3_RING_GL_TX; ++ ++ type = queue_type; ++ ++ hns3_set_field(tqp_type_and_id, HNS3_INT_TYPE_M, HNS3_INT_TYPE_S, ++ type); ++ hns3_set_field(tqp_type_and_id, HNS3_TQP_ID_M, HNS3_TQP_ID_S, queue_id); ++ hns3_set_field(tqp_type_and_id, HNS3_INT_GL_IDX_M, HNS3_INT_GL_IDX_S, ++ gl); ++ req->tqp_type_and_id[0] = rte_cpu_to_le_16(tqp_type_and_id); ++ req->int_cause_num = 1; ++ op_str = mmap ? "Map" : "Unmap"; ++ status = hns3_cmd_send(hw, &desc, 1); ++ if (status) { ++ hns3_err(hw, "%s TQP %d fail, vector_id is %d, status is %d.", ++ op_str, queue_id, req->int_vector_id, status); ++ return status; ++ } ++ ++ return 0; ++} ++ ++static int ++hns3_init_ring_with_vector(struct hns3_hw *hw) ++{ ++ uint8_t vec; ++ int ret; ++ int i; ++ ++ /* ++ * In hns3 network engine, vector 0 is always the misc interrupt of this ++ * function, vector 1~N can be used respectively for the queues of the ++ * function. Tx and Rx queues with the same number share the interrupt ++ * vector. In the initialization clearing the all hardware mapping ++ * relationship configurations between queues and interrupt vectors is ++ * needed, so some error caused by the residual configurations, such as ++ * the unexpected Tx interrupt, can be avoid. Because of the hardware ++ * constraints in hns3 hardware engine, we have to implement clearing ++ * the mapping relationship configurations by binding all queues to the ++ * last interrupt vector and reserving the last interrupt vector. This ++ * method results in a decrease of the maximum queues when upper ++ * applications call the rte_eth_dev_configure API function to enable ++ * Rx interrupt. ++ */ ++ vec = hw->num_msi - 1; /* vector 0 for misc interrupt, not for queue */ ++ /* vec - 1: the last interrupt is reserved */ ++ hw->intr_tqps_num = vec > hw->tqps_num ? hw->tqps_num : vec - 1; ++ for (i = 0; i < hw->intr_tqps_num; i++) { ++ /* ++ * Set gap limiter and rate limiter configuration of queue's ++ * interrupt. ++ */ ++ hns3_set_queue_intr_gl(hw, i, HNS3_RING_GL_RX, ++ HNS3_TQP_INTR_GL_DEFAULT); ++ hns3_set_queue_intr_gl(hw, i, HNS3_RING_GL_TX, ++ HNS3_TQP_INTR_GL_DEFAULT); ++ hns3_set_queue_intr_rl(hw, i, HNS3_TQP_INTR_RL_DEFAULT); ++ ++ ret = hns3_bind_ring_with_vector(hw, vec, false, ++ HNS3_RING_TYPE_TX, i); ++ if (ret) { ++ PMD_INIT_LOG(ERR, "PF fail to unbind TX ring(%d) with " ++ "vector: %d, ret=%d", i, vec, ret); ++ return ret; ++ } ++ ++ ret = hns3_bind_ring_with_vector(hw, vec, false, ++ HNS3_RING_TYPE_RX, i); ++ if (ret) { ++ PMD_INIT_LOG(ERR, "PF fail to unbind RX ring(%d) with " ++ "vector: %d, ret=%d", i, vec, ret); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ + static int + hns3_dev_configure(struct rte_eth_dev *dev) + { +- struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); +- struct hns3_rss_conf *rss_cfg = &hw->rss_info; ++ struct hns3_adapter *hns = dev->data->dev_private; + struct rte_eth_conf *conf = &dev->data->dev_conf; + enum rte_eth_rx_mq_mode mq_mode = conf->rxmode.mq_mode; ++ struct hns3_hw *hw = &hns->hw; ++ struct hns3_rss_conf *rss_cfg = &hw->rss_info; + uint16_t nb_rx_q = dev->data->nb_rx_queues; + uint16_t nb_tx_q = dev->data->nb_tx_queues; + struct rte_eth_rss_conf rss_conf; +@@ -2035,23 +2198,28 @@ hns3_dev_configure(struct rte_eth_dev *dev) + int ret; + + /* +- * Hardware does not support where the number of rx and tx queues is +- * not equal in hip08. ++ * Hardware does not support individually enable/disable/reset the Tx or ++ * Rx queue in hns3 network engine. Driver must enable/disable/reset Tx ++ * and Rx queues at the same time. When the numbers of Tx queues ++ * allocated by upper applications are not equal to the numbers of Rx ++ * queues, driver needs to setup fake Tx or Rx queues to adjust numbers ++ * of Tx/Rx queues. otherwise, network engine can not work as usual. But ++ * these fake queues are imperceptible, and can not be used by upper ++ * applications. + */ +- if (nb_rx_q != nb_tx_q) { +- hns3_err(hw, +- "nb_rx_queues(%u) not equal with nb_tx_queues(%u)! " +- "Hardware does not support this configuration!", +- nb_rx_q, nb_tx_q); +- return -EINVAL; ++ ret = hns3_set_fake_rx_or_tx_queues(dev, nb_rx_q, nb_tx_q); ++ if (ret) { ++ hns3_err(hw, "Failed to set rx/tx fake queues: %d", ret); ++ return ret; + } + ++ hw->adapter_state = HNS3_NIC_CONFIGURING; + if (conf->link_speeds & ETH_LINK_SPEED_FIXED) { + hns3_err(hw, "setting link speed/duplex not supported"); +- return -EINVAL; ++ ret = -EINVAL; ++ goto cfg_err; + } + +- hw->adapter_state = HNS3_NIC_CONFIGURING; + if ((uint32_t)mq_mode & ETH_MQ_RX_DCB_FLAG) { + ret = hns3_check_dcb_cfg(dev); + if (ret) +@@ -2097,7 +2265,9 @@ hns3_dev_configure(struct rte_eth_dev *dev) + return 0; + + cfg_err: ++ (void)hns3_set_fake_rx_or_tx_queues(dev, 0, 0); + hw->adapter_state = HNS3_NIC_INITIALIZED; ++ + return ret; + } + +@@ -2184,8 +2354,16 @@ hns3_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) + { + struct hns3_adapter *hns = eth_dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; ++ uint16_t queue_num = hw->tqps_num; + +- info->max_rx_queues = hw->tqps_num; ++ /* ++ * In interrupt mode, 'max_rx_queues' is set based on the number of ++ * MSI-X interrupt resources of the hardware. ++ */ ++ if (hw->data->dev_conf.intr_conf.rxq == 1) ++ queue_num = hw->intr_tqps_num; ++ ++ info->max_rx_queues = queue_num; + info->max_tx_queues = hw->tqps_num; + info->max_rx_pktlen = HNS3_MAX_FRAME_LEN; /* CRC included */ + info->min_rx_bufsize = hw->rx_buf_len; +@@ -2203,7 +2381,8 @@ hns3_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) + DEV_RX_OFFLOAD_QINQ_STRIP | + DEV_RX_OFFLOAD_VLAN_FILTER | + DEV_RX_OFFLOAD_VLAN_EXTEND | +- DEV_RX_OFFLOAD_JUMBO_FRAME); ++ DEV_RX_OFFLOAD_JUMBO_FRAME | ++ DEV_RX_OFFLOAD_RSS_HASH); + info->tx_queue_offload_capa = DEV_TX_OFFLOAD_MBUF_FAST_FREE; + info->tx_offload_capa = (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | + DEV_TX_OFFLOAD_IPV4_CKSUM | +@@ -2358,6 +2537,7 @@ hns3_query_pf_resource(struct hns3_hw *hw) + hw->total_tqps_num = rte_le_to_cpu_16(req->tqp_num); + pf->pkt_buf_size = rte_le_to_cpu_16(req->buf_size) << HNS3_BUF_UNIT_S; + hw->tqps_num = RTE_MIN(hw->total_tqps_num, HNS3_MAX_TQP_NUM_PER_FUNC); ++ pf->func_num = rte_le_to_cpu_16(req->pf_own_fun_number); + + if (req->tx_buf_size) + pf->tx_buf_size = +@@ -2377,7 +2557,7 @@ hns3_query_pf_resource(struct hns3_hw *hw) + + hw->num_msi = + hns3_get_field(rte_le_to_cpu_16(req->pf_intr_vector_number), +- HNS3_PF_VEC_NUM_M, HNS3_PF_VEC_NUM_S); ++ HNS3_VEC_NUM_M, HNS3_VEC_NUM_S); + + return 0; + } +@@ -2534,6 +2714,7 @@ hns3_get_board_configuration(struct hns3_hw *hw) + + hw->mac.media_type = cfg.media_type; + hw->rss_size_max = cfg.rss_size_max; ++ hw->rss_dis_flag = false; + hw->rx_buf_len = cfg.rx_buf_len; + memcpy(hw->mac.mac_addr, cfg.mac_addr, RTE_ETHER_ADDR_LEN); + hw->mac.phy_addr = cfg.phy_addr; +@@ -2634,6 +2815,7 @@ hns3_map_tqp(struct hns3_hw *hw) + uint16_t tqps_num = hw->total_tqps_num; + uint16_t func_id; + uint16_t tqp_id; ++ bool is_pf; + int num; + int ret; + int i; +@@ -2645,10 +2827,11 @@ hns3_map_tqp(struct hns3_hw *hw) + tqp_id = 0; + num = DIV_ROUND_UP(hw->total_tqps_num, HNS3_MAX_TQP_NUM_PER_FUNC); + for (func_id = 0; func_id < num; func_id++) { ++ is_pf = func_id == 0 ? true : false; + for (i = 0; + i < HNS3_MAX_TQP_NUM_PER_FUNC && tqp_id < tqps_num; i++) { + ret = hns3_map_tqps_to_func(hw, func_id, tqp_id++, i, +- true); ++ is_pf); + if (ret) + return ret; + } +@@ -3521,7 +3704,7 @@ hns3_cmd_set_promisc_mode(struct hns3_hw *hw, struct hns3_promisc_param *param) + + ret = hns3_cmd_send(hw, &desc, 1); + if (ret) +- PMD_INIT_LOG(ERR, "Set promisc mode fail, status is %d", ret); ++ PMD_INIT_LOG(ERR, "Set promisc mode fail, ret = %d", ret); + + return ret; + } +@@ -3549,19 +3732,66 @@ hns3_set_promisc_mode(struct hns3_hw *hw, bool en_uc_pmc, bool en_mc_pmc) + return 0; + } + ++static int ++hns3_clear_all_vfs_promisc_mode(struct hns3_hw *hw) ++{ ++ struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); ++ struct hns3_pf *pf = &hns->pf; ++ struct hns3_promisc_param param; ++ uint16_t func_id; ++ int ret; ++ ++ /* func_id 0 is denoted PF, the VFs start from 1 */ ++ for (func_id = 1; func_id < pf->func_num; func_id++) { ++ hns3_promisc_param_init(¶m, false, false, false, func_id); ++ ret = hns3_cmd_set_promisc_mode(hw, ¶m); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ + static int + hns3_dev_promiscuous_enable(struct rte_eth_dev *dev) + { ++ bool allmulti = dev->data->all_multicast ? true : false; + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; +- bool en_mc_pmc = (dev->data->all_multicast == 1) ? true : false; ++ uint64_t offloads; ++ int err; + int ret = 0; + + rte_spinlock_lock(&hw->lock); +- ret = hns3_set_promisc_mode(hw, true, en_mc_pmc); ++ ret = hns3_set_promisc_mode(hw, true, true); ++ if (ret) { ++ rte_spinlock_unlock(&hw->lock); ++ hns3_err(hw, "failed to enable promiscuous mode, ret = %d", ++ ret); ++ return ret; ++ } ++ ++ /* ++ * When promiscuous mode was enabled, disable the vlan filter to let ++ * all packets coming in in the receiving direction. ++ */ ++ offloads = dev->data->dev_conf.rxmode.offloads; ++ if (offloads & DEV_RX_OFFLOAD_VLAN_FILTER) { ++ ret = hns3_enable_vlan_filter(hns, false); ++ if (ret) { ++ hns3_err(hw, "failed to enable promiscuous mode due to " ++ "failure to disable vlan filter, ret = %d", ++ ret); ++ err = hns3_set_promisc_mode(hw, false, allmulti); ++ if (err) ++ hns3_err(hw, "failed to restore promiscuous " ++ "status after disable vlan filter " ++ "failed during enabling promiscuous " ++ "mode, ret = %d", ret); ++ } ++ } ++ + rte_spinlock_unlock(&hw->lock); +- if (ret) +- hns3_err(hw, "Failed to enable promiscuous mode: %d", ret); + + return ret; + } +@@ -3569,17 +3799,39 @@ hns3_dev_promiscuous_enable(struct rte_eth_dev *dev) + static int + hns3_dev_promiscuous_disable(struct rte_eth_dev *dev) + { ++ bool allmulti = dev->data->all_multicast ? true : false; + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; +- bool en_mc_pmc = (dev->data->all_multicast == 1) ? true : false; ++ uint64_t offloads; ++ int err; + int ret = 0; + + /* If now in all_multicast mode, must remain in all_multicast mode. */ + rte_spinlock_lock(&hw->lock); +- ret = hns3_set_promisc_mode(hw, false, en_mc_pmc); ++ ret = hns3_set_promisc_mode(hw, false, allmulti); ++ if (ret) { ++ rte_spinlock_unlock(&hw->lock); ++ hns3_err(hw, "failed to disable promiscuous mode, ret = %d", ++ ret); ++ return ret; ++ } ++ /* when promiscuous mode was disabled, restore the vlan filter status */ ++ offloads = dev->data->dev_conf.rxmode.offloads; ++ if (offloads & DEV_RX_OFFLOAD_VLAN_FILTER) { ++ ret = hns3_enable_vlan_filter(hns, true); ++ if (ret) { ++ hns3_err(hw, "failed to disable promiscuous mode due to" ++ " failure to restore vlan filter, ret = %d", ++ ret); ++ err = hns3_set_promisc_mode(hw, true, true); ++ if (err) ++ hns3_err(hw, "failed to restore promiscuous " ++ "status after enabling vlan filter " ++ "failed during disabling promiscuous " ++ "mode, ret = %d", ret); ++ } ++ } + rte_spinlock_unlock(&hw->lock); +- if (ret) +- hns3_err(hw, "Failed to disable promiscuous mode: %d", ret); + + return ret; + } +@@ -3589,14 +3841,17 @@ hns3_dev_allmulticast_enable(struct rte_eth_dev *dev) + { + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; +- bool en_uc_pmc = (dev->data->promiscuous == 1) ? true : false; + int ret = 0; + ++ if (dev->data->promiscuous) ++ return 0; ++ + rte_spinlock_lock(&hw->lock); +- ret = hns3_set_promisc_mode(hw, en_uc_pmc, true); ++ ret = hns3_set_promisc_mode(hw, false, true); + rte_spinlock_unlock(&hw->lock); + if (ret) +- hns3_err(hw, "Failed to enable allmulticast mode: %d", ret); ++ hns3_err(hw, "failed to enable allmulticast mode, ret = %d", ++ ret); + + return ret; + } +@@ -3606,18 +3861,18 @@ hns3_dev_allmulticast_disable(struct rte_eth_dev *dev) + { + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; +- bool en_uc_pmc = (dev->data->promiscuous == 1) ? true : false; + int ret = 0; + + /* If now in promiscuous mode, must remain in all_multicast mode. */ +- if (dev->data->promiscuous == 1) ++ if (dev->data->promiscuous) + return 0; + + rte_spinlock_lock(&hw->lock); +- ret = hns3_set_promisc_mode(hw, en_uc_pmc, false); ++ ret = hns3_set_promisc_mode(hw, false, false); + rte_spinlock_unlock(&hw->lock); + if (ret) +- hns3_err(hw, "Failed to disable allmulticast mode: %d", ret); ++ hns3_err(hw, "failed to disable allmulticast mode, ret = %d", ++ ret); + + return ret; + } +@@ -3626,13 +3881,22 @@ static int + hns3_dev_promisc_restore(struct hns3_adapter *hns) + { + struct hns3_hw *hw = &hns->hw; +- bool en_mc_pmc; +- bool en_uc_pmc; ++ bool allmulti = hw->data->all_multicast ? true : false; ++ int ret; + +- en_uc_pmc = (hw->data->promiscuous == 1) ? true : false; +- en_mc_pmc = (hw->data->all_multicast == 1) ? true : false; ++ if (hw->data->promiscuous) { ++ ret = hns3_set_promisc_mode(hw, true, true); ++ if (ret) ++ hns3_err(hw, "failed to restore promiscuous mode, " ++ "ret = %d", ret); ++ return ret; ++ } + +- return hns3_set_promisc_mode(hw, en_uc_pmc, en_mc_pmc); ++ ret = hns3_set_promisc_mode(hw, false, allmulti); ++ if (ret) ++ hns3_err(hw, "failed to restore allmulticast mode, ret = %d", ++ ret); ++ return ret; + } + + static int +@@ -3763,7 +4027,7 @@ hns3_get_mac_link_status(struct hns3_hw *hw) + ret = hns3_cmd_send(hw, &desc, 1); + if (ret) { + hns3_err(hw, "get link status cmd failed %d", ret); +- return ret; ++ return ETH_LINK_DOWN; + } + + req = (struct hns3_link_status_cmd *)desc.data; +@@ -3834,6 +4098,13 @@ hns3_init_hardware(struct hns3_adapter *hns) + goto err_mac_init; + } + ++ ret = hns3_clear_all_vfs_promisc_mode(hw); ++ if (ret) { ++ PMD_INIT_LOG(ERR, "Failed to clear all vfs promisc mode: %d", ++ ret); ++ goto err_mac_init; ++ } ++ + ret = hns3_init_vlan_config(hns); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to init vlan: %d", ret); +@@ -3863,6 +4134,19 @@ hns3_init_hardware(struct hns3_adapter *hns) + PMD_INIT_LOG(ERR, "Failed to config gro: %d", ret); + goto err_mac_init; + } ++ ++ /* ++ * In the initialization clearing the all hardware mapping relationship ++ * configurations between queues and interrupt vectors is needed, so ++ * some error caused by the residual configurations, such as the ++ * unexpected interrupt, can be avoid. ++ */ ++ ret = hns3_init_ring_with_vector(hw); ++ if (ret) { ++ PMD_INIT_LOG(ERR, "Failed to init ring intr vector: %d", ret); ++ goto err_mac_init; ++ } ++ + return 0; + + err_mac_init: +@@ -3953,13 +4237,10 @@ hns3_init_pf(struct rte_eth_dev *eth_dev) + rte_intr_disable(&pci_dev->intr_handle); + hns3_intr_unregister(&pci_dev->intr_handle, hns3_interrupt_handler, + eth_dev); +- + err_intr_callback_register: +- hns3_cmd_uninit(hw); +- + err_cmd_init: ++ hns3_cmd_uninit(hw); + hns3_cmd_destroy_queue(hw); +- + err_cmd_init_queue: + hw->io_base = NULL; + +@@ -4020,15 +4301,116 @@ hns3_do_start(struct hns3_adapter *hns, bool reset_queue) + } + + static int +-hns3_dev_start(struct rte_eth_dev *eth_dev) ++hns3_map_rx_interrupt(struct rte_eth_dev *dev) + { +- struct hns3_adapter *hns = eth_dev->data->dev_private; +- struct hns3_hw *hw = &hns->hw; ++ struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); ++ struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ uint32_t intr_vector; ++ uint8_t base = 0; ++ uint8_t vec = 0; ++ uint16_t q_id; ++ int ret; ++ ++ if (dev->data->dev_conf.intr_conf.rxq == 0) ++ return 0; ++ ++ /* disable uio/vfio intr/eventfd mapping */ ++ rte_intr_disable(intr_handle); ++ ++ /* check and configure queue intr-vector mapping */ ++ if (rte_intr_cap_multiple(intr_handle) || ++ !RTE_ETH_DEV_SRIOV(dev).active) { ++ intr_vector = hw->used_rx_queues; ++ /* creates event fd for each intr vector when MSIX is used */ ++ if (rte_intr_efd_enable(intr_handle, intr_vector)) ++ return -EINVAL; ++ } ++ if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) { ++ intr_handle->intr_vec = ++ rte_zmalloc("intr_vec", ++ hw->used_rx_queues * sizeof(int), 0); ++ if (intr_handle->intr_vec == NULL) { ++ hns3_err(hw, "Failed to allocate %d rx_queues" ++ " intr_vec", hw->used_rx_queues); ++ ret = -ENOMEM; ++ goto alloc_intr_vec_error; ++ } ++ } ++ ++ if (rte_intr_allow_others(intr_handle)) { ++ vec = RTE_INTR_VEC_RXTX_OFFSET; ++ base = RTE_INTR_VEC_RXTX_OFFSET; ++ } ++ if (rte_intr_dp_is_en(intr_handle)) { ++ for (q_id = 0; q_id < hw->used_rx_queues; q_id++) { ++ ret = hns3_bind_ring_with_vector(hw, vec, true, ++ HNS3_RING_TYPE_RX, ++ q_id); ++ if (ret) ++ goto bind_vector_error; ++ intr_handle->intr_vec[q_id] = vec; ++ if (vec < base + intr_handle->nb_efd - 1) ++ vec++; ++ } ++ } ++ rte_intr_enable(intr_handle); ++ return 0; ++ ++bind_vector_error: ++ rte_intr_efd_disable(intr_handle); ++ if (intr_handle->intr_vec) { ++ free(intr_handle->intr_vec); ++ intr_handle->intr_vec = NULL; ++ } ++ return ret; ++alloc_intr_vec_error: ++ rte_intr_efd_disable(intr_handle); ++ return ret; ++} ++ ++static int ++hns3_restore_rx_interrupt(struct hns3_hw *hw) ++{ ++ struct rte_eth_dev *dev = &rte_eth_devices[hw->data->port_id]; ++ struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); ++ struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; ++ uint16_t q_id; + int ret; + ++ if (dev->data->dev_conf.intr_conf.rxq == 0) ++ return 0; ++ ++ if (rte_intr_dp_is_en(intr_handle)) { ++ for (q_id = 0; q_id < hw->used_rx_queues; q_id++) { ++ ret = hns3_bind_ring_with_vector(hw, ++ intr_handle->intr_vec[q_id], true, ++ HNS3_RING_TYPE_RX, q_id); ++ if (ret) ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++static void ++hns3_restore_filter(struct rte_eth_dev *dev) ++{ ++ hns3_restore_rss_filter(dev); ++} ++ ++static int ++hns3_dev_start(struct rte_eth_dev *dev) ++{ ++ struct hns3_adapter *hns = dev->data->dev_private; ++ struct hns3_hw *hw = &hns->hw; ++ int ret = 0; ++ + PMD_INIT_FUNC_TRACE(); + if (rte_atomic16_read(&hw->reset.resetting)) + return -EBUSY; ++ + rte_spinlock_lock(&hw->lock); + hw->adapter_state = HNS3_NIC_STARTING; + +@@ -4039,10 +4421,29 @@ hns3_dev_start(struct rte_eth_dev *eth_dev) + return ret; + } + ++ ret = hns3_map_rx_interrupt(dev); ++ if (ret) { ++ hw->adapter_state = HNS3_NIC_CONFIGURED; ++ rte_spinlock_unlock(&hw->lock); ++ return ret; ++ } ++ + hw->adapter_state = HNS3_NIC_STARTED; + rte_spinlock_unlock(&hw->lock); +- hns3_set_rxtx_function(eth_dev); +- hns3_mp_req_start_rxtx(eth_dev); ++ ++ hns3_set_rxtx_function(dev); ++ hns3_mp_req_start_rxtx(dev); ++ rte_eal_alarm_set(HNS3_SERVICE_INTERVAL, hns3_service_handler, dev); ++ ++ hns3_restore_filter(dev); ++ ++ /* Enable interrupt of all rx queues before enabling queues */ ++ hns3_dev_all_rx_queue_intr_enable(hw, true); ++ /* ++ * When finished the initialization, enable queues to receive/transmit ++ * packets. ++ */ ++ hns3_enable_all_queues(hw, true); + + hns3_info(hw, "hns3 dev start successful!"); + return 0; +@@ -4070,27 +4471,65 @@ hns3_do_stop(struct hns3_adapter *hns) + } + + static void +-hns3_dev_stop(struct rte_eth_dev *eth_dev) ++hns3_unmap_rx_interrupt(struct rte_eth_dev *dev) + { +- struct hns3_adapter *hns = eth_dev->data->dev_private; ++ struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); ++ struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; ++ struct hns3_adapter *hns = dev->data->dev_private; ++ struct hns3_hw *hw = &hns->hw; ++ uint8_t base = 0; ++ uint8_t vec = 0; ++ uint16_t q_id; ++ ++ if (dev->data->dev_conf.intr_conf.rxq == 0) ++ return; ++ ++ /* unmap the ring with vector */ ++ if (rte_intr_allow_others(intr_handle)) { ++ vec = RTE_INTR_VEC_RXTX_OFFSET; ++ base = RTE_INTR_VEC_RXTX_OFFSET; ++ } ++ if (rte_intr_dp_is_en(intr_handle)) { ++ for (q_id = 0; q_id < hw->used_rx_queues; q_id++) { ++ (void)hns3_bind_ring_with_vector(hw, vec, false, ++ HNS3_RING_TYPE_RX, ++ q_id); ++ if (vec < base + intr_handle->nb_efd - 1) ++ vec++; ++ } ++ } ++ /* Clean datapath event and queue/vec mapping */ ++ rte_intr_efd_disable(intr_handle); ++ if (intr_handle->intr_vec) { ++ rte_free(intr_handle->intr_vec); ++ intr_handle->intr_vec = NULL; ++ } ++} ++ ++static void ++hns3_dev_stop(struct rte_eth_dev *dev) ++{ ++ struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; + + PMD_INIT_FUNC_TRACE(); + + hw->adapter_state = HNS3_NIC_STOPPING; +- hns3_set_rxtx_function(eth_dev); ++ hns3_set_rxtx_function(dev); + rte_wmb(); + /* Disable datapath on secondary process. */ +- hns3_mp_req_stop_rxtx(eth_dev); ++ hns3_mp_req_stop_rxtx(dev); + /* Prevent crashes when queues are still in use. */ + rte_delay_ms(hw->tqps_num); + + rte_spinlock_lock(&hw->lock); + if (rte_atomic16_read(&hw->reset.resetting) == 0) { + hns3_do_stop(hns); ++ hns3_unmap_rx_interrupt(dev); + hns3_dev_release_mbufs(hns); + hw->adapter_state = HNS3_NIC_CONFIGURED; + } ++ rte_eal_alarm_cancel(hns3_service_handler, dev); + rte_spinlock_unlock(&hw->lock); + } + +@@ -4112,7 +4551,6 @@ hns3_dev_close(struct rte_eth_dev *eth_dev) + hw->adapter_state = HNS3_NIC_CLOSING; + hns3_reset_abort(hns); + hw->adapter_state = HNS3_NIC_CLOSED; +- rte_eal_alarm_cancel(hns3_service_handler, eth_dev); + + hns3_configure_all_mc_mac_addr(hns, true); + hns3_remove_all_vlan_table(hns); +@@ -4297,15 +4735,13 @@ hns3_get_dcb_info(struct rte_eth_dev *dev, struct rte_eth_dcb_info *dcb_info) + for (i = 0; i < dcb_info->nb_tcs; i++) + dcb_info->tc_bws[i] = hw->dcb_info.pg_info[0].tc_dwrr[i]; + +- for (i = 0; i < HNS3_MAX_TC_NUM; i++) { +- dcb_info->tc_queue.tc_rxq[0][i].base = +- hw->tc_queue[i].tqp_offset; ++ for (i = 0; i < hw->num_tc; i++) { ++ dcb_info->tc_queue.tc_rxq[0][i].base = hw->alloc_rss_size * i; + dcb_info->tc_queue.tc_txq[0][i].base = +- hw->tc_queue[i].tqp_offset; +- dcb_info->tc_queue.tc_rxq[0][i].nb_queue = +- hw->tc_queue[i].tqp_count; ++ hw->tc_queue[i].tqp_offset; ++ dcb_info->tc_queue.tc_rxq[0][i].nb_queue = hw->alloc_rss_size; + dcb_info->tc_queue.tc_txq[0][i].nb_queue = +- hw->tc_queue[i].tqp_count; ++ hw->tc_queue[i].tqp_count; + } + rte_spinlock_unlock(&hw->lock); + +@@ -4327,31 +4763,24 @@ hns3_reinit_dev(struct hns3_adapter *hns) + ret = hns3_reset_all_queues(hns); + if (ret) { + hns3_err(hw, "Failed to reset all queues: %d", ret); +- goto err_init; ++ return ret; + } + + ret = hns3_init_hardware(hns); + if (ret) { + hns3_err(hw, "Failed to init hardware: %d", ret); +- goto err_init; ++ return ret; + } + + ret = hns3_enable_hw_error_intr(hns, true); + if (ret) { + hns3_err(hw, "fail to enable hw error interrupts: %d", + ret); +- goto err_mac_init; ++ return ret; + } + hns3_info(hw, "Reset done, driver initialization finished."); + + return 0; +- +-err_mac_init: +- hns3_uninit_umv_space(hw); +-err_init: +- hns3_cmd_uninit(hw); +- +- return ret; + } + + static bool +@@ -4573,7 +5002,8 @@ hns3_stop_service(struct hns3_adapter *hns) + struct rte_eth_dev *eth_dev; + + eth_dev = &rte_eth_devices[hw->data->port_id]; +- rte_eal_alarm_cancel(hns3_service_handler, eth_dev); ++ if (hw->adapter_state == HNS3_NIC_STARTED) ++ rte_eal_alarm_cancel(hns3_service_handler, eth_dev); + hw->mac.link_status = ETH_LINK_DOWN; + + hns3_set_rxtx_function(eth_dev); +@@ -4614,7 +5044,18 @@ hns3_start_service(struct hns3_adapter *hns) + eth_dev = &rte_eth_devices[hw->data->port_id]; + hns3_set_rxtx_function(eth_dev); + hns3_mp_req_start_rxtx(eth_dev); +- hns3_service_handler(eth_dev); ++ if (hw->adapter_state == HNS3_NIC_STARTED) { ++ hns3_service_handler(eth_dev); ++ ++ /* Enable interrupt of all rx queues before enabling queues */ ++ hns3_dev_all_rx_queue_intr_enable(hw, true); ++ /* ++ * When finished the initialization, enable queues to receive ++ * and transmit packets. ++ */ ++ hns3_enable_all_queues(hw, true); ++ } ++ + return 0; + } + +@@ -4648,6 +5089,10 @@ hns3_restore_conf(struct hns3_adapter *hns) + if (ret) + goto err_promisc; + ++ ret = hns3_restore_rx_interrupt(hw); ++ if (ret) ++ goto err_promisc; ++ + if (hns->hw.adapter_state == HNS3_NIC_STARTED) { + ret = hns3_do_start(hns, false); + if (ret) +@@ -4748,6 +5193,8 @@ static const struct eth_dev_ops hns3_eth_dev_ops = { + .tx_queue_setup = hns3_tx_queue_setup, + .rx_queue_release = hns3_dev_rx_queue_release, + .tx_queue_release = hns3_dev_tx_queue_release, ++ .rx_queue_intr_enable = hns3_dev_rx_queue_intr_enable, ++ .rx_queue_intr_disable = hns3_dev_rx_queue_intr_disable, + .dev_configure = hns3_dev_configure, + .flow_ctrl_get = hns3_flow_ctrl_get, + .flow_ctrl_set = hns3_flow_ctrl_set, +@@ -4870,7 +5317,6 @@ hns3_dev_init(struct rte_eth_dev *eth_dev) + hns3_notify_reset_ready(hw, false); + } + +- rte_eal_alarm_set(HNS3_SERVICE_INTERVAL, hns3_service_handler, eth_dev); + hns3_info(hw, "hns3 dev initialization successful!"); + return 0; + +diff --git a/dpdk/drivers/net/hns3/hns3_ethdev.h b/dpdk/drivers/net/hns3/hns3_ethdev.h +index e9a3fe4107..cdbfc70b25 100644 +--- a/dpdk/drivers/net/hns3/hns3_ethdev.h ++++ b/dpdk/drivers/net/hns3/hns3_ethdev.h +@@ -154,6 +154,12 @@ struct hns3_mac { + uint32_t link_speed; /* ETH_SPEED_NUM_ */ + }; + ++struct hns3_fake_queue_data { ++ void **rx_queues; /* Array of pointers to fake RX queues. */ ++ void **tx_queues; /* Array of pointers to fake TX queues. */ ++ uint16_t nb_fake_rx_queues; /* Number of fake RX queues. */ ++ uint16_t nb_fake_tx_queues; /* Number of fake TX queues. */ ++}; + + /* Primary process maintains driver state in main thread. + * +@@ -348,6 +354,7 @@ struct hns3_hw { + uint16_t num_msi; + uint16_t total_tqps_num; /* total task queue pairs of this PF */ + uint16_t tqps_num; /* num task queue pairs of this function */ ++ uint16_t intr_tqps_num; /* num queue pairs mapping interrupt */ + uint16_t rss_size_max; /* HW defined max RSS task queue */ + uint16_t rx_buf_len; + uint16_t num_tx_desc; /* desc num of per tx queue */ +@@ -358,6 +365,7 @@ struct hns3_hw { + + /* The configuration info of RSS */ + struct hns3_rss_conf rss_info; ++ bool rss_dis_flag; /* disable rss flag. true: disable, false: enable */ + + uint8_t num_tc; /* Total number of enabled TCs */ + uint8_t hw_tc_map; +@@ -366,8 +374,14 @@ struct hns3_hw { + struct hns3_dcb_info dcb_info; + enum hns3_fc_status current_fc_status; /* current flow control status */ + struct hns3_tc_queue_info tc_queue[HNS3_MAX_TC_NUM]; +- uint16_t alloc_tqps; +- uint16_t alloc_rss_size; /* Queue number per TC */ ++ uint16_t used_rx_queues; ++ uint16_t used_tx_queues; ++ ++ /* Config max queue numbers between rx and tx queues from user */ ++ uint16_t cfg_max_queues; ++ struct hns3_fake_queue_data fkq_data; /* fake queue data */ ++ uint16_t alloc_rss_size; /* RX queue number per TC */ ++ uint16_t tx_qnum_per_tc; /* TX queue number per TC */ + + uint32_t flag; + /* +@@ -453,6 +467,7 @@ struct hns3_mp_param { + struct hns3_pf { + struct hns3_adapter *adapter; + bool is_main_pf; ++ uint16_t func_num; /* num functions of this pf, include pf and vfs */ + + uint32_t pkt_buf_size; /* Total pf buf size for tx/rx */ + uint32_t tx_buf_size; /* Tx buffer size for each TC */ +diff --git a/dpdk/drivers/net/hns3/hns3_ethdev_vf.c b/dpdk/drivers/net/hns3/hns3_ethdev_vf.c +index b1736e73ab..251d0efe83 100644 +--- a/dpdk/drivers/net/hns3/hns3_ethdev_vf.c ++++ b/dpdk/drivers/net/hns3/hns3_ethdev_vf.c +@@ -414,6 +414,97 @@ hns3vf_set_promisc_mode(struct hns3_hw *hw, bool en_bc_pmc) + return ret; + } + ++static int ++hns3vf_bind_ring_with_vector(struct hns3_hw *hw, uint8_t vector_id, ++ bool mmap, enum hns3_ring_type queue_type, ++ uint16_t queue_id) ++{ ++ struct hns3_vf_bind_vector_msg bind_msg; ++ const char *op_str; ++ uint16_t code; ++ int ret; ++ ++ memset(&bind_msg, 0, sizeof(bind_msg)); ++ code = mmap ? HNS3_MBX_MAP_RING_TO_VECTOR : ++ HNS3_MBX_UNMAP_RING_TO_VECTOR; ++ bind_msg.vector_id = vector_id; ++ ++ if (queue_type == HNS3_RING_TYPE_RX) ++ bind_msg.param[0].int_gl_index = HNS3_RING_GL_RX; ++ else ++ bind_msg.param[0].int_gl_index = HNS3_RING_GL_TX; ++ ++ bind_msg.param[0].ring_type = queue_type; ++ bind_msg.ring_num = 1; ++ bind_msg.param[0].tqp_index = queue_id; ++ op_str = mmap ? "Map" : "Unmap"; ++ ret = hns3_send_mbx_msg(hw, code, 0, (uint8_t *)&bind_msg, ++ sizeof(bind_msg), false, NULL, 0); ++ if (ret) { ++ hns3_err(hw, "%s TQP %d fail, vector_id is %d, ret is %d.", ++ op_str, queue_id, bind_msg.vector_id, ret); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int ++hns3vf_init_ring_with_vector(struct hns3_hw *hw) ++{ ++ uint8_t vec; ++ int ret; ++ int i; ++ ++ /* ++ * In hns3 network engine, vector 0 is always the misc interrupt of this ++ * function, vector 1~N can be used respectively for the queues of the ++ * function. Tx and Rx queues with the same number share the interrupt ++ * vector. In the initialization clearing the all hardware mapping ++ * relationship configurations between queues and interrupt vectors is ++ * needed, so some error caused by the residual configurations, such as ++ * the unexpected Tx interrupt, can be avoid. Because of the hardware ++ * constraints in hns3 hardware engine, we have to implement clearing ++ * the mapping relationship configurations by binding all queues to the ++ * last interrupt vector and reserving the last interrupt vector. This ++ * method results in a decrease of the maximum queues when upper ++ * applications call the rte_eth_dev_configure API function to enable ++ * Rx interrupt. ++ */ ++ vec = hw->num_msi - 1; /* vector 0 for misc interrupt, not for queue */ ++ /* vec - 1: the last interrupt is reserved */ ++ hw->intr_tqps_num = vec > hw->tqps_num ? hw->tqps_num : vec - 1; ++ for (i = 0; i < hw->intr_tqps_num; i++) { ++ /* ++ * Set gap limiter and rate limiter configuration of queue's ++ * interrupt. ++ */ ++ hns3_set_queue_intr_gl(hw, i, HNS3_RING_GL_RX, ++ HNS3_TQP_INTR_GL_DEFAULT); ++ hns3_set_queue_intr_gl(hw, i, HNS3_RING_GL_TX, ++ HNS3_TQP_INTR_GL_DEFAULT); ++ hns3_set_queue_intr_rl(hw, i, HNS3_TQP_INTR_RL_DEFAULT); ++ ++ ret = hns3vf_bind_ring_with_vector(hw, vec, false, ++ HNS3_RING_TYPE_TX, i); ++ if (ret) { ++ PMD_INIT_LOG(ERR, "VF fail to unbind TX ring(%d) with " ++ "vector: %d, ret=%d", i, vec, ret); ++ return ret; ++ } ++ ++ ret = hns3vf_bind_ring_with_vector(hw, vec, false, ++ HNS3_RING_TYPE_RX, i); ++ if (ret) { ++ PMD_INIT_LOG(ERR, "VF fail to unbind RX ring(%d) with " ++ "vector: %d, ret=%d", i, vec, ret); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ + static int + hns3vf_dev_configure(struct rte_eth_dev *dev) + { +@@ -428,24 +519,28 @@ hns3vf_dev_configure(struct rte_eth_dev *dev) + int ret; + + /* +- * Hardware does not support where the number of rx and tx queues is +- * not equal in hip08. ++ * Hardware does not support individually enable/disable/reset the Tx or ++ * Rx queue in hns3 network engine. Driver must enable/disable/reset Tx ++ * and Rx queues at the same time. When the numbers of Tx queues ++ * allocated by upper applications are not equal to the numbers of Rx ++ * queues, driver needs to setup fake Tx or Rx queues to adjust numbers ++ * of Tx/Rx queues. otherwise, network engine can not work as usual. But ++ * these fake queues are imperceptible, and can not be used by upper ++ * applications. + */ +- if (nb_rx_q != nb_tx_q) { +- hns3_err(hw, +- "nb_rx_queues(%u) not equal with nb_tx_queues(%u)! " +- "Hardware does not support this configuration!", +- nb_rx_q, nb_tx_q); +- return -EINVAL; ++ ret = hns3_set_fake_rx_or_tx_queues(dev, nb_rx_q, nb_tx_q); ++ if (ret) { ++ hns3_err(hw, "Failed to set rx/tx fake queues: %d", ret); ++ return ret; + } + ++ hw->adapter_state = HNS3_NIC_CONFIGURING; + if (conf->link_speeds & ETH_LINK_SPEED_FIXED) { + hns3_err(hw, "setting link speed/duplex not supported"); +- return -EINVAL; ++ ret = -EINVAL; ++ goto cfg_err; + } + +- hw->adapter_state = HNS3_NIC_CONFIGURING; +- + /* When RSS is not configured, redirect the packet queue 0 */ + if ((uint32_t)mq_mode & ETH_MQ_RX_RSS_FLAG) { + rss_conf = conf->rx_adv_conf.rss_conf; +@@ -484,7 +579,9 @@ hns3vf_dev_configure(struct rte_eth_dev *dev) + return 0; + + cfg_err: ++ (void)hns3_set_fake_rx_or_tx_queues(dev, 0, 0); + hw->adapter_state = HNS3_NIC_INITIALIZED; ++ + return ret; + } + +@@ -542,8 +639,16 @@ hns3vf_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) + { + struct hns3_adapter *hns = eth_dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; ++ uint16_t q_num = hw->tqps_num; ++ ++ /* ++ * In interrupt mode, 'max_rx_queues' is set based on the number of ++ * MSI-X interrupt resources of the hardware. ++ */ ++ if (hw->data->dev_conf.intr_conf.rxq == 1) ++ q_num = hw->intr_tqps_num; + +- info->max_rx_queues = hw->tqps_num; ++ info->max_rx_queues = q_num; + info->max_tx_queues = hw->tqps_num; + info->max_rx_pktlen = HNS3_MAX_FRAME_LEN; /* CRC included */ + info->min_rx_bufsize = hw->rx_buf_len; +@@ -561,7 +666,8 @@ hns3vf_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) + DEV_RX_OFFLOAD_VLAN_STRIP | + DEV_RX_OFFLOAD_QINQ_STRIP | + DEV_RX_OFFLOAD_VLAN_FILTER | +- DEV_RX_OFFLOAD_JUMBO_FRAME); ++ DEV_RX_OFFLOAD_JUMBO_FRAME | ++ DEV_RX_OFFLOAD_RSS_HASH); + info->tx_queue_offload_capa = DEV_TX_OFFLOAD_MBUF_FAST_FREE; + info->tx_offload_capa = (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | + DEV_TX_OFFLOAD_IPV4_CKSUM | +@@ -784,6 +890,7 @@ hns3vf_get_configuration(struct hns3_hw *hw) + int ret; + + hw->mac.media_type = HNS3_MEDIA_TYPE_NONE; ++ hw->rss_dis_flag = false; + + /* Get queue configuration from PF */ + ret = hns3vf_get_queue_info(hw); +@@ -799,12 +906,12 @@ hns3vf_get_configuration(struct hns3_hw *hw) + return hns3vf_get_tc_info(hw); + } + +-static void ++static int + hns3vf_set_tc_info(struct hns3_adapter *hns) + { + struct hns3_hw *hw = &hns->hw; + uint16_t nb_rx_q = hw->data->nb_rx_queues; +- uint16_t new_tqps; ++ uint16_t nb_tx_q = hw->data->nb_tx_queues; + uint8_t i; + + hw->num_tc = 0; +@@ -812,11 +919,22 @@ hns3vf_set_tc_info(struct hns3_adapter *hns) + if (hw->hw_tc_map & BIT(i)) + hw->num_tc++; + +- new_tqps = RTE_MIN(hw->tqps_num, nb_rx_q); +- hw->alloc_rss_size = RTE_MIN(hw->rss_size_max, new_tqps / hw->num_tc); +- hw->alloc_tqps = hw->alloc_rss_size * hw->num_tc; ++ if (nb_rx_q < hw->num_tc) { ++ hns3_err(hw, "number of Rx queues(%d) is less than tcs(%d).", ++ nb_rx_q, hw->num_tc); ++ return -EINVAL; ++ } ++ ++ if (nb_tx_q < hw->num_tc) { ++ hns3_err(hw, "number of Tx queues(%d) is less than tcs(%d).", ++ nb_tx_q, hw->num_tc); ++ return -EINVAL; ++ } ++ ++ hns3_set_rss_size(hw, nb_rx_q); ++ hns3_tc_queue_mapping_cfg(hw, nb_tx_q); + +- hns3_tc_queue_mapping_cfg(hw); ++ return 0; + } + + static void +@@ -895,6 +1013,13 @@ hns3vf_vlan_offload_set(struct rte_eth_dev *dev, int mask) + struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_eth_conf *dev_conf = &dev->data->dev_conf; + unsigned int tmp_mask; ++ int ret = 0; ++ ++ if (rte_atomic16_read(&hw->reset.resetting)) { ++ hns3_err(hw, "vf set vlan offload failed during resetting, " ++ "mask = 0x%x", mask); ++ return -EIO; ++ } + + tmp_mask = (unsigned int)mask; + /* Vlan stripping setting */ +@@ -902,13 +1027,13 @@ hns3vf_vlan_offload_set(struct rte_eth_dev *dev, int mask) + rte_spinlock_lock(&hw->lock); + /* Enable or disable VLAN stripping */ + if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_VLAN_STRIP) +- hns3vf_en_hw_strip_rxvtag(hw, true); ++ ret = hns3vf_en_hw_strip_rxvtag(hw, true); + else +- hns3vf_en_hw_strip_rxvtag(hw, false); ++ ret = hns3vf_en_hw_strip_rxvtag(hw, false); + rte_spinlock_unlock(&hw->lock); + } + +- return 0; ++ return ret; + } + + static int +@@ -1055,6 +1180,35 @@ hns3vf_service_handler(void *param) + eth_dev); + } + ++static int ++hns3_query_vf_resource(struct hns3_hw *hw) ++{ ++ struct hns3_vf_res_cmd *req; ++ struct hns3_cmd_desc desc; ++ uint16_t num_msi; ++ int ret; ++ ++ hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_QUERY_VF_RSRC, true); ++ ret = hns3_cmd_send(hw, &desc, 1); ++ if (ret) { ++ hns3_err(hw, "query vf resource failed, ret = %d", ret); ++ return ret; ++ } ++ ++ req = (struct hns3_vf_res_cmd *)desc.data; ++ num_msi = hns3_get_field(rte_le_to_cpu_16(req->vf_intr_vector_number), ++ HNS3_VEC_NUM_M, HNS3_VEC_NUM_S); ++ if (num_msi < HNS3_MIN_VECTOR_NUM) { ++ hns3_err(hw, "Just %u msi resources, not enough for vf(min:%d)", ++ num_msi, HNS3_MIN_VECTOR_NUM); ++ return -EINVAL; ++ } ++ ++ hw->num_msi = num_msi; ++ ++ return 0; ++} ++ + static int + hns3vf_init_hardware(struct hns3_adapter *hns) + { +@@ -1082,6 +1236,18 @@ hns3vf_init_hardware(struct hns3_adapter *hns) + goto err_init_hardware; + } + ++ /* ++ * In the initialization clearing the all hardware mapping relationship ++ * configurations between queues and interrupt vectors is needed, so ++ * some error caused by the residual configurations, such as the ++ * unexpected interrupt, can be avoid. ++ */ ++ ret = hns3vf_init_ring_with_vector(hw); ++ if (ret) { ++ PMD_INIT_LOG(ERR, "Failed to init ring intr vector: %d", ret); ++ goto err_init_hardware; ++ } ++ + ret = hns3vf_set_alive(hw, true); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to VF send alive to PF: %d", ret); +@@ -1131,6 +1297,11 @@ hns3vf_init_vf(struct rte_eth_dev *eth_dev) + goto err_cmd_init; + } + ++ /* Get VF resource */ ++ ret = hns3_query_vf_resource(hw); ++ if (ret) ++ goto err_cmd_init; ++ + rte_spinlock_init(&hw->mbx_resp.lock); + + hns3vf_clear_event_cause(hw, 0); +@@ -1176,11 +1347,9 @@ hns3vf_init_vf(struct rte_eth_dev *eth_dev) + hns3_intr_unregister(&pci_dev->intr_handle, hns3vf_interrupt_handler, + eth_dev); + err_intr_callback_register: +- hns3_cmd_uninit(hw); +- + err_cmd_init: ++ hns3_cmd_uninit(hw); + hns3_cmd_destroy_queue(hw); +- + err_cmd_init_queue: + hw->io_base = NULL; + +@@ -1225,28 +1394,64 @@ hns3vf_do_stop(struct hns3_adapter *hns) + } + + static void +-hns3vf_dev_stop(struct rte_eth_dev *eth_dev) ++hns3vf_unmap_rx_interrupt(struct rte_eth_dev *dev) + { +- struct hns3_adapter *hns = eth_dev->data->dev_private; ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); ++ struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; ++ uint8_t base = 0; ++ uint8_t vec = 0; ++ uint16_t q_id; ++ ++ if (dev->data->dev_conf.intr_conf.rxq == 0) ++ return; ++ ++ /* unmap the ring with vector */ ++ if (rte_intr_allow_others(intr_handle)) { ++ vec = RTE_INTR_VEC_RXTX_OFFSET; ++ base = RTE_INTR_VEC_RXTX_OFFSET; ++ } ++ if (rte_intr_dp_is_en(intr_handle)) { ++ for (q_id = 0; q_id < hw->used_rx_queues; q_id++) { ++ (void)hns3vf_bind_ring_with_vector(hw, vec, false, ++ HNS3_RING_TYPE_RX, ++ q_id); ++ if (vec < base + intr_handle->nb_efd - 1) ++ vec++; ++ } ++ } ++ /* Clean datapath event and queue/vec mapping */ ++ rte_intr_efd_disable(intr_handle); ++ if (intr_handle->intr_vec) { ++ rte_free(intr_handle->intr_vec); ++ intr_handle->intr_vec = NULL; ++ } ++} ++ ++static void ++hns3vf_dev_stop(struct rte_eth_dev *dev) ++{ ++ struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; + + PMD_INIT_FUNC_TRACE(); + + hw->adapter_state = HNS3_NIC_STOPPING; +- hns3_set_rxtx_function(eth_dev); ++ hns3_set_rxtx_function(dev); + rte_wmb(); + /* Disable datapath on secondary process. */ +- hns3_mp_req_stop_rxtx(eth_dev); ++ hns3_mp_req_stop_rxtx(dev); + /* Prevent crashes when queues are still in use. */ + rte_delay_ms(hw->tqps_num); + + rte_spinlock_lock(&hw->lock); + if (rte_atomic16_read(&hw->reset.resetting) == 0) { + hns3vf_do_stop(hns); ++ hns3vf_unmap_rx_interrupt(dev); + hns3_dev_release_mbufs(hns); + hw->adapter_state = HNS3_NIC_CONFIGURED; + } +- rte_eal_alarm_cancel(hns3vf_service_handler, eth_dev); ++ rte_eal_alarm_cancel(hns3vf_service_handler, dev); + rte_spinlock_unlock(&hw->lock); + } + +@@ -1317,7 +1522,9 @@ hns3vf_do_start(struct hns3_adapter *hns, bool reset_queue) + struct hns3_hw *hw = &hns->hw; + int ret; + +- hns3vf_set_tc_info(hns); ++ ret = hns3vf_set_tc_info(hns); ++ if (ret) ++ return ret; + + ret = hns3_start_queues(hns, reset_queue); + if (ret) { +@@ -1329,15 +1536,116 @@ hns3vf_do_start(struct hns3_adapter *hns, bool reset_queue) + } + + static int +-hns3vf_dev_start(struct rte_eth_dev *eth_dev) ++hns3vf_map_rx_interrupt(struct rte_eth_dev *dev) + { +- struct hns3_adapter *hns = eth_dev->data->dev_private; +- struct hns3_hw *hw = &hns->hw; ++ struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); ++ struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ uint32_t intr_vector; ++ uint8_t base = 0; ++ uint8_t vec = 0; ++ uint16_t q_id; ++ int ret; ++ ++ if (dev->data->dev_conf.intr_conf.rxq == 0) ++ return 0; ++ ++ /* disable uio/vfio intr/eventfd mapping */ ++ rte_intr_disable(intr_handle); ++ ++ /* check and configure queue intr-vector mapping */ ++ if (rte_intr_cap_multiple(intr_handle) || ++ !RTE_ETH_DEV_SRIOV(dev).active) { ++ intr_vector = hw->used_rx_queues; ++ /* It creates event fd for each intr vector when MSIX is used */ ++ if (rte_intr_efd_enable(intr_handle, intr_vector)) ++ return -EINVAL; ++ } ++ if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) { ++ intr_handle->intr_vec = ++ rte_zmalloc("intr_vec", ++ hw->used_rx_queues * sizeof(int), 0); ++ if (intr_handle->intr_vec == NULL) { ++ hns3_err(hw, "Failed to allocate %d rx_queues" ++ " intr_vec", hw->used_rx_queues); ++ ret = -ENOMEM; ++ goto vf_alloc_intr_vec_error; ++ } ++ } ++ ++ if (rte_intr_allow_others(intr_handle)) { ++ vec = RTE_INTR_VEC_RXTX_OFFSET; ++ base = RTE_INTR_VEC_RXTX_OFFSET; ++ } ++ if (rte_intr_dp_is_en(intr_handle)) { ++ for (q_id = 0; q_id < hw->used_rx_queues; q_id++) { ++ ret = hns3vf_bind_ring_with_vector(hw, vec, true, ++ HNS3_RING_TYPE_RX, ++ q_id); ++ if (ret) ++ goto vf_bind_vector_error; ++ intr_handle->intr_vec[q_id] = vec; ++ if (vec < base + intr_handle->nb_efd - 1) ++ vec++; ++ } ++ } ++ rte_intr_enable(intr_handle); ++ return 0; ++ ++vf_bind_vector_error: ++ rte_intr_efd_disable(intr_handle); ++ if (intr_handle->intr_vec) { ++ free(intr_handle->intr_vec); ++ intr_handle->intr_vec = NULL; ++ } ++ return ret; ++vf_alloc_intr_vec_error: ++ rte_intr_efd_disable(intr_handle); ++ return ret; ++} ++ ++static int ++hns3vf_restore_rx_interrupt(struct hns3_hw *hw) ++{ ++ struct rte_eth_dev *dev = &rte_eth_devices[hw->data->port_id]; ++ struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); ++ struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; ++ uint16_t q_id; + int ret; + ++ if (dev->data->dev_conf.intr_conf.rxq == 0) ++ return 0; ++ ++ if (rte_intr_dp_is_en(intr_handle)) { ++ for (q_id = 0; q_id < hw->used_rx_queues; q_id++) { ++ ret = hns3vf_bind_ring_with_vector(hw, ++ intr_handle->intr_vec[q_id], true, ++ HNS3_RING_TYPE_RX, q_id); ++ if (ret) ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++static void ++hns3vf_restore_filter(struct rte_eth_dev *dev) ++{ ++ hns3_restore_rss_filter(dev); ++} ++ ++static int ++hns3vf_dev_start(struct rte_eth_dev *dev) ++{ ++ struct hns3_adapter *hns = dev->data->dev_private; ++ struct hns3_hw *hw = &hns->hw; ++ int ret = 0; ++ + PMD_INIT_FUNC_TRACE(); + if (rte_atomic16_read(&hw->reset.resetting)) + return -EBUSY; ++ + rte_spinlock_lock(&hw->lock); + hw->adapter_state = HNS3_NIC_STARTING; + ret = hns3vf_do_start(hns, true); +@@ -1346,13 +1654,30 @@ hns3vf_dev_start(struct rte_eth_dev *eth_dev) + rte_spinlock_unlock(&hw->lock); + return ret; + } ++ ret = hns3vf_map_rx_interrupt(dev); ++ if (ret) { ++ hw->adapter_state = HNS3_NIC_CONFIGURED; ++ rte_spinlock_unlock(&hw->lock); ++ return ret; ++ } + hw->adapter_state = HNS3_NIC_STARTED; + rte_spinlock_unlock(&hw->lock); +- hns3_set_rxtx_function(eth_dev); +- hns3_mp_req_start_rxtx(eth_dev); +- rte_eal_alarm_set(HNS3VF_SERVICE_INTERVAL, hns3vf_service_handler, +- eth_dev); +- return 0; ++ ++ hns3_set_rxtx_function(dev); ++ hns3_mp_req_start_rxtx(dev); ++ rte_eal_alarm_set(HNS3VF_SERVICE_INTERVAL, hns3vf_service_handler, dev); ++ ++ hns3vf_restore_filter(dev); ++ ++ /* Enable interrupt of all rx queues before enabling queues */ ++ hns3_dev_all_rx_queue_intr_enable(hw, true); ++ /* ++ * When finished the initialization, enable queues to receive/transmit ++ * packets. ++ */ ++ hns3_enable_all_queues(hw, true); ++ ++ return ret; + } + + static bool +@@ -1464,7 +1789,8 @@ hns3vf_stop_service(struct hns3_adapter *hns) + struct rte_eth_dev *eth_dev; + + eth_dev = &rte_eth_devices[hw->data->port_id]; +- rte_eal_alarm_cancel(hns3vf_service_handler, eth_dev); ++ if (hw->adapter_state == HNS3_NIC_STARTED) ++ rte_eal_alarm_cancel(hns3vf_service_handler, eth_dev); + hw->mac.link_status = ETH_LINK_DOWN; + + hns3_set_rxtx_function(eth_dev); +@@ -1502,8 +1828,18 @@ hns3vf_start_service(struct hns3_adapter *hns) + eth_dev = &rte_eth_devices[hw->data->port_id]; + hns3_set_rxtx_function(eth_dev); + hns3_mp_req_start_rxtx(eth_dev); ++ if (hw->adapter_state == HNS3_NIC_STARTED) { ++ hns3vf_service_handler(eth_dev); ++ ++ /* Enable interrupt of all rx queues before enabling queues */ ++ hns3_dev_all_rx_queue_intr_enable(hw, true); ++ /* ++ * When finished the initialization, enable queues to receive ++ * and transmit packets. ++ */ ++ hns3_enable_all_queues(hw, true); ++ } + +- hns3vf_service_handler(eth_dev); + return 0; + } + +@@ -1525,6 +1861,10 @@ hns3vf_restore_conf(struct hns3_adapter *hns) + if (ret) + goto err_vlan_table; + ++ ret = hns3vf_restore_rx_interrupt(hw); ++ if (ret) ++ goto err_vlan_table; ++ + if (hw->adapter_state == HNS3_NIC_STARTED) { + ret = hns3vf_do_start(hns, false); + if (ret) +@@ -1630,7 +1970,7 @@ hns3vf_reinit_dev(struct hns3_adapter *hns) + ret = hns3_cmd_init(hw); + if (ret) { + hns3_err(hw, "Failed to init cmd: %d", ret); +- goto err_cmd_init; ++ return ret; + } + + if (hw->reset.level == HNS3_VF_FULL_RESET) { +@@ -1650,22 +1990,16 @@ hns3vf_reinit_dev(struct hns3_adapter *hns) + ret = hns3_reset_all_queues(hns); + if (ret) { + hns3_err(hw, "Failed to reset all queues: %d", ret); +- goto err_init; ++ return ret; + } + + ret = hns3vf_init_hardware(hns); + if (ret) { + hns3_err(hw, "Failed to init hardware: %d", ret); +- goto err_init; ++ return ret; + } + + return 0; +- +-err_cmd_init: +- hns3vf_set_bus_master(pci_dev, false); +-err_init: +- hns3_cmd_uninit(hw); +- return ret; + } + + static const struct eth_dev_ops hns3vf_eth_dev_ops = { +@@ -1685,6 +2019,8 @@ static const struct eth_dev_ops hns3vf_eth_dev_ops = { + .tx_queue_setup = hns3_tx_queue_setup, + .rx_queue_release = hns3_dev_rx_queue_release, + .tx_queue_release = hns3_dev_tx_queue_release, ++ .rx_queue_intr_enable = hns3_dev_rx_queue_intr_enable, ++ .rx_queue_intr_disable = hns3_dev_rx_queue_intr_disable, + .dev_configure = hns3vf_dev_configure, + .mac_addr_add = hns3vf_add_mac_addr, + .mac_addr_remove = hns3vf_remove_mac_addr, +diff --git a/dpdk/drivers/net/hns3/hns3_fdir.c b/dpdk/drivers/net/hns3/hns3_fdir.c +index ca3c78e1ce..53c6448cdd 100644 +--- a/dpdk/drivers/net/hns3/hns3_fdir.c ++++ b/dpdk/drivers/net/hns3/hns3_fdir.c +@@ -772,6 +772,20 @@ static int hns3_config_action(struct hns3_hw *hw, struct hns3_fdir_rule *rule) + return hns3_fd_ad_config(hw, ad_data.ad_id, &ad_data); + } + ++static int hns3_fd_clear_all_rules(struct hns3_hw *hw, uint32_t rule_num) ++{ ++ uint32_t i; ++ int ret; ++ ++ for (i = 0; i < rule_num; i++) { ++ ret = hns3_fd_tcam_config(hw, true, i, NULL, false); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ + int hns3_fdir_filter_init(struct hns3_adapter *hns) + { + struct hns3_pf *pf = &hns->pf; +@@ -785,6 +799,13 @@ int hns3_fdir_filter_init(struct hns3_adapter *hns) + .hash_func = rte_hash_crc, + .hash_func_init_val = 0, + }; ++ int ret; ++ ++ ret = hns3_fd_clear_all_rules(&hns->hw, rule_num); ++ if (ret) { ++ PMD_INIT_LOG(ERR, "Clear all fd rules fail! ret = %d", ret); ++ return ret; ++ } + + fdir_hash_params.socket_id = rte_socket_id(); + TAILQ_INIT(&fdir_info->fdir_list); +diff --git a/dpdk/drivers/net/hns3/hns3_flow.c b/dpdk/drivers/net/hns3/hns3_flow.c +index bcd121f48b..1b9dc1dae8 100644 +--- a/dpdk/drivers/net/hns3/hns3_flow.c ++++ b/dpdk/drivers/net/hns3/hns3_flow.c +@@ -224,14 +224,19 @@ hns3_handle_action_queue(struct rte_eth_dev *dev, + struct rte_flow_error *error) + { + struct hns3_adapter *hns = dev->data->dev_private; +- struct hns3_hw *hw = &hns->hw; + const struct rte_flow_action_queue *queue; ++ struct hns3_hw *hw = &hns->hw; + + queue = (const struct rte_flow_action_queue *)action->conf; +- if (queue->index >= hw->data->nb_rx_queues) ++ if (queue->index >= hw->used_rx_queues) { ++ hns3_err(hw, "queue ID(%d) is greater than number of " ++ "available queue (%d) in driver.", ++ queue->index, hw->used_rx_queues); + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "Invalid queue ID in PF"); ++ } ++ + rule->queue_id = queue->index; + rule->action = HNS3_FD_ACTION_ACCEPT_PACKET; + return 0; +@@ -1328,6 +1333,7 @@ hns3_disable_rss(struct hns3_hw *hw) + + /* Disable RSS */ + hw->rss_info.conf.types = 0; ++ hw->rss_dis_flag = true; + + return 0; + } +@@ -1541,6 +1547,19 @@ hns3_clear_rss_filter(struct rte_eth_dev *dev) + return hns3_config_rss_filter(dev, &hw->rss_info, false); + } + ++/* Restore the rss filter */ ++int ++hns3_restore_rss_filter(struct rte_eth_dev *dev) ++{ ++ struct hns3_adapter *hns = dev->data->dev_private; ++ struct hns3_hw *hw = &hns->hw; ++ ++ if (hw->rss_info.conf.queue_num == 0) ++ return 0; ++ ++ return hns3_config_rss_filter(dev, &hw->rss_info, true); ++} ++ + static int + hns3_flow_parse_rss(struct rte_eth_dev *dev, + const struct hns3_rss_conf *conf, bool add) +@@ -1822,8 +1841,11 @@ hns3_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error) + } + + ret = hns3_clear_rss_filter(dev); +- if (ret) ++ if (ret) { ++ rte_flow_error_set(error, ret, RTE_FLOW_ERROR_TYPE_HANDLE, ++ NULL, "Failed to flush rss filter"); + return ret; ++ } + + hns3_filterlist_flush(dev); + +diff --git a/dpdk/drivers/net/hns3/hns3_intr.c b/dpdk/drivers/net/hns3/hns3_intr.c +index 6c3ebd3ee1..9953a1d98e 100644 +--- a/dpdk/drivers/net/hns3/hns3_intr.c ++++ b/dpdk/drivers/net/hns3/hns3_intr.c +@@ -1001,7 +1001,9 @@ hns3_reset_post(struct hns3_adapter *hns) + hw->reset.attempts = 0; + hw->reset.stats.success_cnt++; + hw->reset.stage = RESET_STAGE_NONE; ++ rte_spinlock_lock(&hw->lock); + hw->reset.ops->start_service(hns); ++ rte_spinlock_unlock(&hw->lock); + gettimeofday(&tv, NULL); + timersub(&tv, &hw->reset.start_time, &tv_delta); + hns3_warn(hw, "%s reset done fail_cnt:%" PRIx64 +diff --git a/dpdk/drivers/net/hns3/hns3_mbx.c b/dpdk/drivers/net/hns3/hns3_mbx.c +index c1647af4bf..7b5ef9476d 100644 +--- a/dpdk/drivers/net/hns3/hns3_mbx.c ++++ b/dpdk/drivers/net/hns3/hns3_mbx.c +@@ -150,6 +150,8 @@ hns3_send_mbx_msg(struct hns3_hw *hw, uint16_t code, uint16_t subcode, + { + struct hns3_mbx_vf_to_pf_cmd *req; + struct hns3_cmd_desc desc; ++ bool is_ring_vector_msg; ++ int offset; + int ret; + + req = (struct hns3_mbx_vf_to_pf_cmd *)desc.data; +@@ -164,9 +166,15 @@ hns3_send_mbx_msg(struct hns3_hw *hw, uint16_t code, uint16_t subcode, + + hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_MBX_VF_TO_PF, false); + req->msg[0] = code; +- req->msg[1] = subcode; +- if (msg_data) +- memcpy(&req->msg[HNS3_CMD_CODE_OFFSET], msg_data, msg_len); ++ is_ring_vector_msg = (code == HNS3_MBX_MAP_RING_TO_VECTOR) || ++ (code == HNS3_MBX_UNMAP_RING_TO_VECTOR) || ++ (code == HNS3_MBX_GET_RING_VECTOR_MAP); ++ if (!is_ring_vector_msg) ++ req->msg[1] = subcode; ++ if (msg_data) { ++ offset = is_ring_vector_msg ? 1 : HNS3_CMD_CODE_OFFSET; ++ memcpy(&req->msg[offset], msg_data, msg_len); ++ } + + /* synchronous send */ + if (need_resp) { +@@ -211,6 +219,7 @@ hns3_mbx_handler(struct hns3_hw *hw) + struct hns3_mac *mac = &hw->mac; + enum hns3_reset_level reset_level; + uint16_t *msg_q; ++ uint8_t opcode; + uint32_t tail; + + tail = hw->arq.tail; +@@ -219,7 +228,8 @@ hns3_mbx_handler(struct hns3_hw *hw) + while (tail != hw->arq.head) { + msg_q = hw->arq.msg_q[hw->arq.head]; + +- switch (msg_q[0]) { ++ opcode = msg_q[0] & 0xff; ++ switch (opcode) { + case HNS3_MBX_LINK_STAT_CHANGE: + memcpy(&mac->link_speed, &msg_q[2], + sizeof(mac->link_speed)); +@@ -241,7 +251,7 @@ hns3_mbx_handler(struct hns3_hw *hw) + break; + default: + hns3_err(hw, "Fetched unsupported(%d) message from arq", +- msg_q[0]); ++ opcode); + break; + } + +@@ -291,6 +301,7 @@ hns3_dev_handle_mbx_msg(struct hns3_hw *hw) + struct hns3_cmd_desc *desc; + uint32_t msg_data; + uint16_t *msg_q; ++ uint8_t opcode; + uint16_t flag; + uint8_t *temp; + int i; +@@ -301,12 +312,13 @@ hns3_dev_handle_mbx_msg(struct hns3_hw *hw) + + desc = &crq->desc[crq->next_to_use]; + req = (struct hns3_mbx_pf_to_vf_cmd *)desc->data; ++ opcode = req->msg[0] & 0xff; + + flag = rte_le_to_cpu_16(crq->desc[crq->next_to_use].flag); + if (unlikely(!hns3_get_bit(flag, HNS3_CMDQ_RX_OUTVLD_B))) { + hns3_warn(hw, + "dropped invalid mailbox message, code = %d", +- req->msg[0]); ++ opcode); + + /* dropping/not processing this invalid message */ + crq->desc[crq->next_to_use].flag = 0; +@@ -314,7 +326,7 @@ hns3_dev_handle_mbx_msg(struct hns3_hw *hw) + continue; + } + +- switch (req->msg[0]) { ++ switch (opcode) { + case HNS3_MBX_PF_VF_RESP: + resp->resp_status = hns3_resp_to_errno(req->msg[3]); + +diff --git a/dpdk/drivers/net/hns3/hns3_mbx.h b/dpdk/drivers/net/hns3/hns3_mbx.h +index 01eddb845d..7bcfbbebc3 100644 +--- a/dpdk/drivers/net/hns3/hns3_mbx.h ++++ b/dpdk/drivers/net/hns3/hns3_mbx.h +@@ -41,6 +41,7 @@ enum HNS3_MBX_OPCODE { + HNS3_MBX_GET_QID_IN_PF, /* (VF -> PF) get queue id in pf */ + + HNS3_MBX_HANDLE_VF_TBL = 38, /* (VF -> PF) store/clear hw cfg tbl */ ++ HNS3_MBX_GET_RING_VECTOR_MAP, /* (VF -> PF) get ring-to-vector map */ + }; + + /* below are per-VF mac-vlan subcodes */ +@@ -104,6 +105,19 @@ struct hns3_mbx_pf_to_vf_cmd { + uint16_t msg[8]; + }; + ++struct hns3_ring_chain_param { ++ uint8_t ring_type; ++ uint8_t tqp_index; ++ uint8_t int_gl_index; ++}; ++ ++#define HNS3_MBX_MAX_RING_CHAIN_PARAM_NUM 4 ++struct hns3_vf_bind_vector_msg { ++ uint8_t vector_id; ++ uint8_t ring_num; ++ struct hns3_ring_chain_param param[HNS3_MBX_MAX_RING_CHAIN_PARAM_NUM]; ++}; ++ + struct hns3_vf_rst_cmd { + uint8_t dest_vfid; + uint8_t vf_rst; +diff --git a/dpdk/drivers/net/hns3/hns3_regs.c b/dpdk/drivers/net/hns3/hns3_regs.c +index 23405030e7..a3f2a51f9b 100644 +--- a/dpdk/drivers/net/hns3/hns3_regs.c ++++ b/dpdk/drivers/net/hns3/hns3_regs.c +@@ -118,15 +118,9 @@ hns3_get_regs_length(struct hns3_hw *hw, uint32_t *length) + struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); + int cmdq_lines, common_lines, ring_lines, tqp_intr_lines; + uint32_t regs_num_32_bit, regs_num_64_bit; ++ uint32_t len; + int ret; + +- ret = hns3_get_regs_num(hw, ®s_num_32_bit, ®s_num_64_bit); +- if (ret) { +- hns3_err(hw, "Get register number failed, ret = %d.", +- ret); +- return -ENOTSUP; +- } +- + cmdq_lines = sizeof(cmdq_reg_addrs) / REG_LEN_PER_LINE + 1; + if (hns->is_vf) + common_lines = +@@ -136,11 +130,21 @@ hns3_get_regs_length(struct hns3_hw *hw, uint32_t *length) + ring_lines = sizeof(ring_reg_addrs) / REG_LEN_PER_LINE + 1; + tqp_intr_lines = sizeof(tqp_intr_reg_addrs) / REG_LEN_PER_LINE + 1; + +- *length = (cmdq_lines + common_lines + ring_lines * hw->tqps_num + +- tqp_intr_lines * hw->num_msi) * REG_LEN_PER_LINE + +- regs_num_32_bit * sizeof(uint32_t) + +- regs_num_64_bit * sizeof(uint64_t); ++ len = (cmdq_lines + common_lines + ring_lines * hw->tqps_num + ++ tqp_intr_lines * hw->num_msi) * REG_LEN_PER_LINE; + ++ if (!hns->is_vf) { ++ ret = hns3_get_regs_num(hw, ®s_num_32_bit, ®s_num_64_bit); ++ if (ret) { ++ hns3_err(hw, "Get register number failed, ret = %d.", ++ ret); ++ return -ENOTSUP; ++ } ++ len += regs_num_32_bit * sizeof(uint32_t) + ++ regs_num_64_bit * sizeof(uint64_t); ++ } ++ ++ *length = len; + return 0; + } + +@@ -346,6 +350,9 @@ hns3_get_regs(struct rte_eth_dev *eth_dev, struct rte_dev_reg_info *regs) + /* fetching per-PF registers values from PF PCIe register space */ + hns3_direct_access_regs(hw, data); + ++ if (hns->is_vf) ++ return 0; ++ + ret = hns3_get_regs_num(hw, ®s_num_32_bit, ®s_num_64_bit); + if (ret) { + hns3_err(hw, "Get register number failed, ret = %d", ret); +diff --git a/dpdk/drivers/net/hns3/hns3_regs.h b/dpdk/drivers/net/hns3/hns3_regs.h +index 2f5faafe18..64bd6931b3 100644 +--- a/dpdk/drivers/net/hns3/hns3_regs.h ++++ b/dpdk/drivers/net/hns3/hns3_regs.h +@@ -94,6 +94,16 @@ + #define HNS3_TQP_INTR_RL_REG 0x20900 + + #define HNS3_TQP_INTR_REG_SIZE 4 ++#define HNS3_TQP_INTR_GL_MAX 0x1FE0 ++#define HNS3_TQP_INTR_GL_DEFAULT 20 ++#define HNS3_TQP_INTR_RL_MAX 0xEC ++#define HNS3_TQP_INTR_RL_ENABLE_MASK 0x40 ++#define HNS3_TQP_INTR_RL_DEFAULT 0 ++ ++/* gl_usec convert to hardware count, as writing each 1 represents 2us */ ++#define HNS3_GL_USEC_TO_REG(gl_usec) ((gl_usec) >> 1) ++/* rl_usec convert to hardware count, as writing each 1 represents 4us */ ++#define HNS3_RL_USEC_TO_REG(rl_usec) ((rl_usec) >> 2) + + int hns3_get_regs(struct rte_eth_dev *eth_dev, struct rte_dev_reg_info *regs); + #endif /* _HNS3_REGS_H_ */ +diff --git a/dpdk/drivers/net/hns3/hns3_rss.c b/dpdk/drivers/net/hns3/hns3_rss.c +index b8c20e6d9d..f19b79928c 100644 +--- a/dpdk/drivers/net/hns3/hns3_rss.c ++++ b/dpdk/drivers/net/hns3/hns3_rss.c +@@ -127,7 +127,7 @@ hns3_set_rss_indir_table(struct hns3_hw *hw, uint8_t *indir, uint16_t size) + req->rss_set_bitmap = rte_cpu_to_le_16(HNS3_RSS_SET_BITMAP_MSK); + for (j = 0; j < HNS3_RSS_CFG_TBL_SIZE; j++) { + num = i * HNS3_RSS_CFG_TBL_SIZE + j; +- req->rss_result[j] = indir[num] % hw->alloc_rss_size; ++ req->rss_result[j] = indir[num]; + } + ret = hns3_cmd_send(hw, &desc, 1); + if (ret) { +@@ -257,6 +257,9 @@ hns3_dev_rss_hash_update(struct rte_eth_dev *dev, + uint8_t *key = rss_conf->rss_key; + int ret; + ++ if (hw->rss_dis_flag) ++ return -EINVAL; ++ + rte_spinlock_lock(&hw->lock); + ret = hns3_set_rss_tuple_by_rss_hf(hw, tuple, rss_hf); + if (ret) +@@ -319,8 +322,10 @@ hns3_dev_rss_hash_conf_get(struct rte_eth_dev *dev, + rss_conf->rss_hf = rss_cfg->conf.types; + + /* Get the RSS Key required by the user */ +- if (rss_conf->rss_key) ++ if (rss_conf->rss_key && rss_conf->rss_key_len >= HNS3_RSS_KEY_SIZE) { + memcpy(rss_conf->rss_key, rss_cfg->key, HNS3_RSS_KEY_SIZE); ++ rss_conf->rss_key_len = HNS3_RSS_KEY_SIZE; ++ } + rte_spinlock_unlock(&hw->lock); + + return 0; +@@ -417,7 +422,7 @@ hns3_dev_rss_reta_query(struct rte_eth_dev *dev, + shift = i % RTE_RETA_GROUP_SIZE; + if (reta_conf[idx].mask & (1ULL << shift)) + reta_conf[idx].reta[shift] = +- rss_cfg->rss_indirection_tbl[i] % hw->alloc_rss_size; ++ rss_cfg->rss_indirection_tbl[i]; + } + rte_spinlock_unlock(&hw->lock); + return 0; +@@ -524,7 +529,7 @@ hns3_config_rss(struct hns3_adapter *hns) + + enum rte_eth_rx_mq_mode mq_mode = hw->data->dev_conf.rxmode.mq_mode; + +- /* When there is no open RSS, redirect the packet queue 0 */ ++ /* When RSS is off, redirect the packet queue 0 */ + if (((uint32_t)mq_mode & ETH_MQ_RX_RSS_FLAG) == 0) + hns3_rss_uninit(hns); + +@@ -538,10 +543,16 @@ hns3_config_rss(struct hns3_adapter *hns) + if (ret) + return ret; + +- ret = hns3_set_rss_indir_table(hw, rss_cfg->rss_indirection_tbl, +- HNS3_RSS_IND_TBL_SIZE); +- if (ret) +- goto rss_tuple_uninit; ++ /* ++ * When RSS is off, it doesn't need to configure rss redirection table ++ * to hardware. ++ */ ++ if (((uint32_t)mq_mode & ETH_MQ_RX_RSS_FLAG)) { ++ ret = hns3_set_rss_indir_table(hw, rss_cfg->rss_indirection_tbl, ++ HNS3_RSS_IND_TBL_SIZE); ++ if (ret) ++ goto rss_tuple_uninit; ++ } + + ret = hns3_set_rss_tc_mode(hw); + if (ret) +@@ -550,9 +561,11 @@ hns3_config_rss(struct hns3_adapter *hns) + return ret; + + rss_indir_table_uninit: +- ret1 = hns3_rss_reset_indir_table(hw); +- if (ret1 != 0) +- return ret; ++ if (((uint32_t)mq_mode & ETH_MQ_RX_RSS_FLAG)) { ++ ret1 = hns3_rss_reset_indir_table(hw); ++ if (ret1 != 0) ++ return ret; ++ } + + rss_tuple_uninit: + hns3_rss_tuple_uninit(hw); +diff --git a/dpdk/drivers/net/hns3/hns3_rss.h b/dpdk/drivers/net/hns3/hns3_rss.h +index 7ffc151314..8f065af0aa 100644 +--- a/dpdk/drivers/net/hns3/hns3_rss.h ++++ b/dpdk/drivers/net/hns3/hns3_rss.h +@@ -121,4 +121,6 @@ int hns3_set_rss_tuple_by_rss_hf(struct hns3_hw *hw, + uint64_t rss_hf); + int hns3_set_rss_algo_key(struct hns3_hw *hw, uint8_t hash_algo, + const uint8_t *key); ++int hns3_restore_rss_filter(struct rte_eth_dev *dev); ++ + #endif /* _HNS3_RSS_H_ */ +diff --git a/dpdk/drivers/net/hns3/hns3_rxtx.c b/dpdk/drivers/net/hns3/hns3_rxtx.c +index 8166447131..34dc389d99 100644 +--- a/dpdk/drivers/net/hns3/hns3_rxtx.c ++++ b/dpdk/drivers/net/hns3/hns3_rxtx.c +@@ -30,13 +30,14 @@ + #include "hns3_logs.h" + + #define HNS3_CFG_DESC_NUM(num) ((num) / 8 - 1) +-#define DEFAULT_RX_FREE_THRESH 16 ++#define DEFAULT_RX_FREE_THRESH 32 + + static void + hns3_rx_queue_release_mbufs(struct hns3_rx_queue *rxq) + { + uint16_t i; + ++ /* Note: Fake rx queue will not enter here */ + if (rxq->sw_ring) { + for (i = 0; i < rxq->nb_rx_desc; i++) { + if (rxq->sw_ring[i].mbuf) { +@@ -52,6 +53,7 @@ hns3_tx_queue_release_mbufs(struct hns3_tx_queue *txq) + { + uint16_t i; + ++ /* Note: Fake rx queue will not enter here */ + if (txq->sw_ring) { + for (i = 0; i < txq->nb_tx_desc; i++) { + if (txq->sw_ring[i].mbuf) { +@@ -120,22 +122,115 @@ hns3_dev_tx_queue_release(void *queue) + rte_spinlock_unlock(&hns->hw.lock); + } + +-void +-hns3_free_all_queues(struct rte_eth_dev *dev) ++static void ++hns3_fake_rx_queue_release(struct hns3_rx_queue *queue) ++{ ++ struct hns3_rx_queue *rxq = queue; ++ struct hns3_adapter *hns; ++ struct hns3_hw *hw; ++ uint16_t idx; ++ ++ if (rxq == NULL) ++ return; ++ ++ hns = rxq->hns; ++ hw = &hns->hw; ++ idx = rxq->queue_id; ++ if (hw->fkq_data.rx_queues[idx]) { ++ hns3_rx_queue_release(hw->fkq_data.rx_queues[idx]); ++ hw->fkq_data.rx_queues[idx] = NULL; ++ } ++ ++ /* free fake rx queue arrays */ ++ if (idx == (hw->fkq_data.nb_fake_rx_queues - 1)) { ++ hw->fkq_data.nb_fake_rx_queues = 0; ++ rte_free(hw->fkq_data.rx_queues); ++ hw->fkq_data.rx_queues = NULL; ++ } ++} ++ ++static void ++hns3_fake_tx_queue_release(struct hns3_tx_queue *queue) ++{ ++ struct hns3_tx_queue *txq = queue; ++ struct hns3_adapter *hns; ++ struct hns3_hw *hw; ++ uint16_t idx; ++ ++ if (txq == NULL) ++ return; ++ ++ hns = txq->hns; ++ hw = &hns->hw; ++ idx = txq->queue_id; ++ if (hw->fkq_data.tx_queues[idx]) { ++ hns3_tx_queue_release(hw->fkq_data.tx_queues[idx]); ++ hw->fkq_data.tx_queues[idx] = NULL; ++ } ++ ++ /* free fake tx queue arrays */ ++ if (idx == (hw->fkq_data.nb_fake_tx_queues - 1)) { ++ hw->fkq_data.nb_fake_tx_queues = 0; ++ rte_free(hw->fkq_data.tx_queues); ++ hw->fkq_data.tx_queues = NULL; ++ } ++} ++ ++static void ++hns3_free_rx_queues(struct rte_eth_dev *dev) + { ++ struct hns3_adapter *hns = dev->data->dev_private; ++ struct hns3_fake_queue_data *fkq_data; ++ struct hns3_hw *hw = &hns->hw; ++ uint16_t nb_rx_q; + uint16_t i; + +- if (dev->data->rx_queues) +- for (i = 0; i < dev->data->nb_rx_queues; i++) { ++ nb_rx_q = hw->data->nb_rx_queues; ++ for (i = 0; i < nb_rx_q; i++) { ++ if (dev->data->rx_queues[i]) { + hns3_rx_queue_release(dev->data->rx_queues[i]); + dev->data->rx_queues[i] = NULL; + } ++ } + +- if (dev->data->tx_queues) +- for (i = 0; i < dev->data->nb_tx_queues; i++) { ++ /* Free fake Rx queues */ ++ fkq_data = &hw->fkq_data; ++ for (i = 0; i < fkq_data->nb_fake_rx_queues; i++) { ++ if (fkq_data->rx_queues[i]) ++ hns3_fake_rx_queue_release(fkq_data->rx_queues[i]); ++ } ++} ++ ++static void ++hns3_free_tx_queues(struct rte_eth_dev *dev) ++{ ++ struct hns3_adapter *hns = dev->data->dev_private; ++ struct hns3_fake_queue_data *fkq_data; ++ struct hns3_hw *hw = &hns->hw; ++ uint16_t nb_tx_q; ++ uint16_t i; ++ ++ nb_tx_q = hw->data->nb_tx_queues; ++ for (i = 0; i < nb_tx_q; i++) { ++ if (dev->data->tx_queues[i]) { + hns3_tx_queue_release(dev->data->tx_queues[i]); + dev->data->tx_queues[i] = NULL; + } ++ } ++ ++ /* Free fake Tx queues */ ++ fkq_data = &hw->fkq_data; ++ for (i = 0; i < fkq_data->nb_fake_tx_queues; i++) { ++ if (fkq_data->tx_queues[i]) ++ hns3_fake_tx_queue_release(fkq_data->tx_queues[i]); ++ } ++} ++ ++void ++hns3_free_all_queues(struct rte_eth_dev *dev) ++{ ++ hns3_free_rx_queues(dev); ++ hns3_free_tx_queues(dev); + } + + static int +@@ -220,20 +315,29 @@ hns3_init_tx_queue_hw(struct hns3_tx_queue *txq) + HNS3_CFG_DESC_NUM(txq->nb_tx_desc)); + } + +-static void ++void + hns3_enable_all_queues(struct hns3_hw *hw, bool en) + { ++ uint16_t nb_rx_q = hw->data->nb_rx_queues; ++ uint16_t nb_tx_q = hw->data->nb_tx_queues; + struct hns3_rx_queue *rxq; + struct hns3_tx_queue *txq; + uint32_t rcb_reg; + int i; + +- for (i = 0; i < hw->data->nb_rx_queues; i++) { +- rxq = hw->data->rx_queues[i]; +- txq = hw->data->tx_queues[i]; ++ for (i = 0; i < hw->cfg_max_queues; i++) { ++ if (i < nb_rx_q) ++ rxq = hw->data->rx_queues[i]; ++ else ++ rxq = hw->fkq_data.rx_queues[i - nb_rx_q]; ++ if (i < nb_tx_q) ++ txq = hw->data->tx_queues[i]; ++ else ++ txq = hw->fkq_data.tx_queues[i - nb_tx_q]; + if (rxq == NULL || txq == NULL || + (en && (rxq->rx_deferred_start || txq->tx_deferred_start))) + continue; ++ + rcb_reg = hns3_read_dev(rxq, HNS3_RING_EN_REG); + if (en) + rcb_reg |= BIT(HNS3_RING_EN_B); +@@ -382,10 +486,9 @@ int + hns3_reset_all_queues(struct hns3_adapter *hns) + { + struct hns3_hw *hw = &hns->hw; +- int ret; +- uint16_t i; ++ int ret, i; + +- for (i = 0; i < hw->data->nb_rx_queues; i++) { ++ for (i = 0; i < hw->cfg_max_queues; i++) { + ret = hns3_reset_queue(hns, i); + if (ret) { + hns3_err(hw, "Failed to reset No.%d queue: %d", i, ret); +@@ -395,6 +498,99 @@ hns3_reset_all_queues(struct hns3_adapter *hns) + return 0; + } + ++void ++hns3_set_queue_intr_gl(struct hns3_hw *hw, uint16_t queue_id, ++ uint8_t gl_idx, uint16_t gl_value) ++{ ++ uint32_t offset[] = {HNS3_TQP_INTR_GL0_REG, ++ HNS3_TQP_INTR_GL1_REG, ++ HNS3_TQP_INTR_GL2_REG}; ++ uint32_t addr, value; ++ ++ if (gl_idx >= RTE_DIM(offset) || gl_value > HNS3_TQP_INTR_GL_MAX) ++ return; ++ ++ addr = offset[gl_idx] + queue_id * HNS3_TQP_INTR_REG_SIZE; ++ value = HNS3_GL_USEC_TO_REG(gl_value); ++ ++ hns3_write_dev(hw, addr, value); ++} ++ ++void ++hns3_set_queue_intr_rl(struct hns3_hw *hw, uint16_t queue_id, uint16_t rl_value) ++{ ++ uint32_t addr, value; ++ ++ if (rl_value > HNS3_TQP_INTR_RL_MAX) ++ return; ++ ++ addr = HNS3_TQP_INTR_RL_REG + queue_id * HNS3_TQP_INTR_REG_SIZE; ++ value = HNS3_RL_USEC_TO_REG(rl_value); ++ if (value > 0) ++ value |= HNS3_TQP_INTR_RL_ENABLE_MASK; ++ ++ hns3_write_dev(hw, addr, value); ++} ++ ++static void ++hns3_queue_intr_enable(struct hns3_hw *hw, uint16_t queue_id, bool en) ++{ ++ uint32_t addr, value; ++ ++ addr = HNS3_TQP_INTR_CTRL_REG + queue_id * HNS3_TQP_INTR_REG_SIZE; ++ value = en ? 1 : 0; ++ ++ hns3_write_dev(hw, addr, value); ++} ++ ++/* ++ * Enable all rx queue interrupt when in interrupt rx mode. ++ * This api was called before enable queue rx&tx (in normal start or reset ++ * recover scenes), used to fix hardware rx queue interrupt enable was clear ++ * when FLR. ++ */ ++void ++hns3_dev_all_rx_queue_intr_enable(struct hns3_hw *hw, bool en) ++{ ++ struct rte_eth_dev *dev = &rte_eth_devices[hw->data->port_id]; ++ uint16_t nb_rx_q = hw->data->nb_rx_queues; ++ int i; ++ ++ if (dev->data->dev_conf.intr_conf.rxq == 0) ++ return; ++ ++ for (i = 0; i < nb_rx_q; i++) ++ hns3_queue_intr_enable(hw, i, en); ++} ++ ++int ++hns3_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) ++{ ++ struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); ++ struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ ++ if (dev->data->dev_conf.intr_conf.rxq == 0) ++ return -ENOTSUP; ++ ++ hns3_queue_intr_enable(hw, queue_id, true); ++ ++ return rte_intr_ack(intr_handle); ++} ++ ++int ++hns3_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ ++ if (dev->data->dev_conf.intr_conf.rxq == 0) ++ return -ENOTSUP; ++ ++ hns3_queue_intr_enable(hw, queue_id, false); ++ ++ return 0; ++} ++ + static int + hns3_dev_rx_queue_start(struct hns3_adapter *hns, uint16_t idx) + { +@@ -404,32 +600,42 @@ hns3_dev_rx_queue_start(struct hns3_adapter *hns, uint16_t idx) + + PMD_INIT_FUNC_TRACE(); + +- rxq = hw->data->rx_queues[idx]; +- ++ rxq = (struct hns3_rx_queue *)hw->data->rx_queues[idx]; + ret = hns3_alloc_rx_queue_mbufs(hw, rxq); + if (ret) { + hns3_err(hw, "Failed to alloc mbuf for No.%d rx queue: %d", +- idx, ret); ++ idx, ret); + return ret; + } + + rxq->next_to_use = 0; + rxq->next_to_clean = 0; ++ rxq->nb_rx_hold = 0; + hns3_init_rx_queue_hw(rxq); + + return 0; + } + + static void +-hns3_dev_tx_queue_start(struct hns3_adapter *hns, uint16_t idx) ++hns3_fake_rx_queue_start(struct hns3_adapter *hns, uint16_t idx) + { + struct hns3_hw *hw = &hns->hw; +- struct hns3_tx_queue *txq; ++ struct hns3_rx_queue *rxq; ++ ++ rxq = (struct hns3_rx_queue *)hw->fkq_data.rx_queues[idx]; ++ rxq->next_to_use = 0; ++ rxq->next_to_clean = 0; ++ rxq->nb_rx_hold = 0; ++ hns3_init_rx_queue_hw(rxq); ++} ++ ++static void ++hns3_init_tx_queue(struct hns3_tx_queue *queue) ++{ ++ struct hns3_tx_queue *txq = queue; + struct hns3_desc *desc; + int i; + +- txq = hw->data->tx_queues[idx]; +- + /* Clear tx bd */ + desc = txq->tx_ring; + for (i = 0; i < txq->nb_tx_desc; i++) { +@@ -439,10 +645,30 @@ hns3_dev_tx_queue_start(struct hns3_adapter *hns, uint16_t idx) + + txq->next_to_use = 0; + txq->next_to_clean = 0; +- txq->tx_bd_ready = txq->nb_tx_desc; ++ txq->tx_bd_ready = txq->nb_tx_desc - 1; + hns3_init_tx_queue_hw(txq); + } + ++static void ++hns3_dev_tx_queue_start(struct hns3_adapter *hns, uint16_t idx) ++{ ++ struct hns3_hw *hw = &hns->hw; ++ struct hns3_tx_queue *txq; ++ ++ txq = (struct hns3_tx_queue *)hw->data->tx_queues[idx]; ++ hns3_init_tx_queue(txq); ++} ++ ++static void ++hns3_fake_tx_queue_start(struct hns3_adapter *hns, uint16_t idx) ++{ ++ struct hns3_hw *hw = &hns->hw; ++ struct hns3_tx_queue *txq; ++ ++ txq = (struct hns3_tx_queue *)hw->fkq_data.tx_queues[idx]; ++ hns3_init_tx_queue(txq); ++} ++ + static void + hns3_init_tx_ring_tc(struct hns3_adapter *hns) + { +@@ -459,7 +685,7 @@ hns3_init_tx_ring_tc(struct hns3_adapter *hns) + + for (j = 0; j < tc_queue->tqp_count; j++) { + num = tc_queue->tqp_offset + j; +- txq = hw->data->tx_queues[num]; ++ txq = (struct hns3_tx_queue *)hw->data->tx_queues[num]; + if (txq == NULL) + continue; + +@@ -468,16 +694,13 @@ hns3_init_tx_ring_tc(struct hns3_adapter *hns) + } + } + +-int +-hns3_start_queues(struct hns3_adapter *hns, bool reset_queue) ++static int ++hns3_start_rx_queues(struct hns3_adapter *hns) + { + struct hns3_hw *hw = &hns->hw; +- struct rte_eth_dev_data *dev_data = hw->data; + struct hns3_rx_queue *rxq; +- struct hns3_tx_queue *txq; ++ int i, j; + int ret; +- int i; +- int j; + + /* Initialize RSS for queues */ + ret = hns3_config_rss(hns); +@@ -486,49 +709,88 @@ hns3_start_queues(struct hns3_adapter *hns, bool reset_queue) + return ret; + } + +- if (reset_queue) { +- ret = hns3_reset_all_queues(hns); +- if (ret) { +- hns3_err(hw, "Failed to reset all queues %d", ret); +- return ret; +- } +- } +- +- /* +- * Hardware does not support where the number of rx and tx queues is +- * not equal in hip08. In .dev_configure callback function we will +- * check the two values, here we think that the number of rx and tx +- * queues is equal. +- */ + for (i = 0; i < hw->data->nb_rx_queues; i++) { +- rxq = dev_data->rx_queues[i]; +- txq = dev_data->tx_queues[i]; +- if (rxq == NULL || txq == NULL || rxq->rx_deferred_start || +- txq->tx_deferred_start) ++ rxq = (struct hns3_rx_queue *)hw->data->rx_queues[i]; ++ if (rxq == NULL || rxq->rx_deferred_start) + continue; +- + ret = hns3_dev_rx_queue_start(hns, i); + if (ret) { + hns3_err(hw, "Failed to start No.%d rx queue: %d", i, + ret); + goto out; + } +- hns3_dev_tx_queue_start(hns, i); + } +- hns3_init_tx_ring_tc(hns); + +- hns3_enable_all_queues(hw, true); ++ for (i = 0; i < hw->fkq_data.nb_fake_rx_queues; i++) { ++ rxq = (struct hns3_rx_queue *)hw->fkq_data.rx_queues[i]; ++ if (rxq == NULL || rxq->rx_deferred_start) ++ continue; ++ hns3_fake_rx_queue_start(hns, i); ++ } + return 0; + + out: + for (j = 0; j < i; j++) { +- rxq = dev_data->rx_queues[j]; ++ rxq = (struct hns3_rx_queue *)hw->data->rx_queues[j]; + hns3_rx_queue_release_mbufs(rxq); + } + + return ret; + } + ++static void ++hns3_start_tx_queues(struct hns3_adapter *hns) ++{ ++ struct hns3_hw *hw = &hns->hw; ++ struct hns3_tx_queue *txq; ++ int i; ++ ++ for (i = 0; i < hw->data->nb_tx_queues; i++) { ++ txq = (struct hns3_tx_queue *)hw->data->tx_queues[i]; ++ if (txq == NULL || txq->tx_deferred_start) ++ continue; ++ hns3_dev_tx_queue_start(hns, i); ++ } ++ ++ for (i = 0; i < hw->fkq_data.nb_fake_tx_queues; i++) { ++ txq = (struct hns3_tx_queue *)hw->fkq_data.tx_queues[i]; ++ if (txq == NULL || txq->tx_deferred_start) ++ continue; ++ hns3_fake_tx_queue_start(hns, i); ++ } ++ ++ hns3_init_tx_ring_tc(hns); ++} ++ ++/* ++ * Start all queues. ++ * Note: just init and setup queues, and don't enable queue rx&tx. ++ */ ++int ++hns3_start_queues(struct hns3_adapter *hns, bool reset_queue) ++{ ++ struct hns3_hw *hw = &hns->hw; ++ int ret; ++ ++ if (reset_queue) { ++ ret = hns3_reset_all_queues(hns); ++ if (ret) { ++ hns3_err(hw, "Failed to reset all queues %d", ret); ++ return ret; ++ } ++ } ++ ++ ret = hns3_start_rx_queues(hns); ++ if (ret) { ++ hns3_err(hw, "Failed to start rx queues: %d", ret); ++ return ret; ++ } ++ ++ hns3_start_tx_queues(hns); ++ ++ return 0; ++} ++ + int + hns3_stop_queues(struct hns3_adapter *hns, bool reset_queue) + { +@@ -546,6 +808,333 @@ hns3_stop_queues(struct hns3_adapter *hns, bool reset_queue) + return 0; + } + ++static void* ++hns3_alloc_rxq_and_dma_zone(struct rte_eth_dev *dev, ++ struct hns3_queue_info *q_info) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ const struct rte_memzone *rx_mz; ++ struct hns3_rx_queue *rxq; ++ unsigned int rx_desc; ++ ++ rxq = rte_zmalloc_socket(q_info->type, sizeof(struct hns3_rx_queue), ++ RTE_CACHE_LINE_SIZE, q_info->socket_id); ++ if (rxq == NULL) { ++ hns3_err(hw, "Failed to allocate memory for No.%d rx ring!", ++ q_info->idx); ++ return NULL; ++ } ++ ++ /* Allocate rx ring hardware descriptors. */ ++ rxq->queue_id = q_info->idx; ++ rxq->nb_rx_desc = q_info->nb_desc; ++ rx_desc = rxq->nb_rx_desc * sizeof(struct hns3_desc); ++ rx_mz = rte_eth_dma_zone_reserve(dev, q_info->ring_name, q_info->idx, ++ rx_desc, HNS3_RING_BASE_ALIGN, ++ q_info->socket_id); ++ if (rx_mz == NULL) { ++ hns3_err(hw, "Failed to reserve DMA memory for No.%d rx ring!", ++ q_info->idx); ++ hns3_rx_queue_release(rxq); ++ return NULL; ++ } ++ rxq->mz = rx_mz; ++ rxq->rx_ring = (struct hns3_desc *)rx_mz->addr; ++ rxq->rx_ring_phys_addr = rx_mz->iova; ++ ++ hns3_dbg(hw, "No.%d rx descriptors iova 0x%" PRIx64, q_info->idx, ++ rxq->rx_ring_phys_addr); ++ ++ return rxq; ++} ++ ++static int ++hns3_fake_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, ++ uint16_t nb_desc, unsigned int socket_id) ++{ ++ struct hns3_adapter *hns = dev->data->dev_private; ++ struct hns3_hw *hw = &hns->hw; ++ struct hns3_queue_info q_info; ++ struct hns3_rx_queue *rxq; ++ uint16_t nb_rx_q; ++ ++ if (hw->fkq_data.rx_queues[idx]) { ++ hns3_rx_queue_release(hw->fkq_data.rx_queues[idx]); ++ hw->fkq_data.rx_queues[idx] = NULL; ++ } ++ ++ q_info.idx = idx; ++ q_info.socket_id = socket_id; ++ q_info.nb_desc = nb_desc; ++ q_info.type = "hns3 fake RX queue"; ++ q_info.ring_name = "rx_fake_ring"; ++ rxq = hns3_alloc_rxq_and_dma_zone(dev, &q_info); ++ if (rxq == NULL) { ++ hns3_err(hw, "Failed to setup No.%d fake rx ring.", idx); ++ return -ENOMEM; ++ } ++ ++ /* Don't need alloc sw_ring, because upper applications don't use it */ ++ rxq->sw_ring = NULL; ++ ++ rxq->hns = hns; ++ rxq->rx_deferred_start = false; ++ rxq->port_id = dev->data->port_id; ++ rxq->configured = true; ++ nb_rx_q = dev->data->nb_rx_queues; ++ rxq->io_base = (void *)((char *)hw->io_base + HNS3_TQP_REG_OFFSET + ++ (nb_rx_q + idx) * HNS3_TQP_REG_SIZE); ++ rxq->rx_buf_len = hw->rx_buf_len; ++ ++ rte_spinlock_lock(&hw->lock); ++ hw->fkq_data.rx_queues[idx] = rxq; ++ rte_spinlock_unlock(&hw->lock); ++ ++ return 0; ++} ++ ++static void* ++hns3_alloc_txq_and_dma_zone(struct rte_eth_dev *dev, ++ struct hns3_queue_info *q_info) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ const struct rte_memzone *tx_mz; ++ struct hns3_tx_queue *txq; ++ struct hns3_desc *desc; ++ unsigned int tx_desc; ++ int i; ++ ++ txq = rte_zmalloc_socket(q_info->type, sizeof(struct hns3_tx_queue), ++ RTE_CACHE_LINE_SIZE, q_info->socket_id); ++ if (txq == NULL) { ++ hns3_err(hw, "Failed to allocate memory for No.%d tx ring!", ++ q_info->idx); ++ return NULL; ++ } ++ ++ /* Allocate tx ring hardware descriptors. */ ++ txq->queue_id = q_info->idx; ++ txq->nb_tx_desc = q_info->nb_desc; ++ tx_desc = txq->nb_tx_desc * sizeof(struct hns3_desc); ++ tx_mz = rte_eth_dma_zone_reserve(dev, q_info->ring_name, q_info->idx, ++ tx_desc, HNS3_RING_BASE_ALIGN, ++ q_info->socket_id); ++ if (tx_mz == NULL) { ++ hns3_err(hw, "Failed to reserve DMA memory for No.%d tx ring!", ++ q_info->idx); ++ hns3_tx_queue_release(txq); ++ return NULL; ++ } ++ txq->mz = tx_mz; ++ txq->tx_ring = (struct hns3_desc *)tx_mz->addr; ++ txq->tx_ring_phys_addr = tx_mz->iova; ++ ++ hns3_dbg(hw, "No.%d tx descriptors iova 0x%" PRIx64, q_info->idx, ++ txq->tx_ring_phys_addr); ++ ++ /* Clear tx bd */ ++ desc = txq->tx_ring; ++ for (i = 0; i < txq->nb_tx_desc; i++) { ++ desc->tx.tp_fe_sc_vld_ra_ri = 0; ++ desc++; ++ } ++ ++ return txq; ++} ++ ++static int ++hns3_fake_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, ++ uint16_t nb_desc, unsigned int socket_id) ++{ ++ struct hns3_adapter *hns = dev->data->dev_private; ++ struct hns3_hw *hw = &hns->hw; ++ struct hns3_queue_info q_info; ++ struct hns3_tx_queue *txq; ++ uint16_t nb_tx_q; ++ ++ if (hw->fkq_data.tx_queues[idx] != NULL) { ++ hns3_tx_queue_release(hw->fkq_data.tx_queues[idx]); ++ hw->fkq_data.tx_queues[idx] = NULL; ++ } ++ ++ q_info.idx = idx; ++ q_info.socket_id = socket_id; ++ q_info.nb_desc = nb_desc; ++ q_info.type = "hns3 fake TX queue"; ++ q_info.ring_name = "tx_fake_ring"; ++ txq = hns3_alloc_txq_and_dma_zone(dev, &q_info); ++ if (txq == NULL) { ++ hns3_err(hw, "Failed to setup No.%d fake tx ring.", idx); ++ return -ENOMEM; ++ } ++ ++ /* Don't need alloc sw_ring, because upper applications don't use it */ ++ txq->sw_ring = NULL; ++ ++ txq->hns = hns; ++ txq->tx_deferred_start = false; ++ txq->port_id = dev->data->port_id; ++ txq->configured = true; ++ nb_tx_q = dev->data->nb_tx_queues; ++ txq->io_base = (void *)((char *)hw->io_base + HNS3_TQP_REG_OFFSET + ++ (nb_tx_q + idx) * HNS3_TQP_REG_SIZE); ++ ++ rte_spinlock_lock(&hw->lock); ++ hw->fkq_data.tx_queues[idx] = txq; ++ rte_spinlock_unlock(&hw->lock); ++ ++ return 0; ++} ++ ++static int ++hns3_fake_rx_queue_config(struct hns3_hw *hw, uint16_t nb_queues) ++{ ++ uint16_t old_nb_queues = hw->fkq_data.nb_fake_rx_queues; ++ void **rxq; ++ uint8_t i; ++ ++ if (hw->fkq_data.rx_queues == NULL && nb_queues != 0) { ++ /* first time configuration */ ++ uint32_t size; ++ size = sizeof(hw->fkq_data.rx_queues[0]) * nb_queues; ++ hw->fkq_data.rx_queues = rte_zmalloc("fake_rx_queues", size, ++ RTE_CACHE_LINE_SIZE); ++ if (hw->fkq_data.rx_queues == NULL) { ++ hw->fkq_data.nb_fake_rx_queues = 0; ++ return -ENOMEM; ++ } ++ } else if (hw->fkq_data.rx_queues != NULL && nb_queues != 0) { ++ /* re-configure */ ++ rxq = hw->fkq_data.rx_queues; ++ for (i = nb_queues; i < old_nb_queues; i++) ++ hns3_dev_rx_queue_release(rxq[i]); ++ ++ rxq = rte_realloc(rxq, sizeof(rxq[0]) * nb_queues, ++ RTE_CACHE_LINE_SIZE); ++ if (rxq == NULL) ++ return -ENOMEM; ++ if (nb_queues > old_nb_queues) { ++ uint16_t new_qs = nb_queues - old_nb_queues; ++ memset(rxq + old_nb_queues, 0, sizeof(rxq[0]) * new_qs); ++ } ++ ++ hw->fkq_data.rx_queues = rxq; ++ } else if (hw->fkq_data.rx_queues != NULL && nb_queues == 0) { ++ rxq = hw->fkq_data.rx_queues; ++ for (i = nb_queues; i < old_nb_queues; i++) ++ hns3_dev_rx_queue_release(rxq[i]); ++ ++ rte_free(hw->fkq_data.rx_queues); ++ hw->fkq_data.rx_queues = NULL; ++ } ++ ++ hw->fkq_data.nb_fake_rx_queues = nb_queues; ++ ++ return 0; ++} ++ ++static int ++hns3_fake_tx_queue_config(struct hns3_hw *hw, uint16_t nb_queues) ++{ ++ uint16_t old_nb_queues = hw->fkq_data.nb_fake_tx_queues; ++ void **txq; ++ uint8_t i; ++ ++ if (hw->fkq_data.tx_queues == NULL && nb_queues != 0) { ++ /* first time configuration */ ++ uint32_t size; ++ size = sizeof(hw->fkq_data.tx_queues[0]) * nb_queues; ++ hw->fkq_data.tx_queues = rte_zmalloc("fake_tx_queues", size, ++ RTE_CACHE_LINE_SIZE); ++ if (hw->fkq_data.tx_queues == NULL) { ++ hw->fkq_data.nb_fake_tx_queues = 0; ++ return -ENOMEM; ++ } ++ } else if (hw->fkq_data.tx_queues != NULL && nb_queues != 0) { ++ /* re-configure */ ++ txq = hw->fkq_data.tx_queues; ++ for (i = nb_queues; i < old_nb_queues; i++) ++ hns3_dev_tx_queue_release(txq[i]); ++ txq = rte_realloc(txq, sizeof(txq[0]) * nb_queues, ++ RTE_CACHE_LINE_SIZE); ++ if (txq == NULL) ++ return -ENOMEM; ++ if (nb_queues > old_nb_queues) { ++ uint16_t new_qs = nb_queues - old_nb_queues; ++ memset(txq + old_nb_queues, 0, sizeof(txq[0]) * new_qs); ++ } ++ ++ hw->fkq_data.tx_queues = txq; ++ } else if (hw->fkq_data.tx_queues != NULL && nb_queues == 0) { ++ txq = hw->fkq_data.tx_queues; ++ for (i = nb_queues; i < old_nb_queues; i++) ++ hns3_dev_tx_queue_release(txq[i]); ++ ++ rte_free(hw->fkq_data.tx_queues); ++ hw->fkq_data.tx_queues = NULL; ++ } ++ hw->fkq_data.nb_fake_tx_queues = nb_queues; ++ ++ return 0; ++} ++ ++int ++hns3_set_fake_rx_or_tx_queues(struct rte_eth_dev *dev, uint16_t nb_rx_q, ++ uint16_t nb_tx_q) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ uint16_t rx_need_add_nb_q; ++ uint16_t tx_need_add_nb_q; ++ uint16_t port_id; ++ uint16_t q; ++ int ret; ++ ++ /* Setup new number of fake RX/TX queues and reconfigure device. */ ++ hw->cfg_max_queues = RTE_MAX(nb_rx_q, nb_tx_q); ++ rx_need_add_nb_q = hw->cfg_max_queues - nb_rx_q; ++ tx_need_add_nb_q = hw->cfg_max_queues - nb_tx_q; ++ ret = hns3_fake_rx_queue_config(hw, rx_need_add_nb_q); ++ if (ret) { ++ hns3_err(hw, "Fail to configure fake rx queues: %d", ret); ++ goto cfg_fake_rx_q_fail; ++ } ++ ++ ret = hns3_fake_tx_queue_config(hw, tx_need_add_nb_q); ++ if (ret) { ++ hns3_err(hw, "Fail to configure fake rx queues: %d", ret); ++ goto cfg_fake_tx_q_fail; ++ } ++ ++ /* Allocate and set up fake RX queue per Ethernet port. */ ++ port_id = hw->data->port_id; ++ for (q = 0; q < rx_need_add_nb_q; q++) { ++ ret = hns3_fake_rx_queue_setup(dev, q, HNS3_MIN_RING_DESC, ++ rte_eth_dev_socket_id(port_id)); ++ if (ret) ++ goto setup_fake_rx_q_fail; ++ } ++ ++ /* Allocate and set up fake TX queue per Ethernet port. */ ++ for (q = 0; q < tx_need_add_nb_q; q++) { ++ ret = hns3_fake_tx_queue_setup(dev, q, HNS3_MIN_RING_DESC, ++ rte_eth_dev_socket_id(port_id)); ++ if (ret) ++ goto setup_fake_tx_q_fail; ++ } ++ ++ return 0; ++ ++setup_fake_tx_q_fail: ++setup_fake_rx_q_fail: ++ (void)hns3_fake_tx_queue_config(hw, 0); ++cfg_fake_tx_q_fail: ++ (void)hns3_fake_rx_queue_config(hw, 0); ++cfg_fake_rx_q_fail: ++ hw->cfg_max_queues = 0; ++ ++ return ret; ++} ++ + void + hns3_dev_release_mbufs(struct hns3_adapter *hns) + { +@@ -577,11 +1166,9 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, + struct rte_mempool *mp) + { + struct hns3_adapter *hns = dev->data->dev_private; +- const struct rte_memzone *rx_mz; + struct hns3_hw *hw = &hns->hw; ++ struct hns3_queue_info q_info; + struct hns3_rx_queue *rxq; +- unsigned int desc_size = sizeof(struct hns3_desc); +- unsigned int rx_desc; + int rx_entry_len; + + if (dev->data->dev_started) { +@@ -601,17 +1188,20 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, + dev->data->rx_queues[idx] = NULL; + } + +- rxq = rte_zmalloc_socket("hns3 RX queue", sizeof(struct hns3_rx_queue), +- RTE_CACHE_LINE_SIZE, socket_id); ++ q_info.idx = idx; ++ q_info.socket_id = socket_id; ++ q_info.nb_desc = nb_desc; ++ q_info.type = "hns3 RX queue"; ++ q_info.ring_name = "rx_ring"; ++ rxq = hns3_alloc_rxq_and_dma_zone(dev, &q_info); + if (rxq == NULL) { +- hns3_err(hw, "Failed to allocate memory for rx queue!"); ++ hns3_err(hw, ++ "Failed to alloc mem and reserve DMA mem for rx ring!"); + return -ENOMEM; + } + + rxq->hns = hns; + rxq->mb_pool = mp; +- rxq->nb_rx_desc = nb_desc; +- rxq->queue_id = idx; + if (conf->rx_free_thresh <= 0) + rxq->rx_free_thresh = DEFAULT_RX_FREE_THRESH; + else +@@ -627,23 +1217,6 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, + return -ENOMEM; + } + +- /* Allocate rx ring hardware descriptors. */ +- rx_desc = rxq->nb_rx_desc * desc_size; +- rx_mz = rte_eth_dma_zone_reserve(dev, "rx_ring", idx, rx_desc, +- HNS3_RING_BASE_ALIGN, socket_id); +- if (rx_mz == NULL) { +- hns3_err(hw, "Failed to reserve DMA memory for No.%d rx ring!", +- idx); +- hns3_rx_queue_release(rxq); +- return -ENOMEM; +- } +- rxq->mz = rx_mz; +- rxq->rx_ring = (struct hns3_desc *)rx_mz->addr; +- rxq->rx_ring_phys_addr = rx_mz->iova; +- +- hns3_dbg(hw, "No.%d rx descriptors iova 0x%" PRIx64, idx, +- rxq->rx_ring_phys_addr); +- + rxq->next_to_use = 0; + rxq->next_to_clean = 0; + rxq->nb_rx_hold = 0; +@@ -654,7 +1227,6 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, + rxq->io_base = (void *)((char *)hw->io_base + HNS3_TQP_REG_OFFSET + + idx * HNS3_TQP_REG_SIZE); + rxq->rx_buf_len = hw->rx_buf_len; +- rxq->non_vld_descs = 0; + rxq->l2_errors = 0; + rxq->pkt_len_errors = 0; + rxq->l3_csum_erros = 0; +@@ -881,13 +1453,14 @@ hns3_rx_set_cksum_flag(struct rte_mbuf *rxm, uint64_t packet_type, + uint16_t + hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + { ++ volatile struct hns3_desc *rx_ring; /* RX ring (desc) */ ++ volatile struct hns3_desc *rxdp; /* pointer of the current desc */ + struct hns3_rx_queue *rxq; /* RX queue */ +- struct hns3_desc *rx_ring; /* RX ring (desc) */ + struct hns3_entry *sw_ring; + struct hns3_entry *rxe; +- struct hns3_desc *rxdp; /* pointer of the current desc */ + struct rte_mbuf *first_seg; + struct rte_mbuf *last_seg; ++ struct hns3_desc rxd; + struct rte_mbuf *nmb; /* pointer of the new mbuf */ + struct rte_mbuf *rxm; + struct rte_eth_dev *dev; +@@ -901,7 +1474,6 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + uint16_t pkt_len; + uint16_t nb_rx; + uint16_t rx_id; +- int num; /* num of desc in ring */ + int ret; + + nb_rx = 0; +@@ -915,15 +1487,72 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + last_seg = rxq->pkt_last_seg; + sw_ring = rxq->sw_ring; + +- /* Get num of packets in descriptor ring */ +- num = hns3_read_dev(rxq, HNS3_RING_RX_FBDNUM_REG); +- while (nb_rx_bd < num && nb_rx < nb_pkts) { ++ while (nb_rx < nb_pkts) { + rxdp = &rx_ring[rx_id]; + bd_base_info = rte_le_to_cpu_32(rxdp->rx.bd_base_info); +- if (unlikely(!hns3_get_bit(bd_base_info, HNS3_RXD_VLD_B))) { +- rxq->non_vld_descs++; ++ if (unlikely(!hns3_get_bit(bd_base_info, HNS3_RXD_VLD_B))) + break; +- } ++ /* ++ * The interactive process between software and hardware of ++ * receiving a new packet in hns3 network engine: ++ * 1. Hardware network engine firstly writes the packet content ++ * to the memory pointed by the 'addr' field of the Rx Buffer ++ * Descriptor, secondly fills the result of parsing the ++ * packet include the valid field into the Rx Buffer ++ * Descriptor in one write operation. ++ * 2. Driver reads the Rx BD's valid field in the loop to check ++ * whether it's valid, if valid then assign a new address to ++ * the addr field, clear the valid field, get the other ++ * information of the packet by parsing Rx BD's other fields, ++ * finally write back the number of Rx BDs processed by the ++ * driver to the HNS3_RING_RX_HEAD_REG register to inform ++ * hardware. ++ * In the above process, the ordering is very important. We must ++ * make sure that CPU read Rx BD's other fields only after the ++ * Rx BD is valid. ++ * ++ * There are two type of re-ordering: compiler re-ordering and ++ * CPU re-ordering under the ARMv8 architecture. ++ * 1. we use volatile to deal with compiler re-ordering, so you ++ * can see that rx_ring/rxdp defined with volatile. ++ * 2. we commonly use memory barrier to deal with CPU ++ * re-ordering, but the cost is high. ++ * ++ * In order to solve the high cost of using memory barrier, we ++ * use the data dependency order under the ARMv8 architecture, ++ * for example: ++ * instr01: load A ++ * instr02: load B <- A ++ * the instr02 will always execute after instr01. ++ * ++ * To construct the data dependency ordering, we use the ++ * following assignment: ++ * rxd = rxdp[(bd_base_info & (1u << HNS3_RXD_VLD_B)) - ++ * (1u<<HNS3_RXD_VLD_B)] ++ * Using gcc compiler under the ARMv8 architecture, the related ++ * assembly code example as follows: ++ * note: (1u << HNS3_RXD_VLD_B) equal 0x10 ++ * instr01: ldr w26, [x22, #28] --read bd_base_info ++ * instr02: and w0, w26, #0x10 --calc bd_base_info & 0x10 ++ * instr03: sub w0, w0, #0x10 --calc (bd_base_info & ++ * 0x10) - 0x10 ++ * instr04: add x0, x22, x0, lsl #5 --calc copy source addr ++ * instr05: ldp x2, x3, [x0] ++ * instr06: stp x2, x3, [x29, #256] --copy BD's [0 ~ 15]B ++ * instr07: ldp x4, x5, [x0, #16] ++ * instr08: stp x4, x5, [x29, #272] --copy BD's [16 ~ 31]B ++ * the instr05~08 depend on x0's value, x0 depent on w26's ++ * value, the w26 is the bd_base_info, this form the data ++ * dependency ordering. ++ * note: if BD is valid, (bd_base_info & (1u<<HNS3_RXD_VLD_B)) - ++ * (1u<<HNS3_RXD_VLD_B) will always zero, so the ++ * assignment is correct. ++ * ++ * So we use the data dependency ordering instead of memory ++ * barrier to improve receive performance. ++ */ ++ rxd = rxdp[(bd_base_info & (1u << HNS3_RXD_VLD_B)) - ++ (1u << HNS3_RXD_VLD_B)]; + + nmb = rte_mbuf_raw_alloc(rxq->mb_pool); + if (unlikely(nmb == NULL)) { +@@ -934,7 +1563,7 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + nb_rx_bd++; + rxe = &sw_ring[rx_id]; + rx_id++; +- if (rx_id == rxq->nb_rx_desc) ++ if (unlikely(rx_id == rxq->nb_rx_desc)) + rx_id = 0; + + rte_prefetch0(sw_ring[rx_id].mbuf); +@@ -947,14 +1576,13 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + rxe->mbuf = nmb; + + dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); +- rxdp->addr = dma_addr; + rxdp->rx.bd_base_info = 0; ++ rxdp->addr = dma_addr; + +- rte_cio_rmb(); + /* Load remained descriptor data and extract necessary fields */ +- data_len = (uint16_t)(rte_le_to_cpu_16(rxdp->rx.size)); +- l234_info = rte_le_to_cpu_32(rxdp->rx.l234_info); +- ol_info = rte_le_to_cpu_32(rxdp->rx.ol_info); ++ data_len = (uint16_t)(rte_le_to_cpu_16(rxd.rx.size)); ++ l234_info = rte_le_to_cpu_32(rxd.rx.l234_info); ++ ol_info = rte_le_to_cpu_32(rxd.rx.ol_info); + + if (first_seg == NULL) { + first_seg = rxm; +@@ -973,14 +1601,14 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + } + + /* The last buffer of the received packet */ +- pkt_len = (uint16_t)(rte_le_to_cpu_16(rxdp->rx.pkt_len)); ++ pkt_len = (uint16_t)(rte_le_to_cpu_16(rxd.rx.pkt_len)); + first_seg->pkt_len = pkt_len; + first_seg->port = rxq->port_id; +- first_seg->hash.rss = rte_le_to_cpu_32(rxdp->rx.rss_hash); +- first_seg->ol_flags |= PKT_RX_RSS_HASH; ++ first_seg->hash.rss = rte_le_to_cpu_32(rxd.rx.rss_hash); ++ first_seg->ol_flags = PKT_RX_RSS_HASH; + if (unlikely(hns3_get_bit(bd_base_info, HNS3_RXD_LUM_B))) { + first_seg->hash.fdir.hi = +- rte_le_to_cpu_32(rxdp->rx.fd_id); ++ rte_le_to_cpu_32(rxd.rx.fd_id); + first_seg->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID; + } + rxm->next = NULL; +@@ -994,12 +1622,13 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + ol_info); + + if (bd_base_info & BIT(HNS3_RXD_L3L4P_B)) +- hns3_rx_set_cksum_flag(rxm, first_seg->packet_type, ++ hns3_rx_set_cksum_flag(first_seg, ++ first_seg->packet_type, + cksum_err); + +- first_seg->vlan_tci = rte_le_to_cpu_16(rxdp->rx.vlan_tag); ++ first_seg->vlan_tci = rte_le_to_cpu_16(rxd.rx.vlan_tag); + first_seg->vlan_tci_outer = +- rte_le_to_cpu_16(rxdp->rx.ot_vlan_tag); ++ rte_le_to_cpu_16(rxd.rx.ot_vlan_tag); + rx_pkts[nb_rx++] = first_seg; + first_seg = NULL; + continue; +@@ -1011,7 +1640,13 @@ hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + rxq->next_to_clean = rx_id; + rxq->pkt_first_seg = first_seg; + rxq->pkt_last_seg = last_seg; +- hns3_clean_rx_buffers(rxq, nb_rx_bd); ++ ++ nb_rx_bd = nb_rx_bd + rxq->nb_rx_hold; ++ if (nb_rx_bd > rxq->rx_free_thresh) { ++ hns3_clean_rx_buffers(rxq, nb_rx_bd); ++ nb_rx_bd = 0; ++ } ++ rxq->nb_rx_hold = nb_rx_bd; + + return nb_rx; + } +@@ -1021,14 +1656,10 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, + unsigned int socket_id, const struct rte_eth_txconf *conf) + { + struct hns3_adapter *hns = dev->data->dev_private; +- const struct rte_memzone *tx_mz; + struct hns3_hw *hw = &hns->hw; ++ struct hns3_queue_info q_info; + struct hns3_tx_queue *txq; +- struct hns3_desc *desc; +- unsigned int desc_size = sizeof(struct hns3_desc); +- unsigned int tx_desc; + int tx_entry_len; +- int i; + + if (dev->data->dev_started) { + hns3_err(hw, "tx_queue_setup after dev_start no supported"); +@@ -1047,17 +1678,19 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, + dev->data->tx_queues[idx] = NULL; + } + +- txq = rte_zmalloc_socket("hns3 TX queue", sizeof(struct hns3_tx_queue), +- RTE_CACHE_LINE_SIZE, socket_id); ++ q_info.idx = idx; ++ q_info.socket_id = socket_id; ++ q_info.nb_desc = nb_desc; ++ q_info.type = "hns3 TX queue"; ++ q_info.ring_name = "tx_ring"; ++ txq = hns3_alloc_txq_and_dma_zone(dev, &q_info); + if (txq == NULL) { +- hns3_err(hw, "Failed to allocate memory for tx queue!"); ++ hns3_err(hw, ++ "Failed to alloc mem and reserve DMA mem for tx ring!"); + return -ENOMEM; + } + +- txq->nb_tx_desc = nb_desc; +- txq->queue_id = idx; + txq->tx_deferred_start = conf->tx_deferred_start; +- + tx_entry_len = sizeof(struct hns3_entry) * txq->nb_tx_desc; + txq->sw_ring = rte_zmalloc_socket("hns3 TX sw ring", tx_entry_len, + RTE_CACHE_LINE_SIZE, socket_id); +@@ -1067,34 +1700,10 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, + return -ENOMEM; + } + +- /* Allocate tx ring hardware descriptors. */ +- tx_desc = txq->nb_tx_desc * desc_size; +- tx_mz = rte_eth_dma_zone_reserve(dev, "tx_ring", idx, tx_desc, +- HNS3_RING_BASE_ALIGN, socket_id); +- if (tx_mz == NULL) { +- hns3_err(hw, "Failed to reserve DMA memory for No.%d tx ring!", +- idx); +- hns3_tx_queue_release(txq); +- return -ENOMEM; +- } +- txq->mz = tx_mz; +- txq->tx_ring = (struct hns3_desc *)tx_mz->addr; +- txq->tx_ring_phys_addr = tx_mz->iova; +- +- hns3_dbg(hw, "No.%d tx descriptors iova 0x%" PRIx64, idx, +- txq->tx_ring_phys_addr); +- +- /* Clear tx bd */ +- desc = txq->tx_ring; +- for (i = 0; i < txq->nb_tx_desc; i++) { +- desc->tx.tp_fe_sc_vld_ra_ri = 0; +- desc++; +- } +- + txq->hns = hns; + txq->next_to_use = 0; + txq->next_to_clean = 0; +- txq->tx_bd_ready = txq->nb_tx_desc; ++ txq->tx_bd_ready = txq->nb_tx_desc - 1; + txq->port_id = dev->data->port_id; + txq->configured = true; + txq->io_base = (void *)((char *)hw->io_base + HNS3_TQP_REG_OFFSET + +@@ -1106,19 +1715,6 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, + return 0; + } + +-static inline int +-tx_ring_dist(struct hns3_tx_queue *txq, int begin, int end) +-{ +- return (end - begin + txq->nb_tx_desc) % txq->nb_tx_desc; +-} +- +-static inline int +-tx_ring_space(struct hns3_tx_queue *txq) +-{ +- return txq->nb_tx_desc - +- tx_ring_dist(txq, txq->next_to_clean, txq->next_to_use) - 1; +-} +- + static inline void + hns3_queue_xmit(struct hns3_tx_queue *txq, uint32_t buf_num) + { +@@ -1137,11 +1733,10 @@ hns3_tx_free_useless_buffer(struct hns3_tx_queue *txq) + struct rte_mbuf *mbuf; + + while ((!hns3_get_bit(desc->tx.tp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B)) && +- (tx_next_use != tx_next_clean || tx_bd_ready < tx_bd_max)) { ++ tx_next_use != tx_next_clean) { + mbuf = tx_bak_pkt->mbuf; + if (mbuf) { +- mbuf->next = NULL; +- rte_pktmbuf_free(mbuf); ++ rte_pktmbuf_free_seg(mbuf); + tx_bak_pkt->mbuf = NULL; + } + +@@ -1559,11 +2154,8 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + struct rte_mbuf *new_pkt; + struct rte_mbuf *tx_pkt; + struct rte_mbuf *m_seg; +- struct rte_mbuf *temp; + uint32_t nb_hold = 0; +- uint16_t tx_next_clean; + uint16_t tx_next_use; +- uint16_t tx_bd_ready; + uint16_t tx_pkt_num; + uint16_t tx_bd_max; + uint16_t nb_buf; +@@ -1572,16 +2164,10 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + + /* free useless buffer */ + hns3_tx_free_useless_buffer(txq); +- tx_bd_ready = txq->tx_bd_ready; +- if (tx_bd_ready == 0) +- return 0; + +- tx_next_clean = txq->next_to_clean; + tx_next_use = txq->next_to_use; + tx_bd_max = txq->nb_tx_desc; +- tx_bak_pkt = &txq->sw_ring[tx_next_clean]; +- +- tx_pkt_num = (tx_bd_ready < nb_pkts) ? tx_bd_ready : nb_pkts; ++ tx_pkt_num = nb_pkts; + + /* send packets */ + tx_bak_pkt = &txq->sw_ring[tx_next_use]; +@@ -1590,7 +2176,7 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + + nb_buf = tx_pkt->nb_segs; + +- if (nb_buf > tx_ring_space(txq)) { ++ if (nb_buf > txq->tx_bd_ready) { + if (nb_tx == 0) + return 0; + +@@ -1635,9 +2221,8 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + i = 0; + do { + fill_desc(txq, tx_next_use, m_seg, (i == 0), 0); +- temp = m_seg->next; + tx_bak_pkt->mbuf = m_seg; +- m_seg = temp; ++ m_seg = m_seg->next; + tx_next_use++; + tx_bak_pkt++; + if (tx_next_use >= tx_bd_max) { +@@ -1650,15 +2235,13 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + + nb_hold += i; + txq->next_to_use = tx_next_use; ++ txq->tx_bd_ready -= i; + } + + end_of_tx: + +- if (likely(nb_tx)) { ++ if (likely(nb_tx)) + hns3_queue_xmit(txq, nb_hold); +- txq->next_to_clean = tx_next_clean; +- txq->tx_bd_ready = tx_bd_ready - nb_hold; +- } + + return nb_tx; + } +diff --git a/dpdk/drivers/net/hns3/hns3_rxtx.h b/dpdk/drivers/net/hns3/hns3_rxtx.h +index daf51f4095..771f3c9be4 100644 +--- a/dpdk/drivers/net/hns3/hns3_rxtx.h ++++ b/dpdk/drivers/net/hns3/hns3_rxtx.h +@@ -5,7 +5,7 @@ + #ifndef _HNS3_RXTX_H_ + #define _HNS3_RXTX_H_ + +-#define HNS3_MIN_RING_DESC 32 ++#define HNS3_MIN_RING_DESC 64 + #define HNS3_MAX_RING_DESC 32768 + #define HNS3_DEFAULT_RING_DESC 1024 + #define HNS3_ALIGN_RING_DESC 32 +@@ -245,7 +245,6 @@ struct hns3_rx_queue { + bool rx_deferred_start; /* don't start this queue in dev start */ + bool configured; /* indicate if rx queue has been configured */ + +- uint64_t non_vld_descs; /* num of non valid rx descriptors */ + uint64_t l2_errors; + uint64_t pkt_len_errors; + uint64_t l3_csum_erros; +@@ -273,6 +272,14 @@ struct hns3_tx_queue { + bool configured; /* indicate if tx queue has been configured */ + }; + ++struct hns3_queue_info { ++ const char *type; /* point to queue memory name */ ++ const char *ring_name; /* point to hardware ring name */ ++ uint16_t idx; ++ uint16_t nb_desc; ++ unsigned int socket_id; ++}; ++ + #define HNS3_TX_CKSUM_OFFLOAD_MASK ( \ + PKT_TX_OUTER_IPV6 | \ + PKT_TX_OUTER_IPV4 | \ +@@ -295,6 +302,10 @@ void hns3_dev_rx_queue_release(void *queue); + void hns3_dev_tx_queue_release(void *queue); + void hns3_free_all_queues(struct rte_eth_dev *dev); + int hns3_reset_all_queues(struct hns3_adapter *hns); ++void hns3_dev_all_rx_queue_intr_enable(struct hns3_hw *hw, bool en); ++int hns3_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id); ++int hns3_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id); ++void hns3_enable_all_queues(struct hns3_hw *hw, bool en); + int hns3_start_queues(struct hns3_adapter *hns, bool reset_queue); + int hns3_stop_queues(struct hns3_adapter *hns, bool reset_queue); + void hns3_dev_release_mbufs(struct hns3_adapter *hns); +@@ -311,4 +322,11 @@ uint16_t hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + const uint32_t *hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev); + void hns3_set_rxtx_function(struct rte_eth_dev *eth_dev); ++void hns3_set_queue_intr_gl(struct hns3_hw *hw, uint16_t queue_id, ++ uint8_t gl_idx, uint16_t gl_value); ++void hns3_set_queue_intr_rl(struct hns3_hw *hw, uint16_t queue_id, ++ uint16_t rl_value); ++int hns3_set_fake_rx_or_tx_queues(struct rte_eth_dev *dev, uint16_t nb_rx_q, ++ uint16_t nb_tx_q); ++ + #endif /* _HNS3_RXTX_H_ */ +diff --git a/dpdk/drivers/net/hns3/hns3_stats.c b/dpdk/drivers/net/hns3/hns3_stats.c +index 9948beb179..10cc7570c2 100644 +--- a/dpdk/drivers/net/hns3/hns3_stats.c ++++ b/dpdk/drivers/net/hns3/hns3_stats.c +@@ -219,8 +219,6 @@ static const struct hns3_xstats_name_offset hns3_reset_stats_strings[] = { + + /* The statistic of errors in Rx BD */ + static const struct hns3_xstats_name_offset hns3_rx_bd_error_strings[] = { +- {"NONE_VALIDATED_DESCRIPTORS", +- HNS3_RX_BD_ERROR_STATS_FIELD_OFFSET(non_vld_descs)}, + {"RX_PKT_LEN_ERRORS", + HNS3_RX_BD_ERROR_STATS_FIELD_OFFSET(pkt_len_errors)}, + {"L2_RX_ERRORS", +@@ -492,6 +490,7 @@ hns3_stats_reset(struct rte_eth_dev *eth_dev) + if (ret) { + hns3_err(hw, "Failed to reset RX No.%d queue stat: %d", + i, ret); ++ return ret; + } + + hns3_cmd_setup_basic_desc(&desc_reset, HNS3_OPC_QUERY_TX_STATUS, +@@ -502,6 +501,7 @@ hns3_stats_reset(struct rte_eth_dev *eth_dev) + if (ret) { + hns3_err(hw, "Failed to reset TX No.%d queue stat: %d", + i, ret); ++ return ret; + } + } + +@@ -510,7 +510,6 @@ hns3_stats_reset(struct rte_eth_dev *eth_dev) + rxq = eth_dev->data->rx_queues[i]; + if (rxq) { + rxq->pkt_len_errors = 0; +- rxq->non_vld_descs = 0; + rxq->l2_errors = 0; + rxq->l3_csum_erros = 0; + rxq->l4_csum_erros = 0; +@@ -524,7 +523,7 @@ hns3_stats_reset(struct rte_eth_dev *eth_dev) + return 0; + } + +-static void ++static int + hns3_mac_stats_reset(__rte_unused struct rte_eth_dev *dev) + { + struct hns3_adapter *hns = dev->data->dev_private; +@@ -533,10 +532,14 @@ hns3_mac_stats_reset(__rte_unused struct rte_eth_dev *dev) + int ret; + + ret = hns3_query_update_mac_stats(dev); +- if (ret) ++ if (ret) { + hns3_err(hw, "Clear Mac stats fail : %d", ret); ++ return ret; ++ } + + memset(mac_stats, 0, sizeof(struct hns3_mac_stats)); ++ ++ return 0; + } + + /* This function calculates the number of xstats based on the current config */ +@@ -911,9 +914,13 @@ hns3_dev_xstats_reset(struct rte_eth_dev *dev) + { + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_pf *pf = &hns->pf; ++ int ret; + + /* Clear tqp stats */ +- (void)hns3_stats_reset(dev); ++ ret = hns3_stats_reset(dev); ++ if (ret) ++ return ret; ++ + /* Clear reset stats */ + memset(&hns->hw.reset.stats, 0, sizeof(struct hns3_reset_stats)); + +@@ -921,7 +928,10 @@ hns3_dev_xstats_reset(struct rte_eth_dev *dev) + return 0; + + /* HW registers are cleared on read */ +- hns3_mac_stats_reset(dev); ++ ret = hns3_mac_stats_reset(dev); ++ if (ret) ++ return ret; ++ + /* Clear error stats */ + memset(&pf->abn_int_stats, 0, sizeof(struct hns3_err_msix_intr_stats)); + +diff --git a/dpdk/drivers/net/i40e/base/README b/dpdk/drivers/net/i40e/base/README +index 8a5339cfff..b46593566b 100644 +--- a/dpdk/drivers/net/i40e/base/README ++++ b/dpdk/drivers/net/i40e/base/README +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2017 Intel Corporation ++ * Copyright(c) 2017-2020 Intel Corporation + */ + + Intel® I40E driver +diff --git a/dpdk/drivers/net/i40e/base/i40e_adminq.c b/dpdk/drivers/net/i40e/base/i40e_adminq.c +index 38214a3731..584da0383c 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_adminq.c ++++ b/dpdk/drivers/net/i40e/base/i40e_adminq.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_status.h" +@@ -835,7 +835,7 @@ enum i40e_status_code i40e_asq_send_command(struct i40e_hw *hw, + if (val >= hw->aq.num_asq_entries) { + i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, + "AQTX: head overrun at %d\n", val); +- status = I40E_ERR_QUEUE_EMPTY; ++ status = I40E_ERR_ADMIN_QUEUE_FULL; + goto asq_send_command_error; + } + +diff --git a/dpdk/drivers/net/i40e/base/i40e_adminq.h b/dpdk/drivers/net/i40e/base/i40e_adminq.h +index 769d84809e..6ce262ad4b 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_adminq.h ++++ b/dpdk/drivers/net/i40e/base/i40e_adminq.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_ADMINQ_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h b/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h +index b459be9212..cd7f24cf14 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h ++++ b/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_ADMINQ_CMD_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_alloc.h b/dpdk/drivers/net/i40e/base/i40e_alloc.h +index 4fc1860155..ae14e4d932 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_alloc.h ++++ b/dpdk/drivers/net/i40e/base/i40e_alloc.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_ALLOC_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_common.c b/dpdk/drivers/net/i40e/base/i40e_common.c +index 37911a99e5..2172ea12fc 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_common.c ++++ b/dpdk/drivers/net/i40e/base/i40e_common.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_type.h" +@@ -1700,19 +1700,22 @@ enum i40e_status_code i40e_aq_get_phy_capabilities(struct i40e_hw *hw, + status = i40e_asq_send_command(hw, &desc, abilities, + abilities_size, cmd_details); + +- if (status != I40E_SUCCESS) +- break; +- +- if (hw->aq.asq_last_status == I40E_AQ_RC_EIO) { ++ switch (hw->aq.asq_last_status) { ++ case I40E_AQ_RC_EIO: + status = I40E_ERR_UNKNOWN_PHY; + break; +- } else if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN) { ++ case I40E_AQ_RC_EAGAIN: + i40e_msec_delay(1); + total_delay++; + status = I40E_ERR_TIMEOUT; ++ break; ++ /* also covers I40E_AQ_RC_OK */ ++ default: ++ break; + } +- } while ((hw->aq.asq_last_status != I40E_AQ_RC_OK) && +- (total_delay < max_delay)); ++ ++ } while ((hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN) && ++ (total_delay < max_delay)); + + if (status != I40E_SUCCESS) + return status; +@@ -2025,8 +2028,8 @@ enum i40e_status_code i40e_aq_get_link_info(struct i40e_hw *hw, + hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE) + hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU; + +- if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && +- hw->aq.api_min_ver >= 7) { ++ if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE && ++ hw->mac.type != I40E_MAC_X722) { + __le32 tmp; + + i40e_memcpy(&tmp, resp->link_type, sizeof(tmp), +@@ -2883,9 +2886,16 @@ enum i40e_status_code i40e_update_link_info(struct i40e_hw *hw) + if (status) + return status; + +- hw->phy.link_info.req_fec_info = +- abilities.fec_cfg_curr_mod_ext_info & +- (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS); ++ if (abilities.fec_cfg_curr_mod_ext_info & ++ I40E_AQ_ENABLE_FEC_AUTO) ++ hw->phy.link_info.req_fec_info = ++ (I40E_AQ_REQUEST_FEC_KR | ++ I40E_AQ_REQUEST_FEC_RS); ++ else ++ hw->phy.link_info.req_fec_info = ++ abilities.fec_cfg_curr_mod_ext_info & ++ (I40E_AQ_REQUEST_FEC_KR | ++ I40E_AQ_REQUEST_FEC_RS); + + i40e_memcpy(hw->phy.link_info.module_type, &abilities.module_type, + sizeof(hw->phy.link_info.module_type), I40E_NONDMA_TO_NONDMA); +@@ -4300,7 +4310,7 @@ enum i40e_status_code i40e_aq_set_lldp_mib(struct i40e_hw *hw, + + cmd->type = mib_type; + cmd->length = CPU_TO_LE16(buff_size); +- cmd->address_high = CPU_TO_LE32(I40E_HI_WORD((u64)buff)); ++ cmd->address_high = CPU_TO_LE32(I40E_HI_DWORD((u64)buff)); + cmd->address_low = CPU_TO_LE32(I40E_LO_DWORD((u64)buff)); + + status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details); +diff --git a/dpdk/drivers/net/i40e/base/i40e_dcb.c b/dpdk/drivers/net/i40e/base/i40e_dcb.c +index a26f82b3a6..d99bd6e3f8 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_dcb.c ++++ b/dpdk/drivers/net/i40e/base/i40e_dcb.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_adminq.h" +diff --git a/dpdk/drivers/net/i40e/base/i40e_dcb.h b/dpdk/drivers/net/i40e/base/i40e_dcb.h +index 85b0eed3ad..8d36fce430 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_dcb.h ++++ b/dpdk/drivers/net/i40e/base/i40e_dcb.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_DCB_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_devids.h b/dpdk/drivers/net/i40e/base/i40e_devids.h +index f3c59bdea9..64cfe2bb26 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_devids.h ++++ b/dpdk/drivers/net/i40e/base/i40e_devids.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_DEVIDS_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_diag.c b/dpdk/drivers/net/i40e/base/i40e_diag.c +index 3ccbea4829..b3c4cfd3aa 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_diag.c ++++ b/dpdk/drivers/net/i40e/base/i40e_diag.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_diag.h" +diff --git a/dpdk/drivers/net/i40e/base/i40e_diag.h b/dpdk/drivers/net/i40e/base/i40e_diag.h +index 4434fc960b..cb59285d9c 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_diag.h ++++ b/dpdk/drivers/net/i40e/base/i40e_diag.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_DIAG_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_hmc.c b/dpdk/drivers/net/i40e/base/i40e_hmc.c +index 11c9ae2072..a47d6e0d79 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_hmc.c ++++ b/dpdk/drivers/net/i40e/base/i40e_hmc.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_osdep.h" +diff --git a/dpdk/drivers/net/i40e/base/i40e_hmc.h b/dpdk/drivers/net/i40e/base/i40e_hmc.h +index 289264ed99..f9aad7dc31 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_hmc.h ++++ b/dpdk/drivers/net/i40e/base/i40e_hmc.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_HMC_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c b/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c +index 0afee49b13..d3969396f0 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c ++++ b/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_osdep.h" +diff --git a/dpdk/drivers/net/i40e/base/i40e_lan_hmc.h b/dpdk/drivers/net/i40e/base/i40e_lan_hmc.h +index e531ec490a..aa5dceb792 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_lan_hmc.h ++++ b/dpdk/drivers/net/i40e/base/i40e_lan_hmc.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_LAN_HMC_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_nvm.c b/dpdk/drivers/net/i40e/base/i40e_nvm.c +index 6c8ca87718..d87a6e56ff 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_nvm.c ++++ b/dpdk/drivers/net/i40e/base/i40e_nvm.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_prototype.h" +diff --git a/dpdk/drivers/net/i40e/base/i40e_osdep.h b/dpdk/drivers/net/i40e/base/i40e_osdep.h +index 8a2d82a8d0..64b15e1b61 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_osdep.h ++++ b/dpdk/drivers/net/i40e/base/i40e_osdep.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_OSDEP_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_prototype.h b/dpdk/drivers/net/i40e/base/i40e_prototype.h +index 0cf006dadc..f356834206 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_prototype.h ++++ b/dpdk/drivers/net/i40e/base/i40e_prototype.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_PROTOTYPE_H_ +@@ -496,6 +496,10 @@ i40e_virtchnl_link_speed(enum i40e_aq_link_speed link_speed) + return VIRTCHNL_LINK_SPEED_100MB; + case I40E_LINK_SPEED_1GB: + return VIRTCHNL_LINK_SPEED_1GB; ++ case I40E_LINK_SPEED_2_5GB: ++ return VIRTCHNL_LINK_SPEED_2_5GB; ++ case I40E_LINK_SPEED_5GB: ++ return VIRTCHNL_LINK_SPEED_5GB; + case I40E_LINK_SPEED_10GB: + return VIRTCHNL_LINK_SPEED_10GB; + case I40E_LINK_SPEED_40GB: +diff --git a/dpdk/drivers/net/i40e/base/i40e_register.h b/dpdk/drivers/net/i40e/base/i40e_register.h +index e93ec3f58f..2408dcb117 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_register.h ++++ b/dpdk/drivers/net/i40e/base/i40e_register.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_REGISTER_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_status.h b/dpdk/drivers/net/i40e/base/i40e_status.h +index 1dad4f4b83..cd72169f14 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_status.h ++++ b/dpdk/drivers/net/i40e/base/i40e_status.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_STATUS_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_type.h b/dpdk/drivers/net/i40e/base/i40e_type.h +index 06863d772d..cc72cc0d58 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_type.h ++++ b/dpdk/drivers/net/i40e/base/i40e_type.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_TYPE_H_ +@@ -79,8 +79,8 @@ typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *); + #define I40E_HI_BYTE(x) ((u8)(((x) >> 8) & 0xFF)) + #define I40E_LO_BYTE(x) ((u8)((x) & 0xFF)) + +-/* Number of Transmit Descriptors must be a multiple of 8. */ +-#define I40E_REQ_TX_DESCRIPTOR_MULTIPLE 8 ++/* Number of Transmit Descriptors must be a multiple of 32. */ ++#define I40E_REQ_TX_DESCRIPTOR_MULTIPLE 32 + /* Number of Receive Descriptors must be a multiple of 32 if + * the number of descriptors is greater than 32. + */ +diff --git a/dpdk/drivers/net/i40e/base/meson.build b/dpdk/drivers/net/i40e/base/meson.build +index 3dee8c9754..bfc38ae1a0 100644 +--- a/dpdk/drivers/net/i40e/base/meson.build ++++ b/dpdk/drivers/net/i40e/base/meson.build +@@ -1,5 +1,5 @@ + # SPDX-License-Identifier: BSD-3-Clause +-# Copyright(c) 2017 Intel Corporation ++# Copyright(c) 2017-2020 Intel Corporation + + sources = [ + 'i40e_adminq.c', +diff --git a/dpdk/drivers/net/i40e/base/virtchnl.h b/dpdk/drivers/net/i40e/base/virtchnl.h +index 88096cb45c..0ec84e9dae 100644 +--- a/dpdk/drivers/net/i40e/base/virtchnl.h ++++ b/dpdk/drivers/net/i40e/base/virtchnl.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _VIRTCHNL_H_ +@@ -47,12 +47,14 @@ enum virtchnl_status_code { + VIRTCHNL_STATUS_NOT_SUPPORTED = -64, + }; + ++#define VIRTCHNL_LINK_SPEED_2_5GB_SHIFT 0x0 + #define VIRTCHNL_LINK_SPEED_100MB_SHIFT 0x1 + #define VIRTCHNL_LINK_SPEED_1000MB_SHIFT 0x2 + #define VIRTCHNL_LINK_SPEED_10GB_SHIFT 0x3 + #define VIRTCHNL_LINK_SPEED_40GB_SHIFT 0x4 + #define VIRTCHNL_LINK_SPEED_20GB_SHIFT 0x5 + #define VIRTCHNL_LINK_SPEED_25GB_SHIFT 0x6 ++#define VIRTCHNL_LINK_SPEED_5GB_SHIFT 0x7 + + enum virtchnl_link_speed { + VIRTCHNL_LINK_SPEED_UNKNOWN = 0, +@@ -62,6 +64,8 @@ enum virtchnl_link_speed { + VIRTCHNL_LINK_SPEED_40GB = BIT(VIRTCHNL_LINK_SPEED_40GB_SHIFT), + VIRTCHNL_LINK_SPEED_20GB = BIT(VIRTCHNL_LINK_SPEED_20GB_SHIFT), + VIRTCHNL_LINK_SPEED_25GB = BIT(VIRTCHNL_LINK_SPEED_25GB_SHIFT), ++ VIRTCHNL_LINK_SPEED_2_5GB = BIT(VIRTCHNL_LINK_SPEED_2_5GB_SHIFT), ++ VIRTCHNL_LINK_SPEED_5GB = BIT(VIRTCHNL_LINK_SPEED_5GB_SHIFT), + }; + + /* for hsplit_0 field of Rx HMC context */ +diff --git a/dpdk/drivers/net/i40e/i40e_ethdev.c b/dpdk/drivers/net/i40e/i40e_ethdev.c +index 5999c964bd..67a44e25f1 100644 +--- a/dpdk/drivers/net/i40e/i40e_ethdev.c ++++ b/dpdk/drivers/net/i40e/i40e_ethdev.c +@@ -2241,6 +2241,9 @@ i40e_apply_link_speed(struct rte_eth_dev *dev) + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_eth_conf *conf = &dev->data->dev_conf; + ++ abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK | ++ I40E_AQ_PHY_LINK_ENABLED; ++ + if (conf->link_speeds == ETH_LINK_SPEED_AUTONEG) { + conf->link_speeds = ETH_LINK_SPEED_40G | + ETH_LINK_SPEED_25G | +@@ -2248,11 +2251,12 @@ i40e_apply_link_speed(struct rte_eth_dev *dev) + ETH_LINK_SPEED_10G | + ETH_LINK_SPEED_1G | + ETH_LINK_SPEED_100M; ++ ++ abilities |= I40E_AQ_PHY_AN_ENABLED; ++ } else { ++ abilities &= ~I40E_AQ_PHY_AN_ENABLED; + } + speed = i40e_parse_link_speeds(conf->link_speeds); +- abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK | +- I40E_AQ_PHY_AN_ENABLED | +- I40E_AQ_PHY_LINK_ENABLED; + + return i40e_phy_conf_link(hw, abilities, speed, true); + } +@@ -2268,16 +2272,10 @@ i40e_dev_start(struct rte_eth_dev *dev) + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + uint32_t intr_vector = 0; + struct i40e_vsi *vsi; ++ uint16_t nb_rxq, nb_txq; + + hw->adapter_stopped = 0; + +- if (dev->data->dev_conf.link_speeds & ETH_LINK_SPEED_FIXED) { +- PMD_INIT_LOG(ERR, +- "Invalid link_speeds for port %u, autonegotiation disabled", +- dev->data->port_id); +- return -EINVAL; +- } +- + rte_intr_disable(intr_handle); + + if ((rte_intr_cap_multiple(intr_handle) || +@@ -2306,7 +2304,7 @@ i40e_dev_start(struct rte_eth_dev *dev) + ret = i40e_dev_rxtx_init(pf); + if (ret != I40E_SUCCESS) { + PMD_DRV_LOG(ERR, "Failed to init rx/tx queues"); +- goto err_up; ++ return ret; + } + + /* Map queues with MSIX interrupt */ +@@ -2331,10 +2329,16 @@ i40e_dev_start(struct rte_eth_dev *dev) + } + + /* Enable all queues which have been configured */ +- ret = i40e_dev_switch_queues(pf, TRUE); +- if (ret != I40E_SUCCESS) { +- PMD_DRV_LOG(ERR, "Failed to enable VSI"); +- goto err_up; ++ for (nb_rxq = 0; nb_rxq < dev->data->nb_rx_queues; nb_rxq++) { ++ ret = i40e_dev_rx_queue_start(dev, nb_rxq); ++ if (ret) ++ goto rx_err; ++ } ++ ++ for (nb_txq = 0; nb_txq < dev->data->nb_tx_queues; nb_txq++) { ++ ret = i40e_dev_tx_queue_start(dev, nb_txq); ++ if (ret) ++ goto tx_err; + } + + /* Enable receiving broadcast packets */ +@@ -2364,7 +2368,7 @@ i40e_dev_start(struct rte_eth_dev *dev) + ret = i40e_aq_set_lb_modes(hw, dev->data->dev_conf.lpbk_mode, NULL); + if (ret != I40E_SUCCESS) { + PMD_DRV_LOG(ERR, "fail to set loopback link"); +- goto err_up; ++ goto tx_err; + } + } + +@@ -2372,7 +2376,7 @@ i40e_dev_start(struct rte_eth_dev *dev) + ret = i40e_apply_link_speed(dev); + if (I40E_SUCCESS != ret) { + PMD_DRV_LOG(ERR, "Fail to apply link setting"); +- goto err_up; ++ goto tx_err; + } + + if (!rte_intr_allow_others(intr_handle)) { +@@ -2415,9 +2419,12 @@ i40e_dev_start(struct rte_eth_dev *dev) + + return I40E_SUCCESS; + +-err_up: +- i40e_dev_switch_queues(pf, FALSE); +- i40e_dev_clear_queues(dev); ++tx_err: ++ for (i = 0; i < nb_txq; i++) ++ i40e_dev_tx_queue_stop(dev, i); ++rx_err: ++ for (i = 0; i < nb_rxq; i++) ++ i40e_dev_rx_queue_stop(dev, i); + + return ret; + } +@@ -2441,7 +2448,11 @@ i40e_dev_stop(struct rte_eth_dev *dev) + } + + /* Disable all queues */ +- i40e_dev_switch_queues(pf, FALSE); ++ for (i = 0; i < dev->data->nb_tx_queues; i++) ++ i40e_dev_tx_queue_stop(dev, i); ++ ++ for (i = 0; i < dev->data->nb_rx_queues; i++) ++ i40e_dev_rx_queue_stop(dev, i); + + /* un-map queues with interrupt registers */ + i40e_vsi_disable_queues_intr(main_vsi); +@@ -2594,7 +2605,7 @@ i40e_dev_close(struct rte_eth_dev *dev) + do { + ret = rte_intr_callback_unregister(intr_handle, + i40e_dev_interrupt_handler, dev); +- if (ret >= 0) { ++ if (ret >= 0 || ret == -ENOENT) { + break; + } else if (ret != -EAGAIN) { + PMD_INIT_LOG(ERR, +@@ -4930,6 +4941,7 @@ i40e_res_pool_free(struct i40e_res_pool_info *pool, + { + struct pool_entry *entry, *next, *prev, *valid_entry = NULL; + uint32_t pool_offset; ++ uint16_t len; + int insert; + + if (pool == NULL) { +@@ -4968,12 +4980,13 @@ i40e_res_pool_free(struct i40e_res_pool_info *pool, + } + + insert = 0; ++ len = valid_entry->len; + /* Try to merge with next one*/ + if (next != NULL) { + /* Merge with next one */ +- if (valid_entry->base + valid_entry->len == next->base) { ++ if (valid_entry->base + len == next->base) { + next->base = valid_entry->base; +- next->len += valid_entry->len; ++ next->len += len; + rte_free(valid_entry); + valid_entry = next; + insert = 1; +@@ -4983,13 +4996,15 @@ i40e_res_pool_free(struct i40e_res_pool_info *pool, + if (prev != NULL) { + /* Merge with previous one */ + if (prev->base + prev->len == valid_entry->base) { +- prev->len += valid_entry->len; ++ prev->len += len; + /* If it merge with next one, remove next node */ + if (insert == 1) { + LIST_REMOVE(valid_entry, next); + rte_free(valid_entry); ++ valid_entry = NULL; + } else { + rte_free(valid_entry); ++ valid_entry = NULL; + insert = 1; + } + } +@@ -5005,8 +5020,8 @@ i40e_res_pool_free(struct i40e_res_pool_info *pool, + LIST_INSERT_HEAD(&pool->free_list, valid_entry, next); + } + +- pool->num_free += valid_entry->len; +- pool->num_alloc -= valid_entry->len; ++ pool->num_free += len; ++ pool->num_alloc -= len; + + return 0; + } +@@ -6277,33 +6292,6 @@ i40e_switch_tx_queue(struct i40e_hw *hw, uint16_t q_idx, bool on) + return I40E_SUCCESS; + } + +-/* Swith on or off the tx queues */ +-static int +-i40e_dev_switch_tx_queues(struct i40e_pf *pf, bool on) +-{ +- struct rte_eth_dev_data *dev_data = pf->dev_data; +- struct i40e_tx_queue *txq; +- struct rte_eth_dev *dev = pf->adapter->eth_dev; +- uint16_t i; +- int ret; +- +- for (i = 0; i < dev_data->nb_tx_queues; i++) { +- txq = dev_data->tx_queues[i]; +- /* Don't operate the queue if not configured or +- * if starting only per queue */ +- if (!txq || !txq->q_set || (on && txq->tx_deferred_start)) +- continue; +- if (on) +- ret = i40e_dev_tx_queue_start(dev, i); +- else +- ret = i40e_dev_tx_queue_stop(dev, i); +- if ( ret != I40E_SUCCESS) +- return ret; +- } +- +- return I40E_SUCCESS; +-} +- + int + i40e_switch_rx_queue(struct i40e_hw *hw, uint16_t q_idx, bool on) + { +@@ -6355,59 +6343,6 @@ i40e_switch_rx_queue(struct i40e_hw *hw, uint16_t q_idx, bool on) + + return I40E_SUCCESS; + } +-/* Switch on or off the rx queues */ +-static int +-i40e_dev_switch_rx_queues(struct i40e_pf *pf, bool on) +-{ +- struct rte_eth_dev_data *dev_data = pf->dev_data; +- struct i40e_rx_queue *rxq; +- struct rte_eth_dev *dev = pf->adapter->eth_dev; +- uint16_t i; +- int ret; +- +- for (i = 0; i < dev_data->nb_rx_queues; i++) { +- rxq = dev_data->rx_queues[i]; +- /* Don't operate the queue if not configured or +- * if starting only per queue */ +- if (!rxq || !rxq->q_set || (on && rxq->rx_deferred_start)) +- continue; +- if (on) +- ret = i40e_dev_rx_queue_start(dev, i); +- else +- ret = i40e_dev_rx_queue_stop(dev, i); +- if (ret != I40E_SUCCESS) +- return ret; +- } +- +- return I40E_SUCCESS; +-} +- +-/* Switch on or off all the rx/tx queues */ +-int +-i40e_dev_switch_queues(struct i40e_pf *pf, bool on) +-{ +- int ret; +- +- if (on) { +- /* enable rx queues before enabling tx queues */ +- ret = i40e_dev_switch_rx_queues(pf, on); +- if (ret) { +- PMD_DRV_LOG(ERR, "Failed to switch rx queues"); +- return ret; +- } +- ret = i40e_dev_switch_tx_queues(pf, on); +- } else { +- /* Stop tx queues before stopping rx queues */ +- ret = i40e_dev_switch_tx_queues(pf, on); +- if (ret) { +- PMD_DRV_LOG(ERR, "Failed to switch tx queues"); +- return ret; +- } +- ret = i40e_dev_switch_rx_queues(pf, on); +- } +- +- return ret; +-} + + /* Initialize VSI for TX */ + static int +@@ -10411,6 +10346,7 @@ i40e_get_swr_pm_cfg(struct i40e_hw *hw, uint32_t *value) + { I40E_GL_SWR_PM_EF_DEVICE(I40E_DEV_ID_KX_C) }, + { I40E_GL_SWR_PM_EF_DEVICE(I40E_DEV_ID_10G_BASE_T) }, + { I40E_GL_SWR_PM_EF_DEVICE(I40E_DEV_ID_10G_BASE_T4) }, ++ { I40E_GL_SWR_PM_EF_DEVICE(I40E_DEV_ID_SFP_X722) }, + + { I40E_GL_SWR_PM_SF_DEVICE(I40E_DEV_ID_KX_B) }, + { I40E_GL_SWR_PM_SF_DEVICE(I40E_DEV_ID_QSFP_A) }, +diff --git a/dpdk/drivers/net/i40e/i40e_ethdev_vf.c b/dpdk/drivers/net/i40e/i40e_ethdev_vf.c +index 5dba0928b9..1f17649081 100644 +--- a/dpdk/drivers/net/i40e/i40e_ethdev_vf.c ++++ b/dpdk/drivers/net/i40e/i40e_ethdev_vf.c +@@ -763,7 +763,6 @@ i40evf_stop_queues(struct rte_eth_dev *dev) + for (i = 0; i < dev->data->nb_tx_queues; i++) { + if (i40evf_dev_tx_queue_stop(dev, i) != 0) { + PMD_DRV_LOG(ERR, "Fail to stop queue %u", i); +- return -1; + } + } + +@@ -771,7 +770,6 @@ i40evf_stop_queues(struct rte_eth_dev *dev) + for (i = 0; i < dev->data->nb_rx_queues; i++) { + if (i40evf_dev_rx_queue_stop(dev, i) != 0) { + PMD_DRV_LOG(ERR, "Fail to stop queue %u", i); +- return -1; + } + } + +diff --git a/dpdk/drivers/net/i40e/i40e_fdir.c b/dpdk/drivers/net/i40e/i40e_fdir.c +index dee007daae..239cdbf522 100644 +--- a/dpdk/drivers/net/i40e/i40e_fdir.c ++++ b/dpdk/drivers/net/i40e/i40e_fdir.c +@@ -1559,8 +1559,8 @@ i40e_flow_add_del_fdir_filter(struct rte_eth_dev *dev, + struct i40e_fdir_filter check_filter; /* Check if the filter exists */ + int ret = 0; + +- if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_PERFECT) { +- PMD_DRV_LOG(ERR, "FDIR is not enabled, please check the mode in fdir_conf."); ++ if (pf->fdir.fdir_vsi == NULL) { ++ PMD_DRV_LOG(ERR, "FDIR is not enabled"); + return -ENOTSUP; + } + +diff --git a/dpdk/drivers/net/i40e/i40e_flow.c b/dpdk/drivers/net/i40e/i40e_flow.c +index 61021037c8..a83543c26e 100644 +--- a/dpdk/drivers/net/i40e/i40e_flow.c ++++ b/dpdk/drivers/net/i40e/i40e_flow.c +@@ -2542,7 +2542,6 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + if (next_type == RTE_FLOW_ITEM_TYPE_VLAN || + ether_type == RTE_ETHER_TYPE_IPV4 || + ether_type == RTE_ETHER_TYPE_IPV6 || +- ether_type == RTE_ETHER_TYPE_ARP || + ether_type == outer_tpid) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, +@@ -2587,7 +2586,6 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + + if (ether_type == RTE_ETHER_TYPE_IPV4 || + ether_type == RTE_ETHER_TYPE_IPV6 || +- ether_type == RTE_ETHER_TYPE_ARP || + ether_type == outer_tpid) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, +@@ -3208,8 +3206,7 @@ i40e_flow_parse_fdir_filter(struct rte_eth_dev *dev, + + cons_filter_type = RTE_ETH_FILTER_FDIR; + +- if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_PERFECT || +- pf->fdir.fdir_vsi == NULL) { ++ if (pf->fdir.fdir_vsi == NULL) { + /* Enable fdir when fdir flow is added at first time. */ + ret = i40e_fdir_setup(pf); + if (ret != I40E_SUCCESS) { +@@ -3225,10 +3222,12 @@ i40e_flow_parse_fdir_filter(struct rte_eth_dev *dev, + NULL, "Failed to configure fdir."); + goto err; + } +- +- dev->data->dev_conf.fdir_conf.mode = RTE_FDIR_MODE_PERFECT; + } + ++ /* If create the first fdir rule, enable fdir check for rx queues */ ++ if (TAILQ_EMPTY(&pf->fdir.fdir_list)) ++ i40e_fdir_rx_proc_enable(dev, 1); ++ + return 0; + err: + i40e_fdir_teardown(pf); +@@ -4332,7 +4331,34 @@ i40e_flow_parse_rss_action(struct rte_eth_dev *dev, + struct i40e_rte_flow_rss_conf *rss_info = &pf->rss_info; + uint16_t i, j, n, tmp; + uint32_t index = 0; +- uint64_t hf_bit = 1; ++ ++ static const struct { ++ uint64_t rss_type; ++ enum i40e_filter_pctype pctype; ++ } pctype_match_table[] = { ++ {ETH_RSS_FRAG_IPV4, ++ I40E_FILTER_PCTYPE_FRAG_IPV4}, ++ {ETH_RSS_NONFRAG_IPV4_TCP, ++ I40E_FILTER_PCTYPE_NONF_IPV4_TCP}, ++ {ETH_RSS_NONFRAG_IPV4_UDP, ++ I40E_FILTER_PCTYPE_NONF_IPV4_UDP}, ++ {ETH_RSS_NONFRAG_IPV4_SCTP, ++ I40E_FILTER_PCTYPE_NONF_IPV4_SCTP}, ++ {ETH_RSS_NONFRAG_IPV4_OTHER, ++ I40E_FILTER_PCTYPE_NONF_IPV4_OTHER}, ++ {ETH_RSS_FRAG_IPV6, ++ I40E_FILTER_PCTYPE_FRAG_IPV6}, ++ {ETH_RSS_NONFRAG_IPV6_TCP, ++ I40E_FILTER_PCTYPE_NONF_IPV6_TCP}, ++ {ETH_RSS_NONFRAG_IPV6_UDP, ++ I40E_FILTER_PCTYPE_NONF_IPV6_UDP}, ++ {ETH_RSS_NONFRAG_IPV6_SCTP, ++ I40E_FILTER_PCTYPE_NONF_IPV6_SCTP}, ++ {ETH_RSS_NONFRAG_IPV6_OTHER, ++ I40E_FILTER_PCTYPE_NONF_IPV6_OTHER}, ++ {ETH_RSS_L2_PAYLOAD, ++ I40E_FILTER_PCTYPE_L2_PAYLOAD}, ++ }; + + NEXT_ITEM_OF_ACTION(act, actions, index); + rss = act->conf; +@@ -4350,9 +4376,10 @@ i40e_flow_parse_rss_action(struct rte_eth_dev *dev, + } + + if (action_flag) { +- for (n = 0; n < 64; n++) { +- if (rss->types & (hf_bit << n)) { +- conf_info->region[0].hw_flowtype[0] = n; ++ for (j = 0; j < RTE_DIM(pctype_match_table); j++) { ++ if (rss->types & pctype_match_table[j].rss_type) { ++ conf_info->region[0].hw_flowtype[0] = ++ (uint8_t)pctype_match_table[j].pctype; + conf_info->region[0].flowtype_num = 1; + conf_info->queue_region_number = 1; + break; +@@ -4796,9 +4823,6 @@ i40e_flow_destroy(struct rte_eth_dev *dev, + + /* If the last flow is destroyed, disable fdir. */ + if (!ret && TAILQ_EMPTY(&pf->fdir.fdir_list)) { +- i40e_fdir_teardown(pf); +- dev->data->dev_conf.fdir_conf.mode = +- RTE_FDIR_MODE_NONE; + i40e_fdir_rx_proc_enable(dev, 0); + } + break; +@@ -4956,9 +4980,6 @@ i40e_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error) + return -rte_errno; + } + +- /* Disable FDIR processing as all FDIR rules are now flushed */ +- i40e_fdir_rx_proc_enable(dev, 0); +- + return ret; + } + +@@ -4994,9 +5015,10 @@ i40e_flow_flush_fdir_filter(struct i40e_pf *pf) + for (pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; + pctype <= I40E_FILTER_PCTYPE_L2_PAYLOAD; pctype++) + pf->fdir.inset_flag[pctype] = 0; +- } + +- i40e_fdir_teardown(pf); ++ /* Disable FDIR processing as all FDIR rules are now flushed */ ++ i40e_fdir_rx_proc_enable(dev, 0); ++ } + + return ret; + } +diff --git a/dpdk/drivers/net/i40e/i40e_rxtx.c b/dpdk/drivers/net/i40e/i40e_rxtx.c +index 17dc8c78f7..249d0c7976 100644 +--- a/dpdk/drivers/net/i40e/i40e_rxtx.c ++++ b/dpdk/drivers/net/i40e/i40e_rxtx.c +@@ -989,6 +989,24 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload) + return ctx_desc; + } + ++/* HW requires that Tx buffer size ranges from 1B up to (16K-1)B. */ ++#define I40E_MAX_DATA_PER_TXD \ ++ (I40E_TXD_QW1_TX_BUF_SZ_MASK >> I40E_TXD_QW1_TX_BUF_SZ_SHIFT) ++/* Calculate the number of TX descriptors needed for each pkt */ ++static inline uint16_t ++i40e_calc_pkt_desc(struct rte_mbuf *tx_pkt) ++{ ++ struct rte_mbuf *txd = tx_pkt; ++ uint16_t count = 0; ++ ++ while (txd != NULL) { ++ count += DIV_ROUND_UP(txd->data_len, I40E_MAX_DATA_PER_TXD); ++ txd = txd->next; ++ } ++ ++ return count; ++} ++ + uint16_t + i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + { +@@ -1021,7 +1039,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + + /* Check if the descriptor ring needs to be cleaned. */ + if (txq->nb_tx_free < txq->tx_free_thresh) +- i40e_xmit_cleanup(txq); ++ (void)i40e_xmit_cleanup(txq); + + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + td_cmd = 0; +@@ -1046,8 +1064,15 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + * The number of descriptors that must be allocated for + * a packet equals to the number of the segments of that + * packet plus 1 context descriptor if needed. ++ * Recalculate the needed tx descs when TSO enabled in case ++ * the mbuf data size exceeds max data size that hw allows ++ * per tx desc. + */ +- nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx); ++ if (ol_flags & PKT_TX_TCP_SEG) ++ nb_used = (uint16_t)(i40e_calc_pkt_desc(tx_pkt) + ++ nb_ctx); ++ else ++ nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx); + tx_last = (uint16_t)(tx_id + nb_used - 1); + + /* Circular ring */ +@@ -1160,6 +1185,24 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + slen = m_seg->data_len; + buf_dma_addr = rte_mbuf_data_iova(m_seg); + ++ while ((ol_flags & PKT_TX_TCP_SEG) && ++ unlikely(slen > I40E_MAX_DATA_PER_TXD)) { ++ txd->buffer_addr = ++ rte_cpu_to_le_64(buf_dma_addr); ++ txd->cmd_type_offset_bsz = ++ i40e_build_ctob(td_cmd, ++ td_offset, I40E_MAX_DATA_PER_TXD, ++ td_tag); ++ ++ buf_dma_addr += I40E_MAX_DATA_PER_TXD; ++ slen -= I40E_MAX_DATA_PER_TXD; ++ ++ txe->last_id = tx_last; ++ tx_id = txe->next_id; ++ txe = txn; ++ txd = &txr[tx_id]; ++ txn = &sw_ring[txe->next_id]; ++ } + PMD_TX_LOG(DEBUG, "mbuf: %p, TDD[%u]:\n" + "buf_dma_addr: %#"PRIx64";\n" + "td_cmd: %#x;\n" +@@ -1205,7 +1248,8 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + (unsigned) txq->port_id, (unsigned) txq->queue_id, + (unsigned) tx_id, (unsigned) nb_tx); + +- I40E_PCI_REG_WRITE(txq->qtx_tail, tx_id); ++ rte_cio_wmb(); ++ I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id); + txq->tx_tail = tx_id; + + return nb_tx; +@@ -1527,6 +1571,15 @@ i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id) + PMD_INIT_FUNC_TRACE(); + + rxq = dev->data->rx_queues[rx_queue_id]; ++ if (!rxq || !rxq->q_set) { ++ PMD_DRV_LOG(ERR, "RX queue %u not available or setup", ++ rx_queue_id); ++ return -EINVAL; ++ } ++ ++ if (rxq->rx_deferred_start) ++ PMD_DRV_LOG(WARNING, "RX queue %u is deferrd start", ++ rx_queue_id); + + err = i40e_alloc_rx_queue_mbufs(rxq); + if (err) { +@@ -1559,6 +1612,11 @@ i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id) + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + rxq = dev->data->rx_queues[rx_queue_id]; ++ if (!rxq || !rxq->q_set) { ++ PMD_DRV_LOG(ERR, "RX queue %u not available or setup", ++ rx_queue_id); ++ return -EINVAL; ++ } + + /* + * rx_queue_id is queue id application refers to, while +@@ -1587,6 +1645,15 @@ i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id) + PMD_INIT_FUNC_TRACE(); + + txq = dev->data->tx_queues[tx_queue_id]; ++ if (!txq || !txq->q_set) { ++ PMD_DRV_LOG(ERR, "TX queue %u is not available or setup", ++ tx_queue_id); ++ return -EINVAL; ++ } ++ ++ if (txq->tx_deferred_start) ++ PMD_DRV_LOG(WARNING, "TX queue %u is deferrd start", ++ tx_queue_id); + + /* + * tx_queue_id is queue id application refers to, while +@@ -1611,6 +1678,11 @@ i40e_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id) + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + txq = dev->data->tx_queues[tx_queue_id]; ++ if (!txq || !txq->q_set) { ++ PMD_DRV_LOG(ERR, "TX queue %u is not available or setup", ++ tx_queue_id); ++ return -EINVAL; ++ } + + /* + * tx_queue_id is queue id application refers to, while +diff --git a/dpdk/drivers/net/i40e/i40e_rxtx_vec_altivec.c b/dpdk/drivers/net/i40e/i40e_rxtx_vec_altivec.c +index 310ce1ee2d..5406828afb 100644 +--- a/dpdk/drivers/net/i40e/i40e_rxtx_vec_altivec.c ++++ b/dpdk/drivers/net/i40e/i40e_rxtx_vec_altivec.c +@@ -13,7 +13,7 @@ + #include "i40e_rxtx.h" + #include "i40e_rxtx_vec_common.h" + +-#include <altivec.h> ++#include <rte_altivec.h> + + #pragma GCC diagnostic ignored "-Wcast-qual" + +diff --git a/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h b/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h +index 0e6ffa0078..31f73f6054 100644 +--- a/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h ++++ b/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h +@@ -33,6 +33,7 @@ reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs, + if (!split_flags[buf_idx]) { + /* it's the last packet of the set */ + start->hash = end->hash; ++ start->vlan_tci = end->vlan_tci; + start->ol_flags = end->ol_flags; + /* we need to strip crc for the whole packet */ + start->pkt_len -= rxq->crc_len; +diff --git a/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c b/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c +index deb185fe2f..4376d8911c 100644 +--- a/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c ++++ b/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c +@@ -72,8 +72,9 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) + rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? + (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); + ++ rte_cio_wmb(); + /* Update the tail pointer on the NIC */ +- I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id); ++ I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); + } + + static inline void +@@ -564,7 +565,8 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, + + txq->tx_tail = tx_id; + +- I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail); ++ rte_cio_wmb(); ++ I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id); + + return nb_pkts; + } +diff --git a/dpdk/drivers/net/iavf/base/README b/dpdk/drivers/net/iavf/base/README +index f57e1048ff..bc54ab2809 100644 +--- a/dpdk/drivers/net/iavf/base/README ++++ b/dpdk/drivers/net/iavf/base/README +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2017 Intel Corporation ++ * Copyright(c) 2019-2020 Intel Corporation + */ + + Intel® IAVF driver +diff --git a/dpdk/drivers/net/iavf/base/iavf_adminq.c b/dpdk/drivers/net/iavf/base/iavf_adminq.c +index 1ba8b52219..d6943e8260 100644 +--- a/dpdk/drivers/net/iavf/base/iavf_adminq.c ++++ b/dpdk/drivers/net/iavf/base/iavf_adminq.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2013 - 2015 Intel Corporation ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "iavf_status.h" +@@ -84,6 +84,7 @@ enum iavf_status_code iavf_alloc_adminq_arq_ring(struct iavf_hw *hw) + **/ + void iavf_free_adminq_asq(struct iavf_hw *hw) + { ++ iavf_free_virt_mem(hw, &hw->aq.asq.cmd_buf); + iavf_free_dma_mem(hw, &hw->aq.asq.desc_buf); + } + +@@ -367,7 +368,7 @@ enum iavf_status_code iavf_init_asq(struct iavf_hw *hw) + /* initialize base registers */ + ret_code = iavf_config_asq_regs(hw); + if (ret_code != IAVF_SUCCESS) +- goto init_adminq_free_rings; ++ goto init_config_regs; + + /* success! */ + hw->aq.asq.count = hw->aq.num_asq_entries; +@@ -375,6 +376,10 @@ enum iavf_status_code iavf_init_asq(struct iavf_hw *hw) + + init_adminq_free_rings: + iavf_free_adminq_asq(hw); ++ return ret_code; ++ ++init_config_regs: ++ iavf_free_asq_bufs(hw); + + init_adminq_exit: + return ret_code; +@@ -817,6 +822,8 @@ enum iavf_status_code iavf_asq_send_command(struct iavf_hw *hw, + cmd_completed = true; + if ((enum iavf_admin_queue_err)retval == IAVF_AQ_RC_OK) + status = IAVF_SUCCESS; ++ else if ((enum iavf_admin_queue_err)retval == IAVF_AQ_RC_EBUSY) ++ status = IAVF_ERR_NOT_READY; + else + status = IAVF_ERR_ADMIN_QUEUE_ERROR; + hw->aq.asq_last_status = (enum iavf_admin_queue_err)retval; +diff --git a/dpdk/drivers/net/iavf/base/iavf_adminq.h b/dpdk/drivers/net/iavf/base/iavf_adminq.h +index 715621644b..f97fe8818e 100644 +--- a/dpdk/drivers/net/iavf/base/iavf_adminq.h ++++ b/dpdk/drivers/net/iavf/base/iavf_adminq.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2013 - 2015 Intel Corporation ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IAVF_ADMINQ_H_ +diff --git a/dpdk/drivers/net/iavf/base/iavf_alloc.h b/dpdk/drivers/net/iavf/base/iavf_alloc.h +index 4ff307d23d..ef5ce080ad 100644 +--- a/dpdk/drivers/net/iavf/base/iavf_alloc.h ++++ b/dpdk/drivers/net/iavf/base/iavf_alloc.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2013 - 2015 Intel Corporation ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IAVF_ALLOC_H_ +diff --git a/dpdk/drivers/net/iavf/base/iavf_common.c b/dpdk/drivers/net/iavf/base/iavf_common.c +index 916a8388b2..23f18aa2ec 100644 +--- a/dpdk/drivers/net/iavf/base/iavf_common.c ++++ b/dpdk/drivers/net/iavf/base/iavf_common.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2013 - 2015 Intel Corporation ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "iavf_type.h" +diff --git a/dpdk/drivers/net/iavf/base/iavf_devids.h b/dpdk/drivers/net/iavf/base/iavf_devids.h +index 2cd88d62f0..08c523f19e 100644 +--- a/dpdk/drivers/net/iavf/base/iavf_devids.h ++++ b/dpdk/drivers/net/iavf/base/iavf_devids.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2013 - 2015 Intel Corporation ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IAVF_DEVIDS_H_ +diff --git a/dpdk/drivers/net/iavf/base/iavf_osdep.h b/dpdk/drivers/net/iavf/base/iavf_osdep.h +index 6480266937..08b312cbd8 100644 +--- a/dpdk/drivers/net/iavf/base/iavf_osdep.h ++++ b/dpdk/drivers/net/iavf/base/iavf_osdep.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2017 Intel Corporation ++ * Copyright(c) 2017-2020 Intel Corporation + */ + + #ifndef _IAVF_OSDEP_H_ +diff --git a/dpdk/drivers/net/iavf/base/iavf_status.h b/dpdk/drivers/net/iavf/base/iavf_status.h +index cb91afb017..a981360af6 100644 +--- a/dpdk/drivers/net/iavf/base/iavf_status.h ++++ b/dpdk/drivers/net/iavf/base/iavf_status.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2013 - 2015 Intel Corporation ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IAVF_STATUS_H_ +diff --git a/dpdk/drivers/net/iavf/base/virtchnl.h b/dpdk/drivers/net/iavf/base/virtchnl.h +index 50f7c8e49e..fa98417b05 100644 +--- a/dpdk/drivers/net/iavf/base/virtchnl.h ++++ b/dpdk/drivers/net/iavf/base/virtchnl.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2013 - 2015 Intel Corporation ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _VIRTCHNL_H_ +diff --git a/dpdk/drivers/net/iavf/iavf.h b/dpdk/drivers/net/iavf/iavf.h +index bbd4d75d00..84f8213542 100644 +--- a/dpdk/drivers/net/iavf/iavf.h ++++ b/dpdk/drivers/net/iavf/iavf.h +@@ -173,6 +173,17 @@ struct iavf_cmd_info { + uint32_t out_size; /* buffer size for response */ + }; + ++/* notify current command done. Only call in case execute ++ * _atomic_set_cmd successfully. ++ */ ++static inline void ++_notify_cmd(struct iavf_info *vf, uint32_t msg_ret) ++{ ++ vf->cmd_retval = msg_ret; ++ rte_wmb(); ++ vf->pend_cmd = VIRTCHNL_OP_UNKNOWN; ++} ++ + /* clear current command. Only call in case execute + * _atomic_set_cmd successfully. + */ +diff --git a/dpdk/drivers/net/iavf/iavf_ethdev.c b/dpdk/drivers/net/iavf/iavf_ethdev.c +index a39ba1466c..266200dbe6 100644 +--- a/dpdk/drivers/net/iavf/iavf_ethdev.c ++++ b/dpdk/drivers/net/iavf/iavf_ethdev.c +@@ -1068,7 +1068,7 @@ iavf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + if (ret == 0) { + iavf_update_stats(vsi, pstats); + stats->ipackets = pstats->rx_unicast + pstats->rx_multicast + +- pstats->rx_broadcast; ++ pstats->rx_broadcast - pstats->rx_discards; + stats->opackets = pstats->tx_broadcast + pstats->tx_multicast + + pstats->tx_unicast; + stats->imissed = pstats->rx_discards; +@@ -1079,7 +1079,7 @@ iavf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + } else { + PMD_DRV_LOG(ERR, "Get statistics failed"); + } +- return -EIO; ++ return ret; + } + + static int +diff --git a/dpdk/drivers/net/iavf/iavf_rxtx.h b/dpdk/drivers/net/iavf/iavf_rxtx.h +index 225a0c4c42..60d02c521f 100644 +--- a/dpdk/drivers/net/iavf/iavf_rxtx.h ++++ b/dpdk/drivers/net/iavf/iavf_rxtx.h +@@ -28,6 +28,7 @@ + DEV_TX_OFFLOAD_VLAN_INSERT | \ + DEV_TX_OFFLOAD_SCTP_CKSUM | \ + DEV_TX_OFFLOAD_UDP_CKSUM | \ ++ DEV_TX_OFFLOAD_TCP_TSO | \ + DEV_TX_OFFLOAD_TCP_CKSUM) + + #define DEFAULT_TX_RS_THRESH 32 +diff --git a/dpdk/drivers/net/iavf/iavf_rxtx_vec_common.h b/dpdk/drivers/net/iavf/iavf_rxtx_vec_common.h +index a6ba227584..25bb502de2 100644 +--- a/dpdk/drivers/net/iavf/iavf_rxtx_vec_common.h ++++ b/dpdk/drivers/net/iavf/iavf_rxtx_vec_common.h +@@ -33,6 +33,7 @@ reassemble_packets(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_bufs, + if (!split_flags[buf_idx]) { + /* it's the last packet of the set */ + start->hash = end->hash; ++ start->vlan_tci = end->vlan_tci; + start->ol_flags = end->ol_flags; + /* we need to strip crc for the whole packet */ + start->pkt_len -= rxq->crc_len; +diff --git a/dpdk/drivers/net/iavf/iavf_vchnl.c b/dpdk/drivers/net/iavf/iavf_vchnl.c +index 14395fed31..149673b9b3 100644 +--- a/dpdk/drivers/net/iavf/iavf_vchnl.c ++++ b/dpdk/drivers/net/iavf/iavf_vchnl.c +@@ -132,6 +132,38 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args) + return err; + } + ++static uint32_t ++iavf_convert_link_speed(enum virtchnl_link_speed virt_link_speed) ++{ ++ uint32_t speed; ++ ++ switch (virt_link_speed) { ++ case VIRTCHNL_LINK_SPEED_100MB: ++ speed = 100; ++ break; ++ case VIRTCHNL_LINK_SPEED_1GB: ++ speed = 1000; ++ break; ++ case VIRTCHNL_LINK_SPEED_10GB: ++ speed = 10000; ++ break; ++ case VIRTCHNL_LINK_SPEED_40GB: ++ speed = 40000; ++ break; ++ case VIRTCHNL_LINK_SPEED_20GB: ++ speed = 20000; ++ break; ++ case VIRTCHNL_LINK_SPEED_25GB: ++ speed = 25000; ++ break; ++ default: ++ speed = 0; ++ break; ++ } ++ ++ return speed; ++} ++ + static void + iavf_handle_pf_event_msg(struct rte_eth_dev *dev, uint8_t *msg, + uint16_t msglen) +@@ -153,7 +185,14 @@ iavf_handle_pf_event_msg(struct rte_eth_dev *dev, uint8_t *msg, + case VIRTCHNL_EVENT_LINK_CHANGE: + PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_LINK_CHANGE event"); + vf->link_up = pf_msg->event_data.link_event.link_status; +- vf->link_speed = pf_msg->event_data.link_event_adv.link_speed; ++ if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_CAP_ADV_LINK_SPEED) { ++ vf->link_speed = ++ pf_msg->event_data.link_event_adv.link_speed; ++ } else { ++ enum virtchnl_link_speed speed; ++ speed = pf_msg->event_data.link_event.link_speed; ++ vf->link_speed = iavf_convert_link_speed(speed); ++ } + iavf_dev_link_update(dev, 0); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, + NULL); +@@ -210,12 +249,9 @@ iavf_handle_virtchnl_msg(struct rte_eth_dev *dev) + info.msg_len); + } else { + /* read message and it's expected one */ +- if (msg_opc == vf->pend_cmd) { +- vf->cmd_retval = msg_ret; +- /* prevent compiler reordering */ +- rte_compiler_barrier(); +- _clear_cmd(vf); +- } else ++ if (msg_opc == vf->pend_cmd) ++ _notify_cmd(vf, msg_ret); ++ else + PMD_DRV_LOG(ERR, "command mismatch," + "expect %u, get %u", + vf->pend_cmd, msg_opc); +diff --git a/dpdk/drivers/net/ice/base/ice_adminq_cmd.h b/dpdk/drivers/net/ice/base/ice_adminq_cmd.h +index e6a1350baa..d196434f4d 100644 +--- a/dpdk/drivers/net/ice/base/ice_adminq_cmd.h ++++ b/dpdk/drivers/net/ice/base/ice_adminq_cmd.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_ADMINQ_CMD_H_ +@@ -158,13 +158,11 @@ struct ice_aqc_manage_mac_write { + #define ICE_AQC_MAN_MAC_WR_MC_MAG_EN BIT(0) + #define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP BIT(1) + #define ICE_AQC_MAN_MAC_WR_S 6 +-#define ICE_AQC_MAN_MAC_WR_M (3 << ICE_AQC_MAN_MAC_WR_S) ++#define ICE_AQC_MAN_MAC_WR_M MAKEMASK(3, ICE_AQC_MAN_MAC_WR_S) + #define ICE_AQC_MAN_MAC_UPDATE_LAA 0 +-#define ICE_AQC_MAN_MAC_UPDATE_LAA_WOL (BIT(0) << ICE_AQC_MAN_MAC_WR_S) +- /* High 16 bits of MAC address in big endian order */ +- __be16 sah; +- /* Low 32 bits of MAC address in big endian order */ +- __be32 sal; ++#define ICE_AQC_MAN_MAC_UPDATE_LAA_WOL BIT(ICE_AQC_MAN_MAC_WR_S) ++ /* byte stream in network order */ ++ u8 mac_addr[ETH_ALEN]; + __le32 addr_high; + __le32 addr_low; + }; +diff --git a/dpdk/drivers/net/ice/base/ice_alloc.h b/dpdk/drivers/net/ice/base/ice_alloc.h +index cf823a2c2a..cfe9199403 100644 +--- a/dpdk/drivers/net/ice/base/ice_alloc.h ++++ b/dpdk/drivers/net/ice/base/ice_alloc.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_ALLOC_H_ +diff --git a/dpdk/drivers/net/ice/base/ice_bitops.h b/dpdk/drivers/net/ice/base/ice_bitops.h +index 32f64cac0c..0344437c9e 100644 +--- a/dpdk/drivers/net/ice/base/ice_bitops.h ++++ b/dpdk/drivers/net/ice/base/ice_bitops.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_BITOPS_H_ +diff --git a/dpdk/drivers/net/ice/base/ice_common.c b/dpdk/drivers/net/ice/base/ice_common.c +index 4ba3ab2028..77f68561e3 100644 +--- a/dpdk/drivers/net/ice/base/ice_common.c ++++ b/dpdk/drivers/net/ice/base/ice_common.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ice_common.h" +@@ -9,7 +9,7 @@ + #include "ice_flow.h" + #include "ice_switch.h" + +-#define ICE_PF_RESET_WAIT_COUNT 200 ++#define ICE_PF_RESET_WAIT_COUNT 300 + + /** + * ice_set_mac_type - Sets MAC type +@@ -674,6 +674,7 @@ enum ice_status ice_init_hw(struct ice_hw *hw) + "Failed to get scheduler allocated resources\n"); + goto err_unroll_alloc; + } ++ ice_sched_get_psm_clk_freq(hw); + + /* Initialize port_info struct with scheduler data */ + status = ice_sched_init_port(hw->port_info); +@@ -2120,10 +2121,7 @@ ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags, + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_write); + + cmd->flags = flags; +- +- /* Prep values for flags, sah, sal */ +- cmd->sah = HTONS(*((const u16 *)mac_addr)); +- cmd->sal = HTONL(*((const u32 *)(mac_addr + 2))); ++ ice_memcpy(cmd->mac_addr, mac_addr, ETH_ALEN, ICE_NONDMA_TO_DMA); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + } +diff --git a/dpdk/drivers/net/ice/base/ice_common.h b/dpdk/drivers/net/ice/base/ice_common.h +index c73184499f..63b733d74f 100644 +--- a/dpdk/drivers/net/ice/base/ice_common.h ++++ b/dpdk/drivers/net/ice/base/ice_common.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_COMMON_H_ +diff --git a/dpdk/drivers/net/ice/base/ice_controlq.c b/dpdk/drivers/net/ice/base/ice_controlq.c +index 8a65fae40e..feffb51987 100644 +--- a/dpdk/drivers/net/ice/base/ice_controlq.c ++++ b/dpdk/drivers/net/ice/base/ice_controlq.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ice_common.h" +diff --git a/dpdk/drivers/net/ice/base/ice_controlq.h b/dpdk/drivers/net/ice/base/ice_controlq.h +index 8b60465474..423d171141 100644 +--- a/dpdk/drivers/net/ice/base/ice_controlq.h ++++ b/dpdk/drivers/net/ice/base/ice_controlq.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_CONTROLQ_H_ +diff --git a/dpdk/drivers/net/ice/base/ice_dcb.c b/dpdk/drivers/net/ice/base/ice_dcb.c +index 7048dbd02f..8918188193 100644 +--- a/dpdk/drivers/net/ice/base/ice_dcb.c ++++ b/dpdk/drivers/net/ice/base/ice_dcb.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ice_common.h" +diff --git a/dpdk/drivers/net/ice/base/ice_dcb.h b/dpdk/drivers/net/ice/base/ice_dcb.h +index 9a0968f5b8..3ffeb864cc 100644 +--- a/dpdk/drivers/net/ice/base/ice_dcb.h ++++ b/dpdk/drivers/net/ice/base/ice_dcb.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_DCB_H_ +diff --git a/dpdk/drivers/net/ice/base/ice_devids.h b/dpdk/drivers/net/ice/base/ice_devids.h +index 348d1907a4..8f778d918c 100644 +--- a/dpdk/drivers/net/ice/base/ice_devids.h ++++ b/dpdk/drivers/net/ice/base/ice_devids.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_DEVIDS_H_ +diff --git a/dpdk/drivers/net/ice/base/ice_fdir.c b/dpdk/drivers/net/ice/base/ice_fdir.c +index 37b3881696..033f6ccb4e 100644 +--- a/dpdk/drivers/net/ice/base/ice_fdir.c ++++ b/dpdk/drivers/net/ice/base/ice_fdir.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ice_common.h" +@@ -430,6 +430,10 @@ ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input, + if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DROP_PKT) { + fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_YES; + fdir_fltr_ctx.qindex = 0; ++ } else if (input->dest_ctl == ++ ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER) { ++ fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO; ++ fdir_fltr_ctx.qindex = 0; + } else { + if (input->dest_ctl == + ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP) +@@ -441,13 +445,16 @@ ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input, + fdir_fltr_ctx.cnt_index = input->cnt_index; + fdir_fltr_ctx.fd_vsi = ice_get_hw_vsi_num(hw, input->dest_vsi); + fdir_fltr_ctx.evict_ena = ICE_FXD_FLTR_QW0_EVICT_ENA_FALSE; +- fdir_fltr_ctx.toq_prio = 3; ++ if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER) ++ fdir_fltr_ctx.toq_prio = 0; ++ else ++ fdir_fltr_ctx.toq_prio = 3; + fdir_fltr_ctx.pcmd = (add) ? ICE_FXD_FLTR_QW1_PCMD_ADD : + ICE_FXD_FLTR_QW1_PCMD_REMOVE; + fdir_fltr_ctx.swap = ICE_FXD_FLTR_QW1_SWAP_NOT_SET; + fdir_fltr_ctx.comp_q = ICE_FXD_FLTR_QW0_COMP_Q_ZERO; + fdir_fltr_ctx.comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW; +- fdir_fltr_ctx.fdid_prio = 3; ++ fdir_fltr_ctx.fdid_prio = input->fdid_prio; + fdir_fltr_ctx.desc_prof = 1; + fdir_fltr_ctx.desc_prof_prio = 3; + ice_set_fd_desc_val(&fdir_fltr_ctx, fdesc); +@@ -786,6 +793,10 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP: + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP: + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER: ++ ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET, ++ input->ip.v4.src_ip); ++ ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET, ++ input->ip.v4.dst_ip); + ice_pkt_insert_u32(loc, ICE_IPV4_GTPU_TEID_OFFSET, + input->gtpu_data.teid); + ice_pkt_insert_u6_qfi(loc, ICE_IPV4_GTPU_QFI_OFFSET, +diff --git a/dpdk/drivers/net/ice/base/ice_fdir.h b/dpdk/drivers/net/ice/base/ice_fdir.h +index db1f8351f9..2cb7291eb1 100644 +--- a/dpdk/drivers/net/ice/base/ice_fdir.h ++++ b/dpdk/drivers/net/ice/base/ice_fdir.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_FDIR_H_ +@@ -151,9 +151,9 @@ struct ice_fdir_v6 { + struct ice_fdir_udp_gtp { + u8 flags; + u8 msg_type; +- u16 rsrvd_len; +- u32 teid; +- u16 rsrvd_seq_nbr; ++ __be16 rsrvd_len; ++ __be32 teid; ++ __be16 rsrvd_seq_nbr; + u8 rsrvd_n_pdu_nbr; + u8 rsrvd_next_ext_type; + u8 rsvrd_ext_len; +@@ -202,6 +202,7 @@ struct ice_fdir_fltr { + u8 cnt_ena; + u8 fltr_status; + u16 cnt_index; ++ u8 fdid_prio; + u32 fltr_id; + }; + +diff --git a/dpdk/drivers/net/ice/base/ice_flex_pipe.c b/dpdk/drivers/net/ice/base/ice_flex_pipe.c +index e8d4bbee40..47adfd07a3 100644 +--- a/dpdk/drivers/net/ice/base/ice_flex_pipe.c ++++ b/dpdk/drivers/net/ice/base/ice_flex_pipe.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ice_common.h" +@@ -610,7 +610,7 @@ ice_gen_key_word(u8 val, u8 valid, u8 dont_care, u8 nvr_mtch, u8 *key, + static bool ice_bits_max_set(const u8 *mask, u16 size, u16 max) + { + u16 count = 0; +- u16 i, j; ++ u16 i; + + /* check each byte */ + for (i = 0; i < size; i++) { +@@ -626,11 +626,9 @@ static bool ice_bits_max_set(const u8 *mask, u16 size, u16 max) + return false; + + /* count the bits in this byte, checking threshold */ +- for (j = 0; j < BITS_PER_BYTE; j++) { +- count += (mask[i] & (0x1 << j)) ? 1 : 0; +- if (count > max) +- return false; +- } ++ count += ice_hweight8(mask[i]); ++ if (count > max) ++ return false; + } + + return true; +@@ -914,9 +912,8 @@ ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count) + return status; + + for (i = 0; i < count; i++) { +- bool last = ((i + 1) == count); +- + struct ice_buf_hdr *bh = (struct ice_buf_hdr *)(bufs + i); ++ bool last = ((i + 1) == count); + + status = ice_aq_update_pkg(hw, bh, LE16_TO_CPU(bh->data_end), + last, &offset, &info, NULL); +@@ -1566,7 +1563,7 @@ ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type type, + * allocated for every list entry. + */ + enum ice_status +-ice_get_sw_fv_list(struct ice_hw *hw, u16 *prot_ids, u8 ids_cnt, ++ice_get_sw_fv_list(struct ice_hw *hw, u16 *prot_ids, u16 ids_cnt, + ice_bitmap_t *bm, struct LIST_HEAD_TYPE *fv_list) + { + struct ice_sw_fv_list_entry *fvl; +@@ -1583,7 +1580,7 @@ ice_get_sw_fv_list(struct ice_hw *hw, u16 *prot_ids, u8 ids_cnt, + + ice_seg = hw->seg; + do { +- u8 i; ++ u16 i; + + fv = (struct ice_fv *) + ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW, +@@ -1807,7 +1804,7 @@ static u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld) + } + + /** +- * ice_pkg_buf_header ++ * ice_pkg_buf + * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc()) + * + * Return a pointer to the buffer's header +@@ -1916,9 +1913,11 @@ ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type, + * ice_create_tunnel + * @hw: pointer to the HW structure + * @type: type of tunnel +- * @port: port to use for vxlan tunnel ++ * @port: port of tunnel to create + * +- * Creates a tunnel ++ * Create a tunnel by updating the parse graph in the parser. We do that by ++ * creating a package buffer with the tunnel info and issuing an update package ++ * command. + */ + enum ice_status + ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port) +@@ -3832,6 +3831,7 @@ ice_vsig_get_ref(struct ice_hw *hw, enum ice_block blk, u16 vsig, u16 *refs) + { + u16 idx = vsig & ICE_VSIG_IDX_M; + struct ice_vsig_vsi *ptr; ++ + *refs = 0; + + if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use) +@@ -4038,12 +4038,12 @@ ice_upd_prof_hw(struct ice_hw *hw, enum ice_block blk, + struct ice_buf_build *b; + struct ice_chs_chg *tmp; + enum ice_status status; +- u16 pkg_sects = 0; +- u16 sects = 0; ++ u16 pkg_sects; + u16 xlt1 = 0; + u16 xlt2 = 0; + u16 tcam = 0; + u16 es = 0; ++ u16 sects; + + /* count number of sections we need */ + LIST_FOR_EACH_ENTRY(tmp, chgs, ice_chs_chg, list_entry) { +@@ -4142,8 +4142,6 @@ static void ice_update_fd_mask(struct ice_hw *hw, u16 prof_id, u32 mask_sel) + GLQF_FDMASK_SEL(prof_id), mask_sel); + } + +-#define ICE_SRC_DST_MAX_COUNT 8 +- + struct ice_fd_src_dst_pair { + u8 prot_id; + u8 count; +@@ -4702,9 +4700,7 @@ ice_rem_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, + } while (vsi_cur); + } + +- status = ice_vsig_free(hw, blk, vsig); +- +- return status; ++ return ice_vsig_free(hw, blk, vsig); + } + + /** +@@ -4922,8 +4918,8 @@ static enum ice_status + ice_add_prof_to_lst(struct ice_hw *hw, enum ice_block blk, + struct LIST_HEAD_TYPE *lst, u64 hdl) + { +- struct ice_vsig_prof *p; + struct ice_prof_map *map; ++ struct ice_vsig_prof *p; + u16 i; + + map = ice_search_prof_id(hw, blk, hdl); +@@ -5200,7 +5196,7 @@ ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl, + /* new VSIG profile structure */ + t = (struct ice_vsig_prof *)ice_malloc(hw, sizeof(*t)); + if (!t) +- goto err_ice_add_prof_id_vsig; ++ return ICE_ERR_NO_MEMORY; + + t->profile_cookie = map->profile_cookie; + t->prof_id = map->prof_id; +@@ -5319,7 +5315,7 @@ ice_create_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl, + } + + /** +- * ice_create_vsig_from_list - create a new VSIG with a list of profiles ++ * ice_create_vsig_from_lst - create a new VSIG with a list of profiles + * @hw: pointer to the HW struct + * @blk: hardware block + * @vsi: the initial VSI that will be in VSIG +@@ -5445,13 +5441,11 @@ ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl) + struct ice_vsig_prof *tmp1, *del1; + struct LIST_HEAD_TYPE union_lst; + struct ice_chs_chg *tmp, *del; +- struct LIST_HEAD_TYPE chrs; + struct LIST_HEAD_TYPE chg; + enum ice_status status; +- u16 vsig, or_vsig = 0; ++ u16 vsig; + + INIT_LIST_HEAD(&union_lst); +- INIT_LIST_HEAD(&chrs); + INIT_LIST_HEAD(&chg); + + /* Get profile */ +@@ -5463,6 +5457,7 @@ ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl) + status = ice_vsig_find_vsi(hw, blk, vsi, &vsig); + if (!status && vsig) { + bool only_vsi; ++ u16 or_vsig; + u16 ref; + + /* found in vsig */ +@@ -5572,11 +5567,6 @@ ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl) + ice_free(hw, del1); + } + +- LIST_FOR_EACH_ENTRY_SAFE(del1, tmp1, &chrs, ice_vsig_prof, list) { +- LIST_DEL(&del1->list); +- ice_free(hw, del1); +- } +- + return status; + } + +diff --git a/dpdk/drivers/net/ice/base/ice_flex_pipe.h b/dpdk/drivers/net/ice/base/ice_flex_pipe.h +index ee606af15a..10f328b652 100644 +--- a/dpdk/drivers/net/ice/base/ice_flex_pipe.h ++++ b/dpdk/drivers/net/ice/base/ice_flex_pipe.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_FLEX_PIPE_H_ +@@ -36,7 +36,7 @@ ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type type, + void + ice_init_prof_result_bm(struct ice_hw *hw); + enum ice_status +-ice_get_sw_fv_list(struct ice_hw *hw, u16 *prot_ids, u8 ids_cnt, ++ice_get_sw_fv_list(struct ice_hw *hw, u16 *prot_ids, u16 ids_cnt, + ice_bitmap_t *bm, struct LIST_HEAD_TYPE *fv_list); + bool + ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type, +diff --git a/dpdk/drivers/net/ice/base/ice_flex_type.h b/dpdk/drivers/net/ice/base/ice_flex_type.h +index 1be98ea521..7708ced40a 100644 +--- a/dpdk/drivers/net/ice/base/ice_flex_type.h ++++ b/dpdk/drivers/net/ice/base/ice_flex_type.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_FLEX_TYPE_H_ +diff --git a/dpdk/drivers/net/ice/base/ice_flow.c b/dpdk/drivers/net/ice/base/ice_flow.c +index 391df1b540..9d58d284f3 100644 +--- a/dpdk/drivers/net/ice/base/ice_flow.c ++++ b/dpdk/drivers/net/ice/base/ice_flow.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ice_common.h" +@@ -548,11 +548,41 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) + (const ice_bitmap_t *)ice_ptypes_ipv4_il; + ice_and_bitmap(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); ++ if (hdrs & ICE_FLOW_SEG_HDR_UDP) { ++ src = (const ice_bitmap_t *)ice_ptypes_udp_il; ++ ice_and_bitmap(params->ptypes, ++ params->ptypes, src, ++ ICE_FLOW_PTYPE_MAX); ++ } else if (hdrs & ICE_FLOW_SEG_HDR_TCP) { ++ ice_and_bitmap(params->ptypes, params->ptypes, ++ (const ice_bitmap_t *) ++ ice_ptypes_tcp_il, ++ ICE_FLOW_PTYPE_MAX); ++ } else if (hdrs & ICE_FLOW_SEG_HDR_SCTP) { ++ src = (const ice_bitmap_t *)ice_ptypes_sctp_il; ++ ice_and_bitmap(params->ptypes, params->ptypes, ++ src, ICE_FLOW_PTYPE_MAX); ++ } + } else if (hdrs & ICE_FLOW_SEG_HDR_IPV6) { + src = !i ? (const ice_bitmap_t *)ice_ptypes_ipv6_ofos : + (const ice_bitmap_t *)ice_ptypes_ipv6_il; + ice_and_bitmap(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); ++ if (hdrs & ICE_FLOW_SEG_HDR_UDP) { ++ src = (const ice_bitmap_t *)ice_ptypes_udp_il; ++ ice_and_bitmap(params->ptypes, ++ params->ptypes, src, ++ ICE_FLOW_PTYPE_MAX); ++ } else if (hdrs & ICE_FLOW_SEG_HDR_TCP) { ++ ice_and_bitmap(params->ptypes, params->ptypes, ++ (const ice_bitmap_t *) ++ ice_ptypes_tcp_il, ++ ICE_FLOW_PTYPE_MAX); ++ } else if (hdrs & ICE_FLOW_SEG_HDR_SCTP) { ++ src = (const ice_bitmap_t *)ice_ptypes_sctp_il; ++ ice_and_bitmap(params->ptypes, params->ptypes, ++ src, ICE_FLOW_PTYPE_MAX); ++ } + } + + if (hdrs & ICE_FLOW_SEG_HDR_ICMP) { +@@ -560,18 +590,6 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) + (const ice_bitmap_t *)ice_ptypes_icmp_il; + ice_and_bitmap(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); +- } else if (hdrs & ICE_FLOW_SEG_HDR_UDP) { +- src = (const ice_bitmap_t *)ice_ptypes_udp_il; +- ice_and_bitmap(params->ptypes, params->ptypes, src, +- ICE_FLOW_PTYPE_MAX); +- } else if (hdrs & ICE_FLOW_SEG_HDR_TCP) { +- ice_and_bitmap(params->ptypes, params->ptypes, +- (const ice_bitmap_t *)ice_ptypes_tcp_il, +- ICE_FLOW_PTYPE_MAX); +- } else if (hdrs & ICE_FLOW_SEG_HDR_SCTP) { +- src = (const ice_bitmap_t *)ice_ptypes_sctp_il; +- ice_and_bitmap(params->ptypes, params->ptypes, src, +- ICE_FLOW_PTYPE_MAX); + } else if (hdrs & ICE_FLOW_SEG_HDR_GRE) { + if (!i) { + src = (const ice_bitmap_t *)ice_ptypes_gre_of; +@@ -586,10 +604,6 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) + src = (const ice_bitmap_t *)ice_ptypes_gtpc_tid; + ice_and_bitmap(params->ptypes, params->ptypes, + src, ICE_FLOW_PTYPE_MAX); +- } else if (hdrs & ICE_FLOW_SEG_HDR_GTPU) { +- src = (const ice_bitmap_t *)ice_ptypes_gtpu; +- ice_and_bitmap(params->ptypes, params->ptypes, +- src, ICE_FLOW_PTYPE_MAX); + } else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_EH) { + src = (const ice_bitmap_t *)ice_ptypes_gtpu; + ice_and_bitmap(params->ptypes, params->ptypes, +@@ -598,6 +612,10 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) + /* Attributes for GTP packet with Extension Header */ + params->attr = ice_attr_gtpu_eh; + params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_eh); ++ } else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_IP) { ++ src = (const ice_bitmap_t *)ice_ptypes_gtpu; ++ ice_and_bitmap(params->ptypes, params->ptypes, ++ src, ICE_FLOW_PTYPE_MAX); + } + } + +@@ -1162,7 +1180,7 @@ ice_flow_add_prof_sync(struct ice_hw *hw, enum ice_block blk, + struct ice_flow_prof **prof) + { + struct ice_flow_prof_params params; +- enum ice_status status = ICE_SUCCESS; ++ enum ice_status status; + u8 i; + + if (!prof || (acts_cnt && !acts)) +@@ -1835,14 +1853,11 @@ void ice_rem_vsi_rss_list(struct ice_hw *hw, u16 vsi_handle) + ice_acquire_lock(&hw->rss_locks); + LIST_FOR_EACH_ENTRY_SAFE(r, tmp, &hw->rss_list_head, + ice_rss_cfg, l_entry) { +- if (ice_is_bit_set(r->vsis, vsi_handle)) { +- ice_clear_bit(vsi_handle, r->vsis); +- ++ if (ice_test_and_clear_bit(vsi_handle, r->vsis)) + if (!ice_is_any_bit_set(r->vsis, ICE_MAX_VSI)) { + LIST_DEL(&r->l_entry); + ice_free(hw, r); + } +- } + } + ice_release_lock(&hw->rss_locks); + } +diff --git a/dpdk/drivers/net/ice/base/ice_flow.h b/dpdk/drivers/net/ice/base/ice_flow.h +index 4686274af8..9b314a1eaa 100644 +--- a/dpdk/drivers/net/ice/base/ice_flow.h ++++ b/dpdk/drivers/net/ice/base/ice_flow.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_FLOW_H_ +@@ -118,6 +118,7 @@ enum ice_flow_seg_hdr { + * ICE_FLOW_SEG_HDR_GTPU_UP 1 1 + */ + #define ICE_FLOW_SEG_HDR_GTPU (ICE_FLOW_SEG_HDR_GTPU_IP | \ ++ ICE_FLOW_SEG_HDR_GTPU_EH | \ + ICE_FLOW_SEG_HDR_GTPU_DWN | \ + ICE_FLOW_SEG_HDR_GTPU_UP) + +@@ -187,6 +188,7 @@ enum ice_flow_avf_hdr_field { + ICE_AVF_FLOW_FIELD_IPV4_SCTP, + ICE_AVF_FLOW_FIELD_IPV4_OTHER, + ICE_AVF_FLOW_FIELD_FRAG_IPV4, ++ /* Values 37-38 are reserved */ + ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP = 39, + ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP, + ICE_AVF_FLOW_FIELD_IPV6_UDP, +diff --git a/dpdk/drivers/net/ice/base/ice_hw_autogen.h b/dpdk/drivers/net/ice/base/ice_hw_autogen.h +index 92d432044d..1c9c84dfb4 100644 +--- a/dpdk/drivers/net/ice/base/ice_hw_autogen.h ++++ b/dpdk/drivers/net/ice/base/ice_hw_autogen.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + /* Machine-generated file; do not edit */ +diff --git a/dpdk/drivers/net/ice/base/ice_lan_tx_rx.h b/dpdk/drivers/net/ice/base/ice_lan_tx_rx.h +index a97c63cc97..51e30848f8 100644 +--- a/dpdk/drivers/net/ice/base/ice_lan_tx_rx.h ++++ b/dpdk/drivers/net/ice/base/ice_lan_tx_rx.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_LAN_TX_RX_H_ +diff --git a/dpdk/drivers/net/ice/base/ice_nvm.c b/dpdk/drivers/net/ice/base/ice_nvm.c +index 1dbfc2dcc7..434258b14c 100644 +--- a/dpdk/drivers/net/ice/base/ice_nvm.c ++++ b/dpdk/drivers/net/ice/base/ice_nvm.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ice_common.h" +diff --git a/dpdk/drivers/net/ice/base/ice_nvm.h b/dpdk/drivers/net/ice/base/ice_nvm.h +index d5b7b2d196..8624d19b19 100644 +--- a/dpdk/drivers/net/ice/base/ice_nvm.h ++++ b/dpdk/drivers/net/ice/base/ice_nvm.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_NVM_H_ +diff --git a/dpdk/drivers/net/ice/base/ice_osdep.h b/dpdk/drivers/net/ice/base/ice_osdep.h +index 27c1830c5e..32ac8ac397 100644 +--- a/dpdk/drivers/net/ice/base/ice_osdep.h ++++ b/dpdk/drivers/net/ice/base/ice_osdep.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018 Intel Corporation ++ * Copyright(c) 2018-2020 Intel Corporation + */ + + #ifndef _ICE_OSDEP_H_ +@@ -24,6 +24,8 @@ + #include <rte_random.h> + #include <rte_io.h> + ++#include "ice_alloc.h" ++ + #include "../ice_logs.h" + + #define INLINE inline +@@ -176,7 +178,6 @@ struct ice_virt_mem { + + #define ice_memset(a, b, c, d) memset((a), (b), (c)) + #define ice_memcpy(a, b, c, d) rte_memcpy((a), (b), (c)) +-#define ice_memdup(a, b, c, d) rte_memcpy(ice_malloc(a, c), b, c) + + #define CPU_TO_BE16(o) rte_cpu_to_be_16(o) + #define CPU_TO_BE32(o) rte_cpu_to_be_32(o) +@@ -223,6 +224,19 @@ ice_destroy_lock(__attribute__((unused)) struct ice_lock *sp) + + struct ice_hw; + ++static __rte_always_inline void * ++ice_memdup(__rte_unused struct ice_hw *hw, const void *src, size_t size, ++ __rte_unused enum ice_memcpy_type dir) ++{ ++ void *p; ++ ++ p = ice_malloc(hw, size); ++ if (p) ++ rte_memcpy(p, src, size); ++ ++ return p; ++} ++ + static inline void * + ice_alloc_dma_mem(__attribute__((unused)) struct ice_hw *hw, + struct ice_dma_mem *mem, u64 size) +@@ -343,6 +357,21 @@ static inline void list_add_tail(struct ice_list_entry *entry, + member) : \ + 0) + ++#define LIST_FOR_EACH_ENTRY_SAFE(pos, tmp, head, type, member) \ ++ for ((pos) = (head)->lh_first ? \ ++ container_of((head)->lh_first, struct type, member) : \ ++ 0, \ ++ (tmp) = (pos) == 0 ? 0 : ((pos)->member.next.le_next ? \ ++ container_of((pos)->member.next.le_next, struct type, \ ++ member) : \ ++ 0); \ ++ (pos); \ ++ (pos) = (tmp), \ ++ (tmp) = (pos) == 0 ? 0 : ((tmp)->member.next.le_next ? \ ++ container_of((pos)->member.next.le_next, struct type, \ ++ member) : \ ++ 0)) ++ + #define LIST_REPLACE_INIT(list_head, head) do { \ + (head)->lh_first = (list_head)->lh_first; \ + INIT_LIST_HEAD(list_head); \ +@@ -356,8 +385,6 @@ static inline void list_add_tail(struct ice_list_entry *entry, + #define HLIST_DEL(entry) LIST_DEL(entry) + #define HLIST_FOR_EACH_ENTRY(pos, head, type, member) \ + LIST_FOR_EACH_ENTRY(pos, head, type, member) +-#define LIST_FOR_EACH_ENTRY_SAFE(pos, tmp, head, type, member) \ +- LIST_FOR_EACH_ENTRY(pos, head, type, member) + + #ifndef ICE_DBG_TRACE + #define ICE_DBG_TRACE BIT_ULL(0) +diff --git a/dpdk/drivers/net/ice/base/ice_protocol_type.h b/dpdk/drivers/net/ice/base/ice_protocol_type.h +index fdcbb2cad3..cbc7c37016 100644 +--- a/dpdk/drivers/net/ice/base/ice_protocol_type.h ++++ b/dpdk/drivers/net/ice/base/ice_protocol_type.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_PROTOCOL_TYPE_H_ +diff --git a/dpdk/drivers/net/ice/base/ice_sbq_cmd.h b/dpdk/drivers/net/ice/base/ice_sbq_cmd.h +index 70a019292a..22bfcebc3c 100644 +--- a/dpdk/drivers/net/ice/base/ice_sbq_cmd.h ++++ b/dpdk/drivers/net/ice/base/ice_sbq_cmd.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_SBQ_CMD_H_ +diff --git a/dpdk/drivers/net/ice/base/ice_sched.c b/dpdk/drivers/net/ice/base/ice_sched.c +index 553fc28ff3..0d2c64e33d 100644 +--- a/dpdk/drivers/net/ice/base/ice_sched.c ++++ b/dpdk/drivers/net/ice/base/ice_sched.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ice_sched.h" +@@ -1368,6 +1368,46 @@ enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw) + return status; + } + ++/** ++ * ice_sched_get_psm_clk_freq - determine the PSM clock frequency ++ * @hw: pointer to the HW struct ++ * ++ * Determine the PSM clock frequency and store in HW struct ++ */ ++void ice_sched_get_psm_clk_freq(struct ice_hw *hw) ++{ ++ u32 val, clk_src; ++ ++ val = rd32(hw, GLGEN_CLKSTAT_SRC); ++ clk_src = (val & GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M) >> ++ GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S; ++ ++#define PSM_CLK_SRC_367_MHZ 0x0 ++#define PSM_CLK_SRC_416_MHZ 0x1 ++#define PSM_CLK_SRC_446_MHZ 0x2 ++#define PSM_CLK_SRC_390_MHZ 0x3 ++ ++ switch (clk_src) { ++ case PSM_CLK_SRC_367_MHZ: ++ hw->psm_clk_freq = ICE_PSM_CLK_367MHZ_IN_HZ; ++ break; ++ case PSM_CLK_SRC_416_MHZ: ++ hw->psm_clk_freq = ICE_PSM_CLK_416MHZ_IN_HZ; ++ break; ++ case PSM_CLK_SRC_446_MHZ: ++ hw->psm_clk_freq = ICE_PSM_CLK_446MHZ_IN_HZ; ++ break; ++ case PSM_CLK_SRC_390_MHZ: ++ hw->psm_clk_freq = ICE_PSM_CLK_390MHZ_IN_HZ; ++ break; ++ default: ++ ice_debug(hw, ICE_DBG_SCHED, "PSM clk_src unexpected %u\n", ++ clk_src); ++ /* fall back to a safe default */ ++ hw->psm_clk_freq = ICE_PSM_CLK_446MHZ_IN_HZ; ++ } ++} ++ + /** + * ice_sched_find_node_in_subtree - Find node in part of base node subtree + * @hw: pointer to the HW struct +@@ -2867,7 +2907,7 @@ ice_sched_update_elem(struct ice_hw *hw, struct ice_sched_node *node, + */ + static enum ice_status + ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node, +- enum ice_rl_type rl_type, u8 bw_alloc) ++ enum ice_rl_type rl_type, u16 bw_alloc) + { + struct ice_aqc_txsched_elem_data buf; + struct ice_aqc_txsched_elem *data; +@@ -3671,11 +3711,12 @@ ice_cfg_agg_bw_alloc(struct ice_port_info *pi, u32 agg_id, u8 ena_tcmap, + + /** + * ice_sched_calc_wakeup - calculate RL profile wakeup parameter ++ * @hw: pointer to the HW struct + * @bw: bandwidth in Kbps + * + * This function calculates the wakeup parameter of RL profile. + */ +-static u16 ice_sched_calc_wakeup(s32 bw) ++static u16 ice_sched_calc_wakeup(struct ice_hw *hw, s32 bw) + { + s64 bytes_per_sec, wakeup_int, wakeup_a, wakeup_b, wakeup_f; + s32 wakeup_f_int; +@@ -3683,7 +3724,7 @@ static u16 ice_sched_calc_wakeup(s32 bw) + + /* Get the wakeup integer value */ + bytes_per_sec = DIV_64BIT(((s64)bw * 1000), BITS_PER_BYTE); +- wakeup_int = DIV_64BIT(ICE_RL_PROF_FREQUENCY, bytes_per_sec); ++ wakeup_int = DIV_64BIT(hw->psm_clk_freq, bytes_per_sec); + if (wakeup_int > 63) { + wakeup = (u16)((1 << 15) | wakeup_int); + } else { +@@ -3692,7 +3733,7 @@ static u16 ice_sched_calc_wakeup(s32 bw) + */ + wakeup_b = (s64)ICE_RL_PROF_MULTIPLIER * wakeup_int; + wakeup_a = DIV_64BIT((s64)ICE_RL_PROF_MULTIPLIER * +- ICE_RL_PROF_FREQUENCY, bytes_per_sec); ++ hw->psm_clk_freq, bytes_per_sec); + + /* Get Fraction value */ + wakeup_f = wakeup_a - wakeup_b; +@@ -3712,13 +3753,15 @@ static u16 ice_sched_calc_wakeup(s32 bw) + + /** + * ice_sched_bw_to_rl_profile - convert BW to profile parameters ++ * @hw: pointer to the HW struct + * @bw: bandwidth in Kbps + * @profile: profile parameters to return + * + * This function converts the BW to profile structure format. + */ + static enum ice_status +-ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile) ++ice_sched_bw_to_rl_profile(struct ice_hw *hw, u32 bw, ++ struct ice_aqc_rl_profile_elem *profile) + { + enum ice_status status = ICE_ERR_PARAM; + s64 bytes_per_sec, ts_rate, mv_tmp; +@@ -3738,7 +3781,7 @@ ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile) + for (i = 0; i < 64; i++) { + u64 pow_result = BIT_ULL(i); + +- ts_rate = DIV_64BIT((s64)ICE_RL_PROF_FREQUENCY, ++ ts_rate = DIV_64BIT((s64)hw->psm_clk_freq, + pow_result * ICE_RL_PROF_TS_MULTIPLIER); + if (ts_rate <= 0) + continue; +@@ -3762,7 +3805,7 @@ ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile) + if (found) { + u16 wm; + +- wm = ice_sched_calc_wakeup(bw); ++ wm = ice_sched_calc_wakeup(hw, bw); + profile->rl_multiply = CPU_TO_LE16(mv); + profile->wake_up_calc = CPU_TO_LE16(wm); + profile->rl_encode = CPU_TO_LE16(encode); +@@ -3831,7 +3874,7 @@ ice_sched_add_rl_profile(struct ice_port_info *pi, + if (!rl_prof_elem) + return NULL; + +- status = ice_sched_bw_to_rl_profile(bw, &rl_prof_elem->profile); ++ status = ice_sched_bw_to_rl_profile(hw, bw, &rl_prof_elem->profile); + if (status != ICE_SUCCESS) + goto exit_add_rl_prof; + +diff --git a/dpdk/drivers/net/ice/base/ice_sched.h b/dpdk/drivers/net/ice/base/ice_sched.h +index d6b467477c..57bf4b59d6 100644 +--- a/dpdk/drivers/net/ice/base/ice_sched.h ++++ b/dpdk/drivers/net/ice/base/ice_sched.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_SCHED_H_ +@@ -25,12 +25,16 @@ + ((BIT(11) - 1) * 64) /* In Bytes */ + #define ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY ICE_MAX_BURST_SIZE_ALLOWED + +-#define ICE_RL_PROF_FREQUENCY 446000000 + #define ICE_RL_PROF_ACCURACY_BYTES 128 + #define ICE_RL_PROF_MULTIPLIER 10000 + #define ICE_RL_PROF_TS_MULTIPLIER 32 + #define ICE_RL_PROF_FRACTION 512 + ++#define ICE_PSM_CLK_367MHZ_IN_HZ 367647059 ++#define ICE_PSM_CLK_416MHZ_IN_HZ 416666667 ++#define ICE_PSM_CLK_446MHZ_IN_HZ 446428571 ++#define ICE_PSM_CLK_390MHZ_IN_HZ 390625000 ++ + struct rl_profile_params { + u32 bw; /* in Kbps */ + u16 rl_multiplier; +@@ -83,6 +87,7 @@ ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req, + u16 *elems_ret, struct ice_sq_cd *cd); + enum ice_status ice_sched_init_port(struct ice_port_info *pi); + enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw); ++void ice_sched_get_psm_clk_freq(struct ice_hw *hw); + + /* Functions to cleanup scheduler SW DB */ + void ice_sched_clear_port(struct ice_port_info *pi); +diff --git a/dpdk/drivers/net/ice/base/ice_status.h b/dpdk/drivers/net/ice/base/ice_status.h +index ac120fa300..ba28895a17 100644 +--- a/dpdk/drivers/net/ice/base/ice_status.h ++++ b/dpdk/drivers/net/ice/base/ice_status.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_STATUS_H_ +diff --git a/dpdk/drivers/net/ice/base/ice_switch.c b/dpdk/drivers/net/ice/base/ice_switch.c +index afa4fe30d4..34d0a164ed 100644 +--- a/dpdk/drivers/net/ice/base/ice_switch.c ++++ b/dpdk/drivers/net/ice/base/ice_switch.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ice_switch.h" +@@ -611,7 +611,7 @@ ice_get_recp_frm_fw(struct ice_hw *hw, struct ice_sw_recipe *recps, u8 rid, + /* Complete initialization of the root recipe entry */ + lkup_exts->n_val_words = fv_word_idx; + recps[rid].big_recp = (num_recps > 1); +- recps[rid].n_grp_count = num_recps; ++ recps[rid].n_grp_count = (u8)num_recps; + recps[rid].root_buf = (struct ice_aqc_recipe_data_elem *) + ice_memdup(hw, tmp, recps[rid].n_grp_count * + sizeof(*recps[rid].root_buf), ICE_NONDMA_TO_NONDMA); +@@ -4748,7 +4748,7 @@ static bool ice_prot_type_to_id(enum ice_protocol_type type, u16 *id) + { + u16 i; + +- for (i = 0; ice_prot_id_tbl[i].type != ICE_PROTOCOL_LAST; i++) ++ for (i = 0; i < ARRAY_SIZE(ice_prot_id_tbl); i++) + if (ice_prot_id_tbl[i].type == type) { + *id = ice_prot_id_tbl[i].protocol_id; + return true; +@@ -5995,9 +5995,12 @@ ice_adv_add_update_vsi_list(struct ice_hw *hw, + if (status) + return status; + ++ ice_memset(&tmp_fltr, 0, sizeof(tmp_fltr), ICE_NONDMA_MEM); + tmp_fltr.fltr_rule_id = cur_fltr->fltr_rule_id; + tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST; + tmp_fltr.fwd_id.vsi_list_id = vsi_list_id; ++ tmp_fltr.lkup_type = ICE_SW_LKUP_LAST; ++ + /* Update the previous switch rule of "forward to VSI" to + * "fwd to VSI list" + */ +@@ -6238,23 +6241,8 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, + sw->recp_list[rid].adv_rule = true; + rule_head = &sw->recp_list[rid].filt_rules; + +- if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI) { +- struct ice_fltr_info tmp_fltr; +- +- tmp_fltr.fltr_rule_id = +- LE16_TO_CPU(s_rule->pdata.lkup_tx_rx.index); +- tmp_fltr.fltr_act = ICE_FWD_TO_VSI; +- tmp_fltr.fwd_id.hw_vsi_id = +- ice_get_hw_vsi_num(hw, vsi_handle); +- tmp_fltr.vsi_handle = vsi_handle; +- /* Update the previous switch rule of "forward to VSI" to +- * "fwd to VSI list" +- */ +- status = ice_update_pkt_fwd_rule(hw, &tmp_fltr); +- if (status) +- goto err_ice_add_adv_rule; ++ if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI) + adv_fltr->vsi_count = 1; +- } + + /* Add rule entry to book keeping list */ + LIST_ADD(&adv_fltr->list_entry, rule_head); +@@ -6325,6 +6313,8 @@ ice_adv_rem_update_vsi_list(struct ice_hw *hw, u16 vsi_handle, + lkup_type); + if (status) + return status; ++ ++ ice_memset(&tmp_fltr, 0, sizeof(tmp_fltr), ICE_NONDMA_MEM); + tmp_fltr.fltr_rule_id = fm_list->rule_info.fltr_rule_id; + fm_list->rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI; + tmp_fltr.fltr_act = ICE_FWD_TO_VSI; +diff --git a/dpdk/drivers/net/ice/base/ice_switch.h b/dpdk/drivers/net/ice/base/ice_switch.h +index 61083738ab..5dc795148e 100644 +--- a/dpdk/drivers/net/ice/base/ice_switch.h ++++ b/dpdk/drivers/net/ice/base/ice_switch.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_SWITCH_H_ +diff --git a/dpdk/drivers/net/ice/base/ice_type.h b/dpdk/drivers/net/ice/base/ice_type.h +index a8e4229a19..bf3c5ffd0a 100644 +--- a/dpdk/drivers/net/ice/base/ice_type.h ++++ b/dpdk/drivers/net/ice/base/ice_type.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2019 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _ICE_TYPE_H_ +@@ -528,7 +528,7 @@ struct ice_sched_node { + #define ICE_TXSCHED_GET_EIR_BWALLOC(x) \ + LE16_TO_CPU((x)->info.eir_bw.bw_alloc) + +-struct ice_sched_rl_profle { ++struct ice_sched_rl_profile { + u32 rate; /* In Kbps */ + struct ice_aqc_rl_profile_elem info; + }; +@@ -745,6 +745,8 @@ struct ice_hw { + struct ice_sched_rl_profile **cir_profiles; + struct ice_sched_rl_profile **eir_profiles; + struct ice_sched_rl_profile **srl_profiles; ++ /* PSM clock frequency for calculating RL profile params */ ++ u32 psm_clk_freq; + u64 debug_mask; /* BITMAP for debug mask */ + enum ice_mac_type mac_type; + +diff --git a/dpdk/drivers/net/ice/base/meson.build b/dpdk/drivers/net/ice/base/meson.build +index eff155574d..46c4ffb500 100644 +--- a/dpdk/drivers/net/ice/base/meson.build ++++ b/dpdk/drivers/net/ice/base/meson.build +@@ -1,5 +1,5 @@ + # SPDX-License-Identifier: BSD-3-Clause +-# Copyright(c) 2018 Intel Corporation ++# Copyright(c) 2018-2020 Intel Corporation + + sources = [ + 'ice_controlq.c', +diff --git a/dpdk/drivers/net/ice/ice_ethdev.c b/dpdk/drivers/net/ice/ice_ethdev.c +index de189daba9..ca4371ae3b 100644 +--- a/dpdk/drivers/net/ice/ice_ethdev.c ++++ b/dpdk/drivers/net/ice/ice_ethdev.c +@@ -870,7 +870,7 @@ ice_add_mac_filter(struct ice_vsi *vsi, struct rte_ether_addr *mac_addr) + ret = -ENOMEM; + goto DONE; + } +- rte_memcpy(&f->mac_info.mac_addr, mac_addr, ETH_ADDR_LEN); ++ rte_ether_addr_copy(mac_addr, &f->mac_info.mac_addr); + TAILQ_INSERT_TAIL(&vsi->mac_list, f, next); + vsi->mac_num++; + +@@ -1573,7 +1573,7 @@ ice_setup_vsi(struct ice_pf *pf, enum ice_vsi_type type) + cfg = ICE_AQ_VSI_PROP_SECURITY_VALID | + ICE_AQ_VSI_PROP_FLOW_DIR_VALID; + vsi_ctx.info.valid_sections |= rte_cpu_to_le_16(cfg); +- cfg = ICE_AQ_VSI_FD_ENABLE | ICE_AQ_VSI_FD_PROG_ENABLE; ++ cfg = ICE_AQ_VSI_FD_ENABLE; + vsi_ctx.info.fd_options = rte_cpu_to_le_16(cfg); + vsi_ctx.info.max_fd_fltr_dedicated = + rte_cpu_to_le_16(hw->func_caps.fd_fltr_guar); +@@ -1601,9 +1601,10 @@ ice_setup_vsi(struct ice_pf *pf, enum ice_vsi_type type) + + cfg = ICE_AQ_VSI_PROP_FLOW_DIR_VALID; + vsi_ctx.info.valid_sections |= rte_cpu_to_le_16(cfg); +- cfg = ICE_AQ_VSI_FD_ENABLE | ICE_AQ_VSI_FD_PROG_ENABLE; ++ cfg = ICE_AQ_VSI_FD_PROG_ENABLE; + vsi_ctx.info.fd_options = rte_cpu_to_le_16(cfg); + vsi_ctx.info.sw_id = hw->port_info->sw_id; ++ vsi_ctx.info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA; + ret = ice_vsi_config_tc_queue_mapping(vsi, + &vsi_ctx.info, + ICE_DEFAULT_TCMAP); +@@ -1657,16 +1658,16 @@ ice_setup_vsi(struct ice_pf *pf, enum ice_vsi_type type) + + if (type == ICE_VSI_PF) { + /* MAC configuration */ +- rte_memcpy(pf->dev_addr.addr_bytes, +- hw->port_info->mac.perm_addr, +- ETH_ADDR_LEN); ++ rte_ether_addr_copy((struct rte_ether_addr *) ++ hw->port_info->mac.perm_addr, ++ &pf->dev_addr); + +- rte_memcpy(&mac_addr, &pf->dev_addr, RTE_ETHER_ADDR_LEN); ++ rte_ether_addr_copy(&pf->dev_addr, &mac_addr); + ret = ice_add_mac_filter(vsi, &mac_addr); + if (ret != ICE_SUCCESS) + PMD_INIT_LOG(ERR, "Failed to add dflt MAC filter"); + +- rte_memcpy(&mac_addr, &broadcast, RTE_ETHER_ADDR_LEN); ++ rte_ether_addr_copy(&broadcast, &mac_addr); + ret = ice_add_mac_filter(vsi, &mac_addr); + if (ret != ICE_SUCCESS) + PMD_INIT_LOG(ERR, "Failed to add MAC filter"); +@@ -2439,24 +2440,6 @@ ice_dev_uninit(struct rte_eth_dev *dev) + return 0; + } + +-static int +-ice_dev_configure(struct rte_eth_dev *dev) +-{ +- struct ice_adapter *ad = +- ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); +- +- /* Initialize to TRUE. If any of Rx queues doesn't meet the +- * bulk allocation or vector Rx preconditions we will reset it. +- */ +- ad->rx_bulk_alloc_allowed = true; +- ad->tx_simple_allowed = true; +- +- if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) +- dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; +- +- return 0; +-} +- + static int ice_init_rss(struct ice_pf *pf) + { + struct ice_hw *hw = ICE_PF_TO_HW(pf); +@@ -2587,6 +2570,32 @@ static int ice_init_rss(struct ice_pf *pf) + return 0; + } + ++static int ++ice_dev_configure(struct rte_eth_dev *dev) ++{ ++ struct ice_adapter *ad = ++ ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); ++ struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private); ++ int ret; ++ ++ /* Initialize to TRUE. If any of Rx queues doesn't meet the ++ * bulk allocation or vector Rx preconditions we will reset it. ++ */ ++ ad->rx_bulk_alloc_allowed = true; ++ ad->tx_simple_allowed = true; ++ ++ if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) ++ dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; ++ ++ ret = ice_init_rss(pf); ++ if (ret) { ++ PMD_DRV_LOG(ERR, "Failed to enable rss for PF"); ++ return ret; ++ } ++ ++ return 0; ++} ++ + static void + __vsi_queues_bind_intr(struct ice_vsi *vsi, uint16_t msix_vect, + int base_queue, int nb_queue) +@@ -2598,9 +2607,9 @@ __vsi_queues_bind_intr(struct ice_vsi *vsi, uint16_t msix_vect, + for (i = 0; i < nb_queue; i++) { + /*do actual bind*/ + val = (msix_vect & QINT_RQCTL_MSIX_INDX_M) | +- (0 < QINT_RQCTL_ITR_INDX_S) | QINT_RQCTL_CAUSE_ENA_M; ++ (0 << QINT_RQCTL_ITR_INDX_S) | QINT_RQCTL_CAUSE_ENA_M; + val_tx = (msix_vect & QINT_TQCTL_MSIX_INDX_M) | +- (0 < QINT_TQCTL_ITR_INDX_S) | QINT_TQCTL_CAUSE_ENA_M; ++ (0 << QINT_TQCTL_ITR_INDX_S) | QINT_TQCTL_CAUSE_ENA_M; + + PMD_DRV_LOG(INFO, "queue %d is binding to vect %d", + base_queue + i, msix_vect); +@@ -2790,12 +2799,6 @@ ice_dev_start(struct rte_eth_dev *dev) + } + } + +- ret = ice_init_rss(pf); +- if (ret) { +- PMD_DRV_LOG(ERR, "Failed to enable rss for PF"); +- goto rx_err; +- } +- + ice_set_rx_function(dev); + ice_set_tx_function(dev); + +@@ -3264,7 +3267,7 @@ static int ice_macaddr_set(struct rte_eth_dev *dev, + PMD_DRV_LOG(ERR, "Failed to add mac filter"); + return -EIO; + } +- memcpy(&pf->dev_addr, mac_addr, ETH_ADDR_LEN); ++ rte_ether_addr_copy(mac_addr, &pf->dev_addr); + + flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL; + ret = ice_aq_manage_mac_write(hw, mac_addr->addr_bytes, flags, NULL); +diff --git a/dpdk/drivers/net/ice/ice_ethdev.h b/dpdk/drivers/net/ice/ice_ethdev.h +index f2186e1ff9..da557a2540 100644 +--- a/dpdk/drivers/net/ice/ice_ethdev.h ++++ b/dpdk/drivers/net/ice/ice_ethdev.h +@@ -254,6 +254,7 @@ enum ice_fdir_tunnel_type { + ICE_FDIR_TUNNEL_TYPE_NONE = 0, + ICE_FDIR_TUNNEL_TYPE_VXLAN, + ICE_FDIR_TUNNEL_TYPE_GTPU, ++ ICE_FDIR_TUNNEL_TYPE_GTPU_EH, + }; + + struct rte_flow; +diff --git a/dpdk/drivers/net/ice/ice_fdir_filter.c b/dpdk/drivers/net/ice/ice_fdir_filter.c +index fa87074556..91fef712c1 100644 +--- a/dpdk/drivers/net/ice/ice_fdir_filter.c ++++ b/dpdk/drivers/net/ice/ice_fdir_filter.c +@@ -67,10 +67,11 @@ + ICE_FDIR_INSET_VXLAN_IPV4 | \ + ICE_INSET_TUN_SCTP_SRC_PORT | ICE_INSET_TUN_SCTP_DST_PORT) + +-#define ICE_FDIR_INSET_GTPU_IPV4 (\ +- ICE_INSET_GTPU_TEID) ++#define ICE_FDIR_INSET_GTPU (\ ++ ICE_INSET_IPV4_SRC | ICE_INSET_IPV4_DST | ICE_INSET_GTPU_TEID) + +-#define ICE_FDIR_INSET_GTPU_EH_IPV4 (\ ++#define ICE_FDIR_INSET_GTPU_EH (\ ++ ICE_INSET_IPV4_SRC | ICE_INSET_IPV4_DST | \ + ICE_INSET_GTPU_TEID | ICE_INSET_GTPU_QFI) + + static struct ice_pattern_match_item ice_fdir_pattern_os[] = { +@@ -125,14 +126,16 @@ static struct ice_pattern_match_item ice_fdir_pattern_comms[] = { + ICE_FDIR_INSET_VXLAN_IPV4_TCP, ICE_INSET_NONE}, + {pattern_eth_ipv4_udp_vxlan_eth_ipv4_sctp, + ICE_FDIR_INSET_VXLAN_IPV4_SCTP, ICE_INSET_NONE}, +- {pattern_eth_ipv4_gtpu_ipv4, ICE_FDIR_INSET_GTPU_IPV4, ICE_INSET_NONE}, +- {pattern_eth_ipv4_gtpu_eh_ipv4, +- ICE_FDIR_INSET_GTPU_EH_IPV4, ICE_INSET_NONE}, ++ {pattern_eth_ipv4_gtpu, ICE_FDIR_INSET_GTPU, ICE_INSET_NONE}, ++ {pattern_eth_ipv4_gtpu_eh, ICE_FDIR_INSET_GTPU_EH, ICE_INSET_NONE}, + }; + + static struct ice_flow_parser ice_fdir_parser_os; + static struct ice_flow_parser ice_fdir_parser_comms; + ++static int ++ice_fdir_is_tunnel_profile(enum ice_fdir_tunnel_type tunnel_type); ++ + static const struct rte_memzone * + ice_memzone_reserve(const char *name, uint32_t len, int socket_id) + { +@@ -915,7 +918,7 @@ ice_fdir_input_set_parse(uint64_t inset, enum ice_flow_field *field) + {ICE_INSET_TUN_UDP_DST_PORT, ICE_FLOW_FIELD_IDX_UDP_DST_PORT}, + {ICE_INSET_TUN_SCTP_SRC_PORT, ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT}, + {ICE_INSET_TUN_SCTP_DST_PORT, ICE_FLOW_FIELD_IDX_SCTP_DST_PORT}, +- {ICE_INSET_GTPU_TEID, ICE_FLOW_FIELD_IDX_GTPU_EH_TEID}, ++ {ICE_INSET_GTPU_TEID, ICE_FLOW_FIELD_IDX_GTPU_IP_TEID}, + {ICE_INSET_GTPU_QFI, ICE_FLOW_FIELD_IDX_GTPU_EH_QFI}, + }; + +@@ -928,11 +931,12 @@ ice_fdir_input_set_parse(uint64_t inset, enum ice_flow_field *field) + + static int + ice_fdir_input_set_conf(struct ice_pf *pf, enum ice_fltr_ptype flow, +- uint64_t input_set, bool is_tunnel) ++ uint64_t input_set, enum ice_fdir_tunnel_type ttype) + { + struct ice_flow_seg_info *seg; + struct ice_flow_seg_info *seg_tun = NULL; + enum ice_flow_field field[ICE_FLOW_FIELD_IDX_MAX]; ++ bool is_tunnel; + int i, ret; + + if (!input_set) +@@ -984,9 +988,15 @@ ice_fdir_input_set_conf(struct ice_pf *pf, enum ice_fltr_ptype flow, + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP: + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP: + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER: +- ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_GTPU_EH | +- ICE_FLOW_SEG_HDR_GTPU_IP | +- ICE_FLOW_SEG_HDR_IPV4); ++ if (ttype == ICE_FDIR_TUNNEL_TYPE_GTPU) ++ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_GTPU_IP | ++ ICE_FLOW_SEG_HDR_IPV4); ++ else if (ttype == ICE_FDIR_TUNNEL_TYPE_GTPU_EH) ++ ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_GTPU_EH | ++ ICE_FLOW_SEG_HDR_GTPU_IP | ++ ICE_FLOW_SEG_HDR_IPV4); ++ else ++ PMD_DRV_LOG(ERR, "not supported tunnel type."); + break; + default: + PMD_DRV_LOG(ERR, "not supported filter type."); +@@ -1000,6 +1010,7 @@ ice_fdir_input_set_conf(struct ice_pf *pf, enum ice_fltr_ptype flow, + ICE_FLOW_FLD_OFF_INVAL, false); + } + ++ is_tunnel = ice_fdir_is_tunnel_profile(ttype); + if (!is_tunnel) { + ret = ice_fdir_hw_tbl_conf(pf, pf->main_vsi, pf->fdir.fdir_vsi, + seg, flow, false); +@@ -1224,7 +1235,7 @@ ice_fdir_create_filter(struct ice_adapter *ad, + is_tun = ice_fdir_is_tunnel_profile(filter->tunnel_type); + + ret = ice_fdir_input_set_conf(pf, filter->input.flow_type, +- filter->input_set, is_tun); ++ filter->input_set, filter->tunnel_type); + if (ret) { + rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, +@@ -1488,8 +1499,7 @@ ice_fdir_parse_action(struct ice_adapter *ad, + dest_num++; + + filter->input.dest_ctl = +- ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX; +- filter->input.q_index = 0; ++ ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER; + break; + case RTE_FLOW_ACTION_TYPE_RSS: + dest_num++; +@@ -1504,6 +1514,7 @@ ice_fdir_parse_action(struct ice_adapter *ad, + + mark_spec = actions->conf; + filter->input.fltr_id = mark_spec->id; ++ filter->input.fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_ONE; + break; + case RTE_FLOW_ACTION_TYPE_COUNT: + counter_num++; +@@ -1522,7 +1533,7 @@ ice_fdir_parse_action(struct ice_adapter *ad, + } + } + +- if (dest_num == 0 || dest_num >= 2) { ++ if (dest_num >= 2) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, actions, + "Unsupported action combination"); +@@ -1543,6 +1554,18 @@ ice_fdir_parse_action(struct ice_adapter *ad, + return -rte_errno; + } + ++ if (dest_num + mark_num + counter_num == 0) { ++ rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ACTION, actions, ++ "Empty action"); ++ return -rte_errno; ++ } ++ ++ /* set default action to PASSTHRU mode, in "mark/count only" case. */ ++ if (dest_num == 0) ++ filter->input.dest_ctl = ++ ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER; ++ + return 0; + } + +@@ -1896,6 +1919,7 @@ ice_fdir_parse_pattern(__rte_unused struct ice_adapter *ad, + filter->input.gtpu_data.qfi = + gtp_psc_spec->qfi; + } ++ tunnel_type = ICE_FDIR_TUNNEL_TYPE_GTPU_EH; + break; + default: + rte_flow_error_set(error, EINVAL, +@@ -1906,7 +1930,8 @@ ice_fdir_parse_pattern(__rte_unused struct ice_adapter *ad, + } + } + +- if (tunnel_type == ICE_FDIR_TUNNEL_TYPE_GTPU) ++ if (tunnel_type == ICE_FDIR_TUNNEL_TYPE_GTPU || ++ tunnel_type == ICE_FDIR_TUNNEL_TYPE_GTPU_EH) + flow_type = ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER; + + filter->tunnel_type = tunnel_type; +@@ -1938,23 +1963,26 @@ ice_fdir_parse(struct ice_adapter *ad, + + ret = ice_fdir_parse_pattern(ad, pattern, error, filter); + if (ret) +- return ret; ++ goto error; + input_set = filter->input_set; + if (!input_set || input_set & ~item->input_set_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, + pattern, + "Invalid input set"); +- return -rte_errno; ++ ret = -rte_errno; ++ goto error; + } + + ret = ice_fdir_parse_action(ad, actions, error, filter); + if (ret) +- return ret; +- +- *meta = filter; ++ goto error; + +- return 0; ++ if (meta) ++ *meta = filter; ++error: ++ rte_free(item); ++ return ret; + } + + static struct ice_flow_parser ice_fdir_parser_os = { +diff --git a/dpdk/drivers/net/ice/ice_generic_flow.c b/dpdk/drivers/net/ice/ice_generic_flow.c +index c9efa8356f..a1648eeb58 100644 +--- a/dpdk/drivers/net/ice/ice_generic_flow.c ++++ b/dpdk/drivers/net/ice/ice_generic_flow.c +@@ -1034,6 +1034,13 @@ enum rte_flow_item_type pattern_eth_ipv6_nvgre_eth_ipv6_icmp6[] = { + }; + + /* GTPU */ ++enum rte_flow_item_type pattern_eth_ipv4_gtpu[] = { ++ RTE_FLOW_ITEM_TYPE_ETH, ++ RTE_FLOW_ITEM_TYPE_IPV4, ++ RTE_FLOW_ITEM_TYPE_UDP, ++ RTE_FLOW_ITEM_TYPE_GTPU, ++ RTE_FLOW_ITEM_TYPE_END, ++}; + enum rte_flow_item_type pattern_eth_ipv4_gtpu_ipv4[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, +@@ -1042,6 +1049,14 @@ enum rte_flow_item_type pattern_eth_ipv4_gtpu_ipv4[] = { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_END, + }; ++enum rte_flow_item_type pattern_eth_ipv4_gtpu_eh[] = { ++ RTE_FLOW_ITEM_TYPE_ETH, ++ RTE_FLOW_ITEM_TYPE_IPV4, ++ RTE_FLOW_ITEM_TYPE_UDP, ++ RTE_FLOW_ITEM_TYPE_GTPU, ++ RTE_FLOW_ITEM_TYPE_GTP_PSC, ++ RTE_FLOW_ITEM_TYPE_END, ++}; + enum rte_flow_item_type pattern_eth_ipv4_gtpu_eh_ipv4[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, +@@ -1360,7 +1375,6 @@ typedef struct ice_flow_engine * (*parse_engine_t)(struct ice_adapter *ad, + struct ice_parser_list *parser_list, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], +- void **meta, + struct rte_flow_error *error); + + void +@@ -1698,11 +1712,11 @@ ice_parse_engine_create(struct ice_adapter *ad, + struct ice_parser_list *parser_list, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], +- void **meta, + struct rte_flow_error *error) + { + struct ice_flow_engine *engine = NULL; + struct ice_flow_parser_node *parser_node; ++ void *meta = NULL; + void *temp; + + TAILQ_FOREACH_SAFE(parser_node, parser_list, node, temp) { +@@ -1711,18 +1725,12 @@ ice_parse_engine_create(struct ice_adapter *ad, + if (parser_node->parser->parse_pattern_action(ad, + parser_node->parser->array, + parser_node->parser->array_len, +- pattern, actions, meta, error) < 0) ++ pattern, actions, &meta, error) < 0) + continue; + + engine = parser_node->parser->engine; +- if (engine->create == NULL) { +- rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_HANDLE, +- NULL, "Invalid engine"); +- continue; +- } +- +- ret = engine->create(ad, flow, *meta, error); ++ RTE_ASSERT(engine->create != NULL); ++ ret = engine->create(ad, flow, meta, error); + if (ret == 0) + return engine; + else if (ret == -EEXIST) +@@ -1737,7 +1745,6 @@ ice_parse_engine_validate(struct ice_adapter *ad, + struct ice_parser_list *parser_list, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], +- void **meta, + struct rte_flow_error *error) + { + struct ice_flow_engine *engine = NULL; +@@ -1748,7 +1755,7 @@ ice_parse_engine_validate(struct ice_adapter *ad, + if (parser_node->parser->parse_pattern_action(ad, + parser_node->parser->array, + parser_node->parser->array_len, +- pattern, actions, meta, error) < 0) ++ pattern, actions, NULL, error) < 0) + continue; + + engine = parser_node->parser->engine; +@@ -1764,7 +1771,6 @@ ice_flow_process_filter(struct rte_eth_dev *dev, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ice_flow_engine **engine, +- void **meta, + parse_engine_t ice_parse_engine, + struct rte_flow_error *error) + { +@@ -1799,7 +1805,7 @@ ice_flow_process_filter(struct rte_eth_dev *dev, + return ret; + + *engine = ice_parse_engine(ad, flow, &pf->rss_parser_list, +- pattern, actions, meta, error); ++ pattern, actions, error); + if (*engine != NULL) + return 0; + +@@ -1807,11 +1813,11 @@ ice_flow_process_filter(struct rte_eth_dev *dev, + case ICE_FLOW_CLASSIFY_STAGE_DISTRIBUTOR_ONLY: + case ICE_FLOW_CLASSIFY_STAGE_DISTRIBUTOR: + *engine = ice_parse_engine(ad, flow, &pf->dist_parser_list, +- pattern, actions, meta, error); ++ pattern, actions, error); + break; + case ICE_FLOW_CLASSIFY_STAGE_PERMISSION: + *engine = ice_parse_engine(ad, flow, &pf->perm_parser_list, +- pattern, actions, meta, error); ++ pattern, actions, error); + break; + default: + return -EINVAL; +@@ -1830,11 +1836,10 @@ ice_flow_validate(struct rte_eth_dev *dev, + const struct rte_flow_action actions[], + struct rte_flow_error *error) + { +- void *meta; + struct ice_flow_engine *engine; + + return ice_flow_process_filter(dev, NULL, attr, pattern, actions, +- &engine, &meta, ice_parse_engine_validate, error); ++ &engine, ice_parse_engine_validate, error); + } + + static struct rte_flow * +@@ -1848,7 +1853,6 @@ ice_flow_create(struct rte_eth_dev *dev, + struct rte_flow *flow = NULL; + int ret; + struct ice_flow_engine *engine = NULL; +- void *meta; + + flow = rte_zmalloc("ice_flow", sizeof(struct rte_flow), 0); + if (!flow) { +@@ -1859,7 +1863,7 @@ ice_flow_create(struct rte_eth_dev *dev, + } + + ret = ice_flow_process_filter(dev, flow, attr, pattern, actions, +- &engine, &meta, ice_parse_engine_create, error); ++ &engine, ice_parse_engine_create, error); + if (ret < 0) + goto free_flow; + flow->engine = engine; +diff --git a/dpdk/drivers/net/ice/ice_generic_flow.h b/dpdk/drivers/net/ice/ice_generic_flow.h +index ac085b47fa..adc30ee2aa 100644 +--- a/dpdk/drivers/net/ice/ice_generic_flow.h ++++ b/dpdk/drivers/net/ice/ice_generic_flow.h +@@ -336,7 +336,9 @@ extern enum rte_flow_item_type pattern_eth_ipv6_nvgre_eth_ipv6_sctp[]; + extern enum rte_flow_item_type pattern_eth_ipv6_nvgre_eth_ipv6_icmp6[]; + + /* GTPU */ ++extern enum rte_flow_item_type pattern_eth_ipv4_gtpu[]; + extern enum rte_flow_item_type pattern_eth_ipv4_gtpu_ipv4[]; ++extern enum rte_flow_item_type pattern_eth_ipv4_gtpu_eh[]; + extern enum rte_flow_item_type pattern_eth_ipv4_gtpu_eh_ipv4[]; + extern enum rte_flow_item_type pattern_eth_ipv4_gtpu_eh_ipv4_udp[]; + extern enum rte_flow_item_type pattern_eth_ipv4_gtpu_eh_ipv4_tcp[]; +diff --git a/dpdk/drivers/net/ice/ice_hash.c b/dpdk/drivers/net/ice/ice_hash.c +index b145a3f0d5..23c455762b 100644 +--- a/dpdk/drivers/net/ice/ice_hash.c ++++ b/dpdk/drivers/net/ice/ice_hash.c +@@ -95,7 +95,7 @@ struct rss_type_match_hdr hint_7 = { + struct rss_type_match_hdr hint_8 = { + ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_SCTP, ETH_RSS_NONFRAG_IPV6_SCTP}; + struct rss_type_match_hdr hint_9 = { +- ICE_FLOW_SEG_HDR_GTPU_IP, ETH_RSS_IPV4}; ++ ICE_FLOW_SEG_HDR_GTPU_EH, ETH_RSS_IPV4}; + struct rss_type_match_hdr hint_10 = { + ICE_FLOW_SEG_HDR_PPPOE, ETH_RSS_IPV4}; + struct rss_type_match_hdr hint_11 = { +@@ -104,6 +104,10 @@ struct rss_type_match_hdr hint_12 = { + ICE_FLOW_SEG_HDR_PPPOE, ETH_RSS_NONFRAG_IPV4_TCP}; + struct rss_type_match_hdr hint_13 = { + ICE_FLOW_SEG_HDR_PPPOE, ETH_RSS_NONFRAG_IPV4_SCTP}; ++struct rss_type_match_hdr hint_14 = { ++ ICE_FLOW_SEG_HDR_GTPU_EH, ETH_RSS_NONFRAG_IPV4_UDP}; ++struct rss_type_match_hdr hint_15 = { ++ ICE_FLOW_SEG_HDR_GTPU_EH, ETH_RSS_NONFRAG_IPV4_TCP}; + + /* Supported pattern for os default package. */ + static struct ice_pattern_match_item ice_hash_pattern_list_os[] = { +@@ -130,8 +134,8 @@ static struct ice_pattern_match_item ice_hash_pattern_list_comms[] = { + {pattern_eth_ipv6_sctp, ICE_INSET_NONE, &hint_8}, + {pattern_empty, ICE_INSET_NONE, &hint_0}, + {pattern_eth_ipv4_gtpu_eh_ipv4, ICE_INSET_NONE, &hint_9}, +- {pattern_eth_ipv4_gtpu_eh_ipv4_udp, ICE_INSET_NONE, &hint_9}, +- {pattern_eth_ipv4_gtpu_eh_ipv4_tcp, ICE_INSET_NONE, &hint_9}, ++ {pattern_eth_ipv4_gtpu_eh_ipv4_udp, ICE_INSET_NONE, &hint_14}, ++ {pattern_eth_ipv4_gtpu_eh_ipv4_tcp, ICE_INSET_NONE, &hint_15}, + {pattern_eth_pppoes_ipv4, ICE_INSET_NONE, &hint_10}, + {pattern_eth_pppoes_ipv4_udp, ICE_INSET_NONE, &hint_11}, + {pattern_eth_pppoes_ipv4_tcp, ICE_INSET_NONE, &hint_12}, +@@ -424,26 +428,31 @@ ice_hash_parse_pattern_action(__rte_unused struct ice_adapter *ad, + /* Check rss supported pattern and find matched pattern. */ + pattern_match_item = ice_search_pattern_match_item(pattern, + array, array_len, error); +- if (!pattern_match_item) +- return -rte_errno; ++ if (!pattern_match_item) { ++ ret = -rte_errno; ++ goto error; ++ } + + ret = ice_hash_check_inset(pattern, error); + if (ret) +- return -rte_errno; ++ goto error; + + /* Save protocol header to rss_meta. */ +- *meta = rss_meta_ptr; +- ((struct rss_meta *)*meta)->pkt_hdr = ((struct rss_type_match_hdr *) ++ rss_meta_ptr->pkt_hdr = ((struct rss_type_match_hdr *) + (pattern_match_item->meta))->hdr_mask; + + /* Check rss action. */ +- ret = ice_hash_parse_action(pattern_match_item, actions, meta, error); +- if (ret) +- return -rte_errno; ++ ret = ice_hash_parse_action(pattern_match_item, actions, ++ (void **)&rss_meta_ptr, error); + ++error: ++ if (!ret && meta) ++ *meta = rss_meta_ptr; ++ else ++ rte_free(rss_meta_ptr); + rte_free(pattern_match_item); + +- return 0; ++ return ret; + } + + static int +diff --git a/dpdk/drivers/net/ice/ice_rxtx.c b/dpdk/drivers/net/ice/ice_rxtx.c +index 2db1744562..110b38646d 100644 +--- a/dpdk/drivers/net/ice/ice_rxtx.c ++++ b/dpdk/drivers/net/ice/ice_rxtx.c +@@ -236,17 +236,12 @@ _ice_rx_queue_release_mbufs(struct ice_rx_queue *rxq) + rxq->sw_ring[i].mbuf = NULL; + } + } +-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC +- if (rxq->rx_nb_avail == 0) +- return; +- for (i = 0; i < rxq->rx_nb_avail; i++) { +- struct rte_mbuf *mbuf; +- +- mbuf = rxq->rx_stage[rxq->rx_next_avail + i]; +- rte_pktmbuf_free_seg(mbuf); +- } +- rxq->rx_nb_avail = 0; +-#endif /* RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC */ ++ if (rxq->rx_nb_avail == 0) ++ return; ++ for (i = 0; i < rxq->rx_nb_avail; i++) ++ rte_pktmbuf_free_seg(rxq->rx_stage[rxq->rx_next_avail + i]); ++ ++ rxq->rx_nb_avail = 0; + } + + static void +@@ -309,16 +304,10 @@ ice_switch_rx_queue(struct ice_hw *hw, uint16_t q_idx, bool on) + } + + static inline int +-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC + ice_check_rx_burst_bulk_alloc_preconditions(struct ice_rx_queue *rxq) +-#else +-ice_check_rx_burst_bulk_alloc_preconditions +- (__rte_unused struct ice_rx_queue *rxq) +-#endif + { + int ret = 0; + +-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC + if (!(rxq->rx_free_thresh >= ICE_RX_MAX_BURST)) { + PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: " + "rxq->rx_free_thresh=%d, " +@@ -338,9 +327,6 @@ ice_check_rx_burst_bulk_alloc_preconditions + rxq->nb_rx_desc, rxq->rx_free_thresh); + ret = -EINVAL; + } +-#else +- ret = -EINVAL; +-#endif + + return ret; + } +@@ -357,17 +343,11 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq) + return; + } + +-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC +- if (ice_check_rx_burst_bulk_alloc_preconditions(rxq) == 0) +- len = (uint16_t)(rxq->nb_rx_desc + ICE_RX_MAX_BURST); +- else +-#endif /* RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC */ +- len = rxq->nb_rx_desc; ++ len = (uint16_t)(rxq->nb_rx_desc + ICE_RX_MAX_BURST); + + for (i = 0; i < len * sizeof(union ice_rx_flex_desc); i++) + ((volatile char *)rxq->rx_ring)[i] = 0; + +-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC + memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf)); + for (i = 0; i < ICE_RX_MAX_BURST; ++i) + rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf; +@@ -375,7 +355,6 @@ ice_reset_rx_queue(struct ice_rx_queue *rxq) + rxq->rx_nb_avail = 0; + rxq->rx_next_avail = 0; + rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1); +-#endif /* RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC */ + + rxq->rx_tail = 0; + rxq->nb_rx_hold = 0; +@@ -926,13 +905,11 @@ ice_rx_queue_setup(struct rte_eth_dev *dev, + /* Allocate the maximun number of RX ring hardware descriptor. */ + len = ICE_MAX_RING_DESC; + +-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC + /** + * Allocating a little more memory because vectorized/bulk_alloc Rx + * functions doesn't check boundaries each time. + */ + len += ICE_RX_MAX_BURST; +-#endif + + /* Allocate the maximum number of RX ring hardware descriptor. */ + ring_size = sizeof(union ice_rx_flex_desc) * len; +@@ -952,11 +929,8 @@ ice_rx_queue_setup(struct rte_eth_dev *dev, + rxq->rx_ring_dma = rz->iova; + rxq->rx_ring = rz->addr; + +-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC ++ /* always reserve more for bulk alloc */ + len = (uint16_t)(nb_desc + ICE_RX_MAX_BURST); +-#else +- len = nb_desc; +-#endif + + /* Allocate the software ring. */ + rxq->sw_ring = rte_zmalloc_socket(NULL, +@@ -977,17 +951,14 @@ ice_rx_queue_setup(struct rte_eth_dev *dev, + use_def_burst_func = ice_check_rx_burst_bulk_alloc_preconditions(rxq); + + if (!use_def_burst_func) { +-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC + PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are " + "satisfied. Rx Burst Bulk Alloc function will be " + "used on port=%d, queue=%d.", + rxq->port_id, rxq->queue_id); +-#endif /* RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC */ + } else { + PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are " +- "not satisfied, Scattered Rx is requested, " +- "or RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC is " +- "not enabled on port=%d, queue=%d.", ++ "not satisfied, Scattered Rx is requested. " ++ "on port=%d, queue=%d.", + rxq->port_id, rxq->queue_id); + ad->rx_bulk_alloc_allowed = false; + } +@@ -1399,7 +1370,6 @@ ice_rxd_to_pkt_fields(struct rte_mbuf *mb, + #endif + } + +-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC + #define ICE_LOOK_AHEAD 8 + #if (ICE_LOOK_AHEAD != 8) + #error "PMD ICE: ICE_LOOK_AHEAD must be 8\n" +@@ -1620,15 +1590,6 @@ ice_recv_pkts_bulk_alloc(void *rx_queue, + + return nb_rx; + } +-#else +-static uint16_t +-ice_recv_pkts_bulk_alloc(void __rte_unused *rx_queue, +- struct rte_mbuf __rte_unused **rx_pkts, +- uint16_t __rte_unused nb_pkts) +-{ +- return 0; +-} +-#endif /* RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC */ + + static uint16_t + ice_recv_scattered_pkts(void *rx_queue, +@@ -1872,9 +1833,7 @@ ice_dev_supported_ptypes_get(struct rte_eth_dev *dev) + ptypes = ptypes_os; + + if (dev->rx_pkt_burst == ice_recv_pkts || +-#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC + dev->rx_pkt_burst == ice_recv_pkts_bulk_alloc || +-#endif + dev->rx_pkt_burst == ice_recv_scattered_pkts) + return ptypes; + +@@ -2421,6 +2380,24 @@ ice_set_tso_ctx(struct rte_mbuf *mbuf, union ice_tx_offload tx_offload) + return ctx_desc; + } + ++/* HW requires that TX buffer size ranges from 1B up to (16K-1)B. */ ++#define ICE_MAX_DATA_PER_TXD \ ++ (ICE_TXD_QW1_TX_BUF_SZ_M >> ICE_TXD_QW1_TX_BUF_SZ_S) ++/* Calculate the number of TX descriptors needed for each pkt */ ++static inline uint16_t ++ice_calc_pkt_desc(struct rte_mbuf *tx_pkt) ++{ ++ struct rte_mbuf *txd = tx_pkt; ++ uint16_t count = 0; ++ ++ while (txd != NULL) { ++ count += DIV_ROUND_UP(txd->data_len, ICE_MAX_DATA_PER_TXD); ++ txd = txd->next; ++ } ++ ++ return count; ++} ++ + uint16_t + ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + { +@@ -2440,6 +2417,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + uint32_t td_offset = 0; + uint32_t td_tag = 0; + uint16_t tx_last; ++ uint16_t slen; + uint64_t buf_dma_addr; + uint64_t ol_flags; + union ice_tx_offload tx_offload = {0}; +@@ -2452,7 +2430,7 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + + /* Check if the descriptor ring needs to be cleaned. */ + if (txq->nb_tx_free < txq->tx_free_thresh) +- ice_xmit_cleanup(txq); ++ (void)ice_xmit_cleanup(txq); + + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + tx_pkt = *tx_pkts++; +@@ -2471,8 +2449,15 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + /* The number of descriptors that must be allocated for + * a packet equals to the number of the segments of that + * packet plus the number of context descriptor if needed. ++ * Recalculate the needed tx descs when TSO enabled in case ++ * the mbuf data size exceeds max data size that hw allows ++ * per tx desc. + */ +- nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx); ++ if (ol_flags & PKT_TX_TCP_SEG) ++ nb_used = (uint16_t)(ice_calc_pkt_desc(tx_pkt) + ++ nb_ctx); ++ else ++ nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx); + tx_last = (uint16_t)(tx_id + nb_used - 1); + + /* Circular ring */ +@@ -2562,15 +2547,37 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + txe->mbuf = m_seg; + + /* Setup TX Descriptor */ ++ slen = m_seg->data_len; + buf_dma_addr = rte_mbuf_data_iova(m_seg); ++ ++ while ((ol_flags & PKT_TX_TCP_SEG) && ++ unlikely(slen > ICE_MAX_DATA_PER_TXD)) { ++ txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr); ++ txd->cmd_type_offset_bsz = ++ rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA | ++ ((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) | ++ ((uint64_t)td_offset << ICE_TXD_QW1_OFFSET_S) | ++ ((uint64_t)ICE_MAX_DATA_PER_TXD << ++ ICE_TXD_QW1_TX_BUF_SZ_S) | ++ ((uint64_t)td_tag << ICE_TXD_QW1_L2TAG1_S)); ++ ++ buf_dma_addr += ICE_MAX_DATA_PER_TXD; ++ slen -= ICE_MAX_DATA_PER_TXD; ++ ++ txe->last_id = tx_last; ++ tx_id = txe->next_id; ++ txe = txn; ++ txd = &tx_ring[tx_id]; ++ txn = &sw_ring[txe->next_id]; ++ } ++ + txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr); + txd->cmd_type_offset_bsz = + rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA | +- ((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) | ++ ((uint64_t)td_cmd << ICE_TXD_QW1_CMD_S) | + ((uint64_t)td_offset << ICE_TXD_QW1_OFFSET_S) | +- ((uint64_t)m_seg->data_len << +- ICE_TXD_QW1_TX_BUF_SZ_S) | +- ((uint64_t)td_tag << ICE_TXD_QW1_L2TAG1_S)); ++ ((uint64_t)slen << ICE_TXD_QW1_TX_BUF_SZ_S) | ++ ((uint64_t)td_tag << ICE_TXD_QW1_L2TAG1_S)); + + txe->last_id = tx_last; + tx_id = txe->next_id; +@@ -3268,7 +3275,7 @@ ice_get_default_pkt_type(uint16_t ptype) + RTE_PTYPE_L4_TCP, + [93] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_SCTP, +- [94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | ++ [94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_ICMP, + + /* IPv6 --> IPv4 */ +diff --git a/dpdk/drivers/net/ice/ice_rxtx_vec_common.h b/dpdk/drivers/net/ice/ice_rxtx_vec_common.h +index 5e6f89642a..46e3be98a6 100644 +--- a/dpdk/drivers/net/ice/ice_rxtx_vec_common.h ++++ b/dpdk/drivers/net/ice/ice_rxtx_vec_common.h +@@ -29,6 +29,7 @@ ice_rx_reassemble_packets(struct ice_rx_queue *rxq, struct rte_mbuf **rx_bufs, + if (!split_flags[buf_idx]) { + /* it's the last packet of the set */ + start->hash = end->hash; ++ start->vlan_tci = end->vlan_tci; + start->ol_flags = end->ol_flags; + /* we need to strip crc for the whole packet */ + start->pkt_len -= rxq->crc_len; +@@ -245,6 +246,7 @@ ice_rx_vec_queue_default(struct ice_rx_queue *rxq) + DEV_TX_OFFLOAD_VLAN_INSERT | \ + DEV_TX_OFFLOAD_SCTP_CKSUM | \ + DEV_TX_OFFLOAD_UDP_CKSUM | \ ++ DEV_TX_OFFLOAD_TCP_TSO | \ + DEV_TX_OFFLOAD_TCP_CKSUM) + + static inline int +diff --git a/dpdk/drivers/net/ice/ice_switch_filter.c b/dpdk/drivers/net/ice/ice_switch_filter.c +index 4a9356b317..6c24731638 100644 +--- a/dpdk/drivers/net/ice/ice_switch_filter.c ++++ b/dpdk/drivers/net/ice/ice_switch_filter.c +@@ -871,7 +871,7 @@ ice_switch_inset_get(const struct rte_flow_item pattern[], + vlan_spec->inner_type; + list[t].m_u.vlan_hdr.type = + UINT16_MAX; +- input_set |= ICE_INSET_VLAN_OUTER; ++ input_set |= ICE_INSET_ETHERTYPE; + } + t++; + } else if (!vlan_spec && !vlan_mask) { +@@ -937,6 +937,8 @@ ice_switch_parse_action(struct ice_pf *pf, + switch (action_type) { + case RTE_FLOW_ACTION_TYPE_RSS: + act_qgrop = action->conf; ++ if (act_qgrop->queue_num <= 1) ++ goto error; + rule_info->sw_act.fltr_act = + ICE_FWD_TO_QGRP; + rule_info->sw_act.fwd_id.q_id = +@@ -997,6 +999,46 @@ ice_switch_parse_action(struct ice_pf *pf, + return -rte_errno; + } + ++static int ++ice_switch_check_action(const struct rte_flow_action *actions, ++ struct rte_flow_error *error) ++{ ++ const struct rte_flow_action *action; ++ enum rte_flow_action_type action_type; ++ uint16_t actions_num = 0; ++ ++ for (action = actions; action->type != ++ RTE_FLOW_ACTION_TYPE_END; action++) { ++ action_type = action->type; ++ switch (action_type) { ++ case RTE_FLOW_ACTION_TYPE_VF: ++ case RTE_FLOW_ACTION_TYPE_RSS: ++ case RTE_FLOW_ACTION_TYPE_QUEUE: ++ case RTE_FLOW_ACTION_TYPE_DROP: ++ actions_num++; ++ break; ++ case RTE_FLOW_ACTION_TYPE_VOID: ++ continue; ++ default: ++ rte_flow_error_set(error, ++ EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, ++ actions, ++ "Invalid action type"); ++ return -rte_errno; ++ } ++ } ++ ++ if (actions_num > 1) { ++ rte_flow_error_set(error, ++ EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, ++ actions, ++ "Invalid action number"); ++ return -rte_errno; ++ } ++ ++ return 0; ++} ++ + static int + ice_switch_parse_pattern_action(struct ice_adapter *ad, + struct ice_pattern_match_item *array, +@@ -1015,7 +1057,8 @@ ice_switch_parse_pattern_action(struct ice_adapter *ad, + uint16_t lkups_num = 0; + const struct rte_flow_item *item = pattern; + uint16_t item_num = 0; +- enum ice_sw_tunnel_type tun_type = ICE_NON_TUN; ++ enum ice_sw_tunnel_type tun_type = ++ ICE_SW_TUN_AND_NON_TUN; + struct ice_pattern_match_item *pattern_match_item = NULL; + + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { +@@ -1051,6 +1094,7 @@ ice_switch_parse_pattern_action(struct ice_adapter *ad, + return -rte_errno; + } + ++ memset(&rule_info, 0, sizeof(rule_info)); + rule_info.tun_type = tun_type; + + sw_meta_ptr = +@@ -1081,6 +1125,14 @@ ice_switch_parse_pattern_action(struct ice_adapter *ad, + goto error; + } + ++ ret = ice_switch_check_action(actions, error); ++ if (ret) { ++ rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_HANDLE, NULL, ++ "Invalid input action number"); ++ goto error; ++ } ++ + ret = ice_switch_parse_action(pf, actions, error, &rule_info); + if (ret) { + rte_flow_error_set(error, EINVAL, +@@ -1088,10 +1140,17 @@ ice_switch_parse_pattern_action(struct ice_adapter *ad, + "Invalid input action"); + goto error; + } +- *meta = sw_meta_ptr; +- ((struct sw_meta *)*meta)->list = list; +- ((struct sw_meta *)*meta)->lkups_num = lkups_num; +- ((struct sw_meta *)*meta)->rule_info = rule_info; ++ ++ if (meta) { ++ *meta = sw_meta_ptr; ++ ((struct sw_meta *)*meta)->list = list; ++ ((struct sw_meta *)*meta)->lkups_num = lkups_num; ++ ((struct sw_meta *)*meta)->rule_info = rule_info; ++ } else { ++ rte_free(list); ++ rte_free(sw_meta_ptr); ++ } ++ + rte_free(pattern_match_item); + + return 0; +diff --git a/dpdk/drivers/net/ipn3ke/ipn3ke_representor.c b/dpdk/drivers/net/ipn3ke/ipn3ke_representor.c +index 8d9ebef915..b673c49149 100644 +--- a/dpdk/drivers/net/ipn3ke/ipn3ke_representor.c ++++ b/dpdk/drivers/net/ipn3ke/ipn3ke_representor.c +@@ -701,7 +701,7 @@ struct ipn3ke_rpst_hw_port_stats *hw_stats) + &tmp, + IPN3KE_25G_TX_STATISTICS_STATUS, + port_id, +- 1); ++ 0); + if (tmp & IPN3KE_25G_TX_STATISTICS_STATUS_SHADOW_REQUEST_MASK) { + tmp = 0x00000000; + (*hw->f_mac_read)(hw, +@@ -2598,7 +2598,8 @@ ipn3ke_rpst_scan_check(void) + int ret; + + if (ipn3ke_rpst_scan_num == 1) { +- ret = pthread_create(&ipn3ke_rpst_scan_thread, ++ ret = rte_ctrl_thread_create(&ipn3ke_rpst_scan_thread, ++ "ipn3ke scanner", + NULL, + ipn3ke_rpst_scan_handle_request, NULL); + if (ret) { +diff --git a/dpdk/drivers/net/ipn3ke/meson.build b/dpdk/drivers/net/ipn3ke/meson.build +index e3c8a6768c..bfec592aba 100644 +--- a/dpdk/drivers/net/ipn3ke/meson.build ++++ b/dpdk/drivers/net/ipn3ke/meson.build +@@ -21,9 +21,11 @@ endif + if build + allow_experimental_apis = true + ++ includes += include_directories('../../raw/ifpga') ++ + sources += files('ipn3ke_ethdev.c', + 'ipn3ke_representor.c', + 'ipn3ke_tm.c', + 'ipn3ke_flow.c') +- deps += ['bus_ifpga', 'sched', 'pmd_i40e', 'rawdev', 'rawdev_ifpga'] ++ deps += ['bus_ifpga', 'ethdev', 'sched'] + endif +diff --git a/dpdk/drivers/net/ixgbe/Makefile b/dpdk/drivers/net/ixgbe/Makefile +index 85762e2f2a..31523025b3 100644 +--- a/dpdk/drivers/net/ixgbe/Makefile ++++ b/dpdk/drivers/net/ixgbe/Makefile +@@ -57,6 +57,7 @@ endif + LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring + LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash + LDLIBS += -lrte_bus_pci ++LDLIBS += -lpthread + + # + # Add extra flags for base driver files (also known as shared code) +diff --git a/dpdk/drivers/net/ixgbe/base/README b/dpdk/drivers/net/ixgbe/base/README +index b6b420e2f3..a48b14ed27 100644 +--- a/dpdk/drivers/net/ixgbe/base/README ++++ b/dpdk/drivers/net/ixgbe/base/README +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2010-2018 Intel Corporation ++ * Copyright(c) 2010-2020 Intel Corporation + */ + + Intel® IXGBE driver +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c b/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c +index 245ff75d55..c83e1c6b30 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_type.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_82598.h b/dpdk/drivers/net/ixgbe/base/ixgbe_82598.h +index 8013f495ec..7bad5e12d3 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_82598.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_82598.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_82598_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c b/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c +index 96bdde62c8..9cd0b1428c 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_type.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_82599.h b/dpdk/drivers/net/ixgbe/base/ixgbe_82599.h +index a32eb1f517..238481983f 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_82599.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_82599.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_82599_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_api.c b/dpdk/drivers/net/ixgbe/base/ixgbe_api.c +index 873c07999c..0a22df3d06 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_api.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_api.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_api.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_api.h b/dpdk/drivers/net/ixgbe/base/ixgbe_api.h +index ff8f7b2611..33e7c3c215 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_api.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_api.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_API_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_common.c b/dpdk/drivers/net/ixgbe/base/ixgbe_common.c +index 62ff767230..4eb98dc198 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_common.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_common.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_common.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_common.h b/dpdk/drivers/net/ixgbe/base/ixgbe_common.h +index 3bb2475119..7a31f088c4 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_common.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_common.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_COMMON_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.c b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.c +index a590e0e07c..53def2146e 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.h b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.h +index 503d06018f..c2a1013ac0 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_DCB_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.c b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.c +index d87cb58857..bb309e28fd 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.h b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.h +index 1a14744482..8f36881378 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_DCB_82598_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.c b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.c +index f4f0ff0190..04e0d1fb7d 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.h b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.h +index 085ada27f7..7bd1d6a325 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_DCB_82599_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c b/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c +index 67a124d8d1..6005c4ac93 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_vf.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h b/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h +index 9664f3bdbf..dd2e1eee4e 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_HV_VF_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.c b/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.c +index cb82942dfa..13bdb5f68f 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_type.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h b/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h +index 5d32cbc074..1a45e49c2f 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_MBX_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h b/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h +index ea8dc1cbe5..a4eb71777c 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_OS_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c b/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c +index dd118f9170..a8243fa974 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_api.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h b/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h +index f1605f2cc9..a06c3be170 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_PHY_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_type.h b/dpdk/drivers/net/ixgbe/base/ixgbe_type.h +index 077b8f01c7..15e9370105 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_type.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_type.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_TYPE_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c b/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c +index aac37822e4..7f69ece107 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h b/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h +index dba643fced..be58b4f76e 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_VF_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c b/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c +index f00f0eae7e..d65f47c181 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_x540.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_x540.h b/dpdk/drivers/net/ixgbe/base/ixgbe_x540.h +index 231dfe56e5..ba79847d11 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_x540.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_x540.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_X540_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c b/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c +index 930a61a20b..3de406fd35 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_x550.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h b/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h +index 3bd98f243d..10086ab423 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_X550_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/meson.build b/dpdk/drivers/net/ixgbe/base/meson.build +index bbd0f51ea5..20677ab034 100644 +--- a/dpdk/drivers/net/ixgbe/base/meson.build ++++ b/dpdk/drivers/net/ixgbe/base/meson.build +@@ -1,5 +1,5 @@ + # SPDX-License-Identifier: BSD-3-Clause +-# Copyright(c) 2017 Intel Corporation ++# Copyright(c) 2017-2020 Intel Corporation + + sources = [ + 'ixgbe_82598.c', +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c +index 2c6fd0f131..50edb69cb2 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c ++++ b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c +@@ -229,7 +229,9 @@ static int ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev); + static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev); + static void ixgbe_dev_interrupt_handler(void *param); + static void ixgbe_dev_interrupt_delayed_handler(void *param); +-static void ixgbe_dev_setup_link_alarm_handler(void *param); ++static void *ixgbe_dev_setup_link_thread_handler(void *param); ++static int ixgbe_dev_wait_setup_link_complete(struct rte_eth_dev *dev, ++ uint32_t timeout_ms); + + static int ixgbe_add_rar(struct rte_eth_dev *dev, + struct rte_ether_addr *mac_addr, +@@ -378,6 +380,7 @@ static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev, + struct rte_eth_udp_tunnel *udp_tunnel); + static int ixgbe_filter_restore(struct rte_eth_dev *dev); + static void ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev); ++static int ixgbe_wait_for_link_up(struct ixgbe_hw *hw); + + /* + * Define VF Stats MACRO for Non "cleared on read" register +@@ -1075,6 +1078,7 @@ ixgbe_swfw_lock_reset(struct ixgbe_hw *hw) + static int + eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused) + { ++ struct ixgbe_adapter *ad = eth_dev->data->dev_private; + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + struct ixgbe_hw *hw = +@@ -1126,6 +1130,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused) + return 0; + } + ++ rte_atomic32_clear(&ad->link_thread_running); + rte_eth_copy_pci_info(eth_dev, pci_dev); + + /* Vendor and Device ID need to be set before init of shared code */ +@@ -1170,8 +1175,8 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused) + memset(dcb_config, 0, sizeof(struct ixgbe_dcb_config)); + ixgbe_dcb_init(hw, dcb_config); + /* Get Hardware Flow Control setting */ +- hw->fc.requested_mode = ixgbe_fc_full; +- hw->fc.current_mode = ixgbe_fc_full; ++ hw->fc.requested_mode = ixgbe_fc_none; ++ hw->fc.current_mode = ixgbe_fc_none; + hw->fc.pause_time = IXGBE_FC_PAUSE; + for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) { + hw->fc.low_water[i] = IXGBE_FC_LO; +@@ -1190,7 +1195,6 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused) + diag = ixgbe_bypass_init_hw(hw); + #else + diag = ixgbe_init_hw(hw); +- hw->mac.autotry_restart = false; + #endif /* RTE_LIBRTE_IXGBE_BYPASS */ + + /* +@@ -1301,8 +1305,6 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused) + /* enable support intr */ + ixgbe_enable_intr(eth_dev); + +- ixgbe_dev_set_link_down(eth_dev); +- + /* initialize filter info */ + memset(filter_info, 0, + sizeof(struct ixgbe_filter_info)); +@@ -1564,6 +1566,7 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev) + { + int diag; + uint32_t tc, tcs; ++ struct ixgbe_adapter *ad = eth_dev->data->dev_private; + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + struct ixgbe_hw *hw = +@@ -1604,6 +1607,7 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev) + return 0; + } + ++ rte_atomic32_clear(&ad->link_thread_running); + ixgbevf_parse_devargs(eth_dev->data->dev_private, + pci_dev->device.devargs); + +@@ -2530,6 +2534,41 @@ ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf, + return 0; + } + ++static int ++ixgbe_flow_ctrl_enable(struct rte_eth_dev *dev, struct ixgbe_hw *hw) ++{ ++ struct ixgbe_adapter *adapter = dev->data->dev_private; ++ int err; ++ uint32_t mflcn; ++ ++ ixgbe_setup_fc(hw); ++ ++ err = ixgbe_fc_enable(hw); ++ ++ /* Not negotiated is not an error case */ ++ if (err == IXGBE_SUCCESS || err == IXGBE_ERR_FC_NOT_NEGOTIATED) { ++ /* ++ *check if we want to forward MAC frames - driver doesn't ++ *have native capability to do that, ++ *so we'll write the registers ourselves ++ */ ++ ++ mflcn = IXGBE_READ_REG(hw, IXGBE_MFLCN); ++ ++ /* set or clear MFLCN.PMCF bit depending on configuration */ ++ if (adapter->mac_ctrl_frame_fwd != 0) ++ mflcn |= IXGBE_MFLCN_PMCF; ++ else ++ mflcn &= ~IXGBE_MFLCN_PMCF; ++ ++ IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn); ++ IXGBE_WRITE_FLUSH(hw); ++ ++ return 0; ++ } ++ return err; ++} ++ + /* + * Configure device link speed and setup link. + * It returns 0 on success. +@@ -2558,19 +2597,8 @@ ixgbe_dev_start(struct rte_eth_dev *dev) + + PMD_INIT_FUNC_TRACE(); + +- /* IXGBE devices don't support: +- * - half duplex (checked afterwards for valid speeds) +- * - fixed speed: TODO implement +- */ +- if (dev->data->dev_conf.link_speeds & ETH_LINK_SPEED_FIXED) { +- PMD_INIT_LOG(ERR, +- "Invalid link_speeds for port %u, fix speed not supported", +- dev->data->port_id); +- return -EINVAL; +- } +- + /* Stop the link setup handler before resetting the HW. */ +- rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev); ++ ixgbe_dev_wait_setup_link_complete(dev, 0); + + /* disable uio/vfio intr/eventfd mapping */ + rte_intr_disable(intr_handle); +@@ -2666,6 +2694,12 @@ ixgbe_dev_start(struct rte_eth_dev *dev) + + ixgbe_restore_statistics_mapping(dev); + ++ err = ixgbe_flow_ctrl_enable(dev, hw); ++ if (err < 0) { ++ PMD_INIT_LOG(ERR, "enable flow ctrl err"); ++ goto error; ++ } ++ + err = ixgbe_dev_rxtx_start(dev); + if (err < 0) { + PMD_INIT_LOG(ERR, "Unable to start rxtx queues"); +@@ -2724,7 +2758,11 @@ ixgbe_dev_start(struct rte_eth_dev *dev) + } + + link_speeds = &dev->data->dev_conf.link_speeds; +- if (*link_speeds & ~allowed_speeds) { ++ ++ /* Ignore autoneg flag bit and check the validity of ++ * link_speed ++ */ ++ if (((*link_speeds) >> 1) & ~(allowed_speeds >> 1)) { + PMD_INIT_LOG(ERR, "Invalid link setting"); + goto error; + } +@@ -2801,6 +2839,11 @@ ixgbe_dev_start(struct rte_eth_dev *dev) + "please call hierarchy_commit() " + "before starting the port"); + ++ /* wait for the controller to acquire link */ ++ err = ixgbe_wait_for_link_up(hw); ++ if (err) ++ goto error; ++ + /* + * Update link status right before return, because it may + * start link configuration process in a separate thread. +@@ -2842,7 +2885,7 @@ ixgbe_dev_stop(struct rte_eth_dev *dev) + + PMD_INIT_FUNC_TRACE(); + +- rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev); ++ ixgbe_dev_wait_setup_link_complete(dev, 0); + + /* disable interrupts */ + ixgbe_disable_intr(hw); +@@ -2893,6 +2936,8 @@ ixgbe_dev_stop(struct rte_eth_dev *dev) + + adapter->rss_reta_updated = 0; + ++ adapter->mac_ctrl_frame_fwd = 0; ++ + hw->adapter_stopped = true; + } + +@@ -4095,16 +4140,46 @@ ixgbevf_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + return ret_val; + } + +-static void +-ixgbe_dev_setup_link_alarm_handler(void *param) ++/* ++ * If @timeout_ms was 0, it means that it will not return until link complete. ++ * It returns 1 on complete, return 0 on timeout. ++ */ ++static int ++ixgbe_dev_wait_setup_link_complete(struct rte_eth_dev *dev, uint32_t timeout_ms) ++{ ++#define WARNING_TIMEOUT 9000 /* 9s in total */ ++ struct ixgbe_adapter *ad = dev->data->dev_private; ++ uint32_t timeout = timeout_ms ? timeout_ms : WARNING_TIMEOUT; ++ ++ while (rte_atomic32_read(&ad->link_thread_running)) { ++ msec_delay(1); ++ timeout--; ++ ++ if (timeout_ms) { ++ if (!timeout) ++ return 0; ++ } else if (!timeout) { ++ /* It will not return until link complete */ ++ timeout = WARNING_TIMEOUT; ++ PMD_DRV_LOG(ERR, "IXGBE link thread not complete too long time!"); ++ } ++ } ++ ++ return 1; ++} ++ ++static void * ++ixgbe_dev_setup_link_thread_handler(void *param) + { + struct rte_eth_dev *dev = (struct rte_eth_dev *)param; ++ struct ixgbe_adapter *ad = dev->data->dev_private; + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_interrupt *intr = + IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private); + u32 speed; + bool autoneg = false; + ++ pthread_detach(pthread_self()); + speed = hw->phy.autoneg_advertised; + if (!speed) + ixgbe_get_link_capabilities(hw, &speed, &autoneg); +@@ -4112,6 +4187,40 @@ ixgbe_dev_setup_link_alarm_handler(void *param) + ixgbe_setup_link(hw, speed, true); + + intr->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG; ++ rte_atomic32_clear(&ad->link_thread_running); ++ return NULL; ++} ++ ++/* ++ * In freebsd environment, nic_uio drivers do not support interrupts, ++ * rte_intr_callback_register() will fail to register interrupts. ++ * We can not make link status to change from down to up by interrupt ++ * callback. So we need to wait for the controller to acquire link ++ * when ports start. ++ * It returns 0 on link up. ++ */ ++static int ++ixgbe_wait_for_link_up(struct ixgbe_hw *hw) ++{ ++#ifdef RTE_EXEC_ENV_FREEBSD ++ int err, i, link_up = 0; ++ uint32_t speed = 0; ++ const int nb_iter = 25; ++ ++ for (i = 0; i < nb_iter; i++) { ++ err = ixgbe_check_link(hw, &speed, &link_up, 0); ++ if (err) ++ return err; ++ if (link_up) ++ return 0; ++ msec_delay(200); ++ } ++ ++ return 0; ++#else ++ RTE_SET_USED(hw); ++ return 0; ++#endif + } + + /* return 0 means link status changed, -1 means not changed */ +@@ -4120,6 +4229,7 @@ ixgbe_dev_link_update_share(struct rte_eth_dev *dev, + int wait_to_complete, int vf) + { + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ struct ixgbe_adapter *ad = dev->data->dev_private; + struct rte_eth_link link; + ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN; + struct ixgbe_interrupt *intr = +@@ -4133,7 +4243,8 @@ ixgbe_dev_link_update_share(struct rte_eth_dev *dev, + link.link_status = ETH_LINK_DOWN; + link.link_speed = ETH_SPEED_NUM_NONE; + link.link_duplex = ETH_LINK_HALF_DUPLEX; +- link.link_autoneg = ETH_LINK_AUTONEG; ++ link.link_autoneg = !(dev->data->dev_conf.link_speeds & ++ ETH_LINK_SPEED_FIXED); + + hw->mac.get_link_status = true; + +@@ -4144,6 +4255,11 @@ ixgbe_dev_link_update_share(struct rte_eth_dev *dev, + if (wait_to_complete == 0 || dev->data->dev_conf.intr_conf.lsc != 0) + wait = 0; + ++/* BSD has no interrupt mechanism, so force NIC status synchronization. */ ++#ifdef RTE_EXEC_ENV_FREEBSD ++ wait = 1; ++#endif ++ + if (vf) + diag = ixgbevf_check_link(hw, &link_speed, &link_up, wait); + else +@@ -4155,15 +4271,34 @@ ixgbe_dev_link_update_share(struct rte_eth_dev *dev, + return rte_eth_linkstatus_set(dev, &link); + } + +- esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); +- if ((esdp_reg & IXGBE_ESDP_SDP3)) +- link_up = 0; ++ if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) { ++ esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); ++ if ((esdp_reg & IXGBE_ESDP_SDP3)) ++ link_up = 0; ++ } + + if (link_up == 0) { + if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) { +- intr->flags |= IXGBE_FLAG_NEED_LINK_CONFIG; +- rte_eal_alarm_set(10, +- ixgbe_dev_setup_link_alarm_handler, dev); ++ ixgbe_dev_wait_setup_link_complete(dev, 0); ++ if (rte_atomic32_test_and_set(&ad->link_thread_running)) { ++ /* To avoid race condition between threads, set ++ * the IXGBE_FLAG_NEED_LINK_CONFIG flag only ++ * when there is no link thread running. ++ */ ++ intr->flags |= IXGBE_FLAG_NEED_LINK_CONFIG; ++ if (rte_ctrl_thread_create(&ad->link_thread_tid, ++ "ixgbe-link-handler", ++ NULL, ++ ixgbe_dev_setup_link_thread_handler, ++ dev) < 0) { ++ PMD_DRV_LOG(ERR, ++ "Create link thread failed!"); ++ rte_atomic32_clear(&ad->link_thread_running); ++ } ++ } else { ++ PMD_DRV_LOG(ERR, ++ "Other link thread is running now!"); ++ } + } + return rte_eth_linkstatus_set(dev, &link); + } +@@ -4646,10 +4781,10 @@ static int + ixgbe_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) + { + struct ixgbe_hw *hw; ++ struct ixgbe_adapter *adapter = dev->data->dev_private; + int err; + uint32_t rx_buf_size; + uint32_t max_high_water; +- uint32_t mflcn; + enum ixgbe_fc_mode rte_fcmode_2_ixgbe_fcmode[] = { + ixgbe_fc_none, + ixgbe_fc_rx_pause, +@@ -4682,31 +4817,14 @@ ixgbe_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) + hw->fc.low_water[0] = fc_conf->low_water; + hw->fc.send_xon = fc_conf->send_xon; + hw->fc.disable_fc_autoneg = !fc_conf->autoneg; ++ adapter->mac_ctrl_frame_fwd = fc_conf->mac_ctrl_frame_fwd; + +- err = ixgbe_fc_enable(hw); +- +- /* Not negotiated is not an error case */ +- if ((err == IXGBE_SUCCESS) || (err == IXGBE_ERR_FC_NOT_NEGOTIATED)) { +- +- /* check if we want to forward MAC frames - driver doesn't have native +- * capability to do that, so we'll write the registers ourselves */ +- +- mflcn = IXGBE_READ_REG(hw, IXGBE_MFLCN); +- +- /* set or clear MFLCN.PMCF bit depending on configuration */ +- if (fc_conf->mac_ctrl_frame_fwd != 0) +- mflcn |= IXGBE_MFLCN_PMCF; +- else +- mflcn &= ~IXGBE_MFLCN_PMCF; +- +- IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn); +- IXGBE_WRITE_FLUSH(hw); +- +- return 0; ++ err = ixgbe_flow_ctrl_enable(dev, hw); ++ if (err < 0) { ++ PMD_INIT_LOG(ERR, "ixgbe_flow_ctrl_enable = 0x%x", err); ++ return -EIO; + } +- +- PMD_INIT_LOG(ERR, "ixgbe_fc_enable = 0x%x", err); +- return -EIO; ++ return err; + } + + /** +@@ -5207,7 +5325,7 @@ ixgbevf_dev_start(struct rte_eth_dev *dev) + PMD_INIT_FUNC_TRACE(); + + /* Stop the link setup handler before resetting the HW. */ +- rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev); ++ ixgbe_dev_wait_setup_link_complete(dev, 0); + + err = hw->mac.ops.reset_hw(hw); + if (err) { +@@ -5305,7 +5423,7 @@ ixgbevf_dev_stop(struct rte_eth_dev *dev) + + PMD_INIT_FUNC_TRACE(); + +- rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev); ++ ixgbe_dev_wait_setup_link_complete(dev, 0); + + ixgbevf_intr_disable(dev); + +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h +index 76a1b9d184..0334c226be 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h ++++ b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h +@@ -510,6 +510,9 @@ struct ixgbe_adapter { + * mailbox status) link status. + */ + uint8_t pflink_fullchk; ++ uint8_t mac_ctrl_frame_fwd; ++ rte_atomic32_t link_thread_running; ++ pthread_t link_thread_tid; + }; + + struct ixgbe_vf_representor { +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c b/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c +index fa572d184d..a7b24cd053 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c ++++ b/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c +@@ -87,11 +87,6 @@ + #define rte_ixgbe_prefetch(p) do {} while (0) + #endif + +-#ifdef RTE_IXGBE_INC_VECTOR +-uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, +- uint16_t nb_pkts); +-#endif +- + /********************************************************************* + * + * TX functions +@@ -993,6 +988,12 @@ ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + return i; + } + ++ /* check the size of packet */ ++ if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) { ++ rte_errno = EINVAL; ++ return i; ++ } ++ + #ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h b/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h +index 505d344b9c..5e1ac44942 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h ++++ b/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h +@@ -53,6 +53,8 @@ + + #define IXGBE_TX_MAX_SEG 40 + ++#define IXGBE_TX_MIN_PKT_LEN 14 ++ + #define IXGBE_PACKET_TYPE_MASK_82599 0X7F + #define IXGBE_PACKET_TYPE_MASK_X550 0X10FF + #define IXGBE_PACKET_TYPE_MASK_TUNNEL 0XFF +diff --git a/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.c b/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.c +index 8bcaded6e5..9bff557f97 100644 +--- a/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.c ++++ b/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.c +@@ -522,6 +522,9 @@ rte_pmd_ixgbe_macsec_enable(uint16_t port, uint8_t en, uint8_t rp) + + dev = &rte_eth_devices[port]; + ++ if (!is_ixgbe_supported(dev)) ++ return -ENOTSUP; ++ + macsec_setting.offload_en = 1; + macsec_setting.encrypt_en = en; + macsec_setting.replayprotect_en = rp; +@@ -542,6 +545,9 @@ rte_pmd_ixgbe_macsec_disable(uint16_t port) + + dev = &rte_eth_devices[port]; + ++ if (!is_ixgbe_supported(dev)) ++ return -ENOTSUP; ++ + ixgbe_dev_macsec_setting_reset(dev); + + ixgbe_dev_macsec_register_disable(dev); +diff --git a/dpdk/drivers/net/memif/memif_socket.c b/dpdk/drivers/net/memif/memif_socket.c +index ad5e30b96e..c1967c67bf 100644 +--- a/dpdk/drivers/net/memif/memif_socket.c ++++ b/dpdk/drivers/net/memif/memif_socket.c +@@ -204,6 +204,13 @@ memif_msg_receive_init(struct memif_control_channel *cc, memif_msg_t *msg) + pmd = dev->data->dev_private; + if (((pmd->flags & ETH_MEMIF_FLAG_DISABLED) == 0) && + pmd->id == i->id) { ++ if (pmd->flags & (ETH_MEMIF_FLAG_CONNECTING | ++ ETH_MEMIF_FLAG_CONNECTED)) { ++ memif_msg_enq_disconnect(cc, ++ "Already connected", 0); ++ return -1; ++ } ++ + /* assign control channel to device */ + cc->dev = dev; + pmd->cc = cc; +@@ -215,12 +222,6 @@ memif_msg_receive_init(struct memif_control_channel *cc, memif_msg_t *msg) + return -1; + } + +- if (pmd->flags & (ETH_MEMIF_FLAG_CONNECTING | +- ETH_MEMIF_FLAG_CONNECTED)) { +- memif_msg_enq_disconnect(pmd->cc, +- "Already connected", 0); +- return -1; +- } + strlcpy(pmd->remote_name, (char *)i->name, + sizeof(pmd->remote_name)); + +@@ -765,6 +766,7 @@ memif_intr_handler(void *arg) + ret = memif_msg_receive(cc); + /* if driver failed to assign device */ + if (cc->dev == NULL) { ++ memif_msg_send_from_queue(cc); + ret = rte_intr_callback_unregister_pending(&cc->intr_handle, + memif_intr_handler, + cc, +diff --git a/dpdk/drivers/net/memif/rte_eth_memif.c b/dpdk/drivers/net/memif/rte_eth_memif.c +index 8dd1d0d63d..aa83df3652 100644 +--- a/dpdk/drivers/net/memif/rte_eth_memif.c ++++ b/dpdk/drivers/net/memif/rte_eth_memif.c +@@ -1501,7 +1501,7 @@ memif_create(struct rte_vdev_device *vdev, enum memif_role_t role, + } + + +- eth_dev->data->dev_flags &= RTE_ETH_DEV_CLOSE_REMOVE; ++ eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; + + rte_eth_dev_probing_finish(eth_dev); + +diff --git a/dpdk/drivers/net/mlx4/meson.build b/dpdk/drivers/net/mlx4/meson.build +index 9eb4988420..ff7386aee2 100644 +--- a/dpdk/drivers/net/mlx4/meson.build ++++ b/dpdk/drivers/net/mlx4/meson.build +@@ -9,11 +9,12 @@ if not is_linux + endif + build = true + +-pmd_dlopen = (get_option('ibverbs_link') == 'dlopen') ++static_ibverbs = (get_option('ibverbs_link') == 'static') ++dlopen_ibverbs = (get_option('ibverbs_link') == 'dlopen') + LIB_GLUE_BASE = 'librte_pmd_mlx4_glue.so' + LIB_GLUE_VERSION = '18.02.0' + LIB_GLUE = LIB_GLUE_BASE + '.' + LIB_GLUE_VERSION +-if pmd_dlopen ++if dlopen_ibverbs + dpdk_conf.set('RTE_IBVERBS_LINK_DLOPEN', 1) + cflags += [ + '-DMLX4_GLUE="@0@"'.format(LIB_GLUE), +@@ -24,12 +25,15 @@ endif + libnames = [ 'mlx4', 'ibverbs' ] + libs = [] + foreach libname:libnames +- lib = dependency('lib' + libname, required:false) +- if not lib.found() ++ lib = dependency('lib' + libname, static:static_ibverbs, required:false) ++ if not lib.found() and not static_ibverbs + lib = cc.find_library(libname, required:false) + endif + if lib.found() +- libs += [ lib ] ++ libs += lib ++ if not static_ibverbs and not dlopen_ibverbs ++ ext_deps += lib ++ endif + else + build = false + reason = 'missing dependency, "' + libname + '"' +@@ -37,8 +41,17 @@ foreach libname:libnames + endforeach + + if build ++ if static_ibverbs or dlopen_ibverbs ++ # Build without adding shared libs to Requires.private ++ ibv_cflags = run_command(pkgconf, '--cflags', 'libibverbs').stdout() ++ ext_deps += declare_dependency(compile_args: ibv_cflags.split()) ++ endif ++ if static_ibverbs ++ # Add static deps ldflags to internal apps and Libs.private ++ ibv_ldflags = run_command(ldflags_ibverbs_static, check:true).stdout() ++ ext_deps += declare_dependency(link_args:ibv_ldflags.split()) ++ endif + allow_experimental_apis = true +- ext_deps += libs + sources = files( + 'mlx4.c', + 'mlx4_ethdev.c', +@@ -51,7 +64,7 @@ if build + 'mlx4_txq.c', + 'mlx4_utils.c', + ) +- if not pmd_dlopen ++ if not dlopen_ibverbs + sources += files('mlx4_glue.c') + endif + cflags_options = [ +@@ -103,7 +116,7 @@ if build + configure_file(output : 'mlx4_autoconf.h', configuration : config) + endif + # Build Glue Library +-if pmd_dlopen and build ++if dlopen_ibverbs and build + dlopen_name = 'mlx4_glue' + dlopen_lib_name = driver_name_fmt.format(dlopen_name) + dlopen_so_version = LIB_GLUE_VERSION +diff --git a/dpdk/drivers/net/mlx4/mlx4.c b/dpdk/drivers/net/mlx4/mlx4.c +index ab5e6c66cb..7771417ebe 100644 +--- a/dpdk/drivers/net/mlx4/mlx4.c ++++ b/dpdk/drivers/net/mlx4/mlx4.c +@@ -49,6 +49,10 @@ + #include "mlx4_rxtx.h" + #include "mlx4_utils.h" + ++#ifdef MLX4_GLUE ++const struct mlx4_glue *mlx4_glue; ++#endif ++ + static const char *MZ_MLX4_PMD_SHARED_DATA = "mlx4_pmd_shared_data"; + + /* Shared memory between primary and secondary processes. */ +diff --git a/dpdk/drivers/net/mlx4/mlx4_flow.c b/dpdk/drivers/net/mlx4/mlx4_flow.c +index 96479b83dd..2da4f6d965 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_flow.c ++++ b/dpdk/drivers/net/mlx4/mlx4_flow.c +@@ -981,12 +981,13 @@ mlx4_drop_get(struct mlx4_priv *priv) + priv->drop = drop; + return drop; + error: +- if (drop->qp) +- claim_zero(mlx4_glue->destroy_qp(drop->qp)); +- if (drop->cq) +- claim_zero(mlx4_glue->destroy_cq(drop->cq)); +- if (drop) ++ if (drop) { ++ if (drop->qp) ++ claim_zero(mlx4_glue->destroy_qp(drop->qp)); ++ if (drop->cq) ++ claim_zero(mlx4_glue->destroy_cq(drop->cq)); + rte_free(drop); ++ } + rte_errno = ENOMEM; + return NULL; + } +diff --git a/dpdk/drivers/net/mlx4/mlx4_glue.h b/dpdk/drivers/net/mlx4/mlx4_glue.h +index 668ca86700..5d9e985495 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_glue.h ++++ b/dpdk/drivers/net/mlx4/mlx4_glue.h +@@ -84,6 +84,6 @@ struct mlx4_glue { + void *attr); + }; + +-const struct mlx4_glue *mlx4_glue; ++extern const struct mlx4_glue *mlx4_glue; + + #endif /* MLX4_GLUE_H_ */ +diff --git a/dpdk/drivers/net/mlx4/mlx4_rxtx.h b/dpdk/drivers/net/mlx4/mlx4_rxtx.h +index 8baf33fa94..9de6c59411 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_rxtx.h ++++ b/dpdk/drivers/net/mlx4/mlx4_rxtx.h +@@ -124,7 +124,7 @@ struct txq { + + /* mlx4_rxq.c */ + +-uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE]; ++extern uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE]; + int mlx4_rss_init(struct mlx4_priv *priv); + void mlx4_rss_deinit(struct mlx4_priv *priv); + struct mlx4_rss *mlx4_rss_get(struct mlx4_priv *priv, uint64_t fields, +diff --git a/dpdk/drivers/net/mlx4/mlx4_utils.h b/dpdk/drivers/net/mlx4/mlx4_utils.h +index 74b9d2ecdc..5718b9c742 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_utils.h ++++ b/dpdk/drivers/net/mlx4/mlx4_utils.h +@@ -79,9 +79,10 @@ pmd_drv_log_basename(const char *s) + + /** Allocate a buffer on the stack and fill it with a printf format string. */ + #define MKSTR(name, ...) \ +- char name[snprintf(NULL, 0, __VA_ARGS__) + 1]; \ ++ int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \ ++ char name[mkstr_size_##name + 1]; \ + \ +- snprintf(name, sizeof(name), __VA_ARGS__) ++ snprintf(name, sizeof(name), "" __VA_ARGS__) + + /** Generate a string out of the provided arguments. */ + #define MLX4_STR(...) # __VA_ARGS__ +diff --git a/dpdk/drivers/net/mlx5/Makefile b/dpdk/drivers/net/mlx5/Makefile +index c5cf4397ac..605975c245 100644 +--- a/dpdk/drivers/net/mlx5/Makefile ++++ b/dpdk/drivers/net/mlx5/Makefile +@@ -193,6 +193,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh + infiniband/mlx5dv.h \ + func mlx5dv_devx_obj_query_async \ + $(AUTOCONF_OUTPUT) ++ $Q sh -- '$<' '$@' \ ++ HAVE_IBV_DEVX_QP \ ++ infiniband/mlx5dv.h \ ++ func mlx5dv_devx_qp_query \ ++ $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_MLX5DV_DR_ACTION_DEST_DEVX_TIR \ + infiniband/mlx5dv.h \ +diff --git a/dpdk/drivers/net/mlx5/meson.build b/dpdk/drivers/net/mlx5/meson.build +index d6b32db794..a5775d18e3 100644 +--- a/dpdk/drivers/net/mlx5/meson.build ++++ b/dpdk/drivers/net/mlx5/meson.build +@@ -9,11 +9,12 @@ if not is_linux + endif + build = true + +-pmd_dlopen = (get_option('ibverbs_link') == 'dlopen') ++static_ibverbs = (get_option('ibverbs_link') == 'static') ++dlopen_ibverbs = (get_option('ibverbs_link') == 'dlopen') + LIB_GLUE_BASE = 'librte_pmd_mlx5_glue.so' + LIB_GLUE_VERSION = '19.08.0' + LIB_GLUE = LIB_GLUE_BASE + '.' + LIB_GLUE_VERSION +-if pmd_dlopen ++if dlopen_ibverbs + dpdk_conf.set('RTE_IBVERBS_LINK_DLOPEN', 1) + cflags += [ + '-DMLX5_GLUE="@0@"'.format(LIB_GLUE), +@@ -24,12 +25,15 @@ endif + libnames = [ 'mlx5', 'ibverbs' ] + libs = [] + foreach libname:libnames +- lib = dependency('lib' + libname, required:false) +- if not lib.found() ++ lib = dependency('lib' + libname, static:static_ibverbs, required:false) ++ if not lib.found() and not static_ibverbs + lib = cc.find_library(libname, required:false) + endif + if lib.found() + libs += [ lib ] ++ if not static_ibverbs ++ ext_deps += lib ++ endif + else + build = false + reason = 'missing dependency, "' + libname + '"' +@@ -37,9 +41,18 @@ foreach libname:libnames + endforeach + + if build ++ if static_ibverbs or dlopen_ibverbs ++ # Build without adding shared libs to Requires.private ++ ibv_cflags = run_command(pkgconf, '--cflags', 'libibverbs').stdout() ++ ext_deps += declare_dependency(compile_args: ibv_cflags.split()) ++ endif ++ if static_ibverbs ++ # Add static deps ldflags to internal apps and Libs.private ++ ibv_ldflags = run_command(ldflags_ibverbs_static, check:true).stdout() ++ ext_deps += declare_dependency(link_args:ibv_ldflags.split()) ++ endif + allow_experimental_apis = true + deps += ['hash'] +- ext_deps += libs + sources = files( + 'mlx5.c', + 'mlx5_ethdev.c', +@@ -67,7 +80,7 @@ if build + or dpdk_conf.has('RTE_ARCH_PPC_64')) + sources += files('mlx5_rxtx_vec.c') + endif +- if not pmd_dlopen ++ if not dlopen_ibverbs + sources += files('mlx5_glue.c') + endif + cflags_options = [ +@@ -130,6 +143,8 @@ if build + 'MLX5DV_FLOW_ACTION_COUNTERS_DEVX' ], + [ 'HAVE_IBV_DEVX_ASYNC', 'infiniband/mlx5dv.h', + 'mlx5dv_devx_obj_query_async' ], ++ [ 'HAVE_IBV_DEVX_QP', 'infiniband/mlx5dv.h', ++ 'mlx5dv_devx_qp_query' ], + [ 'HAVE_MLX5DV_DR_ACTION_DEST_DEVX_TIR', 'infiniband/mlx5dv.h', + 'mlx5dv_dr_action_create_dest_devx_tir' ], + [ 'HAVE_MLX5_DR_CREATE_ACTION_FLOW_METER', 'infiniband/mlx5dv.h', +@@ -200,7 +215,7 @@ if build + configure_file(output : 'mlx5_autoconf.h', configuration : config) + endif + # Build Glue Library +-if pmd_dlopen and build ++if dlopen_ibverbs and build + dlopen_name = 'mlx5_glue' + dlopen_lib_name = driver_name_fmt.format(dlopen_name) + dlopen_so_version = LIB_GLUE_VERSION +diff --git a/dpdk/drivers/net/mlx5/mlx5.c b/dpdk/drivers/net/mlx5/mlx5.c +index d84a6f91b4..8879df317d 100644 +--- a/dpdk/drivers/net/mlx5/mlx5.c ++++ b/dpdk/drivers/net/mlx5/mlx5.c +@@ -62,6 +62,9 @@ + /* Device parameter to configure log 2 of the number of strides for MPRQ. */ + #define MLX5_RX_MPRQ_LOG_STRIDE_NUM "mprq_log_stride_num" + ++/* Device parameter to configure log 2 of the stride size for MPRQ. */ ++#define MLX5_RX_MPRQ_LOG_STRIDE_SIZE "mprq_log_stride_size" ++ + /* Device parameter to limit the size of memcpy'd packet for MPRQ. */ + #define MLX5_RX_MPRQ_MAX_MEMCPY_LEN "mprq_max_memcpy_len" + +@@ -184,6 +187,10 @@ struct mlx5_dev_spawn_data { + struct rte_pci_device *pci_dev; /**< Backend PCI device. */ + }; + ++#ifdef MLX5_GLUE ++const struct mlx5_glue *mlx5_glue; ++#endif ++ + static LIST_HEAD(, mlx5_ibv_shared) mlx5_ibv_list = LIST_HEAD_INITIALIZER(); + static pthread_mutex_t mlx5_ibv_list_mutex = PTHREAD_MUTEX_INITIALIZER; + +@@ -196,11 +203,14 @@ static pthread_mutex_t mlx5_ibv_list_mutex = PTHREAD_MUTEX_INITIALIZER; + /** + * Allocate ID pool structure. + * ++ * @param[in] max_id ++ * The maximum id can be allocated from the pool. ++ * + * @return + * Pointer to pool object, NULL value otherwise. + */ + struct mlx5_flow_id_pool * +-mlx5_flow_id_pool_alloc(void) ++mlx5_flow_id_pool_alloc(uint32_t max_id) + { + struct mlx5_flow_id_pool *pool; + void *mem; +@@ -223,6 +233,7 @@ mlx5_flow_id_pool_alloc(void) + pool->curr = pool->free_arr; + pool->last = pool->free_arr + MLX5_FLOW_MIN_ID_POOL_SIZE; + pool->base_index = 0; ++ pool->max_id = max_id; + return pool; + error: + rte_free(pool); +@@ -257,7 +268,7 @@ uint32_t + mlx5_flow_id_get(struct mlx5_flow_id_pool *pool, uint32_t *id) + { + if (pool->curr == pool->free_arr) { +- if (pool->base_index == UINT32_MAX) { ++ if (pool->base_index == pool->max_id) { + rte_errno = ENOMEM; + DRV_LOG(ERR, "no free id"); + return -rte_errno; +@@ -590,7 +601,7 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn, + goto error; + } + } +- sh->flow_id_pool = mlx5_flow_id_pool_alloc(); ++ sh->flow_id_pool = mlx5_flow_id_pool_alloc(UINT32_MAX); + if (!sh->flow_id_pool) { + DRV_LOG(ERR, "can't create flow id pool"); + err = ENOMEM; +@@ -673,12 +684,12 @@ mlx5_free_shared_ibctx(struct mlx5_ibv_shared *sh) + assert(rte_eal_process_type() == RTE_PROC_PRIMARY); + if (--sh->refcnt) + goto exit; +- /* Release created Memory Regions. */ +- mlx5_mr_release(sh); + /* Remove from memory callback device list. */ + rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); + LIST_REMOVE(sh, mem_event_cb); + rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); ++ /* Release created Memory Regions. */ ++ mlx5_mr_release(sh); + /* Remove context from the global device list. */ + LIST_REMOVE(sh, next); + /* +@@ -868,8 +879,13 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv) + { + struct mlx5_ibv_shared *sh = priv->sh; + char s[MLX5_HLIST_NAMESIZE]; +- int err = mlx5_alloc_table_hash_list(priv); ++ int err = 0; + ++ if (!sh->flow_tbls) ++ err = mlx5_alloc_table_hash_list(priv); ++ else ++ DRV_LOG(DEBUG, "sh->flow_tbls[%p] already created, reuse\n", ++ (void *)sh->flow_tbls); + if (err) + return err; + /* Create tags hash list table. */ +@@ -1490,6 +1506,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque) + config->mprq.enabled = !!tmp; + } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) { + config->mprq.stride_num_n = tmp; ++ } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_SIZE, key) == 0) { ++ config->mprq.stride_size_n = tmp; + } else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) { + config->mprq.max_memcpy_len = tmp; + } else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) { +@@ -1582,6 +1600,7 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) + MLX5_RXQ_PKT_PAD_EN, + MLX5_RX_MPRQ_EN, + MLX5_RX_MPRQ_LOG_STRIDE_NUM, ++ MLX5_RX_MPRQ_LOG_STRIDE_SIZE, + MLX5_RX_MPRQ_MAX_MEMCPY_LEN, + MLX5_RXQS_MIN_MPRQ, + MLX5_TXQ_INLINE, +@@ -1697,7 +1716,7 @@ mlx5_init_once(void) + * key is specified in devargs + * - if DevX is enabled the inline mode is queried from the + * device (HCA attributes and NIC vport context if needed). +- * - otherwise L2 mode (18 bytes) is assumed for ConnectX-4/4LX ++ * - otherwise L2 mode (18 bytes) is assumed for ConnectX-4/4 Lx + * and none (0 bytes) for other NICs + * + * @param spawn +@@ -1931,9 +1950,9 @@ mlx5_get_dbr(struct rte_eth_dev *dev, struct mlx5_devx_dbr_page **dbr_page) + i++) + ; /* Empty. */ + /* Find the first clear bit. */ ++ assert(i < MLX5_DBR_BITMAP_SIZE); + j = rte_bsf64(~page->dbr_bitmap[i]); +- assert(i < (MLX5_DBR_PER_PAGE / 64)); +- page->dbr_bitmap[i] |= (1 << j); ++ page->dbr_bitmap[i] |= (UINT64_C(1) << j); + page->dbr_count++; + *dbr_page = page; + return (((i * 64) + j) * sizeof(uint64_t)); +@@ -1978,7 +1997,7 @@ mlx5_release_dbr(struct rte_eth_dev *dev, uint32_t umem_id, uint64_t offset) + int i = offset / 64; + int j = offset % 64; + +- page->dbr_bitmap[i] &= ~(1 << j); ++ page->dbr_bitmap[i] &= ~(UINT64_C(1) << j); + } + return ret; + } +@@ -2236,8 +2255,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + mprq_caps.min_single_wqe_log_num_of_strides; + mprq_max_stride_num_n = + mprq_caps.max_single_wqe_log_num_of_strides; +- config.mprq.stride_num_n = RTE_MAX(MLX5_MPRQ_STRIDE_NUM_N, +- mprq_min_stride_num_n); + } + #endif + if (RTE_CACHE_LINE_SIZE == 128 && +@@ -2543,6 +2560,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + priv->mtr_color_reg = ffs(reg_c_mask) - 1 + + REG_C_0; + priv->mtr_en = 1; ++ priv->mtr_reg_share = ++ config.hca_attr.qos.flow_meter_reg_share; + DRV_LOG(DEBUG, "The REG_C meter uses is %d", + priv->mtr_color_reg); + } +@@ -2550,17 +2569,32 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + #endif + } + if (config.mprq.enabled && mprq) { +- if (config.mprq.stride_num_n > mprq_max_stride_num_n || +- config.mprq.stride_num_n < mprq_min_stride_num_n) { ++ if (config.mprq.stride_num_n && ++ (config.mprq.stride_num_n > mprq_max_stride_num_n || ++ config.mprq.stride_num_n < mprq_min_stride_num_n)) { + config.mprq.stride_num_n = +- RTE_MAX(MLX5_MPRQ_STRIDE_NUM_N, +- mprq_min_stride_num_n); ++ RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_NUM_N, ++ mprq_min_stride_num_n), ++ mprq_max_stride_num_n); + DRV_LOG(WARNING, + "the number of strides" + " for Multi-Packet RQ is out of range," + " setting default value (%u)", + 1 << config.mprq.stride_num_n); + } ++ if (config.mprq.stride_size_n && ++ (config.mprq.stride_size_n > mprq_max_stride_size_n || ++ config.mprq.stride_size_n < mprq_min_stride_size_n)) { ++ config.mprq.stride_size_n = ++ RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_SIZE_N, ++ mprq_min_stride_size_n), ++ mprq_max_stride_size_n); ++ DRV_LOG(WARNING, ++ "the size of a stride" ++ " for Multi-Packet RQ is out of range," ++ " setting default value (%u)", ++ 1 << config.mprq.stride_size_n); ++ } + config.mprq.min_stride_size_n = mprq_min_stride_size_n; + config.mprq.max_stride_size_n = mprq_max_stride_size_n; + } else if (config.mprq.enabled && !mprq) { +@@ -2675,7 +2709,12 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + err = mlx5_alloc_shared_dr(priv); + if (err) + goto error; +- priv->qrss_id_pool = mlx5_flow_id_pool_alloc(); ++ /* ++ * RSS id is shared with meter flow id. Meter flow id can only ++ * use the 24 MSB of the register. ++ */ ++ priv->qrss_id_pool = mlx5_flow_id_pool_alloc(UINT32_MAX >> ++ MLX5_MTR_COLOR_BITS); + if (!priv->qrss_id_pool) { + DRV_LOG(ERR, "can't create flow id pool"); + err = ENOMEM; +@@ -3074,7 +3113,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + /* + * Single IB device with multiple ports found, + * it may be E-Switch master device and representors. +- * We have to perform identification trough the ports. ++ * We have to perform identification through the ports. + */ + assert(nl_rdma >= 0); + assert(ns == 0); +@@ -3274,7 +3313,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + .mr_ext_memseg_en = 1, + .mprq = { + .enabled = 0, /* Disabled by default. */ +- .stride_num_n = MLX5_MPRQ_STRIDE_NUM_N, ++ .stride_num_n = 0, ++ .stride_size_n = 0, + .max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN, + .min_rxqs_num = MLX5_MPRQ_MIN_RXQS, + }, +diff --git a/dpdk/drivers/net/mlx5/mlx5.h b/dpdk/drivers/net/mlx5/mlx5.h +index 0c3a90e1bf..e4af5d40db 100644 +--- a/dpdk/drivers/net/mlx5/mlx5.h ++++ b/dpdk/drivers/net/mlx5/mlx5.h +@@ -148,12 +148,15 @@ struct mlx5_xstats_ctrl { + /* Index in the device counters table. */ + uint16_t dev_table_idx[MLX5_MAX_XSTATS]; + uint64_t base[MLX5_MAX_XSTATS]; ++ uint64_t xstats[MLX5_MAX_XSTATS]; ++ uint64_t hw_stats[MLX5_MAX_XSTATS]; + struct mlx5_counter_ctrl info[MLX5_MAX_XSTATS]; + }; + + struct mlx5_stats_ctrl { + /* Base for imissed counter. */ + uint64_t imissed_base; ++ uint64_t imissed; + }; + + /* devX creation object */ +@@ -173,6 +176,8 @@ struct mlx5_devx_mkey_attr { + struct mlx5_hca_qos_attr { + uint32_t sup:1; /* Whether QOS is supported. */ + uint32_t srtcm_sup:1; /* Whether srTCM mode is supported. */ ++ uint32_t flow_meter_reg_share:1; ++ /* Whether reg_c share is supported. */ + uint8_t log_max_flow_meter; + /* Power of the maximum supported meters. */ + uint8_t flow_meter_reg_c_ids; +@@ -262,6 +267,7 @@ struct mlx5_dev_config { + struct { + unsigned int enabled:1; /* Whether MPRQ is enabled. */ + unsigned int stride_num_n; /* Number of strides. */ ++ unsigned int stride_size_n; /* Size of a stride. */ + unsigned int min_stride_size_n; /* Min size of a stride. */ + unsigned int max_stride_size_n; /* Max size of a stride. */ + unsigned int max_memcpy_len; +@@ -364,7 +370,7 @@ struct mlx5_devx_tir_attr { + uint32_t rx_hash_fn:4; + uint32_t self_lb_block:2; + uint32_t transport_domain:24; +- uint32_t rx_hash_toeplitz_key[10]; ++ uint8_t rx_hash_toeplitz_key[MLX5_RSS_HASH_KEY_LEN]; + struct mlx5_rx_hash_field_select rx_hash_field_selector_outer; + struct mlx5_rx_hash_field_select rx_hash_field_selector_inner; + }; +@@ -626,6 +632,7 @@ struct mlx5_flow_id_pool { + /**< The next index that can be used without any free elements. */ + uint32_t *curr; /**< Pointer to the index to pop. */ + uint32_t *last; /**< Pointer to the last element in the empty arrray. */ ++ uint32_t max_id; /**< Maximum id can be allocated from the pool. */ + }; + + /* +@@ -660,14 +667,8 @@ struct mlx5_ibv_shared { + uint32_t dv_regc0_mask; /* available bits of metatada reg_c[0]. */ + uint32_t dv_refcnt; /* DV/DR data reference counter. */ + void *fdb_domain; /* FDB Direct Rules name space handle. */ +- struct mlx5_flow_tbl_resource *fdb_mtr_sfx_tbl; +- /* FDB meter suffix rules table. */ + void *rx_domain; /* RX Direct Rules name space handle. */ +- struct mlx5_flow_tbl_resource *rx_mtr_sfx_tbl; +- /* RX meter suffix rules table. */ + void *tx_domain; /* TX Direct Rules name space handle. */ +- struct mlx5_flow_tbl_resource *tx_mtr_sfx_tbl; +- /* TX meter suffix rules table. */ + struct mlx5_hlist *flow_tbls; + /* Direct Rules tables for FDB, NIC TX+RX */ + void *esw_drop_action; /* Pointer to DR E-Switch drop action. */ +@@ -727,6 +728,7 @@ struct mlx5_priv { + unsigned int dr_shared:1; /* DV/DR data is shared. */ + unsigned int counter_fallback:1; /* Use counter fallback management. */ + unsigned int mtr_en:1; /* Whether support meter. */ ++ unsigned int mtr_reg_share:1; /* Whether support meter REG_C share. */ + uint16_t domain_id; /* Switch domain identifier. */ + uint16_t vport_id; /* Associated VF vport index (if any). */ + uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */ +@@ -784,6 +786,7 @@ struct mlx5_priv { + /* UAR same-page access control required in 32bit implementations. */ + #endif + uint8_t skip_default_rss_reta; /* Skip configuration of default reta. */ ++ uint8_t fdb_def_rule; /* Whether fdb jump to table 1 is configured. */ + }; + + #define PORT_ID(priv) ((priv)->dev_data->port_id) +@@ -972,6 +975,7 @@ struct mlx5_flow_counter *mlx5_counter_alloc(struct rte_eth_dev *dev); + void mlx5_counter_free(struct rte_eth_dev *dev, struct mlx5_flow_counter *cnt); + int mlx5_counter_query(struct rte_eth_dev *dev, struct mlx5_flow_counter *cnt, + bool clear, uint64_t *pkts, uint64_t *bytes); ++void mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev); + + /* mlx5_mp.c */ + void mlx5_mp_req_start_rxtx(struct rte_eth_dev *dev); +diff --git a/dpdk/drivers/net/mlx5/mlx5_defs.h b/dpdk/drivers/net/mlx5/mlx5_defs.h +index 042e1f31ee..418e744d65 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_defs.h ++++ b/dpdk/drivers/net/mlx5/mlx5_defs.h +@@ -146,6 +146,9 @@ + /* Log 2 of the default number of strides per WQE for Multi-Packet RQ. */ + #define MLX5_MPRQ_STRIDE_NUM_N 6U + ++/* Log 2 of the default size of a stride per WQE for Multi-Packet RQ. */ ++#define MLX5_MPRQ_STRIDE_SIZE_N 11U ++ + /* Two-byte shift is disabled for Multi-Packet RQ. */ + #define MLX5_MPRQ_TWO_BYTE_SHIFT 0 + +@@ -176,6 +179,10 @@ + #define MLX5_FLOW_MREG_HNAME "MARK_COPY_TABLE" + #define MLX5_DEFAULT_COPY_ID UINT32_MAX + ++/* Hairpin TX/RX queue configuration parameters. */ ++#define MLX5_HAIRPIN_QUEUE_STRIDE 6 ++#define MLX5_HAIRPIN_JUMBO_LOG_SIZE (15 + 2) ++ + /* Definition of static_assert found in /usr/include/assert.h */ + #ifndef HAVE_STATIC_ASSERT + #define static_assert _Static_assert +diff --git a/dpdk/drivers/net/mlx5/mlx5_devx_cmds.c b/dpdk/drivers/net/mlx5/mlx5_devx_cmds.c +index 9893287ba8..e223ee9b18 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_devx_cmds.c ++++ b/dpdk/drivers/net/mlx5/mlx5_devx_cmds.c +@@ -362,6 +362,8 @@ mlx5_devx_cmd_query_hca_attr(struct ibv_context *ctx, + MLX5_GET(qos_cap, hcattr, log_max_flow_meter); + attr->qos.flow_meter_reg_c_ids = + MLX5_GET(qos_cap, hcattr, flow_meter_reg_id); ++ attr->qos.flow_meter_reg_share = ++ MLX5_GET(qos_cap, hcattr, flow_meter_reg_share); + } + if (!attr->eth_net_offloads) + return 0; +@@ -633,9 +635,8 @@ mlx5_devx_cmd_create_tir(struct ibv_context *ctx, + { + uint32_t in[MLX5_ST_SZ_DW(create_tir_in)] = {0}; + uint32_t out[MLX5_ST_SZ_DW(create_tir_out)] = {0}; +- void *tir_ctx, *outer, *inner; ++ void *tir_ctx, *outer, *inner, *rss_key; + struct mlx5_devx_obj *tir = NULL; +- int i; + + tir = rte_calloc(__func__, 1, sizeof(*tir), 0); + if (!tir) { +@@ -658,10 +659,8 @@ mlx5_devx_cmd_create_tir(struct ibv_context *ctx, + MLX5_SET(tirc, tir_ctx, rx_hash_fn, tir_attr->rx_hash_fn); + MLX5_SET(tirc, tir_ctx, self_lb_block, tir_attr->self_lb_block); + MLX5_SET(tirc, tir_ctx, transport_domain, tir_attr->transport_domain); +- for (i = 0; i < 10; i++) { +- MLX5_SET(tirc, tir_ctx, rx_hash_toeplitz_key[i], +- tir_attr->rx_hash_toeplitz_key[i]); +- } ++ rss_key = MLX5_ADDR_OF(tirc, tir_ctx, rx_hash_toeplitz_key); ++ memcpy(rss_key, tir_attr->rx_hash_toeplitz_key, MLX5_RSS_HASH_KEY_LEN); + outer = MLX5_ADDR_OF(tirc, tir_ctx, rx_hash_field_selector_outer); + MLX5_SET(rx_hash_field_select, outer, l3_prot_type, + tir_attr->rx_hash_field_selector_outer.l3_prot_type); +diff --git a/dpdk/drivers/net/mlx5/mlx5_ethdev.c b/dpdk/drivers/net/mlx5/mlx5_ethdev.c +index d80ae458bc..3b4c5dbe7a 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_ethdev.c ++++ b/dpdk/drivers/net/mlx5/mlx5_ethdev.c +@@ -476,7 +476,7 @@ mlx5_dev_configure_rss_reta(struct rte_eth_dev *dev) + + rxq_data = (*priv->rxqs)[i]; + rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); +- if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) ++ if (rxq_ctrl && rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) + rss_queue_arr[j++] = i; + } + rss_queue_n = j; +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow.c b/dpdk/drivers/net/mlx5/mlx5_flow.c +index 008716367c..e05c35a417 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow.c ++++ b/dpdk/drivers/net/mlx5/mlx5_flow.c +@@ -165,7 +165,9 @@ static const struct rte_flow_expand_node mlx5_support_expansion[] = { + .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, + }, + [MLX5_EXPANSION_VXLAN] = { +- .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH), ++ .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, ++ MLX5_EXPANSION_IPV4, ++ MLX5_EXPANSION_IPV6), + .type = RTE_FLOW_ITEM_TYPE_VXLAN, + }, + [MLX5_EXPANSION_VXLAN_GPE] = { +@@ -336,7 +338,7 @@ static struct mlx5_flow_tunnel_info tunnels_info[] = { + * The request register on success, a negative errno + * value otherwise and rte_errno is set. + */ +-enum modify_reg ++int + mlx5_flow_get_reg_id(struct rte_eth_dev *dev, + enum mlx5_feature_name feature, + uint32_t id, +@@ -345,6 +347,7 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev, + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_dev_config *config = &priv->config; + enum modify_reg start_reg; ++ bool skip_mtr_reg = false; + + switch (feature) { + case MLX5_HAIRPIN_RX: +@@ -383,29 +386,36 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev, + return REG_C_0; + } + break; +- case MLX5_COPY_MARK: + case MLX5_MTR_SFX: + /* +- * Metadata COPY_MARK register using is in meter suffix sub +- * flow while with meter. It's safe to share the same register. ++ * If meter color and flow match share one register, flow match ++ * should use the meter color register for match. + */ +- return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3; ++ if (priv->mtr_reg_share) ++ return priv->mtr_color_reg; ++ else ++ return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : ++ REG_C_3; + case MLX5_MTR_COLOR: + RTE_ASSERT(priv->mtr_color_reg != REG_NONE); + return priv->mtr_color_reg; ++ case MLX5_COPY_MARK: ++ /* ++ * Metadata COPY_MARK register using is in meter suffix sub ++ * flow while with meter. It's safe to share the same register. ++ */ ++ return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3; + case MLX5_APP_TAG: + /* +- * If meter is enable, it will engage two registers for color ++ * If meter is enable, it will engage the register for color + * match and flow match. If meter color match is not using the + * REG_C_2, need to skip the REG_C_x be used by meter color + * match. + * If meter is disable, free to use all available registers. + */ +- if (priv->mtr_color_reg != REG_NONE) +- start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_3 : +- REG_C_4; +- else +- start_reg = REG_C_2; ++ start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 : ++ (priv->mtr_reg_share ? REG_C_3 : REG_C_4); ++ skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2); + if (id > (REG_C_7 - start_reg)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, +@@ -420,12 +430,16 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev, + * If the available index REG_C_y >= REG_C_x, skip the + * color register. + */ +- if (start_reg == REG_C_3 && config->flow_mreg_c +- [id + REG_C_3 - REG_C_0] >= priv->mtr_color_reg) { +- if (config->flow_mreg_c[id + 1 + REG_C_3 - REG_C_0] != +- REG_NONE) ++ if (skip_mtr_reg && config->flow_mreg_c ++ [id + start_reg - REG_C_0] >= priv->mtr_color_reg) { ++ if (id >= (REG_C_7 - start_reg)) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM, ++ NULL, "invalid tag id"); ++ if (config->flow_mreg_c ++ [id + 1 + start_reg - REG_C_0] != REG_NONE) + return config->flow_mreg_c +- [id + 1 + REG_C_3 - REG_C_0]; ++ [id + 1 + start_reg - REG_C_0]; + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "unsupported tag id"); +@@ -859,6 +873,35 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev) + } + } + ++/** ++ * Set the Rx queue dynamic metadata (mask and offset) for a flow ++ * ++ * @param[in] dev ++ * Pointer to the Ethernet device structure. ++ */ ++void ++mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev) ++{ ++ struct mlx5_priv *priv = dev->data->dev_private; ++ struct mlx5_rxq_data *data; ++ unsigned int i; ++ ++ for (i = 0; i != priv->rxqs_n; ++i) { ++ if (!(*priv->rxqs)[i]) ++ continue; ++ data = (*priv->rxqs)[i]; ++ if (!rte_flow_dynf_metadata_avail()) { ++ data->dynf_meta = 0; ++ data->flow_meta_mask = 0; ++ data->flow_meta_offset = -1; ++ } else { ++ data->dynf_meta = 1; ++ data->flow_meta_mask = rte_flow_dynf_metadata_mask; ++ data->flow_meta_offset = rte_flow_dynf_metadata_offs; ++ } ++ } ++} ++ + /* + * return a pointer to the desired action in the list of actions. + * +@@ -900,11 +943,6 @@ mlx5_flow_validate_action_flag(uint64_t action_flags, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) + { +- +- if (action_flags & MLX5_FLOW_ACTION_DROP) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can't drop and flag in same flow"); + if (action_flags & MLX5_FLOW_ACTION_MARK) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, +@@ -956,10 +994,6 @@ mlx5_flow_validate_action_mark(const struct rte_flow_action *action, + &mark->id, + "mark id must in 0 <= id < " + RTE_STR(MLX5_FLOW_MARK_MAX)); +- if (action_flags & MLX5_FLOW_ACTION_DROP) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can't drop and mark in same flow"); + if (action_flags & MLX5_FLOW_ACTION_FLAG) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, +@@ -991,24 +1025,10 @@ mlx5_flow_validate_action_mark(const struct rte_flow_action *action, + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ + int +-mlx5_flow_validate_action_drop(uint64_t action_flags, ++mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) + { +- if (action_flags & MLX5_FLOW_ACTION_FLAG) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can't drop and flag in same flow"); +- if (action_flags & MLX5_FLOW_ACTION_MARK) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can't drop and mark in same flow"); +- if (action_flags & (MLX5_FLOW_FATE_ACTIONS | +- MLX5_FLOW_FATE_ESWITCH_ACTIONS)) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can't have 2 fate actions in" +- " same flow"); + if (attr->egress) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, +@@ -1634,7 +1654,6 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, + "\xff\xff\xff\xff\xff\xff\xff\xff", + .vtc_flow = RTE_BE32(0xffffffff), + .proto = 0xff, +- .hop_limits = 0xff, + }, + }; + const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); +@@ -1831,7 +1850,6 @@ mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, + uint32_t vlan_id; + uint8_t vni[4]; + } id = { .vlan_id = 0, }; +- uint32_t vlan_id = 0; + + + if (item_flags & MLX5_FLOW_LAYER_TUNNEL) +@@ -1858,23 +1876,8 @@ mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, + return ret; + if (spec) { + memcpy(&id.vni[1], spec->vni, 3); +- vlan_id = id.vlan_id; + memcpy(&id.vni[1], mask->vni, 3); +- vlan_id &= id.vlan_id; + } +- /* +- * Tunnel id 0 is equivalent as not adding a VXLAN layer, if +- * only this layer is defined in the Verbs specification it is +- * interpreted as wildcard and all packets will match this +- * rule, if it follows a full stack layer (ex: eth / ipv4 / +- * udp), all packets matching the layers before will also +- * match this rule. To avoid such situation, VNI 0 is +- * currently refused. +- */ +- if (!vlan_id) +- return rte_flow_error_set(error, ENOTSUP, +- RTE_FLOW_ERROR_TYPE_ITEM, item, +- "VXLAN vni cannot be 0"); + if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, item, +@@ -1913,7 +1916,6 @@ mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, + uint32_t vlan_id; + uint8_t vni[4]; + } id = { .vlan_id = 0, }; +- uint32_t vlan_id = 0; + + if (!priv->config.l3_vxlan_en) + return rte_flow_error_set(error, ENOTSUP, +@@ -1951,22 +1953,8 @@ mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, + "VxLAN-GPE protocol" + " not supported"); + memcpy(&id.vni[1], spec->vni, 3); +- vlan_id = id.vlan_id; + memcpy(&id.vni[1], mask->vni, 3); +- vlan_id &= id.vlan_id; + } +- /* +- * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this +- * layer is defined in the Verbs specification it is interpreted as +- * wildcard and all packets will match this rule, if it follows a full +- * stack layer (ex: eth / ipv4 / udp), all packets matching the layers +- * before will also match this rule. To avoid such situation, VNI 0 +- * is currently refused. +- */ +- if (!vlan_id) +- return rte_flow_error_set(error, ENOTSUP, +- RTE_FLOW_ERROR_TYPE_ITEM, item, +- "VXLAN-GPE vni cannot be 0"); + if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, item, +@@ -2131,9 +2119,7 @@ mlx5_flow_validate_item_geneve(const struct rte_flow_item *item, + .protocol = RTE_BE16(UINT16_MAX), + }; + +- if (!(priv->config.hca_attr.flex_parser_protocols & +- MLX5_HCA_FLEX_GENEVE_ENABLED) || +- !priv->config.hca_attr.tunnel_stateless_geneve_rx) ++ if (!priv->config.hca_attr.tunnel_stateless_geneve_rx) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "L3 Geneve is not enabled by device" +@@ -2349,6 +2335,7 @@ flow_null_validate(struct rte_eth_dev *dev __rte_unused, + const struct rte_flow_item items[] __rte_unused, + const struct rte_flow_action actions[] __rte_unused, + bool external __rte_unused, ++ int hairpin __rte_unused, + struct rte_flow_error *error) + { + return rte_flow_error_set(error, ENOTSUP, +@@ -2463,6 +2450,8 @@ flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr) + * Pointer to the list of actions. + * @param[in] external + * This flow rule is created by request external to PMD. ++ * @param[in] hairpin ++ * Number of hairpin TX actions, 0 means classic flow. + * @param[out] error + * Pointer to the error structure. + * +@@ -2474,13 +2463,14 @@ flow_drv_validate(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], +- bool external, struct rte_flow_error *error) ++ bool external, int hairpin, struct rte_flow_error *error) + { + const struct mlx5_flow_driver_ops *fops; + enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr); + + fops = flow_get_drv_ops(type); +- return fops->validate(dev, attr, items, actions, external, error); ++ return fops->validate(dev, attr, items, actions, external, ++ hairpin, error); + } + + /** +@@ -2638,47 +2628,6 @@ flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) + fops->destroy(dev, flow); + } + +-/** +- * Validate a flow supported by the NIC. +- * +- * @see rte_flow_validate() +- * @see rte_flow_ops +- */ +-int +-mlx5_flow_validate(struct rte_eth_dev *dev, +- const struct rte_flow_attr *attr, +- const struct rte_flow_item items[], +- const struct rte_flow_action actions[], +- struct rte_flow_error *error) +-{ +- int ret; +- +- ret = flow_drv_validate(dev, attr, items, actions, true, error); +- if (ret < 0) +- return ret; +- return 0; +-} +- +-/** +- * Get port id item from the item list. +- * +- * @param[in] item +- * Pointer to the list of items. +- * +- * @return +- * Pointer to the port id item if exist, else return NULL. +- */ +-static const struct rte_flow_item * +-find_port_id_item(const struct rte_flow_item *item) +-{ +- assert(item); +- for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { +- if (item->type == RTE_FLOW_ITEM_TYPE_PORT_ID) +- return item; +- } +- return NULL; +-} +- + /** + * Get RSS action from the action list. + * +@@ -2723,7 +2672,44 @@ find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) + } + + /** +- * Get QUEUE/RSS action from the action list. ++ * Get layer flags from the prefix flow. ++ * ++ * Some flows may be split to several subflows, the prefix subflow gets the ++ * match items and the suffix sub flow gets the actions. ++ * Some actions need the user defined match item flags to get the detail for ++ * the action. ++ * This function helps the suffix flow to get the item layer flags from prefix ++ * subflow. ++ * ++ * @param[in] dev_flow ++ * Pointer the created preifx subflow. ++ * ++ * @return ++ * The layers get from prefix subflow. ++ */ ++static inline uint64_t ++flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow) ++{ ++ uint64_t layers = 0; ++ ++ /* If no decap actions, use the layers directly. */ ++ if (!(dev_flow->actions & MLX5_FLOW_ACTION_DECAP)) ++ return dev_flow->layers; ++ /* Convert L3 layers with decap action. */ ++ if (dev_flow->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4) ++ layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; ++ else if (dev_flow->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6) ++ layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; ++ /* Convert L4 layers with decap action. */ ++ if (dev_flow->layers & MLX5_FLOW_LAYER_INNER_L4_TCP) ++ layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP; ++ else if (dev_flow->layers & MLX5_FLOW_LAYER_INNER_L4_UDP) ++ layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP; ++ return layers; ++} ++ ++/** ++ * Get metadata split action information. + * + * @param[in] actions + * Pointer to the list of actions. +@@ -2732,18 +2718,38 @@ find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) + * @param[out] qrss_type + * Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned + * if no QUEUE/RSS is found. ++ * @param[out] encap_idx ++ * Pointer to the index of the encap action if exists, otherwise the last ++ * action index. + * + * @return + * Total number of actions. + */ + static int +-flow_parse_qrss_action(const struct rte_flow_action actions[], +- const struct rte_flow_action **qrss) ++flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[], ++ const struct rte_flow_action **qrss, ++ int *encap_idx) + { ++ const struct rte_flow_action_raw_encap *raw_encap; + int actions_n = 0; ++ int raw_decap_idx = -1; + ++ *encap_idx = -1; + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { + switch (actions->type) { ++ case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: ++ case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: ++ *encap_idx = actions_n; ++ break; ++ case RTE_FLOW_ACTION_TYPE_RAW_DECAP: ++ raw_decap_idx = actions_n; ++ break; ++ case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: ++ raw_encap = actions->conf; ++ if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) ++ *encap_idx = raw_decap_idx != -1 ? ++ raw_decap_idx : actions_n; ++ break; + case RTE_FLOW_ACTION_TYPE_QUEUE: + case RTE_FLOW_ACTION_TYPE_RSS: + *qrss = actions; +@@ -2753,6 +2759,8 @@ flow_parse_qrss_action(const struct rte_flow_action actions[], + } + actions_n++; + } ++ if (*encap_idx == -1) ++ *encap_idx = actions_n; + /* Count RTE_FLOW_ACTION_TYPE_END. */ + return actions_n + 1; + } +@@ -2958,18 +2966,21 @@ flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, + /* Build a new flow. */ + if (mark_id != MLX5_DEFAULT_COPY_ID) { + items[0] = (struct rte_flow_item){ +- .type = MLX5_RTE_FLOW_ITEM_TYPE_TAG, ++ .type = (enum rte_flow_item_type) ++ MLX5_RTE_FLOW_ITEM_TYPE_TAG, + .spec = &tag_spec, + }; + items[1] = (struct rte_flow_item){ + .type = RTE_FLOW_ITEM_TYPE_END, + }; + actions[0] = (struct rte_flow_action){ +- .type = MLX5_RTE_FLOW_ACTION_TYPE_MARK, ++ .type = (enum rte_flow_action_type) ++ MLX5_RTE_FLOW_ACTION_TYPE_MARK, + .conf = &ftag, + }; + actions[1] = (struct rte_flow_action){ +- .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, ++ .type = (enum rte_flow_action_type) ++ MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, + .conf = &cp_mreg, + }; + actions[2] = (struct rte_flow_action){ +@@ -2986,7 +2997,8 @@ flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, + .type = RTE_FLOW_ITEM_TYPE_END, + }; + actions[0] = (struct rte_flow_action){ +- .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, ++ .type = (enum rte_flow_action_type) ++ MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, + .conf = &cp_mreg, + }; + actions[1] = (struct rte_flow_action){ +@@ -3360,7 +3372,8 @@ flow_hairpin_split(struct rte_eth_dev *dev, + } + /* Add set meta action and end action for the Rx flow. */ + tag_action = actions_rx; +- tag_action->type = MLX5_RTE_FLOW_ACTION_TYPE_TAG; ++ tag_action->type = (enum rte_flow_action_type) ++ MLX5_RTE_FLOW_ACTION_TYPE_TAG; + actions_rx++; + rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action)); + actions_rx++; +@@ -3373,7 +3386,8 @@ flow_hairpin_split(struct rte_eth_dev *dev, + rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action)); + addr = (void *)&pattern_tx[2]; + item = pattern_tx; +- item->type = MLX5_RTE_FLOW_ITEM_TYPE_TAG; ++ item->type = (enum rte_flow_item_type) ++ MLX5_RTE_FLOW_ITEM_TYPE_TAG; + tag_item = (void *)addr; + tag_item->data = *flow_id; + tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL); +@@ -3401,6 +3415,8 @@ flow_hairpin_split(struct rte_eth_dev *dev, + * Parent flow structure pointer. + * @param[in, out] sub_flow + * Pointer to return the created subflow, may be NULL. ++ * @param[in] prefix_layers ++ * Prefix subflow layers, may be 0. + * @param[in] attr + * Flow rule attributes. + * @param[in] items +@@ -3418,6 +3434,7 @@ static int + flow_create_split_inner(struct rte_eth_dev *dev, + struct rte_flow *flow, + struct mlx5_flow **sub_flow, ++ uint64_t prefix_layers, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], +@@ -3432,6 +3449,12 @@ flow_create_split_inner(struct rte_eth_dev *dev, + dev_flow->external = external; + /* Subflow object was created, we must include one in the list. */ + LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next); ++ /* ++ * If dev_flow is as one of the suffix flow, some actions in suffix ++ * flow may need some user defined item layer flags. ++ */ ++ if (prefix_layers) ++ dev_flow->layers = prefix_layers; + if (sub_flow) + *sub_flow = dev_flow; + return flow_drv_translate(dev, dev_flow, attr, items, actions, error); +@@ -3451,6 +3474,10 @@ flow_create_split_inner(struct rte_eth_dev *dev, + * + * @param dev + * Pointer to Ethernet device. ++ * @param[in] items ++ * Pattern specification (list terminated by the END pattern item). ++ * @param[out] sfx_items ++ * Suffix flow match items (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] actions_sfx +@@ -3467,66 +3494,61 @@ flow_create_split_inner(struct rte_eth_dev *dev, + */ + static int + flow_meter_split_prep(struct rte_eth_dev *dev, ++ const struct rte_flow_item items[], ++ struct rte_flow_item sfx_items[], + const struct rte_flow_action actions[], + struct rte_flow_action actions_sfx[], + struct rte_flow_action actions_pre[]) + { +- struct rte_flow_action *tag_action; ++ struct rte_flow_action *tag_action = NULL; ++ struct rte_flow_item *tag_item; + struct mlx5_rte_flow_action_set_tag *set_tag; + struct rte_flow_error error; + const struct rte_flow_action_raw_encap *raw_encap; + const struct rte_flow_action_raw_decap *raw_decap; ++ struct mlx5_rte_flow_item_tag *tag_spec; ++ struct mlx5_rte_flow_item_tag *tag_mask; + uint32_t tag_id; ++ bool copy_vlan = false; + +- /* Add the extra tag action first. */ +- tag_action = actions_pre; +- tag_action->type = MLX5_RTE_FLOW_ACTION_TYPE_TAG; +- actions_pre++; + /* Prepare the actions for prefix and suffix flow. */ + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { ++ struct rte_flow_action **action_cur = NULL; ++ + switch (actions->type) { + case RTE_FLOW_ACTION_TYPE_METER: ++ /* Add the extra tag action first. */ ++ tag_action = actions_pre; ++ tag_action->type = (enum rte_flow_action_type) ++ MLX5_RTE_FLOW_ACTION_TYPE_TAG; ++ actions_pre++; ++ action_cur = &actions_pre; ++ break; + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: + case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: +- memcpy(actions_pre, actions, +- sizeof(struct rte_flow_action)); +- actions_pre++; ++ action_cur = &actions_pre; + break; + case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: + raw_encap = actions->conf; +- if (raw_encap->size > +- (sizeof(struct rte_flow_item_eth) + +- sizeof(struct rte_flow_item_ipv4))) { +- memcpy(actions_sfx, actions, +- sizeof(struct rte_flow_action)); +- actions_sfx++; +- } else { +- rte_memcpy(actions_pre, actions, +- sizeof(struct rte_flow_action)); +- actions_pre++; +- } ++ if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE) ++ action_cur = &actions_pre; + break; + case RTE_FLOW_ACTION_TYPE_RAW_DECAP: + raw_decap = actions->conf; +- /* Size 0 decap means 50 bytes as vxlan decap. */ +- if (raw_decap->size && (raw_decap->size < +- (sizeof(struct rte_flow_item_eth) + +- sizeof(struct rte_flow_item_ipv4)))) { +- memcpy(actions_sfx, actions, +- sizeof(struct rte_flow_action)); +- actions_sfx++; +- } else { +- rte_memcpy(actions_pre, actions, +- sizeof(struct rte_flow_action)); +- actions_pre++; +- } ++ if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE) ++ action_cur = &actions_pre; ++ break; ++ case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: ++ case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: ++ copy_vlan = true; + break; + default: +- memcpy(actions_sfx, actions, +- sizeof(struct rte_flow_action)); +- actions_sfx++; + break; + } ++ if (!action_cur) ++ action_cur = &actions_sfx; ++ memcpy(*action_cur, actions, sizeof(struct rte_flow_action)); ++ (*action_cur)++; + } + /* Add end action to the actions. */ + actions_sfx->type = RTE_FLOW_ACTION_TYPE_END; +@@ -3539,8 +3561,47 @@ flow_meter_split_prep(struct rte_eth_dev *dev, + * Get the id from the qrss_pool to make qrss share the id with meter. + */ + tag_id = flow_qrss_get_id(dev); +- set_tag->data = rte_cpu_to_be_32(tag_id); ++ set_tag->data = tag_id << MLX5_MTR_COLOR_BITS; ++ assert(tag_action); + tag_action->conf = set_tag; ++ /* Prepare the suffix subflow items. */ ++ tag_item = sfx_items++; ++ for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { ++ int item_type = items->type; ++ ++ switch (item_type) { ++ case RTE_FLOW_ITEM_TYPE_PORT_ID: ++ memcpy(sfx_items, items, sizeof(*sfx_items)); ++ sfx_items++; ++ break; ++ case RTE_FLOW_ITEM_TYPE_VLAN: ++ if (copy_vlan) { ++ memcpy(sfx_items, items, sizeof(*sfx_items)); ++ /* ++ * Convert to internal match item, it is used ++ * for vlan push and set vid. ++ */ ++ sfx_items->type = (enum rte_flow_item_type) ++ MLX5_RTE_FLOW_ITEM_TYPE_VLAN; ++ sfx_items++; ++ } ++ break; ++ default: ++ break; ++ } ++ } ++ sfx_items->type = RTE_FLOW_ITEM_TYPE_END; ++ sfx_items++; ++ tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items; ++ tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS; ++ tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error); ++ tag_mask = tag_spec + 1; ++ tag_mask->data = 0xffffff00; ++ tag_item->type = (enum rte_flow_item_type) ++ MLX5_RTE_FLOW_ITEM_TYPE_TAG; ++ tag_item->spec = tag_spec; ++ tag_item->last = NULL; ++ tag_item->mask = tag_mask; + return tag_id; + } + +@@ -3640,7 +3701,8 @@ flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, + /* Construct new actions array. */ + /* Replace QUEUE/RSS action. */ + split_actions[qrss_idx] = (struct rte_flow_action){ +- .type = MLX5_RTE_FLOW_ACTION_TYPE_TAG, ++ .type = (enum rte_flow_action_type) ++ MLX5_RTE_FLOW_ACTION_TYPE_TAG, + .conf = set_tag, + }; + } +@@ -3673,6 +3735,8 @@ flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, + * Number of actions in the list. + * @param[out] error + * Perform verbose error reporting if not NULL. ++ * @param[in] encap_idx ++ * The encap action inndex. + * + * @return + * 0 on success, negative value otherwise +@@ -3681,7 +3745,8 @@ static int + flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, + struct rte_flow_action *ext_actions, + const struct rte_flow_action *actions, +- int actions_n, struct rte_flow_error *error) ++ int actions_n, struct rte_flow_error *error, ++ int encap_idx) + { + struct mlx5_flow_action_copy_mreg *cp_mreg = + (struct mlx5_flow_action_copy_mreg *) +@@ -3696,15 +3761,26 @@ flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, + if (ret < 0) + return ret; + cp_mreg->src = ret; +- memcpy(ext_actions, actions, +- sizeof(*ext_actions) * actions_n); +- ext_actions[actions_n - 1] = (struct rte_flow_action){ +- .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, +- .conf = cp_mreg, +- }; +- ext_actions[actions_n] = (struct rte_flow_action){ +- .type = RTE_FLOW_ACTION_TYPE_END, +- }; ++ if (encap_idx != 0) ++ memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx); ++ if (encap_idx == actions_n - 1) { ++ ext_actions[actions_n - 1] = (struct rte_flow_action){ ++ .type = (enum rte_flow_action_type) ++ MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, ++ .conf = cp_mreg, ++ }; ++ ext_actions[actions_n] = (struct rte_flow_action){ ++ .type = RTE_FLOW_ACTION_TYPE_END, ++ }; ++ } else { ++ ext_actions[encap_idx] = (struct rte_flow_action){ ++ .type = (enum rte_flow_action_type) ++ MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, ++ .conf = cp_mreg, ++ }; ++ memcpy(ext_actions + encap_idx + 1, actions + encap_idx, ++ sizeof(*ext_actions) * (actions_n - encap_idx)); ++ } + return 0; + } + +@@ -3722,6 +3798,8 @@ flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, + * Pointer to Ethernet device. + * @param[in] flow + * Parent flow structure pointer. ++ * @param[in] prefix_layers ++ * Prefix flow layer flags. + * @param[in] attr + * Flow rule attributes. + * @param[in] items +@@ -3738,6 +3816,7 @@ flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, + static int + flow_create_split_metadata(struct rte_eth_dev *dev, + struct rte_flow *flow, ++ uint64_t prefix_layers, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], +@@ -3752,15 +3831,18 @@ flow_create_split_metadata(struct rte_eth_dev *dev, + int mtr_sfx = 0; + size_t act_size; + int actions_n; ++ int encap_idx; + int ret; + + /* Check whether extensive metadata feature is engaged. */ + if (!config->dv_flow_en || + config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || + !mlx5_flow_ext_mreg_supported(dev)) +- return flow_create_split_inner(dev, flow, NULL, attr, items, +- actions, external, error); +- actions_n = flow_parse_qrss_action(actions, &qrss); ++ return flow_create_split_inner(dev, flow, NULL, prefix_layers, ++ attr, items, actions, external, ++ error); ++ actions_n = flow_parse_metadata_split_actions_info(actions, &qrss, ++ &encap_idx); + if (qrss) { + /* Exclude hairpin flows from splitting. */ + if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) { +@@ -3807,6 +3889,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev, + RTE_FLOW_ACTION_TYPE_VOID; + else + ext_actions[qrss - actions].type = ++ (enum rte_flow_action_type) + MLX5_RTE_FLOW_ACTION_TYPE_TAG; + /* + * Create the new actions list with removed Q/RSS action +@@ -3835,14 +3918,14 @@ flow_create_split_metadata(struct rte_eth_dev *dev, + "metadata flow"); + /* Create the action list appended with copy register. */ + ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions, +- actions_n, error); ++ actions_n, error, encap_idx); + if (ret < 0) + goto exit; + } + /* Add the unmodified original or prefix subflow. */ +- ret = flow_create_split_inner(dev, flow, &dev_flow, attr, items, +- ext_actions ? ext_actions : actions, +- external, error); ++ ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr, ++ items, ext_actions ? ext_actions : ++ actions, external, error); + if (ret < 0) + goto exit; + assert(dev_flow); +@@ -3858,7 +3941,8 @@ flow_create_split_metadata(struct rte_eth_dev *dev, + }; + struct rte_flow_item q_items[] = { + { +- .type = MLX5_RTE_FLOW_ITEM_TYPE_TAG, ++ .type = (enum rte_flow_item_type) ++ MLX5_RTE_FLOW_ITEM_TYPE_TAG, + .spec = &q_tag_spec, + .last = NULL, + .mask = NULL, +@@ -3876,7 +3960,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev, + .type = RTE_FLOW_ACTION_TYPE_END, + }, + }; +- uint64_t hash_fields = dev_flow->hash_fields; ++ uint64_t layers = flow_get_prefix_layer_flags(dev_flow); + + /* + * Configure the tag item only if there is no meter subflow. +@@ -3903,14 +3987,13 @@ flow_create_split_metadata(struct rte_eth_dev *dev, + } + dev_flow = NULL; + /* Add suffix subflow to execute Q/RSS. */ +- ret = flow_create_split_inner(dev, flow, &dev_flow, ++ ret = flow_create_split_inner(dev, flow, &dev_flow, layers, + &q_attr, mtr_sfx ? items : + q_items, q_actions, + external, error); + if (ret < 0) + goto exit; + assert(dev_flow); +- dev_flow->hash_fields = hash_fields; + } + + exit: +@@ -3963,7 +4046,6 @@ flow_create_split_meter(struct rte_eth_dev *dev, + struct rte_flow_action *sfx_actions = NULL; + struct rte_flow_action *pre_actions = NULL; + struct rte_flow_item *sfx_items = NULL; +- const struct rte_flow_item *sfx_port_id_item; + struct mlx5_flow *dev_flow = NULL; + struct rte_flow_attr sfx_attr = *attr; + uint32_t mtr = 0; +@@ -3976,63 +4058,47 @@ flow_create_split_meter(struct rte_eth_dev *dev, + if (priv->mtr_en) + actions_n = flow_check_meter_action(actions, &mtr); + if (mtr) { +- struct mlx5_rte_flow_item_tag *tag_spec; + /* The five prefix actions: meter, decap, encap, tag, end. */ + act_size = sizeof(struct rte_flow_action) * (actions_n + 5) + +- sizeof(struct rte_flow_action_set_tag); +- /* tag, end. */ +-#define METER_SUFFIX_ITEM 3 ++ sizeof(struct mlx5_rte_flow_action_set_tag); ++ /* tag, vlan, port id, end. */ ++#define METER_SUFFIX_ITEM 4 + item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM + +- sizeof(struct mlx5_rte_flow_item_tag); ++ sizeof(struct mlx5_rte_flow_item_tag) * 2; + sfx_actions = rte_zmalloc(__func__, (act_size + item_size), 0); + if (!sfx_actions) + return rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "no memory to split " + "meter flow"); ++ sfx_items = (struct rte_flow_item *)((char *)sfx_actions + ++ act_size); + pre_actions = sfx_actions + actions_n; +- mtr_tag_id = flow_meter_split_prep(dev, actions, sfx_actions, +- pre_actions); ++ mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items, ++ actions, sfx_actions, ++ pre_actions); + if (!mtr_tag_id) { + ret = -rte_errno; + goto exit; + } + /* Add the prefix subflow. */ +- ret = flow_create_split_inner(dev, flow, &dev_flow, attr, items, +- pre_actions, external, error); ++ ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr, ++ items, pre_actions, external, ++ error); + if (ret) { + ret = -rte_errno; + goto exit; + } + dev_flow->mtr_flow_id = mtr_tag_id; +- /* Prepare the suffix flow match pattern. */ +- sfx_items = (struct rte_flow_item *)((char *)sfx_actions + +- act_size); +- tag_spec = (struct mlx5_rte_flow_item_tag *)(sfx_items + +- METER_SUFFIX_ITEM); +- tag_spec->data = rte_cpu_to_be_32(dev_flow->mtr_flow_id); +- tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, +- error); +- sfx_items->type = MLX5_RTE_FLOW_ITEM_TYPE_TAG; +- sfx_items->spec = tag_spec; +- sfx_items->last = NULL; +- sfx_items->mask = NULL; +- sfx_items++; +- sfx_port_id_item = find_port_id_item(items); +- if (sfx_port_id_item) { +- memcpy(sfx_items, sfx_port_id_item, +- sizeof(*sfx_items)); +- sfx_items++; +- } +- sfx_items->type = RTE_FLOW_ITEM_TYPE_END; +- sfx_items -= METER_SUFFIX_ITEM; + /* Setting the sfx group atrr. */ + sfx_attr.group = sfx_attr.transfer ? + (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) : + MLX5_FLOW_TABLE_LEVEL_SUFFIX; + } + /* Add the prefix subflow. */ +- ret = flow_create_split_metadata(dev, flow, &sfx_attr, ++ ret = flow_create_split_metadata(dev, flow, dev_flow ? ++ flow_get_prefix_layer_flags(dev_flow) : ++ 0, &sfx_attr, + sfx_items ? sfx_items : items, + sfx_actions ? sfx_actions : actions, + external, error); +@@ -4146,14 +4212,18 @@ flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list, + } items_tx; + struct rte_flow_expand_rss *buf = &expand_buffer.buf; + const struct rte_flow_action *p_actions_rx = actions; +- int ret; + uint32_t i; + uint32_t flow_size; +- int hairpin_flow = 0; ++ int hairpin_flow; + uint32_t hairpin_id = 0; + struct rte_flow_attr attr_tx = { .priority = 0 }; ++ int ret; + + hairpin_flow = flow_check_hairpin_split(dev, attr, actions); ++ ret = flow_drv_validate(dev, attr, items, p_actions_rx, ++ external, hairpin_flow, error); ++ if (ret < 0) ++ return NULL; + if (hairpin_flow > 0) { + if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) { + rte_errno = EINVAL; +@@ -4164,10 +4234,6 @@ flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list, + &hairpin_id); + p_actions_rx = actions_rx.actions; + } +- ret = flow_drv_validate(dev, attr, items, p_actions_rx, external, +- error); +- if (ret < 0) +- goto error_before_flow; + flow_size = sizeof(struct rte_flow); + rss = flow_get_rss_action(p_actions_rx); + if (rss) +@@ -4334,6 +4400,26 @@ mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev) + actions, false, &error); + } + ++/** ++ * Validate a flow supported by the NIC. ++ * ++ * @see rte_flow_validate() ++ * @see rte_flow_ops ++ */ ++int ++mlx5_flow_validate(struct rte_eth_dev *dev, ++ const struct rte_flow_attr *attr, ++ const struct rte_flow_item items[], ++ const struct rte_flow_action actions[], ++ struct rte_flow_error *error) ++{ ++ int hairpin_flow; ++ ++ hairpin_flow = flow_check_hairpin_split(dev, attr, actions); ++ return flow_drv_validate(dev, attr, items, actions, ++ true, hairpin_flow, error); ++} ++ + /** + * Create a flow. + * +@@ -4518,7 +4604,8 @@ mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev, + }; + struct rte_flow_item items[] = { + { +- .type = MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, ++ .type = (enum rte_flow_item_type) ++ MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, + .spec = &queue_spec, + .last = NULL, + .mask = &queue_mask, +@@ -4623,6 +4710,8 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, + if (!priv->reta_idx_n || !priv->rxqs_n) { + return 0; + } ++ if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) ++ action_rss.types = 0; + for (i = 0; i != priv->reta_idx_n; ++i) + queue[i] = (*priv->reta_idx)[i]; + flow = flow_list_create(dev, &priv->ctrl_flows, +@@ -5570,6 +5659,8 @@ mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh, + * Value is part of flow rule created by request external to PMD. + * @param[in] group + * rte_flow group index value. ++ * @param[out] fdb_def_rule ++ * Whether fdb jump to table 1 is configured. + * @param[out] table + * HW table value. + * @param[out] error +@@ -5580,10 +5671,10 @@ mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh, + */ + int + mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external, +- uint32_t group, uint32_t *table, ++ uint32_t group, bool fdb_def_rule, uint32_t *table, + struct rte_flow_error *error) + { +- if (attributes->transfer && external) { ++ if (attributes->transfer && external && fdb_def_rule) { + if (group == UINT32_MAX) + return rte_flow_error_set + (error, EINVAL, +@@ -5633,7 +5724,8 @@ mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev) + }; + struct rte_flow_action actions[] = { + [0] = { +- .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, ++ .type = (enum rte_flow_action_type) ++ MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, + .conf = &(struct mlx5_flow_action_copy_mreg){ + .src = REG_C_1, + .dst = idx, +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow.h b/dpdk/drivers/net/mlx5/mlx5_flow.h +index 3fff5dd7da..f8046119ec 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow.h ++++ b/dpdk/drivers/net/mlx5/mlx5_flow.h +@@ -33,6 +33,7 @@ enum mlx5_rte_flow_item_type { + MLX5_RTE_FLOW_ITEM_TYPE_END = INT_MIN, + MLX5_RTE_FLOW_ITEM_TYPE_TAG, + MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, ++ MLX5_RTE_FLOW_ITEM_TYPE_VLAN, + }; + + /* Private (internal) rte flow actions. */ +@@ -188,20 +189,16 @@ enum mlx5_feature_name { + #define MLX5_FLOW_ACTION_DEC_TTL (1u << 19) + #define MLX5_FLOW_ACTION_SET_MAC_SRC (1u << 20) + #define MLX5_FLOW_ACTION_SET_MAC_DST (1u << 21) +-#define MLX5_FLOW_ACTION_VXLAN_ENCAP (1u << 22) +-#define MLX5_FLOW_ACTION_VXLAN_DECAP (1u << 23) +-#define MLX5_FLOW_ACTION_NVGRE_ENCAP (1u << 24) +-#define MLX5_FLOW_ACTION_NVGRE_DECAP (1u << 25) +-#define MLX5_FLOW_ACTION_RAW_ENCAP (1u << 26) +-#define MLX5_FLOW_ACTION_RAW_DECAP (1u << 27) +-#define MLX5_FLOW_ACTION_INC_TCP_SEQ (1u << 28) +-#define MLX5_FLOW_ACTION_DEC_TCP_SEQ (1u << 29) +-#define MLX5_FLOW_ACTION_INC_TCP_ACK (1u << 30) +-#define MLX5_FLOW_ACTION_DEC_TCP_ACK (1u << 31) +-#define MLX5_FLOW_ACTION_SET_TAG (1ull << 32) +-#define MLX5_FLOW_ACTION_MARK_EXT (1ull << 33) +-#define MLX5_FLOW_ACTION_SET_META (1ull << 34) +-#define MLX5_FLOW_ACTION_METER (1ull << 35) ++#define MLX5_FLOW_ACTION_ENCAP (1u << 22) ++#define MLX5_FLOW_ACTION_DECAP (1u << 23) ++#define MLX5_FLOW_ACTION_INC_TCP_SEQ (1u << 24) ++#define MLX5_FLOW_ACTION_DEC_TCP_SEQ (1u << 25) ++#define MLX5_FLOW_ACTION_INC_TCP_ACK (1u << 26) ++#define MLX5_FLOW_ACTION_DEC_TCP_ACK (1u << 27) ++#define MLX5_FLOW_ACTION_SET_TAG (1ull << 28) ++#define MLX5_FLOW_ACTION_MARK_EXT (1ull << 29) ++#define MLX5_FLOW_ACTION_SET_META (1ull << 30) ++#define MLX5_FLOW_ACTION_METER (1ull << 31) + + #define MLX5_FLOW_FATE_ACTIONS \ + (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \ +@@ -211,15 +208,6 @@ enum mlx5_feature_name { + (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \ + MLX5_FLOW_ACTION_JUMP) + +-#define MLX5_FLOW_ENCAP_ACTIONS (MLX5_FLOW_ACTION_VXLAN_ENCAP | \ +- MLX5_FLOW_ACTION_NVGRE_ENCAP | \ +- MLX5_FLOW_ACTION_RAW_ENCAP | \ +- MLX5_FLOW_ACTION_OF_PUSH_VLAN) +- +-#define MLX5_FLOW_DECAP_ACTIONS (MLX5_FLOW_ACTION_VXLAN_DECAP | \ +- MLX5_FLOW_ACTION_NVGRE_DECAP | \ +- MLX5_FLOW_ACTION_RAW_DECAP | \ +- MLX5_FLOW_ACTION_OF_POP_VLAN) + + #define MLX5_FLOW_MODIFY_HDR_ACTIONS (MLX5_FLOW_ACTION_SET_IPV4_SRC | \ + MLX5_FLOW_ACTION_SET_IPV4_DST | \ +@@ -242,6 +230,9 @@ enum mlx5_feature_name { + + #define MLX5_FLOW_VLAN_ACTIONS (MLX5_FLOW_ACTION_OF_POP_VLAN | \ + MLX5_FLOW_ACTION_OF_PUSH_VLAN) ++ ++#define MLX5_FLOW_XCAP_ACTIONS (MLX5_FLOW_ACTION_ENCAP | MLX5_FLOW_ACTION_DECAP) ++ + #ifndef IPPROTO_MPLS + #define IPPROTO_MPLS 137 + #endif +@@ -288,6 +279,27 @@ enum mlx5_feature_name { + /* IBV hash source bits for IPV6. */ + #define MLX5_IPV6_IBV_RX_HASH (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6) + ++/* IBV hash bits for L3 SRC. */ ++#define MLX5_L3_SRC_IBV_RX_HASH (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_SRC_IPV6) ++ ++/* IBV hash bits for L3 DST. */ ++#define MLX5_L3_DST_IBV_RX_HASH (IBV_RX_HASH_DST_IPV4 | IBV_RX_HASH_DST_IPV6) ++ ++/* IBV hash bits for TCP. */ ++#define MLX5_TCP_IBV_RX_HASH (IBV_RX_HASH_SRC_PORT_TCP | \ ++ IBV_RX_HASH_DST_PORT_TCP) ++ ++/* IBV hash bits for UDP. */ ++#define MLX5_UDP_IBV_RX_HASH (IBV_RX_HASH_SRC_PORT_UDP | \ ++ IBV_RX_HASH_DST_PORT_UDP) ++ ++/* IBV hash bits for L4 SRC. */ ++#define MLX5_L4_SRC_IBV_RX_HASH (IBV_RX_HASH_SRC_PORT_TCP | \ ++ IBV_RX_HASH_SRC_PORT_UDP) ++ ++/* IBV hash bits for L4 DST. */ ++#define MLX5_L4_DST_IBV_RX_HASH (IBV_RX_HASH_DST_PORT_TCP | \ ++ IBV_RX_HASH_DST_PORT_UDP) + + /* Geneve header first 16Bit */ + #define MLX5_GENEVE_VER_MASK 0x3 +@@ -315,6 +327,26 @@ enum mlx5_feature_name { + #define MLX5_GENEVE_OPT_LEN_0 14 + #define MLX5_GENEVE_OPT_LEN_1 63 + ++#define MLX5_ENCAPSULATION_DECISION_SIZE (sizeof(struct rte_flow_item_eth) + \ ++ sizeof(struct rte_flow_item_ipv4)) ++ ++/* Software header modify action numbers of a flow. */ ++#define MLX5_ACT_NUM_MDF_IPV4 1 ++#define MLX5_ACT_NUM_MDF_IPV6 4 ++#define MLX5_ACT_NUM_MDF_MAC 2 ++#define MLX5_ACT_NUM_MDF_VID 1 ++#define MLX5_ACT_NUM_MDF_PORT 2 ++#define MLX5_ACT_NUM_MDF_TTL 1 ++#define MLX5_ACT_NUM_DEC_TTL MLX5_ACT_NUM_MDF_TTL ++#define MLX5_ACT_NUM_MDF_TCPSEQ 1 ++#define MLX5_ACT_NUM_MDF_TCPACK 1 ++#define MLX5_ACT_NUM_SET_REG 1 ++#define MLX5_ACT_NUM_SET_TAG 1 ++#define MLX5_ACT_NUM_CPY_MREG MLX5_ACT_NUM_SET_TAG ++#define MLX5_ACT_NUM_SET_MARK MLX5_ACT_NUM_SET_TAG ++#define MLX5_ACT_NUM_SET_META MLX5_ACT_NUM_SET_TAG ++#define MLX5_ACT_NUM_SET_DSCP 1 ++ + enum mlx5_flow_drv_type { + MLX5_FLOW_TYPE_MIN, + MLX5_FLOW_TYPE_DV, +@@ -370,11 +402,16 @@ struct mlx5_flow_dv_tag_resource { + + /* + * Number of modification commands. +- * If extensive metadata registers are supported +- * the maximal actions amount is 16 and 8 otherwise. ++ * The maximal actions amount in FW is some constant, and it is 16 in the ++ * latest releases. In some old releases, it will be limited to 8. ++ * Since there is no interface to query the capacity, the maximal value should ++ * be used to allow PMD to create the flow. The validation will be done in the ++ * lower driver layer or FW. A failure will be returned if exceeds the maximal ++ * supported actions number on the root table. ++ * On non-root tables, there is no limitation, but 32 is enough right now. + */ +-#define MLX5_MODIFY_NUM 16 +-#define MLX5_MODIFY_NUM_NO_MREG 8 ++#define MLX5_MAX_MODIFY_NUM 32 ++#define MLX5_ROOT_TBL_MODIFY_NUM 16 + + /* Modify resource structure */ + struct mlx5_flow_dv_modify_hdr_resource { +@@ -385,9 +422,9 @@ struct mlx5_flow_dv_modify_hdr_resource { + /**< Verbs modify header action object. */ + uint8_t ft_type; /**< Flow table type, Rx or Tx. */ + uint32_t actions_num; /**< Number of modification actions. */ +- struct mlx5_modification_cmd actions[MLX5_MODIFY_NUM]; +- /**< Modification actions. */ + uint64_t flags; /**< Flags for RDMA API. */ ++ struct mlx5_modification_cmd actions[]; ++ /**< Modification actions. */ + }; + + /* Jump action resource structure. */ +@@ -554,6 +591,8 @@ struct mlx5_flow_policer_stats { + struct mlx5_meter_domain_info { + struct mlx5_flow_tbl_resource *tbl; + /**< Meter table. */ ++ struct mlx5_flow_tbl_resource *sfx_tbl; ++ /**< Meter suffix table. */ + void *any_matcher; + /**< Meter color not match default criteria. */ + void *color_matcher; +@@ -657,6 +696,7 @@ typedef int (*mlx5_flow_validate_t)(struct rte_eth_dev *dev, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + bool external, ++ int hairpin, + struct rte_flow_error *error); + typedef struct mlx5_flow *(*mlx5_flow_prepare_t) + (const struct rte_flow_attr *attr, const struct rte_flow_item items[], +@@ -724,20 +764,20 @@ struct mlx5_flow_driver_ops { + + /* mlx5_flow.c */ + +-struct mlx5_flow_id_pool *mlx5_flow_id_pool_alloc(void); ++struct mlx5_flow_id_pool *mlx5_flow_id_pool_alloc(uint32_t max_id); + void mlx5_flow_id_pool_release(struct mlx5_flow_id_pool *pool); + uint32_t mlx5_flow_id_get(struct mlx5_flow_id_pool *pool, uint32_t *id); + uint32_t mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, + uint32_t id); + int mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, +- bool external, uint32_t group, uint32_t *table, +- struct rte_flow_error *error); ++ bool external, uint32_t group, bool fdb_def_rule, ++ uint32_t *table, struct rte_flow_error *error); + uint64_t mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow, int tunnel, + uint64_t layer_types, + uint64_t hash_fields); + uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority, + uint32_t subpriority); +-enum modify_reg mlx5_flow_get_reg_id(struct rte_eth_dev *dev, ++int mlx5_flow_get_reg_id(struct rte_eth_dev *dev, + enum mlx5_feature_name feature, + uint32_t id, + struct rte_flow_error *error); +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow_dv.c b/dpdk/drivers/net/mlx5/mlx5_flow_dv.c +index 73aaea4536..d83e49f954 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow_dv.c ++++ b/dpdk/drivers/net/mlx5/mlx5_flow_dv.c +@@ -51,8 +51,6 @@ + #define MLX5DV_DR_ACTION_FLAGS_ROOT_LEVEL 1 + #endif + +-#define MLX5_ENCAPSULATION_DECISION_SIZE (sizeof(struct rte_flow_item_eth) + \ +- sizeof(struct rte_flow_item_ipv4)) + /* VLAN header definitions */ + #define MLX5DV_FLOW_VLAN_PCP_SHIFT 13 + #define MLX5DV_FLOW_VLAN_PCP_MASK (0x7 << MLX5DV_FLOW_VLAN_PCP_SHIFT) +@@ -72,6 +70,10 @@ union flow_dv_attr { + uint32_t attr; + }; + ++static int ++flow_dv_tbl_resource_release(struct rte_eth_dev *dev, ++ struct mlx5_flow_tbl_resource *tbl); ++ + /** + * Initialize flow attributes structure according to flow items' types. + * +@@ -82,19 +84,74 @@ union flow_dv_attr { + * Pointer to item specification. + * @param[out] attr + * Pointer to flow attributes structure. ++ * @param[in] dev_flow ++ * Pointer to the sub flow. ++ * @param[in] tunnel_decap ++ * Whether action is after tunnel decapsulation. + */ + static void +-flow_dv_attr_init(const struct rte_flow_item *item, union flow_dv_attr *attr) ++flow_dv_attr_init(const struct rte_flow_item *item, union flow_dv_attr *attr, ++ struct mlx5_flow *dev_flow, bool tunnel_decap) + { ++ /* ++ * If layers is already initialized, it means this dev_flow is the ++ * suffix flow, the layers flags is set by the prefix flow. Need to ++ * use the layer flags from prefix flow as the suffix flow may not ++ * have the user defined items as the flow is split. ++ */ ++ if (dev_flow->layers) { ++ if (dev_flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4) ++ attr->ipv4 = 1; ++ else if (dev_flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV6) ++ attr->ipv6 = 1; ++ if (dev_flow->layers & MLX5_FLOW_LAYER_OUTER_L4_TCP) ++ attr->tcp = 1; ++ else if (dev_flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP) ++ attr->udp = 1; ++ attr->valid = 1; ++ return; ++ } + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { ++ uint8_t next_protocol = 0xff; + switch (item->type) { ++ case RTE_FLOW_ITEM_TYPE_GRE: ++ case RTE_FLOW_ITEM_TYPE_NVGRE: ++ case RTE_FLOW_ITEM_TYPE_VXLAN: ++ case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: ++ case RTE_FLOW_ITEM_TYPE_GENEVE: ++ case RTE_FLOW_ITEM_TYPE_MPLS: ++ if (tunnel_decap) ++ attr->attr = 0; ++ break; + case RTE_FLOW_ITEM_TYPE_IPV4: + if (!attr->ipv6) + attr->ipv4 = 1; ++ if (item->mask != NULL && ++ ((const struct rte_flow_item_ipv4 *) ++ item->mask)->hdr.next_proto_id) ++ next_protocol = ++ ((const struct rte_flow_item_ipv4 *) ++ (item->spec))->hdr.next_proto_id & ++ ((const struct rte_flow_item_ipv4 *) ++ (item->mask))->hdr.next_proto_id; ++ if ((next_protocol == IPPROTO_IPIP || ++ next_protocol == IPPROTO_IPV6) && tunnel_decap) ++ attr->attr = 0; + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + if (!attr->ipv4) + attr->ipv6 = 1; ++ if (item->mask != NULL && ++ ((const struct rte_flow_item_ipv6 *) ++ item->mask)->hdr.proto) ++ next_protocol = ++ ((const struct rte_flow_item_ipv6 *) ++ (item->spec))->hdr.proto & ++ ((const struct rte_flow_item_ipv6 *) ++ (item->mask))->hdr.proto; ++ if ((next_protocol == IPPROTO_IPIP || ++ next_protocol == IPPROTO_IPV6) && tunnel_decap) ++ attr->attr = 0; + break; + case RTE_FLOW_ITEM_TYPE_UDP: + if (!attr->tcp) +@@ -363,7 +420,7 @@ flow_dv_convert_modify_action(struct rte_flow_item *item, + uint32_t mask; + uint32_t data; + +- if (i >= MLX5_MODIFY_NUM) ++ if (i >= MLX5_MAX_MODIFY_NUM) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "too many items to modify"); +@@ -380,10 +437,12 @@ flow_dv_convert_modify_action(struct rte_flow_item *item, + off_b - __builtin_clz(mask); + assert(size_b); + size_b = size_b == sizeof(uint32_t) * CHAR_BIT ? 0 : size_b; +- actions[i].action_type = type; +- actions[i].field = field->id; +- actions[i].offset = off_b; +- actions[i].length = size_b; ++ actions[i] = (struct mlx5_modification_cmd) { ++ .action_type = type, ++ .field = field->id, ++ .offset = off_b, ++ .length = size_b, ++ }; + /* Convert entire record to expected big-endian format. */ + actions[i].data0 = rte_cpu_to_be_32(actions[i].data0); + if (type == MLX5_MODIFICATION_TYPE_COPY) { +@@ -404,11 +463,11 @@ flow_dv_convert_modify_action(struct rte_flow_item *item, + ++i; + ++field; + } while (field->size); +- resource->actions_num = i; +- if (!resource->actions_num) ++ if (resource->actions_num == i) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "invalid modification flow item"); ++ resource->actions_num = i; + return 0; + } + +@@ -566,17 +625,19 @@ flow_dv_convert_action_modify_vlan_vid + const struct rte_flow_action_of_set_vlan_vid *conf = + (const struct rte_flow_action_of_set_vlan_vid *)(action->conf); + int i = resource->actions_num; +- struct mlx5_modification_cmd *actions = &resource->actions[i]; ++ struct mlx5_modification_cmd *actions = resource->actions; + struct field_modify_info *field = modify_vlan_out_first_vid; + +- if (i >= MLX5_MODIFY_NUM) ++ if (i >= MLX5_MAX_MODIFY_NUM) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "too many items to modify"); +- actions[i].action_type = MLX5_MODIFICATION_TYPE_SET; +- actions[i].field = field->id; +- actions[i].length = field->size; +- actions[i].offset = field->offset; ++ actions[i] = (struct mlx5_modification_cmd) { ++ .action_type = MLX5_MODIFICATION_TYPE_SET, ++ .field = field->id, ++ .length = field->size, ++ .offset = field->offset, ++ }; + actions[i].data0 = rte_cpu_to_be_32(actions[i].data0); + actions[i].data1 = conf->vlan_vid; + actions[i].data1 = actions[i].data1 << 16; +@@ -595,6 +656,10 @@ flow_dv_convert_action_modify_vlan_vid + * Pointer to rte_flow_item objects list. + * @param[in] attr + * Pointer to flow attributes structure. ++ * @param[in] dev_flow ++ * Pointer to the sub flow. ++ * @param[in] tunnel_decap ++ * Whether action is after tunnel decapsulation. + * @param[out] error + * Pointer to the error structure. + * +@@ -606,8 +671,8 @@ flow_dv_convert_action_modify_tp + (struct mlx5_flow_dv_modify_hdr_resource *resource, + const struct rte_flow_action *action, + const struct rte_flow_item *items, +- union flow_dv_attr *attr, +- struct rte_flow_error *error) ++ union flow_dv_attr *attr, struct mlx5_flow *dev_flow, ++ bool tunnel_decap, struct rte_flow_error *error) + { + const struct rte_flow_action_set_tp *conf = + (const struct rte_flow_action_set_tp *)(action->conf); +@@ -619,7 +684,7 @@ flow_dv_convert_action_modify_tp + struct field_modify_info *field; + + if (!attr->valid) +- flow_dv_attr_init(items, attr); ++ flow_dv_attr_init(items, attr, dev_flow, tunnel_decap); + if (attr->udp) { + memset(&udp, 0, sizeof(udp)); + memset(&udp_mask, 0, sizeof(udp_mask)); +@@ -636,8 +701,8 @@ flow_dv_convert_action_modify_tp + item.spec = &udp; + item.mask = &udp_mask; + field = modify_udp; +- } +- if (attr->tcp) { ++ } else { ++ assert(attr->tcp); + memset(&tcp, 0, sizeof(tcp)); + memset(&tcp_mask, 0, sizeof(tcp_mask)); + if (action->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC) { +@@ -669,6 +734,10 @@ flow_dv_convert_action_modify_tp + * Pointer to rte_flow_item objects list. + * @param[in] attr + * Pointer to flow attributes structure. ++ * @param[in] dev_flow ++ * Pointer to the sub flow. ++ * @param[in] tunnel_decap ++ * Whether action is after tunnel decapsulation. + * @param[out] error + * Pointer to the error structure. + * +@@ -680,8 +749,8 @@ flow_dv_convert_action_modify_ttl + (struct mlx5_flow_dv_modify_hdr_resource *resource, + const struct rte_flow_action *action, + const struct rte_flow_item *items, +- union flow_dv_attr *attr, +- struct rte_flow_error *error) ++ union flow_dv_attr *attr, struct mlx5_flow *dev_flow, ++ bool tunnel_decap, struct rte_flow_error *error) + { + const struct rte_flow_action_set_ttl *conf = + (const struct rte_flow_action_set_ttl *)(action->conf); +@@ -693,7 +762,7 @@ flow_dv_convert_action_modify_ttl + struct field_modify_info *field; + + if (!attr->valid) +- flow_dv_attr_init(items, attr); ++ flow_dv_attr_init(items, attr, dev_flow, tunnel_decap); + if (attr->ipv4) { + memset(&ipv4, 0, sizeof(ipv4)); + memset(&ipv4_mask, 0, sizeof(ipv4_mask)); +@@ -703,8 +772,8 @@ flow_dv_convert_action_modify_ttl + item.spec = &ipv4; + item.mask = &ipv4_mask; + field = modify_ipv4; +- } +- if (attr->ipv6) { ++ } else { ++ assert(attr->ipv6); + memset(&ipv6, 0, sizeof(ipv6)); + memset(&ipv6_mask, 0, sizeof(ipv6_mask)); + ipv6.hdr.hop_limits = conf->ttl_value; +@@ -729,6 +798,10 @@ flow_dv_convert_action_modify_ttl + * Pointer to rte_flow_item objects list. + * @param[in] attr + * Pointer to flow attributes structure. ++ * @param[in] dev_flow ++ * Pointer to the sub flow. ++ * @param[in] tunnel_decap ++ * Whether action is after tunnel decapsulation. + * @param[out] error + * Pointer to the error structure. + * +@@ -739,8 +812,8 @@ static int + flow_dv_convert_action_modify_dec_ttl + (struct mlx5_flow_dv_modify_hdr_resource *resource, + const struct rte_flow_item *items, +- union flow_dv_attr *attr, +- struct rte_flow_error *error) ++ union flow_dv_attr *attr, struct mlx5_flow *dev_flow, ++ bool tunnel_decap, struct rte_flow_error *error) + { + struct rte_flow_item item; + struct rte_flow_item_ipv4 ipv4; +@@ -750,7 +823,7 @@ flow_dv_convert_action_modify_dec_ttl + struct field_modify_info *field; + + if (!attr->valid) +- flow_dv_attr_init(items, attr); ++ flow_dv_attr_init(items, attr, dev_flow, tunnel_decap); + if (attr->ipv4) { + memset(&ipv4, 0, sizeof(ipv4)); + memset(&ipv4_mask, 0, sizeof(ipv4_mask)); +@@ -760,8 +833,8 @@ flow_dv_convert_action_modify_dec_ttl + item.spec = &ipv4; + item.mask = &ipv4_mask; + field = modify_ipv4; +- } +- if (attr->ipv6) { ++ } else { ++ assert(attr->ipv6); + memset(&ipv6, 0, sizeof(ipv6)); + memset(&ipv6_mask, 0, sizeof(ipv6_mask)); + ipv6.hdr.hop_limits = 0xFF; +@@ -902,22 +975,20 @@ flow_dv_convert_action_set_reg + struct mlx5_modification_cmd *actions = resource->actions; + uint32_t i = resource->actions_num; + +- if (i >= MLX5_MODIFY_NUM) ++ if (i >= MLX5_MAX_MODIFY_NUM) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "too many items to modify"); + assert(conf->id != REG_NONE); + assert(conf->id < RTE_DIM(reg_to_field)); +- actions[i].action_type = MLX5_MODIFICATION_TYPE_SET; +- actions[i].field = reg_to_field[conf->id]; ++ actions[i] = (struct mlx5_modification_cmd) { ++ .action_type = MLX5_MODIFICATION_TYPE_SET, ++ .field = reg_to_field[conf->id], ++ }; + actions[i].data0 = rte_cpu_to_be_32(actions[i].data0); + actions[i].data1 = rte_cpu_to_be_32(conf->data); + ++i; + resource->actions_num = i; +- if (!resource->actions_num) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "invalid modification flow item"); + return 0; + } + +@@ -1078,7 +1149,7 @@ flow_dv_convert_action_mark(struct rte_eth_dev *dev, + {4, 0, 0}, /* dynamic instead of MLX5_MODI_META_REG_C_1. */ + {0, 0, 0}, + }; +- enum modify_reg reg; ++ int reg; + + if (!mask) + return rte_flow_error_set(error, EINVAL, +@@ -1088,6 +1159,14 @@ flow_dv_convert_action_mark(struct rte_eth_dev *dev, + if (reg < 0) + return reg; + assert(reg > 0); ++ if (reg == REG_C_0) { ++ uint32_t msk_c0 = priv->sh->dv_regc0_mask; ++ uint32_t shl_c0 = rte_bsf32(msk_c0); ++ ++ data = rte_cpu_to_be_32(rte_cpu_to_be_32(data) << shl_c0); ++ mask = rte_cpu_to_be_32(mask) & msk_c0; ++ mask = rte_cpu_to_be_32(mask << shl_c0); ++ } + reg_c_x[0].id = reg_to_field[reg]; + return flow_dv_convert_modify_action(&item, reg_c_x, NULL, resource, + MLX5_MODIFICATION_TYPE_SET, error); +@@ -1112,7 +1191,7 @@ flow_dv_get_metadata_reg(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) + { +- enum modify_reg reg = ++ int reg = + mlx5_flow_get_reg_id(dev, attr->transfer ? + MLX5_METADATA_FDB : + attr->egress ? +@@ -1160,7 +1239,7 @@ flow_dv_convert_action_set_meta + struct field_modify_info reg_c_x[] = { + [1] = {0, 0, 0}, + }; +- enum modify_reg reg = flow_dv_get_metadata_reg(dev, attr, error); ++ int reg = flow_dv_get_metadata_reg(dev, attr, error); + + if (reg < 0) + return reg; +@@ -1250,6 +1329,11 @@ flow_dv_validate_item_mark(struct rte_eth_dev *dev, + "mark id exceeds the limit"); + if (!mask) + mask = &nic_mask; ++ if (!mask->id) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM_SPEC, NULL, ++ "mask cannot be zero"); ++ + ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, + (const uint8_t *)&nic_mask, + sizeof(struct rte_flow_item_mark), +@@ -1287,7 +1371,7 @@ flow_dv_validate_item_meta(struct rte_eth_dev *dev __rte_unused, + struct rte_flow_item_meta nic_mask = { + .data = UINT32_MAX + }; +- enum modify_reg reg; ++ int reg; + int ret; + + if (!spec) +@@ -1295,10 +1379,6 @@ flow_dv_validate_item_meta(struct rte_eth_dev *dev __rte_unused, + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, + item->spec, + "data cannot be empty"); +- if (!spec->data) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ITEM_SPEC, NULL, +- "data cannot be zero"); + if (config->dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { + if (!mlx5_flow_ext_mreg_supported(dev)) + return rte_flow_error_set(error, ENOTSUP, +@@ -1318,6 +1398,11 @@ flow_dv_validate_item_meta(struct rte_eth_dev *dev __rte_unused, + } + if (!mask) + mask = &rte_flow_item_meta_mask; ++ if (!mask->data) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM_SPEC, NULL, ++ "mask cannot be zero"); ++ + ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, + (const uint8_t *)&nic_mask, + sizeof(struct rte_flow_item_meta), +@@ -1366,6 +1451,11 @@ flow_dv_validate_item_tag(struct rte_eth_dev *dev, + "data cannot be empty"); + if (!mask) + mask = &rte_flow_item_tag_mask; ++ if (!mask->data) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM_SPEC, NULL, ++ "mask cannot be zero"); ++ + ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, + (const uint8_t *)&nic_mask, + sizeof(struct rte_flow_item_tag), +@@ -1465,6 +1555,79 @@ flow_dv_validate_item_port_id(struct rte_eth_dev *dev, + return 0; + } + ++/** ++ * Validate VLAN item. ++ * ++ * @param[in] item ++ * Item specification. ++ * @param[in] item_flags ++ * Bit-fields that holds the items detected until now. ++ * @param[in] dev ++ * Ethernet device flow is being created on. ++ * @param[out] error ++ * Pointer to error structure. ++ * ++ * @return ++ * 0 on success, a negative errno value otherwise and rte_errno is set. ++ */ ++static int ++flow_dv_validate_item_vlan(const struct rte_flow_item *item, ++ uint64_t item_flags, ++ struct rte_eth_dev *dev, ++ struct rte_flow_error *error) ++{ ++ const struct rte_flow_item_vlan *mask = item->mask; ++ const struct rte_flow_item_vlan nic_mask = { ++ .tci = RTE_BE16(UINT16_MAX), ++ .inner_type = RTE_BE16(UINT16_MAX), ++ }; ++ const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); ++ int ret; ++ const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | ++ MLX5_FLOW_LAYER_INNER_L4) : ++ (MLX5_FLOW_LAYER_OUTER_L3 | ++ MLX5_FLOW_LAYER_OUTER_L4); ++ const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : ++ MLX5_FLOW_LAYER_OUTER_VLAN; ++ ++ if (item_flags & vlanm) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM, item, ++ "multiple VLAN layers not supported"); ++ else if ((item_flags & l34m) != 0) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM, item, ++ "VLAN cannot follow L3/L4 layer"); ++ if (!mask) ++ mask = &rte_flow_item_vlan_mask; ++ ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, ++ (const uint8_t *)&nic_mask, ++ sizeof(struct rte_flow_item_vlan), ++ error); ++ if (ret) ++ return ret; ++ if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { ++ struct mlx5_priv *priv = dev->data->dev_private; ++ ++ if (priv->vmwa_context) { ++ /* ++ * Non-NULL context means we have a virtual machine ++ * and SR-IOV enabled, we have to create VLAN interface ++ * to make hypervisor to setup E-Switch vport ++ * context correctly. We avoid creating the multiple ++ * VLAN interfaces, so we cannot support VLAN tag mask. ++ */ ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM, ++ item, ++ "VLAN tag mask is not" ++ " supported in virtual" ++ " environment"); ++ } ++ } ++ return 0; ++} ++ + /** + * Validate the pop VLAN action. + * +@@ -1492,7 +1655,7 @@ flow_dv_validate_action_pop_vlan(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) + { +- struct mlx5_priv *priv = dev->data->dev_private; ++ const struct mlx5_priv *priv = dev->data->dev_private; + + (void)action; + (void)attr; +@@ -1501,17 +1664,16 @@ flow_dv_validate_action_pop_vlan(struct rte_eth_dev *dev, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "pop vlan action is not supported"); +- /* +- * Check for inconsistencies: +- * fail strip_vlan in a flow that matches packets without VLAN tags. +- * fail strip_vlan in a flow that matches packets without explicitly a +- * matching on VLAN tag ? +- */ +- if (action_flags & MLX5_FLOW_ACTION_OF_POP_VLAN) ++ if (attr->egress) + return rte_flow_error_set(error, ENOTSUP, +- RTE_FLOW_ERROR_TYPE_UNSPECIFIED, ++ RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + NULL, +- "no support for multiple vlan pop " ++ "pop vlan action not supported for " ++ "egress"); ++ if (action_flags & MLX5_FLOW_VLAN_ACTIONS) ++ return rte_flow_error_set(error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_ACTION, action, ++ "no support for multiple VLAN " + "actions"); + if (!(item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) + return rte_flow_error_set(error, ENOTSUP, +@@ -1524,20 +1686,21 @@ flow_dv_validate_action_pop_vlan(struct rte_eth_dev *dev, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "wrong action order, port_id should " + "be after pop VLAN action"); ++ if (!attr->transfer && priv->representor) ++ return rte_flow_error_set(error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, ++ "pop vlan action for VF representor " ++ "not supported on NIC table"); + return 0; + } + + /** + * Get VLAN default info from vlan match info. + * +- * @param[in] dev +- * Pointer to the rte_eth_dev structure. +- * @param[in] item ++ * @param[in] items + * the list of item specifications. + * @param[out] vlan + * pointer VLAN info to fill to. +- * @param[out] error +- * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. +@@ -1554,19 +1717,26 @@ flow_dev_get_vlan_info_from_items(const struct rte_flow_item *items, + + if (items == NULL) + return; +- for (; items->type != RTE_FLOW_ITEM_TYPE_END && +- items->type != RTE_FLOW_ITEM_TYPE_VLAN; items++) +- ; +- if (items->type == RTE_FLOW_ITEM_TYPE_VLAN) { ++ for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { ++ int type = items->type; ++ ++ if (type == RTE_FLOW_ITEM_TYPE_VLAN || ++ type == MLX5_RTE_FLOW_ITEM_TYPE_VLAN) ++ break; ++ } ++ if (items->type != RTE_FLOW_ITEM_TYPE_END) { + const struct rte_flow_item_vlan *vlan_m = items->mask; + const struct rte_flow_item_vlan *vlan_v = items->spec; + ++ /* If VLAN item in pattern doesn't contain data, return here. */ ++ if (!vlan_v) ++ return; + if (!vlan_m) + vlan_m = &nic_mask; + /* Only full match values are accepted */ + if ((vlan_m->tci & MLX5DV_FLOW_VLAN_PCP_MASK_BE) == + MLX5DV_FLOW_VLAN_PCP_MASK_BE) { +- vlan->vlan_tci &= MLX5DV_FLOW_VLAN_PCP_MASK; ++ vlan->vlan_tci &= ~MLX5DV_FLOW_VLAN_PCP_MASK; + vlan->vlan_tci |= + rte_be_to_cpu_16(vlan_v->tci & + MLX5DV_FLOW_VLAN_PCP_MASK_BE); +@@ -1587,10 +1757,14 @@ flow_dev_get_vlan_info_from_items(const struct rte_flow_item *items, + /** + * Validate the push VLAN action. + * ++ * @param[in] dev ++ * Pointer to the rte_eth_dev structure. + * @param[in] action_flags + * Holds the actions detected until now. ++ * @param[in] item_flags ++ * The items found in this flow rule. + * @param[in] action +- * Pointer to the encap action. ++ * Pointer to the action structure. + * @param[in] attr + * Pointer to flow attributes + * @param[out] error +@@ -1600,38 +1774,68 @@ flow_dev_get_vlan_info_from_items(const struct rte_flow_item *items, + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ + static int +-flow_dv_validate_action_push_vlan(uint64_t action_flags, +- uint64_t item_flags, ++flow_dv_validate_action_push_vlan(struct rte_eth_dev *dev, ++ uint64_t action_flags, ++ const struct rte_flow_item_vlan *vlan_m, + const struct rte_flow_action *action, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) + { + const struct rte_flow_action_of_push_vlan *push_vlan = action->conf; ++ const struct mlx5_priv *priv = dev->data->dev_private; + ++ if (!attr->transfer && attr->ingress) ++ return rte_flow_error_set(error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, ++ NULL, ++ "push VLAN action not supported for " ++ "ingress"); + if (push_vlan->ethertype != RTE_BE16(RTE_ETHER_TYPE_VLAN) && + push_vlan->ethertype != RTE_BE16(RTE_ETHER_TYPE_QINQ)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "invalid vlan ethertype"); +- if (action_flags & +- (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN)) ++ if (action_flags & MLX5_FLOW_VLAN_ACTIONS) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "no support for multiple VLAN " + "actions"); +- if (!mlx5_flow_find_action +- (action + 1, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) && +- !(item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) +- return rte_flow_error_set(error, ENOTSUP, +- RTE_FLOW_ERROR_TYPE_ACTION, action, +- "push VLAN needs to match on VLAN in order to " +- "get VLAN VID information because there is " +- "no followed set VLAN VID action"); + if (action_flags & MLX5_FLOW_ACTION_PORT_ID) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "wrong action order, port_id should " + "be after push VLAN"); ++ if (!attr->transfer && priv->representor) ++ return rte_flow_error_set(error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, ++ "push vlan action for VF representor " ++ "not supported on NIC table"); ++ if (vlan_m && ++ (vlan_m->tci & MLX5DV_FLOW_VLAN_PCP_MASK_BE) && ++ (vlan_m->tci & MLX5DV_FLOW_VLAN_PCP_MASK_BE) != ++ MLX5DV_FLOW_VLAN_PCP_MASK_BE && ++ !(action_flags & MLX5_FLOW_ACTION_OF_SET_VLAN_PCP) && ++ !(mlx5_flow_find_action ++ (action + 1, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP))) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ACTION, action, ++ "not full match mask on VLAN PCP and " ++ "there is no of_set_vlan_pcp action, " ++ "push VLAN action cannot figure out " ++ "PCP value"); ++ if (vlan_m && ++ (vlan_m->tci & MLX5DV_FLOW_VLAN_VID_MASK_BE) && ++ (vlan_m->tci & MLX5DV_FLOW_VLAN_VID_MASK_BE) != ++ MLX5DV_FLOW_VLAN_VID_MASK_BE && ++ !(action_flags & MLX5_FLOW_ACTION_OF_SET_VLAN_VID) && ++ !(mlx5_flow_find_action ++ (action + 1, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID))) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ACTION, action, ++ "not full match mask on VLAN VID and " ++ "there is no of_set_vlan_vid action, " ++ "push VLAN action cannot figure out " ++ "VID value"); + (void)attr; + return 0; + } +@@ -1643,8 +1847,6 @@ flow_dv_validate_action_push_vlan(uint64_t action_flags, + * Holds the actions detected until now. + * @param[in] actions + * Pointer to the list of actions remaining in the flow rule. +- * @param[in] attr +- * Pointer to flow attributes + * @param[out] error + * Pointer to error structure. + * +@@ -1686,10 +1888,10 @@ flow_dv_validate_action_set_vlan_pcp(uint64_t action_flags, + * + * @param[in] item_flags + * Holds the items detected in this rule. ++ * @param[in] action_flags ++ * Holds the actions detected until now. + * @param[in] actions + * Pointer to the list of actions remaining in the flow rule. +- * @param[in] attr +- * Pointer to flow attributes + * @param[out] error + * Pointer to error structure. + * +@@ -1705,37 +1907,21 @@ flow_dv_validate_action_set_vlan_vid(uint64_t item_flags, + const struct rte_flow_action *action = actions; + const struct rte_flow_action_of_set_vlan_vid *conf = action->conf; + +- if (conf->vlan_vid > RTE_BE16(0xFFE)) ++ if (rte_be_to_cpu_16(conf->vlan_vid) > 0xFFE) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "VLAN VID value is too big"); +- /* there is an of_push_vlan action before us */ +- if (action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) { +- if (mlx5_flow_find_action(actions + 1, +- RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID)) +- return rte_flow_error_set(error, ENOTSUP, +- RTE_FLOW_ERROR_TYPE_ACTION, action, +- "Multiple VLAN VID modifications are " +- "not supported"); +- else +- return 0; +- } +- +- /* +- * Action is on an existing VLAN header: +- * Need to verify this is a single modify CID action. +- * Rule mast include a match on outer VLAN. +- */ ++ if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) && ++ !(item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ++ return rte_flow_error_set(error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_ACTION, action, ++ "set VLAN VID action must follow push" ++ " VLAN action or match on VLAN item"); + if (action_flags & MLX5_FLOW_ACTION_OF_SET_VLAN_VID) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "Multiple VLAN VID modifications are " + "not supported"); +- if (!(item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, action, +- "match on VLAN is required in order " +- "to set VLAN VID"); + if (action_flags & MLX5_FLOW_ACTION_PORT_ID) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, +@@ -1788,10 +1974,6 @@ flow_dv_validate_action_flag(struct rte_eth_dev *dev, + if (ret < 0) + return ret; + assert(ret > 0); +- if (action_flags & MLX5_FLOW_ACTION_DROP) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can't drop and flag in same flow"); + if (action_flags & MLX5_FLOW_ACTION_MARK) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, +@@ -1861,10 +2043,6 @@ flow_dv_validate_action_mark(struct rte_eth_dev *dev, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + &mark->id, + "mark id exceeds the limit"); +- if (action_flags & MLX5_FLOW_ACTION_DROP) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can't drop and mark in same flow"); + if (action_flags & MLX5_FLOW_ACTION_FLAG) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, +@@ -1883,7 +2061,7 @@ flow_dv_validate_action_mark(struct rte_eth_dev *dev, + * @param[in] dev + * Pointer to the rte_eth_dev structure. + * @param[in] action +- * Pointer to the encap action. ++ * Pointer to the action structure. + * @param[in] action_flags + * Holds the actions detected until now. + * @param[in] attr +@@ -1903,7 +2081,7 @@ flow_dv_validate_action_set_meta(struct rte_eth_dev *dev, + { + const struct rte_flow_action_set_meta *conf; + uint32_t nic_mask = UINT32_MAX; +- enum modify_reg reg; ++ int reg; + + if (!mlx5_flow_ext_mreg_supported(dev)) + return rte_flow_error_set(error, ENOTSUP, +@@ -1931,10 +2109,6 @@ flow_dv_validate_action_set_meta(struct rte_eth_dev *dev, + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "meta data must be within reg C0"); +- if (!(conf->data & conf->mask)) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, action, +- "zero value has no effect"); + return 0; + } + +@@ -1944,7 +2118,7 @@ flow_dv_validate_action_set_meta(struct rte_eth_dev *dev, + * @param[in] dev + * Pointer to the rte_eth_dev structure. + * @param[in] action +- * Pointer to the encap action. ++ * Pointer to the action structure. + * @param[in] action_flags + * Holds the actions detected until now. + * @param[in] attr +@@ -1998,7 +2172,7 @@ flow_dv_validate_action_set_tag(struct rte_eth_dev *dev, + * Validate count action. + * + * @param[in] dev +- * device otr. ++ * Pointer to rte_eth_dev structure. + * @param[out] error + * Pointer to error structure. + * +@@ -2027,12 +2201,14 @@ flow_dv_validate_action_count(struct rte_eth_dev *dev, + /** + * Validate the L2 encap action. + * ++ * @param[in] dev ++ * Pointer to the rte_eth_dev structure. + * @param[in] action_flags + * Holds the actions detected until now. + * @param[in] action +- * Pointer to the encap action. ++ * Pointer to the action structure. + * @param[in] attr +- * Pointer to flow attributes ++ * Pointer to flow attributes. + * @param[out] error + * Pointer to error structure. + * +@@ -2040,36 +2216,36 @@ flow_dv_validate_action_count(struct rte_eth_dev *dev, + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ + static int +-flow_dv_validate_action_l2_encap(uint64_t action_flags, ++flow_dv_validate_action_l2_encap(struct rte_eth_dev *dev, ++ uint64_t action_flags, + const struct rte_flow_action *action, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) + { ++ const struct mlx5_priv *priv = dev->data->dev_private; ++ + if (!(action->conf)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "configuration cannot be null"); +- if (action_flags & MLX5_FLOW_ACTION_DROP) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can't drop and encap in same flow"); +- if (action_flags & (MLX5_FLOW_ENCAP_ACTIONS | MLX5_FLOW_DECAP_ACTIONS)) ++ if (action_flags & MLX5_FLOW_ACTION_ENCAP) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can only have a single encap or" +- " decap action in a flow"); +- if (!attr->transfer && attr->ingress) ++ "can only have a single encap action " ++ "in a flow"); ++ if (!attr->transfer && priv->representor) + return rte_flow_error_set(error, ENOTSUP, +- RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, +- NULL, +- "encap action not supported for " +- "ingress"); ++ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, ++ "encap action for VF representor " ++ "not supported on NIC table"); + return 0; + } + + /** +- * Validate the L2 decap action. ++ * Validate a decap action. + * ++ * @param[in] dev ++ * Pointer to the rte_eth_dev structure. + * @param[in] action_flags + * Holds the actions detected until now. + * @param[in] attr +@@ -2081,19 +2257,20 @@ flow_dv_validate_action_l2_encap(uint64_t action_flags, + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ + static int +-flow_dv_validate_action_l2_decap(uint64_t action_flags, +- const struct rte_flow_attr *attr, +- struct rte_flow_error *error) ++flow_dv_validate_action_decap(struct rte_eth_dev *dev, ++ uint64_t action_flags, ++ const struct rte_flow_attr *attr, ++ struct rte_flow_error *error) + { +- if (action_flags & MLX5_FLOW_ACTION_DROP) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can't drop and decap in same flow"); +- if (action_flags & (MLX5_FLOW_ENCAP_ACTIONS | MLX5_FLOW_DECAP_ACTIONS)) +- return rte_flow_error_set(error, EINVAL, ++ const struct mlx5_priv *priv = dev->data->dev_private; ++ ++ if (action_flags & MLX5_FLOW_XCAP_ACTIONS) ++ return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can only have a single encap or" +- " decap action in a flow"); ++ action_flags & ++ MLX5_FLOW_ACTION_DECAP ? "can only " ++ "have a single decap action" : "decap " ++ "after encap is not supported"); + if (action_flags & MLX5_FLOW_MODIFY_HDR_ACTIONS) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, +@@ -2105,69 +2282,31 @@ flow_dv_validate_action_l2_decap(uint64_t action_flags, + NULL, + "decap action not supported for " + "egress"); +- return 0; +-} +- +-/** +- * Validate the raw encap action. +- * +- * @param[in] action_flags +- * Holds the actions detected until now. +- * @param[in] action +- * Pointer to the encap action. +- * @param[in] attr +- * Pointer to flow attributes +- * @param[out] error +- * Pointer to error structure. +- * +- * @return +- * 0 on success, a negative errno value otherwise and rte_errno is set. +- */ +-static int +-flow_dv_validate_action_raw_encap(uint64_t action_flags, +- const struct rte_flow_action *action, +- const struct rte_flow_attr *attr, +- struct rte_flow_error *error) +-{ +- const struct rte_flow_action_raw_encap *raw_encap = +- (const struct rte_flow_action_raw_encap *)action->conf; +- if (!(action->conf)) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, action, +- "configuration cannot be null"); +- if (action_flags & MLX5_FLOW_ACTION_DROP) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can't drop and encap in same flow"); +- if (action_flags & MLX5_FLOW_ENCAP_ACTIONS) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can only have a single encap" +- " action in a flow"); +- /* encap without preceding decap is not supported for ingress */ +- if (!attr->transfer && attr->ingress && +- !(action_flags & MLX5_FLOW_ACTION_RAW_DECAP)) ++ if (!attr->transfer && priv->representor) + return rte_flow_error_set(error, ENOTSUP, +- RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, +- NULL, +- "encap action not supported for " +- "ingress"); +- if (!raw_encap->size || !raw_encap->data) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, action, +- "raw encap data cannot be empty"); ++ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, ++ "decap action for VF representor " ++ "not supported on NIC table"); + return 0; + } + ++const struct rte_flow_action_raw_decap empty_decap = {.data = NULL, .size = 0,}; ++ + /** +- * Validate the raw decap action. ++ * Validate the raw encap and decap actions. + * +- * @param[in] action_flags +- * Holds the actions detected until now. +- * @param[in] action ++ * @param[in] dev ++ * Pointer to the rte_eth_dev structure. ++ * @param[in] decap ++ * Pointer to the decap action. ++ * @param[in] encap + * Pointer to the encap action. + * @param[in] attr + * Pointer to flow attributes ++ * @param[in/out] action_flags ++ * Holds the actions detected until now. ++ * @param[out] actions_n ++ * pointer to the number of actions counter. + * @param[out] error + * Pointer to error structure. + * +@@ -2175,41 +2314,72 @@ flow_dv_validate_action_raw_encap(uint64_t action_flags, + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ + static int +-flow_dv_validate_action_raw_decap(uint64_t action_flags, +- const struct rte_flow_action *action, +- const struct rte_flow_attr *attr, +- struct rte_flow_error *error) +-{ +- const struct rte_flow_action_raw_decap *decap = action->conf; ++flow_dv_validate_action_raw_encap_decap ++ (struct rte_eth_dev *dev, ++ const struct rte_flow_action_raw_decap *decap, ++ const struct rte_flow_action_raw_encap *encap, ++ const struct rte_flow_attr *attr, uint64_t *action_flags, ++ int *actions_n, struct rte_flow_error *error) ++{ ++ const struct mlx5_priv *priv = dev->data->dev_private; ++ int ret; + +- if (action_flags & MLX5_FLOW_ACTION_DROP) ++ if (encap && (!encap->size || !encap->data)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can't drop and decap in same flow"); +- if (action_flags & MLX5_FLOW_ENCAP_ACTIONS) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can't have encap action before" +- " decap action"); +- if (action_flags & MLX5_FLOW_DECAP_ACTIONS) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, NULL, +- "can only have a single decap" +- " action in a flow"); +- /* decap action is valid on egress only if it is followed by encap */ +- if (attr->egress && decap && +- decap->size > MLX5_ENCAPSULATION_DECISION_SIZE) { +- return rte_flow_error_set(error, ENOTSUP, +- RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, +- NULL, "decap action not supported" +- " for egress"); +- } else if (decap && decap->size > MLX5_ENCAPSULATION_DECISION_SIZE && +- (action_flags & MLX5_FLOW_MODIFY_HDR_ACTIONS)) { +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, +- NULL, +- "can't have decap action " +- "after modify action"); ++ "raw encap data cannot be empty"); ++ if (decap && encap) { ++ if (decap->size <= MLX5_ENCAPSULATION_DECISION_SIZE && ++ encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) ++ /* L3 encap. */ ++ decap = NULL; ++ else if (encap->size <= ++ MLX5_ENCAPSULATION_DECISION_SIZE && ++ decap->size > ++ MLX5_ENCAPSULATION_DECISION_SIZE) ++ /* L3 decap. */ ++ encap = NULL; ++ else if (encap->size > ++ MLX5_ENCAPSULATION_DECISION_SIZE && ++ decap->size > ++ MLX5_ENCAPSULATION_DECISION_SIZE) ++ /* 2 L2 actions: encap and decap. */ ++ ; ++ else ++ return rte_flow_error_set(error, ++ ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_ACTION, ++ NULL, "unsupported too small " ++ "raw decap and too small raw " ++ "encap combination"); ++ } ++ if (decap) { ++ ret = flow_dv_validate_action_decap(dev, *action_flags, attr, ++ error); ++ if (ret < 0) ++ return ret; ++ *action_flags |= MLX5_FLOW_ACTION_DECAP; ++ ++(*actions_n); ++ } ++ if (encap) { ++ if (encap->size <= MLX5_ENCAPSULATION_DECISION_SIZE) ++ return rte_flow_error_set(error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_ACTION, ++ NULL, ++ "small raw encap size"); ++ if (*action_flags & MLX5_FLOW_ACTION_ENCAP) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ACTION, ++ NULL, ++ "more than one encap action"); ++ if (!attr->transfer && priv->representor) ++ return rte_flow_error_set ++ (error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, ++ "encap action for VF representor " ++ "not supported on NIC table"); ++ *action_flags |= MLX5_FLOW_ACTION_ENCAP; ++ ++(*actions_n); + } + return 0; + } +@@ -2248,7 +2418,6 @@ flow_dv_encap_decap_resource_register + domain = sh->rx_domain; + else + domain = sh->tx_domain; +- + /* Lookup a matching resource from cache. */ + LIST_FOREACH(cache_resource, &sh->encaps_decaps, next) { + if (resource->reformat_type == cache_resource->reformat_type && +@@ -2334,6 +2503,8 @@ flow_dv_jump_tbl_resource_register + DRV_LOG(DEBUG, "new jump table resource %p: refcnt %d++", + (void *)&tbl_data->jump, cnt); + } else { ++ /* old jump should not make the table ref++. */ ++ flow_dv_tbl_resource_release(dev, &tbl_data->tbl); + assert(tbl_data->jump.action); + DRV_LOG(DEBUG, "existed jump table resource %p: refcnt %d++", + (void *)&tbl_data->jump, cnt); +@@ -2799,8 +2970,6 @@ flow_dv_create_action_l2_encap(struct rte_eth_dev *dev, + (const struct rte_flow_action_raw_encap *)action->conf; + res.size = raw_encap_data->size; + memcpy(res.buf, raw_encap_data->data, res.size); +- if (flow_dv_zero_encap_udp_csum(res.buf, error)) +- return -rte_errno; + } else { + if (action->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP) + encap_data = +@@ -2814,6 +2983,8 @@ flow_dv_create_action_l2_encap(struct rte_eth_dev *dev, + &res.size, error)) + return -rte_errno; + } ++ if (flow_dv_zero_encap_udp_csum(res.buf, error)) ++ return -rte_errno; + if (flow_dv_encap_decap_resource_register(dev, &res, dev_flow, error)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, +@@ -2907,12 +3078,12 @@ flow_dv_create_action_raw_encap(struct rte_eth_dev *dev, + * + * @param[in] dev + * Pointer to rte_eth_dev structure. +- * @param[in] vlan_tag +- * the vlan tag to push to the Ethernet header. +- * @param[in, out] dev_flow +- * Pointer to the mlx5_flow. + * @param[in] attr + * Pointer to the flow attributes. ++ * @param[in] vlan ++ * Pointer to the vlan to push to the Ethernet header. ++ * @param[in, out] dev_flow ++ * Pointer to the mlx5_flow. + * @param[out] error + * Pointer to the error structure. + * +@@ -2962,7 +3133,7 @@ flow_dv_validate_action_modify_hdr(const uint64_t action_flags, + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + NULL, "action configuration not set"); +- if (action_flags & MLX5_FLOW_ENCAP_ACTIONS) ++ if (action_flags & MLX5_FLOW_ACTION_ENCAP) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can't have encap action before" +@@ -3026,10 +3197,14 @@ flow_dv_validate_action_modify_ipv4(const uint64_t action_flags, + struct rte_flow_error *error) + { + int ret = 0; ++ uint64_t layer; + + ret = flow_dv_validate_action_modify_hdr(action_flags, action, error); + if (!ret) { +- if (!(item_flags & MLX5_FLOW_LAYER_L3_IPV4)) ++ layer = (action_flags & MLX5_FLOW_ACTION_DECAP) ? ++ MLX5_FLOW_LAYER_INNER_L3_IPV4 : ++ MLX5_FLOW_LAYER_OUTER_L3_IPV4; ++ if (!(item_flags & layer)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, +@@ -3060,10 +3235,14 @@ flow_dv_validate_action_modify_ipv6(const uint64_t action_flags, + struct rte_flow_error *error) + { + int ret = 0; ++ uint64_t layer; + + ret = flow_dv_validate_action_modify_hdr(action_flags, action, error); + if (!ret) { +- if (!(item_flags & MLX5_FLOW_LAYER_L3_IPV6)) ++ layer = (action_flags & MLX5_FLOW_ACTION_DECAP) ? ++ MLX5_FLOW_LAYER_INNER_L3_IPV6 : ++ MLX5_FLOW_LAYER_OUTER_L3_IPV6; ++ if (!(item_flags & layer)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, +@@ -3094,10 +3273,14 @@ flow_dv_validate_action_modify_tp(const uint64_t action_flags, + struct rte_flow_error *error) + { + int ret = 0; ++ uint64_t layer; + + ret = flow_dv_validate_action_modify_hdr(action_flags, action, error); + if (!ret) { +- if (!(item_flags & MLX5_FLOW_LAYER_L4)) ++ layer = (action_flags & MLX5_FLOW_ACTION_DECAP) ? ++ MLX5_FLOW_LAYER_INNER_L4 : ++ MLX5_FLOW_LAYER_OUTER_L4; ++ if (!(item_flags & layer)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "no transport layer " +@@ -3129,10 +3312,14 @@ flow_dv_validate_action_modify_tcp_seq(const uint64_t action_flags, + struct rte_flow_error *error) + { + int ret = 0; ++ uint64_t layer; + + ret = flow_dv_validate_action_modify_hdr(action_flags, action, error); + if (!ret) { +- if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)) ++ layer = (action_flags & MLX5_FLOW_ACTION_DECAP) ? ++ MLX5_FLOW_LAYER_INNER_L4_TCP : ++ MLX5_FLOW_LAYER_OUTER_L4_TCP; ++ if (!(item_flags & layer)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "no TCP item in" +@@ -3174,10 +3361,14 @@ flow_dv_validate_action_modify_tcp_ack(const uint64_t action_flags, + struct rte_flow_error *error) + { + int ret = 0; ++ uint64_t layer; + + ret = flow_dv_validate_action_modify_hdr(action_flags, action, error); + if (!ret) { +- if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)) ++ layer = (action_flags & MLX5_FLOW_ACTION_DECAP) ? ++ MLX5_FLOW_LAYER_INNER_L4_TCP : ++ MLX5_FLOW_LAYER_OUTER_L4_TCP; ++ if (!(item_flags & layer)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "no TCP item in" +@@ -3218,10 +3409,14 @@ flow_dv_validate_action_modify_ttl(const uint64_t action_flags, + struct rte_flow_error *error) + { + int ret = 0; ++ uint64_t layer; + + ret = flow_dv_validate_action_modify_hdr(action_flags, action, error); + if (!ret) { +- if (!(item_flags & MLX5_FLOW_LAYER_L3)) ++ layer = (action_flags & MLX5_FLOW_ACTION_DECAP) ? ++ MLX5_FLOW_LAYER_INNER_L3 : ++ MLX5_FLOW_LAYER_OUTER_L3; ++ if (!(item_flags & layer)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, +@@ -3273,7 +3468,7 @@ flow_dv_validate_action_jump(const struct rte_flow_action *action, + target_group = + ((const struct rte_flow_action_jump *)action->conf)->group; + ret = mlx5_flow_group_to_table(attributes, external, target_group, +- &table, error); ++ true, &table, error); + if (ret) + return ret; + if (attributes->group == target_group) +@@ -3359,21 +3554,24 @@ flow_dv_validate_action_port_id(struct rte_eth_dev *dev, + * + * @param dev + * Pointer to rte_eth_dev structure. ++ * @param flags ++ * Flags bits to check if root level. + * + * @return + * Max number of modify header actions device can support. + */ +-static unsigned int +-flow_dv_modify_hdr_action_max(struct rte_eth_dev *dev) ++static inline unsigned int ++flow_dv_modify_hdr_action_max(struct rte_eth_dev *dev __rte_unused, ++ uint64_t flags) + { + /* +- * There's no way to directly query the max cap. Although it has to be +- * acquried by iterative trial, it is a safe assumption that more +- * actions are supported by FW if extensive metadata register is +- * supported. ++ * There's no way to directly query the max capacity from FW. ++ * The maximal value on root table should be assumed to be supported. + */ +- return mlx5_flow_ext_mreg_supported(dev) ? MLX5_MODIFY_NUM : +- MLX5_MODIFY_NUM_NO_MREG; ++ if (!(flags & MLX5DV_DR_ACTION_FLAGS_ROOT_LEVEL)) ++ return MLX5_MAX_MODIFY_NUM; ++ else ++ return MLX5_ROOT_TBL_MODIFY_NUM; + } + + /** +@@ -3402,7 +3600,12 @@ mlx5_flow_validate_action_meter(struct rte_eth_dev *dev, + { + struct mlx5_priv *priv = dev->data->dev_private; + const struct rte_flow_action_meter *am = action->conf; +- struct mlx5_flow_meter *fm = mlx5_flow_meter_find(priv, am->mtr_id); ++ struct mlx5_flow_meter *fm; ++ ++ if (!am) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ACTION, NULL, ++ "meter action conf is NULL"); + + if (action_flags & MLX5_FLOW_ACTION_METER) + return rte_flow_error_set(error, ENOTSUP, +@@ -3417,6 +3620,7 @@ mlx5_flow_validate_action_meter(struct rte_eth_dev *dev, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "meter action not supported"); ++ fm = mlx5_flow_meter_find(priv, am->mtr_id); + if (!fm) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, +@@ -3458,8 +3662,12 @@ flow_dv_modify_hdr_resource_register + struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_flow_dv_modify_hdr_resource *cache_resource; + struct mlx5dv_dr_domain *ns; ++ uint32_t actions_len; + +- if (resource->actions_num > flow_dv_modify_hdr_action_max(dev)) ++ resource->flags = ++ dev_flow->group ? 0 : MLX5DV_DR_ACTION_FLAGS_ROOT_LEVEL; ++ if (resource->actions_num > flow_dv_modify_hdr_action_max(dev, ++ resource->flags)) + return rte_flow_error_set(error, EOVERFLOW, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "too many modify header items"); +@@ -3469,17 +3677,15 @@ flow_dv_modify_hdr_resource_register + ns = sh->tx_domain; + else + ns = sh->rx_domain; +- resource->flags = +- dev_flow->group ? 0 : MLX5DV_DR_ACTION_FLAGS_ROOT_LEVEL; + /* Lookup a matching resource from cache. */ ++ actions_len = resource->actions_num * sizeof(resource->actions[0]); + LIST_FOREACH(cache_resource, &sh->modify_cmds, next) { + if (resource->ft_type == cache_resource->ft_type && + resource->actions_num == cache_resource->actions_num && + resource->flags == cache_resource->flags && + !memcmp((const void *)resource->actions, + (const void *)cache_resource->actions, +- (resource->actions_num * +- sizeof(resource->actions[0])))) { ++ actions_len)) { + DRV_LOG(DEBUG, "modify-header resource %p: refcnt %d++", + (void *)cache_resource, + rte_atomic32_read(&cache_resource->refcnt)); +@@ -3489,18 +3695,18 @@ flow_dv_modify_hdr_resource_register + } + } + /* Register new modify-header resource. */ +- cache_resource = rte_calloc(__func__, 1, sizeof(*cache_resource), 0); ++ cache_resource = rte_calloc(__func__, 1, ++ sizeof(*cache_resource) + actions_len, 0); + if (!cache_resource) + return rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "cannot allocate resource memory"); + *cache_resource = *resource; ++ rte_memcpy(cache_resource->actions, resource->actions, actions_len); + cache_resource->verbs_action = + mlx5_glue->dv_create_flow_action_modify_header +- (sh->ctx, cache_resource->ft_type, +- ns, cache_resource->flags, +- cache_resource->actions_num * +- sizeof(cache_resource->actions[0]), ++ (sh->ctx, cache_resource->ft_type, ns, ++ cache_resource->flags, actions_len, + (uint64_t *)cache_resource->actions); + if (!cache_resource->verbs_action) { + rte_free(cache_resource); +@@ -3846,11 +4052,13 @@ _flow_dv_query_count(struct rte_eth_dev *dev, + * The devX counter handle. + * @param[in] batch + * Whether the pool is for counter that was allocated by batch command. ++ * @param[in/out] cont_cur ++ * Pointer to the container pointer, it will be update in pool resize. + * + * @return +- * A new pool pointer on success, NULL otherwise and rte_errno is set. ++ * The pool container pointer on success, NULL otherwise and rte_errno is set. + */ +-static struct mlx5_flow_counter_pool * ++static struct mlx5_pools_container * + flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, + uint32_t batch) + { +@@ -3884,12 +4092,12 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, + */ + rte_atomic64_set(&pool->query_gen, 0x2); + TAILQ_INIT(&pool->counters); +- TAILQ_INSERT_TAIL(&cont->pool_list, pool, next); ++ TAILQ_INSERT_HEAD(&cont->pool_list, pool, next); + cont->pools[n_valid] = pool; + /* Pool initialization must be updated before host thread access. */ + rte_cio_wmb(); + rte_atomic16_add(&cont->n_valid, 1); +- return pool; ++ return cont; + } + + /** +@@ -3903,33 +4111,35 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, + * Whether the pool is for counter that was allocated by batch command. + * + * @return +- * The free counter pool pointer and @p cnt_free is set on success, ++ * The counter container pointer and @p cnt_free is set on success, + * NULL otherwise and rte_errno is set. + */ +-static struct mlx5_flow_counter_pool * ++static struct mlx5_pools_container * + flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, + struct mlx5_flow_counter **cnt_free, + uint32_t batch) + { + struct mlx5_priv *priv = dev->data->dev_private; ++ struct mlx5_pools_container *cont; + struct mlx5_flow_counter_pool *pool; + struct mlx5_devx_obj *dcs = NULL; + struct mlx5_flow_counter *cnt; + uint32_t i; + ++ cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0); + if (!batch) { + /* bulk_bitmap must be 0 for single counter allocation. */ + dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0); + if (!dcs) + return NULL; +- pool = flow_dv_find_pool_by_id +- (MLX5_CNT_CONTAINER(priv->sh, batch, 0), dcs->id); ++ pool = flow_dv_find_pool_by_id(cont, dcs->id); + if (!pool) { +- pool = flow_dv_pool_create(dev, dcs, batch); +- if (!pool) { ++ cont = flow_dv_pool_create(dev, dcs, batch); ++ if (!cont) { + mlx5_devx_cmd_destroy(dcs); + return NULL; + } ++ pool = TAILQ_FIRST(&cont->pool_list); + } else if (dcs->id < pool->min_dcs->id) { + rte_atomic64_set(&pool->a64_dcs, + (int64_t)(uintptr_t)dcs); +@@ -3938,7 +4148,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, + TAILQ_INSERT_HEAD(&pool->counters, cnt, next); + cnt->dcs = dcs; + *cnt_free = cnt; +- return pool; ++ return cont; + } + /* bulk_bitmap is in 128 counters units. */ + if (priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4) +@@ -3947,18 +4157,19 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, + rte_errno = ENODATA; + return NULL; + } +- pool = flow_dv_pool_create(dev, dcs, batch); +- if (!pool) { ++ cont = flow_dv_pool_create(dev, dcs, batch); ++ if (!cont) { + mlx5_devx_cmd_destroy(dcs); + return NULL; + } ++ pool = TAILQ_FIRST(&cont->pool_list); + for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { + cnt = &pool->counters_raw[i]; + cnt->pool = pool; + TAILQ_INSERT_HEAD(&pool->counters, cnt, next); + } + *cnt_free = &pool->counters_raw[0]; +- return pool; ++ return cont; + } + + /** +@@ -4059,9 +4270,10 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, + cnt_free = NULL; + } + if (!cnt_free) { +- pool = flow_dv_counter_pool_prepare(dev, &cnt_free, batch); +- if (!pool) ++ cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch); ++ if (!cont) + return NULL; ++ pool = TAILQ_FIRST(&cont->pool_list); + } + cnt_free->batch = batch; + /* Create a DV counter action only in the first time usage. */ +@@ -4146,7 +4358,9 @@ flow_dv_counter_release(struct rte_eth_dev *dev, + * Pointer to error structure. + * + * @return +- * 0 on success, a negative errno value otherwise and rte_errno is set. ++ * - 0 on success and non root table. ++ * - 1 on success and root table. ++ * - a negative errno value otherwise and rte_errno is set. + */ + static int + flow_dv_validate_attributes(struct rte_eth_dev *dev, +@@ -4156,6 +4370,7 @@ flow_dv_validate_attributes(struct rte_eth_dev *dev, + { + struct mlx5_priv *priv = dev->data->dev_private; + uint32_t priority_max = priv->config.flow_prio - 1; ++ int ret = 0; + + #ifndef HAVE_MLX5DV_DR + if (attributes->group) +@@ -4164,14 +4379,15 @@ flow_dv_validate_attributes(struct rte_eth_dev *dev, + NULL, + "groups are not supported"); + #else +- uint32_t table; +- int ret; ++ uint32_t table = 0; + + ret = mlx5_flow_group_to_table(attributes, external, +- attributes->group, ++ attributes->group, !!priv->fdb_def_rule, + &table, error); + if (ret) + return ret; ++ if (!table) ++ ret = MLX5DV_DR_ACTION_FLAGS_ROOT_LEVEL; + #endif + if (attributes->priority != MLX5_FLOW_PRIO_RSVD && + attributes->priority >= priority_max) +@@ -4201,7 +4417,7 @@ flow_dv_validate_attributes(struct rte_eth_dev *dev, + RTE_FLOW_ERROR_TYPE_ATTR, NULL, + "must specify exactly one of " + "ingress or egress"); +- return 0; ++ return ret; + } + + /** +@@ -4217,6 +4433,8 @@ flow_dv_validate_attributes(struct rte_eth_dev *dev, + * Pointer to the list of actions. + * @param[in] external + * This flow rule is created by request external to PMD. ++ * @param[in] hairpin ++ * Number of hairpin TX actions, 0 means classic flow. + * @param[out] error + * Pointer to the error structure. + * +@@ -4227,7 +4445,7 @@ static int + flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], +- bool external, struct rte_flow_error *error) ++ bool external, int hairpin, struct rte_flow_error *error) + { + int ret; + uint64_t action_flags = 0; +@@ -4236,7 +4454,11 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + uint8_t next_protocol = 0xff; + uint16_t ether_type = 0; + int actions_n = 0; ++ uint8_t item_ipv6_proto = 0; + const struct rte_flow_item *gre_item = NULL; ++ const struct rte_flow_action_raw_decap *decap; ++ const struct rte_flow_action_raw_encap *encap; ++ const struct rte_flow_action_rss *rss; + struct rte_flow_item_tcp nic_tcp_mask = { + .hdr = { + .tcp_flags = 0xFF, +@@ -4246,12 +4468,17 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + }; + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_dev_config *dev_conf = &priv->config; ++ uint16_t queue_index = 0xFFFF; ++ const struct rte_flow_item_vlan *vlan_m = NULL; ++ int16_t rw_act_num = 0; ++ uint64_t is_root; + + if (items == NULL) + return -1; + ret = flow_dv_validate_attributes(dev, attr, external, error); + if (ret < 0) + return ret; ++ is_root = (uint64_t)ret; + for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { + int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); + int type = items->type; +@@ -4286,8 +4513,8 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + } + break; + case RTE_FLOW_ITEM_TYPE_VLAN: +- ret = mlx5_flow_validate_item_vlan(items, item_flags, +- dev, error); ++ ret = flow_dv_validate_item_vlan(items, item_flags, ++ dev, error); + if (ret < 0) + return ret; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : +@@ -4303,6 +4530,9 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + } else { + ether_type = 0; + } ++ /* Store outer VLAN mask for of_push_vlan action. */ ++ if (!tunnel) ++ vlan_m = items->mask; + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + mlx5_flow_tunnel_ip_check(items, next_protocol, +@@ -4343,6 +4573,9 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + if (items->mask != NULL && + ((const struct rte_flow_item_ipv6 *) + items->mask)->hdr.proto) { ++ item_ipv6_proto = ++ ((const struct rte_flow_item_ipv6 *) ++ items->spec)->hdr.proto; + next_protocol = + ((const struct rte_flow_item_ipv6 *) + items->spec)->hdr.proto; +@@ -4418,7 +4651,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + error); + if (ret < 0) + return ret; +- last_item = MLX5_FLOW_LAYER_VXLAN_GPE; ++ last_item = MLX5_FLOW_LAYER_GENEVE; + break; + case RTE_FLOW_ITEM_TYPE_MPLS: + ret = mlx5_flow_validate_item_mpls(dev, items, +@@ -4457,6 +4690,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + error); + if (ret < 0) + return ret; ++ item_ipv6_proto = IPPROTO_ICMPV6; + last_item = MLX5_FLOW_LAYER_ICMP6; + break; + case RTE_FLOW_ITEM_TYPE_TAG: +@@ -4512,6 +4746,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + action_flags |= MLX5_FLOW_ACTION_FLAG; + ++actions_n; + } ++ rw_act_num += MLX5_ACT_NUM_SET_MARK; + break; + case RTE_FLOW_ACTION_TYPE_MARK: + ret = flow_dv_validate_action_mark(dev, actions, +@@ -4530,6 +4765,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + action_flags |= MLX5_FLOW_ACTION_MARK; + ++actions_n; + } ++ rw_act_num += MLX5_ACT_NUM_SET_MARK; + break; + case RTE_FLOW_ACTION_TYPE_SET_META: + ret = flow_dv_validate_action_set_meta(dev, actions, +@@ -4541,6 +4777,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + if (!(action_flags & MLX5_FLOW_MODIFY_HDR_ACTIONS)) + ++actions_n; + action_flags |= MLX5_FLOW_ACTION_SET_META; ++ rw_act_num += MLX5_ACT_NUM_SET_META; + break; + case RTE_FLOW_ACTION_TYPE_SET_TAG: + ret = flow_dv_validate_action_set_tag(dev, actions, +@@ -4552,6 +4789,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + if (!(action_flags & MLX5_FLOW_MODIFY_HDR_ACTIONS)) + ++actions_n; + action_flags |= MLX5_FLOW_ACTION_SET_TAG; ++ rw_act_num += MLX5_ACT_NUM_SET_TAG; + break; + case RTE_FLOW_ACTION_TYPE_DROP: + ret = mlx5_flow_validate_action_drop(action_flags, +@@ -4567,16 +4805,21 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + attr, error); + if (ret < 0) + return ret; ++ queue_index = ((const struct rte_flow_action_queue *) ++ (actions->conf))->index; + action_flags |= MLX5_FLOW_ACTION_QUEUE; + ++actions_n; + break; + case RTE_FLOW_ACTION_TYPE_RSS: ++ rss = actions->conf; + ret = mlx5_flow_validate_action_rss(actions, + action_flags, dev, + attr, item_flags, + error); + if (ret < 0) + return ret; ++ if (rss != NULL && rss->queue_num) ++ queue_index = rss->queue[0]; + action_flags |= MLX5_FLOW_ACTION_RSS; + ++actions_n; + break; +@@ -4598,8 +4841,9 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + ++actions_n; + break; + case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: +- ret = flow_dv_validate_action_push_vlan(action_flags, +- item_flags, ++ ret = flow_dv_validate_action_push_vlan(dev, ++ action_flags, ++ vlan_m, + actions, attr, + error); + if (ret < 0) +@@ -4623,49 +4867,52 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + return ret; + /* Count VID with push_vlan command. */ + action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID; ++ rw_act_num += MLX5_ACT_NUM_MDF_VID; + break; + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: + case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: +- ret = flow_dv_validate_action_l2_encap(action_flags, ++ ret = flow_dv_validate_action_l2_encap(dev, ++ action_flags, + actions, attr, + error); + if (ret < 0) + return ret; +- action_flags |= actions->type == +- RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP ? +- MLX5_FLOW_ACTION_VXLAN_ENCAP : +- MLX5_FLOW_ACTION_NVGRE_ENCAP; ++ action_flags |= MLX5_FLOW_ACTION_ENCAP; + ++actions_n; + break; + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: + case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: +- ret = flow_dv_validate_action_l2_decap(action_flags, +- attr, error); ++ ret = flow_dv_validate_action_decap(dev, action_flags, ++ attr, error); + if (ret < 0) + return ret; +- action_flags |= actions->type == +- RTE_FLOW_ACTION_TYPE_VXLAN_DECAP ? +- MLX5_FLOW_ACTION_VXLAN_DECAP : +- MLX5_FLOW_ACTION_NVGRE_DECAP; ++ action_flags |= MLX5_FLOW_ACTION_DECAP; + ++actions_n; + break; + case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: +- ret = flow_dv_validate_action_raw_encap(action_flags, +- actions, attr, +- error); ++ ret = flow_dv_validate_action_raw_encap_decap ++ (dev, NULL, actions->conf, attr, &action_flags, ++ &actions_n, error); + if (ret < 0) + return ret; +- action_flags |= MLX5_FLOW_ACTION_RAW_ENCAP; +- ++actions_n; + break; + case RTE_FLOW_ACTION_TYPE_RAW_DECAP: +- ret = flow_dv_validate_action_raw_decap(action_flags, +- actions, attr, +- error); ++ decap = actions->conf; ++ while ((++actions)->type == RTE_FLOW_ACTION_TYPE_VOID) ++ ; ++ if (actions->type != RTE_FLOW_ACTION_TYPE_RAW_ENCAP) { ++ encap = NULL; ++ actions--; ++ } else { ++ encap = actions->conf; ++ } ++ ret = flow_dv_validate_action_raw_encap_decap ++ (dev, ++ decap ? decap : &empty_decap, encap, ++ attr, &action_flags, &actions_n, ++ error); + if (ret < 0) + return ret; +- action_flags |= MLX5_FLOW_ACTION_RAW_DECAP; +- ++actions_n; + break; + case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC: + case RTE_FLOW_ACTION_TYPE_SET_MAC_DST: +@@ -4682,8 +4929,15 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ? + MLX5_FLOW_ACTION_SET_MAC_SRC : + MLX5_FLOW_ACTION_SET_MAC_DST; ++ /* ++ * Even if the source and destination MAC addresses have ++ * overlap in the header with 4B alignment, the convert ++ * function will handle them separately and 4 SW actions ++ * will be created. And 2 actions will be added each ++ * time no matter how many bytes of address will be set. ++ */ ++ rw_act_num += MLX5_ACT_NUM_MDF_MAC; + break; +- + case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC: + case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST: + ret = flow_dv_validate_action_modify_ipv4(action_flags, +@@ -4699,6 +4953,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? + MLX5_FLOW_ACTION_SET_IPV4_SRC : + MLX5_FLOW_ACTION_SET_IPV4_DST; ++ rw_act_num += MLX5_ACT_NUM_MDF_IPV4; + break; + case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC: + case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST: +@@ -4708,6 +4963,12 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + error); + if (ret < 0) + return ret; ++ if (item_ipv6_proto == IPPROTO_ICMPV6) ++ return rte_flow_error_set(error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_ACTION, ++ actions, ++ "Can't change header " ++ "with ICMPv6 proto"); + /* Count all modify-header actions as one action. */ + if (!(action_flags & MLX5_FLOW_MODIFY_HDR_ACTIONS)) + ++actions_n; +@@ -4715,6 +4976,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? + MLX5_FLOW_ACTION_SET_IPV6_SRC : + MLX5_FLOW_ACTION_SET_IPV6_DST; ++ rw_act_num += MLX5_ACT_NUM_MDF_IPV6; + break; + case RTE_FLOW_ACTION_TYPE_SET_TP_SRC: + case RTE_FLOW_ACTION_TYPE_SET_TP_DST: +@@ -4731,6 +4993,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + RTE_FLOW_ACTION_TYPE_SET_TP_SRC ? + MLX5_FLOW_ACTION_SET_TP_SRC : + MLX5_FLOW_ACTION_SET_TP_DST; ++ rw_act_num += MLX5_ACT_NUM_MDF_PORT; + break; + case RTE_FLOW_ACTION_TYPE_DEC_TTL: + case RTE_FLOW_ACTION_TYPE_SET_TTL: +@@ -4747,6 +5010,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + RTE_FLOW_ACTION_TYPE_SET_TTL ? + MLX5_FLOW_ACTION_SET_TTL : + MLX5_FLOW_ACTION_DEC_TTL; ++ rw_act_num += MLX5_ACT_NUM_MDF_TTL; + break; + case RTE_FLOW_ACTION_TYPE_JUMP: + ret = flow_dv_validate_action_jump(actions, +@@ -4774,6 +5038,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ ? + MLX5_FLOW_ACTION_INC_TCP_SEQ : + MLX5_FLOW_ACTION_DEC_TCP_SEQ; ++ rw_act_num += MLX5_ACT_NUM_MDF_TCPSEQ; + break; + case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK: + case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK: +@@ -4791,10 +5056,13 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + RTE_FLOW_ACTION_TYPE_INC_TCP_ACK ? + MLX5_FLOW_ACTION_INC_TCP_ACK : + MLX5_FLOW_ACTION_DEC_TCP_ACK; ++ rw_act_num += MLX5_ACT_NUM_MDF_TCPACK; + break; +- case MLX5_RTE_FLOW_ACTION_TYPE_TAG: + case MLX5_RTE_FLOW_ACTION_TYPE_MARK: ++ break; ++ case MLX5_RTE_FLOW_ACTION_TYPE_TAG: + case MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG: ++ rw_act_num += MLX5_ACT_NUM_SET_TAG; + break; + case RTE_FLOW_ACTION_TYPE_METER: + ret = mlx5_flow_validate_action_meter(dev, +@@ -4805,6 +5073,8 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + return ret; + action_flags |= MLX5_FLOW_ACTION_METER; + ++actions_n; ++ /* Meter action will add one more TAG action. */ ++ rw_act_num += MLX5_ACT_NUM_SET_TAG; + break; + default: + return rte_flow_error_set(error, ENOTSUP, +@@ -4813,13 +5083,18 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + "action not supported"); + } + } +- if ((action_flags & MLX5_FLOW_LAYER_TUNNEL) && +- (action_flags & MLX5_FLOW_VLAN_ACTIONS)) +- return rte_flow_error_set(error, ENOTSUP, +- RTE_FLOW_ERROR_TYPE_ACTION, +- actions, +- "can't have vxlan and vlan" +- " actions in the same rule"); ++ /* ++ * Validate the drop action mutual exclusion with other actions. ++ * Drop action is mutually-exclusive with any other action, except for ++ * Count action. ++ */ ++ if ((action_flags & MLX5_FLOW_ACTION_DROP) && ++ (action_flags & ~(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_COUNT))) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ACTION, NULL, ++ "Drop action is mutually-exclusive " ++ "with any other action, except for " ++ "Count action"); + /* Eswitch has few restrictions on using items and actions */ + if (attr->transfer) { + if (!mlx5_flow_ext_mreg_supported(dev) && +@@ -4856,6 +5131,37 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + actions, + "no fate action is found"); + } ++ /* Continue validation for Xcap actions.*/ ++ if ((action_flags & MLX5_FLOW_XCAP_ACTIONS) && (queue_index == 0xFFFF || ++ mlx5_rxq_get_type(dev, queue_index) != MLX5_RXQ_TYPE_HAIRPIN)) { ++ if ((action_flags & MLX5_FLOW_XCAP_ACTIONS) == ++ MLX5_FLOW_XCAP_ACTIONS) ++ return rte_flow_error_set(error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_ACTION, ++ NULL, "encap and decap " ++ "combination aren't supported"); ++ if (!attr->transfer && attr->ingress && (action_flags & ++ MLX5_FLOW_ACTION_ENCAP)) ++ return rte_flow_error_set(error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_ACTION, ++ NULL, "encap is not supported" ++ " for ingress traffic"); ++ } ++ /* Hairpin flow will add one more TAG action. */ ++ if (hairpin > 0) ++ rw_act_num += MLX5_ACT_NUM_SET_TAG; ++ /* extra metadata enabled: one more TAG action will be add. */ ++ if (dev_conf->dv_flow_en && ++ dev_conf->dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && ++ mlx5_flow_ext_mreg_supported(dev)) ++ rw_act_num += MLX5_ACT_NUM_SET_TAG; ++ if ((uint32_t)rw_act_num > ++ flow_dv_modify_hdr_action_max(dev, is_root)) { ++ return rte_flow_error_set(error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_ACTION, ++ NULL, "too many header modify" ++ " actions to support"); ++ } + return 0; + } + +@@ -4984,6 +5290,23 @@ flow_dv_translate_item_eth(void *matcher, void *key, + /* The value must be in the range of the mask. */ + for (i = 0; i < sizeof(eth_m->dst); ++i) + l24_v[i] = eth_m->src.addr_bytes[i] & eth_v->src.addr_bytes[i]; ++ if (eth_v->type) { ++ /* When ethertype is present set mask for tagged VLAN. */ ++ MLX5_SET(fte_match_set_lyr_2_4, headers_m, cvlan_tag, 1); ++ /* Set value for tagged VLAN if ethertype is 802.1Q. */ ++ if (eth_v->type == RTE_BE16(RTE_ETHER_TYPE_VLAN) || ++ eth_v->type == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { ++ MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, ++ 1); ++ /* Return here to avoid setting match on ethertype. */ ++ return; ++ } ++ } ++ /* ++ * HW supports match on one Ethertype, the Ethertype following the last ++ * VLAN tag of the packet (see PRM). ++ * Set match on ethertype only if ETH header is not followed by VLAN. ++ */ + MLX5_SET(fte_match_set_lyr_2_4, headers_m, ethertype, + rte_be_to_cpu_16(eth_m->type)); + l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, ethertype); +@@ -5017,10 +5340,6 @@ flow_dv_translate_item_vlan(struct mlx5_flow *dev_flow, + uint16_t tci_m; + uint16_t tci_v; + +- if (!vlan_v) +- return; +- if (!vlan_m) +- vlan_m = &rte_flow_item_vlan_mask; + if (inner) { + headers_m = MLX5_ADDR_OF(fte_match_param, matcher, + inner_headers); +@@ -5033,13 +5352,22 @@ flow_dv_translate_item_vlan(struct mlx5_flow *dev_flow, + * This is workaround, masks are not supported, + * and pre-validated. + */ +- dev_flow->dv.vf_vlan.tag = +- rte_be_to_cpu_16(vlan_v->tci) & 0x0fff; ++ if (vlan_v) ++ dev_flow->dv.vf_vlan.tag = ++ rte_be_to_cpu_16(vlan_v->tci) & 0x0fff; + } +- tci_m = rte_be_to_cpu_16(vlan_m->tci); +- tci_v = rte_be_to_cpu_16(vlan_m->tci & vlan_v->tci); ++ /* ++ * When VLAN item exists in flow, mark packet as tagged, ++ * even if TCI is not specified. ++ */ + MLX5_SET(fte_match_set_lyr_2_4, headers_m, cvlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); ++ if (!vlan_v) ++ return; ++ if (!vlan_m) ++ vlan_m = &rte_flow_item_vlan_mask; ++ tci_m = rte_be_to_cpu_16(vlan_m->tci); ++ tci_v = rte_be_to_cpu_16(vlan_m->tci & vlan_v->tci); + MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_vid, tci_m); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, tci_v); + MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_cfi, tci_m >> 12); +@@ -5061,6 +5389,8 @@ flow_dv_translate_item_vlan(struct mlx5_flow *dev_flow, + * Flow matcher value. + * @param[in] item + * Flow pattern to translate. ++ * @param[in] item_flags ++ * Bit-fields that holds the items detected until now. + * @param[in] inner + * Item is inner pattern. + * @param[in] group +@@ -5069,6 +5399,7 @@ flow_dv_translate_item_vlan(struct mlx5_flow *dev_flow, + static void + flow_dv_translate_item_ipv4(void *matcher, void *key, + const struct rte_flow_item *item, ++ const uint64_t item_flags, + int inner, uint32_t group) + { + const struct rte_flow_item_ipv4 *ipv4_m = item->mask; +@@ -5101,6 +5432,13 @@ flow_dv_translate_item_ipv4(void *matcher, void *key, + else + MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0x4); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 4); ++ /* ++ * On outer header (which must contains L2), or inner header with L2, ++ * set cvlan_tag mask bit to mark this packet as untagged. ++ * This should be done even if item->spec is empty. ++ */ ++ if (!inner || item_flags & MLX5_FLOW_LAYER_INNER_L2) ++ MLX5_SET(fte_match_set_lyr_2_4, headers_m, cvlan_tag, 1); + if (!ipv4_v) + return; + if (!ipv4_m) +@@ -5139,6 +5477,8 @@ flow_dv_translate_item_ipv4(void *matcher, void *key, + * Flow matcher value. + * @param[in] item + * Flow pattern to translate. ++ * @param[in] item_flags ++ * Bit-fields that holds the items detected until now. + * @param[in] inner + * Item is inner pattern. + * @param[in] group +@@ -5147,6 +5487,7 @@ flow_dv_translate_item_ipv4(void *matcher, void *key, + static void + flow_dv_translate_item_ipv6(void *matcher, void *key, + const struct rte_flow_item *item, ++ const uint64_t item_flags, + int inner, uint32_t group) + { + const struct rte_flow_item_ipv6 *ipv6_m = item->mask; +@@ -5189,6 +5530,13 @@ flow_dv_translate_item_ipv6(void *matcher, void *key, + else + MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0x6); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 6); ++ /* ++ * On outer header (which must contains L2), or inner header with L2, ++ * set cvlan_tag mask bit to mark this packet as untagged. ++ * This should be done even if item->spec is empty. ++ */ ++ if (!inner || item_flags & MLX5_FLOW_LAYER_INNER_L2) ++ MLX5_SET(fte_match_set_lyr_2_4, headers_m, cvlan_tag, 1); + if (!ipv6_v) + return; + if (!ipv6_m) +@@ -5354,13 +5702,13 @@ flow_dv_translate_item_gre_key(void *matcher, void *key, + void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters); + rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX); + ++ /* GRE K bit must be on and should already be validated */ ++ MLX5_SET(fte_match_set_misc, misc_m, gre_k_present, 1); ++ MLX5_SET(fte_match_set_misc, misc_v, gre_k_present, 1); + if (!key_v) + return; + if (!key_m) + key_m = &gre_key_default_mask; +- /* GRE K bit must be on and should already be validated */ +- MLX5_SET(fte_match_set_misc, misc_m, gre_k_present, 1); +- MLX5_SET(fte_match_set_misc, misc_v, gre_k_present, 1); + MLX5_SET(fte_match_set_misc, misc_m, gre_key_h, + rte_be_to_cpu_32(*key_m) >> 8); + MLX5_SET(fte_match_set_misc, misc_v, gre_key_h, +@@ -5558,6 +5906,76 @@ flow_dv_translate_item_vxlan(void *matcher, void *key, + vni_v[i] = vni_m[i] & vxlan_v->vni[i]; + } + ++/** ++ * Add VXLAN-GPE item to matcher and to the value. ++ * ++ * @param[in, out] matcher ++ * Flow matcher. ++ * @param[in, out] key ++ * Flow matcher value. ++ * @param[in] item ++ * Flow pattern to translate. ++ * @param[in] inner ++ * Item is inner pattern. ++ */ ++ ++static void ++flow_dv_translate_item_vxlan_gpe(void *matcher, void *key, ++ const struct rte_flow_item *item, int inner) ++{ ++ const struct rte_flow_item_vxlan_gpe *vxlan_m = item->mask; ++ const struct rte_flow_item_vxlan_gpe *vxlan_v = item->spec; ++ void *headers_m; ++ void *headers_v; ++ void *misc_m = ++ MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters_3); ++ void *misc_v = ++ MLX5_ADDR_OF(fte_match_param, key, misc_parameters_3); ++ char *vni_m; ++ char *vni_v; ++ uint16_t dport; ++ int size; ++ int i; ++ uint8_t flags_m = 0xff; ++ uint8_t flags_v = 0xc; ++ ++ if (inner) { ++ headers_m = MLX5_ADDR_OF(fte_match_param, matcher, ++ inner_headers); ++ headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers); ++ } else { ++ headers_m = MLX5_ADDR_OF(fte_match_param, matcher, ++ outer_headers); ++ headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers); ++ } ++ dport = item->type == RTE_FLOW_ITEM_TYPE_VXLAN ? ++ MLX5_UDP_PORT_VXLAN : MLX5_UDP_PORT_VXLAN_GPE; ++ if (!MLX5_GET16(fte_match_set_lyr_2_4, headers_v, udp_dport)) { ++ MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport, 0xFFFF); ++ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, dport); ++ } ++ if (!vxlan_v) ++ return; ++ if (!vxlan_m) ++ vxlan_m = &rte_flow_item_vxlan_gpe_mask; ++ size = sizeof(vxlan_m->vni); ++ vni_m = MLX5_ADDR_OF(fte_match_set_misc3, misc_m, outer_vxlan_gpe_vni); ++ vni_v = MLX5_ADDR_OF(fte_match_set_misc3, misc_v, outer_vxlan_gpe_vni); ++ memcpy(vni_m, vxlan_m->vni, size); ++ for (i = 0; i < size; ++i) ++ vni_v[i] = vni_m[i] & vxlan_v->vni[i]; ++ if (vxlan_m->flags) { ++ flags_m = vxlan_m->flags; ++ flags_v = vxlan_v->flags; ++ } ++ MLX5_SET(fte_match_set_misc3, misc_m, outer_vxlan_gpe_flags, flags_m); ++ MLX5_SET(fte_match_set_misc3, misc_v, outer_vxlan_gpe_flags, flags_v); ++ MLX5_SET(fte_match_set_misc3, misc_m, outer_vxlan_gpe_next_protocol, ++ vxlan_m->protocol); ++ MLX5_SET(fte_match_set_misc3, misc_v, outer_vxlan_gpe_next_protocol, ++ vxlan_v->protocol); ++} ++ + /** + * Add Geneve item to matcher and to the value. + * +@@ -5742,6 +6160,7 @@ flow_dv_match_meta_reg(void *matcher, void *key, + MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters_2); + void *misc2_v = + MLX5_ADDR_OF(fte_match_param, key, misc_parameters_2); ++ uint32_t temp; + + data &= mask; + switch (reg_type) { +@@ -5754,8 +6173,18 @@ flow_dv_match_meta_reg(void *matcher, void *key, + MLX5_SET(fte_match_set_misc2, misc2_v, metadata_reg_b, data); + break; + case REG_C_0: +- MLX5_SET(fte_match_set_misc2, misc2_m, metadata_reg_c_0, mask); +- MLX5_SET(fte_match_set_misc2, misc2_v, metadata_reg_c_0, data); ++ /* ++ * The metadata register C0 field might be divided into ++ * source vport index and META item value, we should set ++ * this field according to specified mask, not as whole one. ++ */ ++ temp = MLX5_GET(fte_match_set_misc2, misc2_m, metadata_reg_c_0); ++ temp |= mask; ++ MLX5_SET(fte_match_set_misc2, misc2_m, metadata_reg_c_0, temp); ++ temp = MLX5_GET(fte_match_set_misc2, misc2_v, metadata_reg_c_0); ++ temp &= ~mask; ++ temp |= data; ++ MLX5_SET(fte_match_set_misc2, misc2_v, metadata_reg_c_0, temp); + break; + case REG_C_1: + MLX5_SET(fte_match_set_misc2, misc2_m, metadata_reg_c_1, mask); +@@ -5825,6 +6254,15 @@ flow_dv_translate_item_mark(struct rte_eth_dev *dev, + /* Get the metadata register index for the mark. */ + reg = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, NULL); + assert(reg > 0); ++ if (reg == REG_C_0) { ++ struct mlx5_priv *priv = dev->data->dev_private; ++ uint32_t msk_c0 = priv->sh->dv_regc0_mask; ++ uint32_t shl_c0 = rte_bsf32(msk_c0); ++ ++ mask &= msk_c0; ++ mask <<= shl_c0; ++ value <<= shl_c0; ++ } + flow_dv_match_meta_reg(matcher, key, reg, value, mask); + } + } +@@ -5857,7 +6295,7 @@ flow_dv_translate_item_meta(struct rte_eth_dev *dev, + meta_m = &rte_flow_item_meta_mask; + meta_v = (const void *)item->spec; + if (meta_v) { +- enum modify_reg reg; ++ int reg; + uint32_t value = meta_v->data; + uint32_t mask = meta_m->data; + +@@ -5875,8 +6313,12 @@ flow_dv_translate_item_meta(struct rte_eth_dev *dev, + struct mlx5_priv *priv = dev->data->dev_private; + uint32_t msk_c0 = priv->sh->dv_regc0_mask; + uint32_t shl_c0 = rte_bsf32(msk_c0); ++#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN ++ uint32_t shr_c0 = __builtin_clz(priv->sh->dv_meta_mask); + +- msk_c0 = rte_cpu_to_be_32(msk_c0); ++ value >>= shr_c0; ++ mask >>= shr_c0; ++#endif + value <<= shl_c0; + mask <<= shl_c0; + assert(msk_c0); +@@ -5906,6 +6348,8 @@ flow_dv_translate_item_meta_vport(void *matcher, void *key, + /** + * Add tag item to matcher + * ++ * @param[in] dev ++ * The devich to configure through. + * @param[in, out] matcher + * Flow matcher. + * @param[in, out] key +@@ -5914,15 +6358,27 @@ flow_dv_translate_item_meta_vport(void *matcher, void *key, + * Flow pattern to translate. + */ + static void +-flow_dv_translate_mlx5_item_tag(void *matcher, void *key, ++flow_dv_translate_mlx5_item_tag(struct rte_eth_dev *dev, ++ void *matcher, void *key, + const struct rte_flow_item *item) + { + const struct mlx5_rte_flow_item_tag *tag_v = item->spec; + const struct mlx5_rte_flow_item_tag *tag_m = item->mask; ++ uint32_t mask, value; + + assert(tag_v); +- flow_dv_match_meta_reg(matcher, key, tag_v->id, tag_v->data, +- tag_m ? tag_m->data : UINT32_MAX); ++ value = tag_v->data; ++ mask = tag_m ? tag_m->data : UINT32_MAX; ++ if (tag_v->id == REG_C_0) { ++ struct mlx5_priv *priv = dev->data->dev_private; ++ uint32_t msk_c0 = priv->sh->dv_regc0_mask; ++ uint32_t shl_c0 = rte_bsf32(msk_c0); ++ ++ mask &= msk_c0; ++ mask <<= shl_c0; ++ value <<= shl_c0; ++ } ++ flow_dv_match_meta_reg(matcher, key, tag_v->id, value, mask); + } + + /** +@@ -6056,6 +6512,12 @@ flow_dv_translate_item_icmp6(void *matcher, void *key, + return; + if (!icmp6_m) + icmp6_m = &rte_flow_item_icmp6_mask; ++ /* ++ * Force flow only to match the non-fragmented IPv6 ICMPv6 packets. ++ * If only the protocol is specified, no need to match the frag. ++ */ ++ MLX5_SET(fte_match_set_lyr_2_4, headers_m, frag, 1); ++ MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + MLX5_SET(fte_match_set_misc3, misc3_m, icmpv6_type, icmp6_m->type); + MLX5_SET(fte_match_set_misc3, misc3_v, icmpv6_type, + icmp6_v->type & icmp6_m->type); +@@ -6103,6 +6565,12 @@ flow_dv_translate_item_icmp(void *matcher, void *key, + return; + if (!icmp_m) + icmp_m = &rte_flow_item_icmp_mask; ++ /* ++ * Force flow only to match the non-fragmented IPv4 ICMP packets. ++ * If only the protocol is specified, no need to match the frag. ++ */ ++ MLX5_SET(fte_match_set_lyr_2_4, headers_m, frag, 1); ++ MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + MLX5_SET(fte_match_set_misc3, misc3_m, icmp_type, + icmp_m->hdr.icmp_type); + MLX5_SET(fte_match_set_misc3, misc3_v, icmp_type, +@@ -6618,10 +7086,13 @@ __flow_dv_translate(struct rte_eth_dev *dev, + }; + int actions_n = 0; + bool actions_end = false; +- struct mlx5_flow_dv_modify_hdr_resource mhdr_res = { +- .ft_type = attr->egress ? MLX5DV_FLOW_TABLE_TYPE_NIC_TX : +- MLX5DV_FLOW_TABLE_TYPE_NIC_RX +- }; ++ union { ++ struct mlx5_flow_dv_modify_hdr_resource res; ++ uint8_t len[sizeof(struct mlx5_flow_dv_modify_hdr_resource) + ++ sizeof(struct mlx5_modification_cmd) * ++ (MLX5_MAX_MODIFY_NUM + 1)]; ++ } mhdr_dummy; ++ struct mlx5_flow_dv_modify_hdr_resource *mhdr_res = &mhdr_dummy.res; + union flow_dv_attr flow_attr = { .attr = 0 }; + uint32_t tag_be; + union mlx5_flow_tbl_key tbl_key; +@@ -6633,15 +7104,19 @@ __flow_dv_translate(struct rte_eth_dev *dev, + uint32_t table; + int ret = 0; + ++ mhdr_res->ft_type = attr->egress ? MLX5DV_FLOW_TABLE_TYPE_NIC_TX : ++ MLX5DV_FLOW_TABLE_TYPE_NIC_RX; + ret = mlx5_flow_group_to_table(attr, dev_flow->external, attr->group, +- &table, error); ++ !!priv->fdb_def_rule, &table, error); + if (ret) + return ret; + dev_flow->group = table; + if (attr->transfer) +- mhdr_res.ft_type = MLX5DV_FLOW_TABLE_TYPE_FDB; ++ mhdr_res->ft_type = MLX5DV_FLOW_TABLE_TYPE_FDB; + if (priority == MLX5_FLOW_PRIO_RSVD) + priority = dev_conf->flow_prio - 1; ++ /* number of actions must be set to 0 in case of dirty stack. */ ++ mhdr_res->actions_num = 0; + for (; !actions_end ; actions++) { + const struct rte_flow_action_queue *queue; + const struct rte_flow_action_rss *rss; +@@ -6679,7 +7154,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + }; + + if (flow_dv_convert_action_mark(dev, &mark, +- &mhdr_res, ++ mhdr_res, + error)) + return -rte_errno; + action_flags |= MLX5_FLOW_ACTION_MARK_EXT; +@@ -6701,7 +7176,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + actions->conf; + + if (flow_dv_convert_action_mark(dev, mark, +- &mhdr_res, ++ mhdr_res, + error)) + return -rte_errno; + action_flags |= MLX5_FLOW_ACTION_MARK_EXT; +@@ -6722,7 +7197,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + break; + case RTE_FLOW_ACTION_TYPE_SET_META: + if (flow_dv_convert_action_set_meta +- (dev, &mhdr_res, attr, ++ (dev, mhdr_res, attr, + (const struct rte_flow_action_set_meta *) + actions->conf, error)) + return -rte_errno; +@@ -6730,7 +7205,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + break; + case RTE_FLOW_ACTION_TYPE_SET_TAG: + if (flow_dv_convert_action_set_tag +- (dev, &mhdr_res, ++ (dev, mhdr_res, + (const struct rte_flow_action_set_tag *) + actions->conf, error)) + return -rte_errno; +@@ -6798,7 +7273,9 @@ __flow_dv_translate(struct rte_eth_dev *dev, + action_flags |= MLX5_FLOW_ACTION_OF_POP_VLAN; + break; + case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: +- flow_dev_get_vlan_info_from_items(items, &vlan); ++ if (!(action_flags & ++ MLX5_FLOW_ACTION_OF_SET_VLAN_VID)) ++ flow_dev_get_vlan_info_from_items(items, &vlan); + vlan.eth_proto = rte_be_to_cpu_16 + ((((const struct rte_flow_action_of_push_vlan *) + actions->conf)->ethertype)); +@@ -6830,7 +7307,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + mlx5_update_vlan_vid_pcp(actions, &vlan); + /* If no VLAN push - this is a modify header action */ + if (flow_dv_convert_action_modify_vlan_vid +- (&mhdr_res, actions, error)) ++ (mhdr_res, actions, error)) + return -rte_errno; + action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID; + break; +@@ -6843,10 +7320,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + return -rte_errno; + dev_flow->dv.actions[actions_n++] = + dev_flow->dv.encap_decap->verbs_action; +- action_flags |= actions->type == +- RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP ? +- MLX5_FLOW_ACTION_VXLAN_ENCAP : +- MLX5_FLOW_ACTION_NVGRE_ENCAP; ++ action_flags |= MLX5_FLOW_ACTION_ENCAP; + break; + case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: + case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: +@@ -6856,14 +7330,11 @@ __flow_dv_translate(struct rte_eth_dev *dev, + return -rte_errno; + dev_flow->dv.actions[actions_n++] = + dev_flow->dv.encap_decap->verbs_action; +- action_flags |= actions->type == +- RTE_FLOW_ACTION_TYPE_VXLAN_DECAP ? +- MLX5_FLOW_ACTION_VXLAN_DECAP : +- MLX5_FLOW_ACTION_NVGRE_DECAP; ++ action_flags |= MLX5_FLOW_ACTION_DECAP; + break; + case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: + /* Handle encap with preceding decap. */ +- if (action_flags & MLX5_FLOW_ACTION_RAW_DECAP) { ++ if (action_flags & MLX5_FLOW_ACTION_DECAP) { + if (flow_dv_create_action_raw_encap + (dev, actions, dev_flow, attr, error)) + return -rte_errno; +@@ -6878,15 +7349,11 @@ __flow_dv_translate(struct rte_eth_dev *dev, + dev_flow->dv.actions[actions_n++] = + dev_flow->dv.encap_decap->verbs_action; + } +- action_flags |= MLX5_FLOW_ACTION_RAW_ENCAP; ++ action_flags |= MLX5_FLOW_ACTION_ENCAP; + break; + case RTE_FLOW_ACTION_TYPE_RAW_DECAP: +- /* Check if this decap is followed by encap. */ +- for (; action->type != RTE_FLOW_ACTION_TYPE_END && +- action->type != RTE_FLOW_ACTION_TYPE_RAW_ENCAP; +- action++) { +- } +- /* Handle decap only if it isn't followed by encap. */ ++ while ((++action)->type == RTE_FLOW_ACTION_TYPE_VOID) ++ ; + if (action->type != RTE_FLOW_ACTION_TYPE_RAW_ENCAP) { + if (flow_dv_create_action_l2_decap + (dev, dev_flow, attr->transfer, error)) +@@ -6895,13 +7362,14 @@ __flow_dv_translate(struct rte_eth_dev *dev, + dev_flow->dv.encap_decap->verbs_action; + } + /* If decap is followed by encap, handle it at encap. */ +- action_flags |= MLX5_FLOW_ACTION_RAW_DECAP; ++ action_flags |= MLX5_FLOW_ACTION_DECAP; + break; + case RTE_FLOW_ACTION_TYPE_JUMP: + jump_data = action->conf; + ret = mlx5_flow_group_to_table(attr, dev_flow->external, +- jump_data->group, &table, +- error); ++ jump_data->group, ++ !!priv->fdb_def_rule, ++ &table, error); + if (ret) + return ret; + tbl = flow_dv_tbl_resource_get(dev, table, +@@ -6929,7 +7397,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC: + case RTE_FLOW_ACTION_TYPE_SET_MAC_DST: + if (flow_dv_convert_action_modify_mac +- (&mhdr_res, actions, error)) ++ (mhdr_res, actions, error)) + return -rte_errno; + action_flags |= actions->type == + RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ? +@@ -6939,7 +7407,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC: + case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST: + if (flow_dv_convert_action_modify_ipv4 +- (&mhdr_res, actions, error)) ++ (mhdr_res, actions, error)) + return -rte_errno; + action_flags |= actions->type == + RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? +@@ -6949,7 +7417,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC: + case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST: + if (flow_dv_convert_action_modify_ipv6 +- (&mhdr_res, actions, error)) ++ (mhdr_res, actions, error)) + return -rte_errno; + action_flags |= actions->type == + RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? +@@ -6959,8 +7427,9 @@ __flow_dv_translate(struct rte_eth_dev *dev, + case RTE_FLOW_ACTION_TYPE_SET_TP_SRC: + case RTE_FLOW_ACTION_TYPE_SET_TP_DST: + if (flow_dv_convert_action_modify_tp +- (&mhdr_res, actions, items, +- &flow_attr, error)) ++ (mhdr_res, actions, items, ++ &flow_attr, dev_flow, !!(action_flags & ++ MLX5_FLOW_ACTION_DECAP), error)) + return -rte_errno; + action_flags |= actions->type == + RTE_FLOW_ACTION_TYPE_SET_TP_SRC ? +@@ -6969,21 +7438,24 @@ __flow_dv_translate(struct rte_eth_dev *dev, + break; + case RTE_FLOW_ACTION_TYPE_DEC_TTL: + if (flow_dv_convert_action_modify_dec_ttl +- (&mhdr_res, items, &flow_attr, error)) ++ (mhdr_res, items, &flow_attr, dev_flow, ++ !!(action_flags & ++ MLX5_FLOW_ACTION_DECAP), error)) + return -rte_errno; + action_flags |= MLX5_FLOW_ACTION_DEC_TTL; + break; + case RTE_FLOW_ACTION_TYPE_SET_TTL: + if (flow_dv_convert_action_modify_ttl +- (&mhdr_res, actions, items, +- &flow_attr, error)) ++ (mhdr_res, actions, items, &flow_attr, ++ dev_flow, !!(action_flags & ++ MLX5_FLOW_ACTION_DECAP), error)) + return -rte_errno; + action_flags |= MLX5_FLOW_ACTION_SET_TTL; + break; + case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ: + case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ: + if (flow_dv_convert_action_modify_tcp_seq +- (&mhdr_res, actions, error)) ++ (mhdr_res, actions, error)) + return -rte_errno; + action_flags |= actions->type == + RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ ? +@@ -6994,7 +7466,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK: + case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK: + if (flow_dv_convert_action_modify_tcp_ack +- (&mhdr_res, actions, error)) ++ (mhdr_res, actions, error)) + return -rte_errno; + action_flags |= actions->type == + RTE_FLOW_ACTION_TYPE_INC_TCP_ACK ? +@@ -7003,13 +7475,13 @@ __flow_dv_translate(struct rte_eth_dev *dev, + break; + case MLX5_RTE_FLOW_ACTION_TYPE_TAG: + if (flow_dv_convert_action_set_reg +- (&mhdr_res, actions, error)) ++ (mhdr_res, actions, error)) + return -rte_errno; + action_flags |= MLX5_FLOW_ACTION_SET_TAG; + break; + case MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG: + if (flow_dv_convert_action_copy_mreg +- (dev, &mhdr_res, actions, error)) ++ (dev, mhdr_res, actions, error)) + return -rte_errno; + action_flags |= MLX5_FLOW_ACTION_SET_TAG; + break; +@@ -7034,10 +7506,10 @@ __flow_dv_translate(struct rte_eth_dev *dev, + break; + case RTE_FLOW_ACTION_TYPE_END: + actions_end = true; +- if (mhdr_res.actions_num) { ++ if (mhdr_res->actions_num) { + /* create modify action if needed. */ + if (flow_dv_modify_hdr_resource_register +- (dev, &mhdr_res, dev_flow, error)) ++ (dev, mhdr_res, dev_flow, error)) + return -rte_errno; + dev_flow->dv.actions[modify_action_position] = + dev_flow->dv.modify_hdr->verbs_action; +@@ -7046,7 +7518,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + default: + break; + } +- if (mhdr_res.actions_num && ++ if (mhdr_res->actions_num && + modify_action_position == UINT32_MAX) + modify_action_position = actions_n++; + } +@@ -7083,7 +7555,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + mlx5_flow_tunnel_ip_check(items, next_protocol, + &item_flags, &tunnel); + flow_dv_translate_item_ipv4(match_mask, match_value, +- items, tunnel, ++ items, item_flags, tunnel, + dev_flow->group); + matcher.priority = MLX5_PRIORITY_MAP_L3; + dev_flow->hash_fields |= +@@ -7111,7 +7583,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + mlx5_flow_tunnel_ip_check(items, next_protocol, + &item_flags, &tunnel); + flow_dv_translate_item_ipv6(match_mask, match_value, +- items, tunnel, ++ items, item_flags, tunnel, + dev_flow->group); + matcher.priority = MLX5_PRIORITY_MAP_L3; + dev_flow->hash_fields |= +@@ -7162,6 +7634,8 @@ __flow_dv_translate(struct rte_eth_dev *dev, + case RTE_FLOW_ITEM_TYPE_GRE: + flow_dv_translate_item_gre(match_mask, match_value, + items, tunnel); ++ matcher.priority = flow->rss.level >= 2 ? ++ MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4; + last_item = MLX5_FLOW_LAYER_GRE; + break; + case RTE_FLOW_ITEM_TYPE_GRE_KEY: +@@ -7172,26 +7646,37 @@ __flow_dv_translate(struct rte_eth_dev *dev, + case RTE_FLOW_ITEM_TYPE_NVGRE: + flow_dv_translate_item_nvgre(match_mask, match_value, + items, tunnel); ++ matcher.priority = flow->rss.level >= 2 ? ++ MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4; + last_item = MLX5_FLOW_LAYER_GRE; + break; + case RTE_FLOW_ITEM_TYPE_VXLAN: + flow_dv_translate_item_vxlan(match_mask, match_value, + items, tunnel); ++ matcher.priority = flow->rss.level >= 2 ? ++ MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4; + last_item = MLX5_FLOW_LAYER_VXLAN; + break; + case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: +- flow_dv_translate_item_vxlan(match_mask, match_value, +- items, tunnel); ++ flow_dv_translate_item_vxlan_gpe(match_mask, ++ match_value, items, ++ tunnel); ++ matcher.priority = flow->rss.level >= 2 ? ++ MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4; + last_item = MLX5_FLOW_LAYER_VXLAN_GPE; + break; + case RTE_FLOW_ITEM_TYPE_GENEVE: + flow_dv_translate_item_geneve(match_mask, match_value, + items, tunnel); ++ matcher.priority = flow->rss.level >= 2 ? ++ MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4; + last_item = MLX5_FLOW_LAYER_GENEVE; + break; + case RTE_FLOW_ITEM_TYPE_MPLS: + flow_dv_translate_item_mpls(match_mask, match_value, + items, last_item, tunnel); ++ matcher.priority = flow->rss.level >= 2 ? ++ MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4; + last_item = MLX5_FLOW_LAYER_MPLS; + break; + case RTE_FLOW_ITEM_TYPE_MARK: +@@ -7220,7 +7705,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, + last_item = MLX5_FLOW_ITEM_TAG; + break; + case MLX5_RTE_FLOW_ITEM_TYPE_TAG: +- flow_dv_translate_mlx5_item_tag(match_mask, ++ flow_dv_translate_mlx5_item_tag(dev, match_mask, + match_value, items); + last_item = MLX5_FLOW_ITEM_TAG; + break; +@@ -7236,13 +7721,13 @@ __flow_dv_translate(struct rte_eth_dev *dev, + item_flags |= last_item; + } + /* +- * In case of ingress traffic when E-Switch mode is enabled, +- * we have two cases where we need to set the source port manually. ++ * When E-Switch mode is enabled, we have two cases where we need to ++ * set the source port manually. + * The first one, is in case of Nic steering rule, and the second is + * E-Switch rule where no port_id item was found. In both cases + * the source port is set according the current port in use. + */ +- if ((attr->ingress && !(item_flags & MLX5_FLOW_ITEM_PORT_ID)) && ++ if (!(item_flags & MLX5_FLOW_ITEM_PORT_ID) && + (priv->representor || priv->master)) { + if (flow_dv_translate_item_port_id(dev, match_mask, + match_value, NULL)) +@@ -7250,7 +7735,11 @@ __flow_dv_translate(struct rte_eth_dev *dev, + } + assert(!flow_dv_check_valid_spec(matcher.mask.buf, + dev_flow->dv.value.buf)); +- dev_flow->layers = item_flags; ++ /* ++ * Layers may be already initialized from prefix flow if this dev_flow ++ * is the suffix flow. ++ */ ++ dev_flow->layers |= item_flags; + /* Register matcher. */ + matcher.crc = rte_raw_cksum((const void *)matcher.mask.buf, + matcher.mask.size); +@@ -7779,8 +8268,9 @@ flow_dv_destroy_mtr_tbl(struct rte_eth_dev *dev, + claim_zero(mlx5_glue->dv_destroy_flow_matcher + (mtd->egress.any_matcher)); + if (mtd->egress.tbl) +- claim_zero(flow_dv_tbl_resource_release(dev, +- mtd->egress.tbl)); ++ flow_dv_tbl_resource_release(dev, mtd->egress.tbl); ++ if (mtd->egress.sfx_tbl) ++ flow_dv_tbl_resource_release(dev, mtd->egress.sfx_tbl); + if (mtd->ingress.color_matcher) + claim_zero(mlx5_glue->dv_destroy_flow_matcher + (mtd->ingress.color_matcher)); +@@ -7788,8 +8278,9 @@ flow_dv_destroy_mtr_tbl(struct rte_eth_dev *dev, + claim_zero(mlx5_glue->dv_destroy_flow_matcher + (mtd->ingress.any_matcher)); + if (mtd->ingress.tbl) +- claim_zero(flow_dv_tbl_resource_release(dev, +- mtd->ingress.tbl)); ++ flow_dv_tbl_resource_release(dev, mtd->ingress.tbl); ++ if (mtd->ingress.sfx_tbl) ++ flow_dv_tbl_resource_release(dev, mtd->ingress.sfx_tbl); + if (mtd->transfer.color_matcher) + claim_zero(mlx5_glue->dv_destroy_flow_matcher + (mtd->transfer.color_matcher)); +@@ -7797,8 +8288,9 @@ flow_dv_destroy_mtr_tbl(struct rte_eth_dev *dev, + claim_zero(mlx5_glue->dv_destroy_flow_matcher + (mtd->transfer.any_matcher)); + if (mtd->transfer.tbl) +- claim_zero(flow_dv_tbl_resource_release(dev, +- mtd->transfer.tbl)); ++ flow_dv_tbl_resource_release(dev, mtd->transfer.tbl); ++ if (mtd->transfer.sfx_tbl) ++ flow_dv_tbl_resource_release(dev, mtd->transfer.sfx_tbl); + if (mtd->drop_actn) + claim_zero(mlx5_glue->destroy_flow_action(mtd->drop_actn)); + rte_free(mtd); +@@ -7846,31 +8338,16 @@ flow_dv_prepare_mtr_tables(struct rte_eth_dev *dev, + .match_mask = (void *)&mask, + }; + void *actions[METER_ACTIONS]; +- struct mlx5_flow_tbl_resource **sfx_tbl; + struct mlx5_meter_domain_info *dtb; + struct rte_flow_error error; + int i = 0; + +- if (transfer) { +- sfx_tbl = &sh->fdb_mtr_sfx_tbl; ++ if (transfer) + dtb = &mtb->transfer; +- } else if (egress) { +- sfx_tbl = &sh->tx_mtr_sfx_tbl; ++ else if (egress) + dtb = &mtb->egress; +- } else { +- sfx_tbl = &sh->rx_mtr_sfx_tbl; ++ else + dtb = &mtb->ingress; +- } +- /* If the suffix table in missing, create it. */ +- if (!(*sfx_tbl)) { +- *sfx_tbl = flow_dv_tbl_resource_get(dev, +- MLX5_FLOW_TABLE_LEVEL_SUFFIX, +- egress, transfer, &error); +- if (!(*sfx_tbl)) { +- DRV_LOG(ERR, "Failed to create meter suffix table."); +- return -1; +- } +- } + /* Create the meter table with METER level. */ + dtb->tbl = flow_dv_tbl_resource_get(dev, MLX5_FLOW_TABLE_LEVEL_METER, + egress, transfer, &error); +@@ -7878,6 +8355,14 @@ flow_dv_prepare_mtr_tables(struct rte_eth_dev *dev, + DRV_LOG(ERR, "Failed to create meter policer table."); + return -1; + } ++ /* Create the meter suffix table with SUFFIX level. */ ++ dtb->sfx_tbl = flow_dv_tbl_resource_get(dev, ++ MLX5_FLOW_TABLE_LEVEL_SUFFIX, ++ egress, transfer, &error); ++ if (!dtb->sfx_tbl) { ++ DRV_LOG(ERR, "Failed to create meter suffix table."); ++ return -1; ++ } + /* Create matchers, Any and Color. */ + dv_attr.priority = 3; + dv_attr.match_criteria_enable = 0; +@@ -7893,7 +8378,7 @@ flow_dv_prepare_mtr_tables(struct rte_eth_dev *dev, + dv_attr.match_criteria_enable = + 1 << MLX5_MATCH_CRITERIA_ENABLE_MISC2_BIT; + flow_dv_match_meta_reg(mask.buf, value.buf, color_reg_c_idx, +- rte_col_2_mlx5_col(RTE_COLORS), UINT32_MAX); ++ rte_col_2_mlx5_col(RTE_COLORS), UINT8_MAX); + dtb->color_matcher = mlx5_glue->dv_create_flow_matcher(sh->ctx, + &dv_attr, + dtb->tbl->obj); +@@ -8048,8 +8533,6 @@ flow_dv_destroy_policer_rules(struct rte_eth_dev *dev __rte_unused, + * Pointer to flow meter structure. + * @param[in] mtb + * Pointer to DV meter table set. +- * @param[in] sfx_tb +- * Pointer to suffix table. + * @param[in] mtr_reg_c + * Color match REG_C. + * +@@ -8059,7 +8542,6 @@ flow_dv_destroy_policer_rules(struct rte_eth_dev *dev __rte_unused, + static int + flow_dv_create_policer_forward_rule(struct mlx5_flow_meter *fm, + struct mlx5_meter_domain_info *dtb, +- struct mlx5_flow_tbl_resource *sfx_tb, + uint8_t mtr_reg_c) + { + struct mlx5_flow_dv_match_params matcher = { +@@ -8073,12 +8555,10 @@ flow_dv_create_policer_forward_rule(struct mlx5_flow_meter *fm, + int i; + + /* Create jump action. */ +- if (!sfx_tb) +- return -1; + if (!dtb->jump_actn) + dtb->jump_actn = + mlx5_glue->dr_create_flow_action_dest_flow_tbl +- (sfx_tb->obj); ++ (dtb->sfx_tbl->obj); + if (!dtb->jump_actn) { + DRV_LOG(ERR, "Failed to create policer jump action."); + goto error; +@@ -8087,7 +8567,7 @@ flow_dv_create_policer_forward_rule(struct mlx5_flow_meter *fm, + int j = 0; + + flow_dv_match_meta_reg(matcher.buf, value.buf, mtr_reg_c, +- rte_col_2_mlx5_col(i), UINT32_MAX); ++ rte_col_2_mlx5_col(i), UINT8_MAX); + if (mtb->count_actns[i]) + actions[j++] = mtb->count_actns[i]; + if (fm->params.action[i] == MTR_POLICER_ACTION_DROP) +@@ -8133,7 +8613,6 @@ flow_dv_create_policer_rules(struct rte_eth_dev *dev, + + if (attr->egress) { + ret = flow_dv_create_policer_forward_rule(fm, &mtb->egress, +- priv->sh->tx_mtr_sfx_tbl, + priv->mtr_color_reg); + if (ret) { + DRV_LOG(ERR, "Failed to create egress policer."); +@@ -8142,7 +8621,6 @@ flow_dv_create_policer_rules(struct rte_eth_dev *dev, + } + if (attr->ingress) { + ret = flow_dv_create_policer_forward_rule(fm, &mtb->ingress, +- priv->sh->rx_mtr_sfx_tbl, + priv->mtr_color_reg); + if (ret) { + DRV_LOG(ERR, "Failed to create ingress policer."); +@@ -8151,7 +8629,6 @@ flow_dv_create_policer_rules(struct rte_eth_dev *dev, + } + if (attr->transfer) { + ret = flow_dv_create_policer_forward_rule(fm, &mtb->transfer, +- priv->sh->fdb_mtr_sfx_tbl, + priv->mtr_color_reg); + if (ret) { + DRV_LOG(ERR, "Failed to create transfer policer."); +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c b/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c +index c787c9838d..7ac6a25e43 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c ++++ b/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c +@@ -493,14 +493,12 @@ flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow, + ipv6.val.traffic_class = (vtc_flow_val & RTE_IPV6_HDR_TC_MASK) >> + RTE_IPV6_HDR_TC_SHIFT; + ipv6.val.next_hdr = spec->hdr.proto; +- ipv6.val.hop_limit = spec->hdr.hop_limits; + ipv6.mask.flow_label = + rte_cpu_to_be_32((vtc_flow_mask & RTE_IPV6_HDR_FL_MASK) >> + RTE_IPV6_HDR_FL_SHIFT); + ipv6.mask.traffic_class = (vtc_flow_mask & RTE_IPV6_HDR_TC_MASK) >> + RTE_IPV6_HDR_TC_SHIFT; + ipv6.mask.next_hdr = mask->hdr.proto; +- ipv6.mask.hop_limit = mask->hdr.hop_limits; + /* Remove unwanted bits from values. */ + for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) { + ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i]; +@@ -509,7 +507,6 @@ flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow, + ipv6.val.flow_label &= ipv6.mask.flow_label; + ipv6.val.traffic_class &= ipv6.mask.traffic_class; + ipv6.val.next_hdr &= ipv6.mask.next_hdr; +- ipv6.val.hop_limit &= ipv6.mask.hop_limit; + } + flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size); + } +@@ -589,6 +586,28 @@ flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow, + udp.val.src_port &= udp.mask.src_port; + udp.val.dst_port &= udp.mask.dst_port; + } ++ item++; ++ while (item->type == RTE_FLOW_ITEM_TYPE_VOID) ++ item++; ++ if (!(udp.val.dst_port & udp.mask.dst_port)) { ++ switch ((item)->type) { ++ case RTE_FLOW_ITEM_TYPE_VXLAN: ++ udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN); ++ udp.mask.dst_port = 0xffff; ++ break; ++ case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: ++ udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN_GPE); ++ udp.mask.dst_port = 0xffff; ++ break; ++ case RTE_FLOW_ITEM_TYPE_MPLS: ++ udp.val.dst_port = htons(MLX5_UDP_PORT_MPLS); ++ udp.mask.dst_port = 0xffff; ++ break; ++ default: ++ break; ++ } ++ } ++ + flow_verbs_spec_add(&dev_flow->verbs, &udp, size); + } + +@@ -1019,6 +1038,8 @@ flow_verbs_translate_action_count(struct mlx5_flow *dev_flow, + * Pointer to the list of actions. + * @param[in] external + * This flow rule is created by request external to PMD. ++ * @param[in] hairpin ++ * Number of hairpin TX actions, 0 means classic flow. + * @param[out] error + * Pointer to the error structure. + * +@@ -1031,6 +1052,7 @@ flow_verbs_validate(struct rte_eth_dev *dev, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + bool external __rte_unused, ++ int hairpin __rte_unused, + struct rte_flow_error *error) + { + int ret; +@@ -1255,6 +1277,18 @@ flow_verbs_validate(struct rte_eth_dev *dev, + "action not supported"); + } + } ++ /* ++ * Validate the drop action mutual exclusion with other actions. ++ * Drop action is mutually-exclusive with any other action, except for ++ * Count action. ++ */ ++ if ((action_flags & MLX5_FLOW_ACTION_DROP) && ++ (action_flags & ~(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_COUNT))) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ACTION, NULL, ++ "Drop action is mutually-exclusive " ++ "with any other action, except for " ++ "Count action"); + if (!(action_flags & MLX5_FLOW_FATE_ACTIONS)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, actions, +diff --git a/dpdk/drivers/net/mlx5/mlx5_glue.c b/dpdk/drivers/net/mlx5/mlx5_glue.c +index 0917bf28d6..44f63116a8 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_glue.c ++++ b/dpdk/drivers/net/mlx5/mlx5_glue.c +@@ -1008,7 +1008,7 @@ mlx5_glue_devx_qp_query(struct ibv_qp *qp, + const void *in, size_t inlen, + void *out, size_t outlen) + { +-#ifdef HAVE_IBV_DEVX_OBJ ++#ifdef HAVE_IBV_DEVX_QP + return mlx5dv_devx_qp_query(qp, in, inlen, out, outlen); + #else + (void)qp; +diff --git a/dpdk/drivers/net/mlx5/mlx5_glue.h b/dpdk/drivers/net/mlx5/mlx5_glue.h +index 6442f1eba8..4e6465523a 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_glue.h ++++ b/dpdk/drivers/net/mlx5/mlx5_glue.h +@@ -258,6 +258,6 @@ struct mlx5_glue { + struct mlx5dv_devx_port *mlx5_devx_port); + }; + +-const struct mlx5_glue *mlx5_glue; ++extern const struct mlx5_glue *mlx5_glue; + + #endif /* MLX5_GLUE_H_ */ +diff --git a/dpdk/drivers/net/mlx5/mlx5_nl.c b/dpdk/drivers/net/mlx5/mlx5_nl.c +index e7ba03471d..64580b9e6a 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_nl.c ++++ b/dpdk/drivers/net/mlx5/mlx5_nl.c +@@ -269,10 +269,10 @@ mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), + void *arg) + { + struct sockaddr_nl sa; +- char buf[MLX5_RECV_BUF_SIZE]; ++ void *buf = malloc(MLX5_RECV_BUF_SIZE); + struct iovec iov = { + .iov_base = buf, +- .iov_len = sizeof(buf), ++ .iov_len = MLX5_RECV_BUF_SIZE, + }; + struct msghdr msg = { + .msg_name = &sa, +@@ -284,6 +284,10 @@ mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), + int multipart = 0; + int ret = 0; + ++ if (!buf) { ++ rte_errno = ENOMEM; ++ return -rte_errno; ++ } + do { + struct nlmsghdr *nh; + int recv_bytes = 0; +@@ -292,7 +296,8 @@ mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), + recv_bytes = recvmsg(nlsk_fd, &msg, 0); + if (recv_bytes == -1) { + rte_errno = errno; +- return -rte_errno; ++ ret = -rte_errno; ++ goto exit; + } + nh = (struct nlmsghdr *)buf; + } while (nh->nlmsg_seq != sn); +@@ -304,24 +309,30 @@ mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), + + if (err_data->error < 0) { + rte_errno = -err_data->error; +- return -rte_errno; ++ ret = -rte_errno; ++ goto exit; + } + /* Ack message. */ +- return 0; ++ ret = 0; ++ goto exit; + } + /* Multi-part msgs and their trailing DONE message. */ + if (nh->nlmsg_flags & NLM_F_MULTI) { +- if (nh->nlmsg_type == NLMSG_DONE) +- return 0; ++ if (nh->nlmsg_type == NLMSG_DONE) { ++ ret = 0; ++ goto exit; ++ } + multipart = 1; + } + if (cb) { + ret = cb(nh, arg); + if (ret < 0) +- return ret; ++ goto exit; + } + } + } while (multipart); ++exit: ++ free(buf); + return ret; + } + +diff --git a/dpdk/drivers/net/mlx5/mlx5_prm.h b/dpdk/drivers/net/mlx5/mlx5_prm.h +index a805363757..4c86719769 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_prm.h ++++ b/dpdk/drivers/net/mlx5/mlx5_prm.h +@@ -18,6 +18,8 @@ + #pragma GCC diagnostic error "-Wpedantic" + #endif + ++#include <unistd.h> ++ + #include <rte_vect.h> + #include "mlx5_autoconf.h" + +@@ -100,7 +102,7 @@ + */ + #define MLX5_EMPW_MAX_PACKETS MLX5_TX_COMP_THRESH + #define MLX5_MPW_MAX_PACKETS 6 +-#define MLX5_MPW_INLINE_MAX_PACKETS 2 ++#define MLX5_MPW_INLINE_MAX_PACKETS 6 + + /* + * Default packet length threshold to be inlined with +@@ -251,7 +253,7 @@ + #define MLX5_MAX_LOG_RQ_SEGS 5u + + /* The alignment needed for WQ buffer. */ +-#define MLX5_WQE_BUF_ALIGNMENT 512 ++#define MLX5_WQE_BUF_ALIGNMENT sysconf(_SC_PAGESIZE) + + /* Completion mode. */ + enum mlx5_completion_mode { +@@ -1196,7 +1198,9 @@ struct mlx5_ifc_qos_cap_bits { + u8 reserved_at_8[0x8]; + u8 log_max_flow_meter[0x8]; + u8 flow_meter_reg_id[0x8]; +- u8 reserved_at_25[0x20]; ++ u8 reserved_at_25[0x8]; ++ u8 flow_meter_reg_share[0x1]; ++ u8 reserved_at_2e[0x17]; + u8 packet_pacing_max_rate[0x20]; + u8 packet_pacing_min_rate[0x20]; + u8 reserved_at_80[0x10]; +@@ -1816,6 +1820,9 @@ enum { + #define MLX5_SRTCM_CIR_MAX (8 * (1ULL << 30) * 0xFF) + #define MLX5_SRTCM_EBS_MAX 0 + ++/* The bits meter color use. */ ++#define MLX5_MTR_COLOR_BITS 8 ++ + /** + * Convert a user mark to flow mark. + * +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxq.c b/dpdk/drivers/net/mlx5/mlx5_rxq.c +index 986ec016df..2b6ab21b90 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxq.c ++++ b/dpdk/drivers/net/mlx5/mlx5_rxq.c +@@ -36,6 +36,7 @@ + #include "mlx5_autoconf.h" + #include "mlx5_defs.h" + #include "mlx5_glue.h" ++#include "mlx5_flow.h" + + /* Default RSS hash key also used for ConnectX-3. */ + uint8_t rss_hash_default_key[] = { +@@ -1260,6 +1261,7 @@ mlx5_rxq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx) + struct mlx5_devx_create_rq_attr attr = { 0 }; + struct mlx5_rxq_obj *tmpl = NULL; + int ret = 0; ++ uint32_t max_wq_data; + + assert(rxq_data); + assert(!rxq_ctrl->obj); +@@ -1275,11 +1277,15 @@ mlx5_rxq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx) + tmpl->type = MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN; + tmpl->rxq_ctrl = rxq_ctrl; + attr.hairpin = 1; +- /* Workaround for hairpin startup */ +- attr.wq_attr.log_hairpin_num_packets = log2above(32); +- /* Workaround for packets larger than 1KB */ ++ max_wq_data = priv->config.hca_attr.log_max_hairpin_wq_data_sz; ++ /* Jumbo frames > 9KB should be supported, and more packets. */ + attr.wq_attr.log_hairpin_data_sz = +- priv->config.hca_attr.log_max_hairpin_wq_data_sz; ++ (max_wq_data < MLX5_HAIRPIN_JUMBO_LOG_SIZE) ? ++ max_wq_data : MLX5_HAIRPIN_JUMBO_LOG_SIZE; ++ /* Set the packets number to the maximum value for performance. */ ++ attr.wq_attr.log_hairpin_num_packets = ++ attr.wq_attr.log_hairpin_data_sz - ++ MLX5_HAIRPIN_QUEUE_STRIDE; + tmpl->rq = mlx5_devx_cmd_create_rq(priv->sh->ctx, &attr, + rxq_ctrl->socket); + if (!tmpl->rq) { +@@ -1762,9 +1768,10 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_rxq_ctrl *tmpl; + unsigned int mb_len = rte_pktmbuf_data_room_size(mp); ++ unsigned int mprq_stride_nums; + unsigned int mprq_stride_size; ++ unsigned int mprq_stride_cap; + struct mlx5_dev_config *config = &priv->config; +- unsigned int strd_headroom_en; + /* + * Always allocate extra slots, even if eventually + * the vector Rx will not be used. +@@ -1810,42 +1817,42 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + tmpl->socket = socket; + if (dev->data->dev_conf.intr_conf.rxq) + tmpl->irq = 1; +- /* +- * LRO packet may consume all the stride memory, hence we cannot +- * guaranty head-room near the packet memory in the stride. +- * In this case scatter is, for sure, enabled and an empty mbuf may be +- * added in the start for the head-room. +- */ +- if (lro_on_queue && RTE_PKTMBUF_HEADROOM > 0 && +- non_scatter_min_mbuf_size > mb_len) { +- strd_headroom_en = 0; +- mprq_stride_size = RTE_MIN(max_rx_pkt_len, +- 1u << config->mprq.max_stride_size_n); +- } else { +- strd_headroom_en = 1; +- mprq_stride_size = non_scatter_min_mbuf_size; +- } ++ mprq_stride_nums = config->mprq.stride_num_n ? ++ config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N; ++ mprq_stride_size = non_scatter_min_mbuf_size <= ++ (1U << config->mprq.max_stride_size_n) ? ++ log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N; ++ mprq_stride_cap = (config->mprq.stride_num_n ? ++ (1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) * ++ (config->mprq.stride_size_n ? ++ (1U << config->mprq.stride_size_n) : (1U << mprq_stride_size)); + /* + * This Rx queue can be configured as a Multi-Packet RQ if all of the + * following conditions are met: + * - MPRQ is enabled. + * - The number of descs is more than the number of strides. +- * - max_rx_pkt_len plus overhead is less than the max size of a +- * stride. ++ * - max_rx_pkt_len plus overhead is less than the max size ++ * of a stride or mprq_stride_size is specified by a user. ++ * Need to nake sure that there are enough stides to encap ++ * the maximum packet size in case mprq_stride_size is set. + * Otherwise, enable Rx scatter if necessary. + */ +- if (mprq_en && +- desc > (1U << config->mprq.stride_num_n) && +- mprq_stride_size <= (1U << config->mprq.max_stride_size_n)) { ++ if (mprq_en && desc > (1U << mprq_stride_nums) && ++ (non_scatter_min_mbuf_size <= ++ (1U << config->mprq.max_stride_size_n) || ++ (config->mprq.stride_size_n && ++ non_scatter_min_mbuf_size <= mprq_stride_cap))) { + /* TODO: Rx scatter isn't supported yet. */ + tmpl->rxq.sges_n = 0; + /* Trim the number of descs needed. */ +- desc >>= config->mprq.stride_num_n; +- tmpl->rxq.strd_num_n = config->mprq.stride_num_n; +- tmpl->rxq.strd_sz_n = RTE_MAX(log2above(mprq_stride_size), +- config->mprq.min_stride_size_n); ++ desc >>= mprq_stride_nums; ++ tmpl->rxq.strd_num_n = config->mprq.stride_num_n ? ++ config->mprq.stride_num_n : mprq_stride_nums; ++ tmpl->rxq.strd_sz_n = config->mprq.stride_size_n ? ++ config->mprq.stride_size_n : mprq_stride_size; + tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT; +- tmpl->rxq.strd_headroom_en = strd_headroom_en; ++ tmpl->rxq.strd_scatter_en = ++ !!(offloads & DEV_RX_OFFLOAD_SCATTER); + tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size, + config->mprq.max_memcpy_len); + max_lro_size = RTE_MIN(max_rx_pkt_len, +@@ -1889,14 +1896,24 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + tmpl->rxq.sges_n = sges_n; + max_lro_size = max_rx_pkt_len; + } +- if (mprq_en && !mlx5_rxq_mprq_enabled(&tmpl->rxq)) ++ if (config->mprq.enabled && !mlx5_rxq_mprq_enabled(&tmpl->rxq)) + DRV_LOG(WARNING, +- "port %u MPRQ is requested but cannot be enabled" +- " (requested: desc = %u, stride_sz = %u," +- " supported: min_stride_num = %u, max_stride_sz = %u).", +- dev->data->port_id, desc, mprq_stride_size, +- (1 << config->mprq.stride_num_n), +- (1 << config->mprq.max_stride_size_n)); ++ "port %u MPRQ is requested but cannot be enabled\n" ++ " (requested: pkt_sz = %u, desc_num = %u," ++ " rxq_num = %u, stride_sz = %u, stride_num = %u\n" ++ " supported: min_rxqs_num = %u," ++ " min_stride_sz = %u, max_stride_sz = %u).", ++ dev->data->port_id, non_scatter_min_mbuf_size, ++ desc, priv->rxqs_n, ++ config->mprq.stride_size_n ? ++ (1U << config->mprq.stride_size_n) : ++ (1U << mprq_stride_size), ++ config->mprq.stride_num_n ? ++ (1U << config->mprq.stride_num_n) : ++ (1U << mprq_stride_nums), ++ config->mprq.min_rxqs_num, ++ (1U << config->mprq.min_stride_size_n), ++ (1U << config->mprq.max_stride_size_n)); + DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u", + dev->data->port_id, 1 << tmpl->rxq.sges_n); + if (desc % (1 << tmpl->rxq.sges_n)) { +@@ -2465,13 +2482,42 @@ mlx5_hrxq_new(struct rte_eth_dev *dev, + memset(&tir_attr, 0, sizeof(tir_attr)); + tir_attr.disp_type = MLX5_TIRC_DISP_TYPE_INDIRECT; + tir_attr.rx_hash_fn = MLX5_RX_HASH_FN_TOEPLITZ; +- memcpy(&tir_attr.rx_hash_field_selector_outer, &hash_fields, +- sizeof(uint64_t)); ++ tir_attr.tunneled_offload_en = !!tunnel; ++ /* If needed, translate hash_fields bitmap to PRM format. */ ++ if (hash_fields) { ++#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT ++ struct mlx5_rx_hash_field_select *rx_hash_field_select = ++ hash_fields & IBV_RX_HASH_INNER ? ++ &tir_attr.rx_hash_field_selector_inner : ++ &tir_attr.rx_hash_field_selector_outer; ++#else ++ struct mlx5_rx_hash_field_select *rx_hash_field_select = ++ &tir_attr.rx_hash_field_selector_outer; ++#endif ++ ++ /* 1 bit: 0: IPv4, 1: IPv6. */ ++ rx_hash_field_select->l3_prot_type = ++ !!(hash_fields & MLX5_IPV6_IBV_RX_HASH); ++ /* 1 bit: 0: TCP, 1: UDP. */ ++ rx_hash_field_select->l4_prot_type = ++ !!(hash_fields & MLX5_UDP_IBV_RX_HASH); ++ /* Bitmask which sets which fields to use in RX Hash. */ ++ rx_hash_field_select->selected_fields = ++ ((!!(hash_fields & MLX5_L3_SRC_IBV_RX_HASH)) << ++ MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_SRC_IP) | ++ (!!(hash_fields & MLX5_L3_DST_IBV_RX_HASH)) << ++ MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_DST_IP | ++ (!!(hash_fields & MLX5_L4_SRC_IBV_RX_HASH)) << ++ MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_SPORT | ++ (!!(hash_fields & MLX5_L4_DST_IBV_RX_HASH)) << ++ MLX5_RX_HASH_FIELD_SELECT_SELECTED_FIELDS_L4_DPORT; ++ } + if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN) + tir_attr.transport_domain = priv->sh->td->id; + else + tir_attr.transport_domain = priv->sh->tdn; +- memcpy(tir_attr.rx_hash_toeplitz_key, rss_key, rss_key_len); ++ memcpy(tir_attr.rx_hash_toeplitz_key, rss_key, ++ MLX5_RSS_HASH_KEY_LEN); + tir_attr.indirect_table = ind_tbl->rqt->id; + if (dev->data->dev_conf.lpbk_mode) + tir_attr.self_lb_block = +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx.c b/dpdk/drivers/net/mlx5/mlx5_rxtx.c +index acf0fd794b..488a87f593 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxtx.c ++++ b/dpdk/drivers/net/mlx5/mlx5_rxtx.c +@@ -654,10 +654,10 @@ check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) + * Pointer to the error CQE. + * + * @return +- * Negative value if queue recovery failed, +- * the last Tx buffer element to free otherwise. ++ * Negative value if queue recovery failed, otherwise ++ * the error completion entry is handled successfully. + */ +-int ++static int + mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq, + volatile struct mlx5_err_cqe *err_cqe) + { +@@ -701,18 +701,14 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq, + */ + txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - + new_wqe_pi) & wqe_m; +- if (tx_recover_qp(txq_ctrl) == 0) { +- txq->cq_ci++; +- /* Release all the remaining buffers. */ +- return txq->elts_head; ++ if (tx_recover_qp(txq_ctrl)) { ++ /* Recovering failed - retry later on the same WQE. */ ++ return -1; + } +- /* Recovering failed - try again later on the same WQE. */ +- return -1; +- } else { +- txq->cq_ci++; ++ /* Release all the remaining buffers. */ ++ txq_free_elts(txq_ctrl); + } +- /* Do not release buffers. */ +- return txq->elts_tail; ++ return 0; + } + + /** +@@ -1253,9 +1249,10 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, + pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); + } + } +- if (rte_flow_dynf_metadata_avail() && cqe->flow_table_metadata) { +- pkt->ol_flags |= PKT_RX_DYNF_METADATA; +- *RTE_FLOW_DYNF_METADATA(pkt) = cqe->flow_table_metadata; ++ if (rxq->dynf_meta && cqe->flow_table_metadata) { ++ pkt->ol_flags |= rxq->flow_meta_mask; ++ *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, uint32_t *) = ++ cqe->flow_table_metadata; + } + if (rxq->csum) + pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); +@@ -1574,21 +1571,20 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) + unsigned int i = 0; + uint32_t rq_ci = rxq->rq_ci; + uint16_t consumed_strd = rxq->consumed_strd; +- uint16_t headroom_sz = rxq->strd_headroom_en * RTE_PKTMBUF_HEADROOM; + struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; + + while (i < pkts_n) { + struct rte_mbuf *pkt; + void *addr; + int ret; +- unsigned int len; ++ uint32_t len; + uint16_t strd_cnt; + uint16_t strd_idx; + uint32_t offset; + uint32_t byte_cnt; ++ int32_t hdrm_overlap; + volatile struct mlx5_mini_cqe8 *mcqe = NULL; + uint32_t rss_hash_res = 0; +- uint8_t lro_num_seg; + + if (consumed_strd == strd_n) { + /* Replace WQE only if the buffer is still in use. */ +@@ -1634,18 +1630,6 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) + } + assert(strd_idx < strd_n); + assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask)); +- lro_num_seg = cqe->lro_num_seg; +- /* +- * Currently configured to receive a packet per a stride. But if +- * MTU is adjusted through kernel interface, device could +- * consume multiple strides without raising an error. In this +- * case, the packet should be dropped because it is bigger than +- * the max_rx_pkt_len. +- */ +- if (unlikely(!lro_num_seg && strd_cnt > 1)) { +- ++rxq->stats.idropped; +- continue; +- } + pkt = rte_pktmbuf_alloc(rxq->mp); + if (unlikely(pkt == NULL)) { + ++rxq->stats.rx_nombuf; +@@ -1657,23 +1641,57 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) + len -= RTE_ETHER_CRC_LEN; + offset = strd_idx * strd_sz + strd_shift; + addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset); ++ hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz; + /* + * Memcpy packets to the target mbuf if: + * - The size of packet is smaller than mprq_max_memcpy_len. + * - Out of buffer in the Mempool for Multi-Packet RQ. ++ * - The packet's stride overlaps a headroom and scatter is off. + */ +- if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) { +- /* +- * When memcpy'ing packet due to out-of-buffer, the +- * packet must be smaller than the target mbuf. +- */ +- if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) { ++ if (len <= rxq->mprq_max_memcpy_len || ++ rxq->mprq_repl == NULL || ++ (hdrm_overlap > 0 && !rxq->strd_scatter_en)) { ++ if (likely(rte_pktmbuf_tailroom(pkt) >= len)) { ++ rte_memcpy(rte_pktmbuf_mtod(pkt, void *), ++ addr, len); ++ DATA_LEN(pkt) = len; ++ } else if (rxq->strd_scatter_en) { ++ struct rte_mbuf *prev = pkt; ++ uint32_t seg_len = ++ RTE_MIN(rte_pktmbuf_tailroom(pkt), len); ++ uint32_t rem_len = len - seg_len; ++ ++ rte_memcpy(rte_pktmbuf_mtod(pkt, void *), ++ addr, seg_len); ++ DATA_LEN(pkt) = seg_len; ++ while (rem_len) { ++ struct rte_mbuf *next = ++ rte_pktmbuf_alloc(rxq->mp); ++ ++ if (unlikely(next == NULL)) { ++ rte_pktmbuf_free(pkt); ++ ++rxq->stats.rx_nombuf; ++ goto out; ++ } ++ NEXT(prev) = next; ++ SET_DATA_OFF(next, 0); ++ addr = RTE_PTR_ADD(addr, seg_len); ++ seg_len = RTE_MIN ++ (rte_pktmbuf_tailroom(next), ++ rem_len); ++ rte_memcpy ++ (rte_pktmbuf_mtod(next, void *), ++ addr, seg_len); ++ DATA_LEN(next) = seg_len; ++ rem_len -= seg_len; ++ prev = next; ++ ++NB_SEGS(pkt); ++ } ++ } else { + rte_pktmbuf_free_seg(pkt); + ++rxq->stats.idropped; + continue; + } +- rte_memcpy(rte_pktmbuf_mtod(pkt, void *), addr, len); +- DATA_LEN(pkt) = len; + } else { + rte_iova_t buf_iova; + struct rte_mbuf_ext_shared_info *shinfo; +@@ -1684,7 +1702,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) + rte_atomic16_add_return(&buf->refcnt, 1); + assert((uint16_t)rte_atomic16_read(&buf->refcnt) <= + strd_n + 1); +- buf_addr = RTE_PTR_SUB(addr, headroom_sz); ++ buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM); + /* + * MLX5 device doesn't use iova but it is necessary in a + * case where the Rx packet is transmitted via a +@@ -1703,43 +1721,42 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) + rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova, + buf_len, shinfo); + /* Set mbuf head-room. */ +- pkt->data_off = headroom_sz; ++ SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM); + assert(pkt->ol_flags == EXT_ATTACHED_MBUF); +- /* +- * Prevent potential overflow due to MTU change through +- * kernel interface. +- */ +- if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) { +- rte_pktmbuf_free_seg(pkt); +- ++rxq->stats.idropped; +- continue; +- } ++ assert(rte_pktmbuf_tailroom(pkt) >= ++ len - (hdrm_overlap > 0 ? hdrm_overlap : 0)); + DATA_LEN(pkt) = len; + /* +- * LRO packet may consume all the stride memory, in this +- * case packet head-room space is not guaranteed so must +- * to add an empty mbuf for the head-room. ++ * Copy the last fragment of a packet (up to headroom ++ * size bytes) in case there is a stride overlap with ++ * a next packet's headroom. Allocate a separate mbuf ++ * to store this fragment and link it. Scatter is on. + */ +- if (!rxq->strd_headroom_en) { +- struct rte_mbuf *headroom_mbuf = +- rte_pktmbuf_alloc(rxq->mp); ++ if (hdrm_overlap > 0) { ++ assert(rxq->strd_scatter_en); ++ struct rte_mbuf *seg = ++ rte_pktmbuf_alloc(rxq->mp); + +- if (unlikely(headroom_mbuf == NULL)) { ++ if (unlikely(seg == NULL)) { + rte_pktmbuf_free_seg(pkt); + ++rxq->stats.rx_nombuf; + break; + } +- PORT(pkt) = rxq->port_id; +- NEXT(headroom_mbuf) = pkt; +- pkt = headroom_mbuf; ++ SET_DATA_OFF(seg, 0); ++ rte_memcpy(rte_pktmbuf_mtod(seg, void *), ++ RTE_PTR_ADD(addr, len - hdrm_overlap), ++ hdrm_overlap); ++ DATA_LEN(seg) = hdrm_overlap; ++ DATA_LEN(pkt) = len - hdrm_overlap; ++ NEXT(pkt) = seg; + NB_SEGS(pkt) = 2; + } + } + rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); +- if (lro_num_seg > 1) { ++ if (cqe->lro_num_seg > 1) { + mlx5_lro_update_hdr(addr, cqe, len); + pkt->ol_flags |= PKT_RX_LRO; +- pkt->tso_segsz = strd_sz; ++ pkt->tso_segsz = len / cqe->lro_num_seg; + } + PKT_LEN(pkt) = len; + PORT(pkt) = rxq->port_id; +@@ -1751,6 +1768,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) + *(pkts++) = pkt; + ++i; + } ++out: + /* Update the consumer indexes. */ + rxq->consumed_strd = consumed_strd; + rte_cio_wmb(); +@@ -2034,8 +2052,6 @@ mlx5_tx_copy_elts(struct mlx5_txq_data *restrict txq, + * Pointer to TX queue structure. + * @param valid CQE pointer + * if not NULL update txq->wqe_pi and flush the buffers +- * @param itail +- * if not negative - flush the buffers till this index. + * @param olx + * Configured Tx offloads mask. It is fully defined at + * compile time and may be used for optimization. +@@ -2043,25 +2059,17 @@ mlx5_tx_copy_elts(struct mlx5_txq_data *restrict txq, + static __rte_always_inline void + mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq, + volatile struct mlx5_cqe *last_cqe, +- int itail, + unsigned int olx __rte_unused) + { +- uint16_t tail; +- + if (likely(last_cqe != NULL)) { ++ uint16_t tail; ++ + txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); +- tail = ((volatile struct mlx5_wqe_cseg *) +- (txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc; +- } else if (itail >= 0) { +- tail = (uint16_t)itail; +- } else { +- return; +- } +- rte_compiler_barrier(); +- *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); +- if (likely(tail != txq->elts_tail)) { +- mlx5_tx_free_elts(txq, tail, olx); +- assert(tail == txq->elts_tail); ++ tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m]; ++ if (likely(tail != txq->elts_tail)) { ++ mlx5_tx_free_elts(txq, tail, olx); ++ assert(tail == txq->elts_tail); ++ } + } + } + +@@ -2085,6 +2093,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, + { + unsigned int count = MLX5_TX_COMP_MAX_CQE; + volatile struct mlx5_cqe *last_cqe = NULL; ++ bool ring_doorbell = false; + int ret; + + static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); +@@ -2109,31 +2118,49 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, + rte_wmb(); + ret = mlx5_tx_error_cqe_handle + (txq, (volatile struct mlx5_err_cqe *)cqe); ++ if (unlikely(ret < 0)) { ++ /* ++ * Some error occurred on queue error ++ * handling, we do not advance the index ++ * here, allowing to retry on next call. ++ */ ++ return; ++ } + /* +- * Flush buffers, update consuming index +- * if recovery succeeded. Otherwise +- * just try to recover later. ++ * We are going to fetch all entries with ++ * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. ++ * The send queue is supposed to be empty. + */ ++ ring_doorbell = true; ++ ++txq->cq_ci; ++ txq->cq_pi = txq->cq_ci; + last_cqe = NULL; +- break; ++ continue; + } + /* Normal transmit completion. */ ++ assert(txq->cq_ci != txq->cq_pi); ++ assert((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == ++ cqe->wqe_counter); ++ ring_doorbell = true; + ++txq->cq_ci; + last_cqe = cqe; +-#ifndef NDEBUG +- if (txq->cq_pi) +- --txq->cq_pi; +-#endif +- /* +- * We have to restrict the amount of processed CQEs +- * in one tx_burst routine call. The CQ may be large +- * and many CQEs may be updated by the NIC in one +- * transaction. Buffers freeing is time consuming, +- * multiple iterations may introduce significant +- * latency. +- */ +- } while (--count); +- mlx5_tx_comp_flush(txq, last_cqe, ret, olx); ++ /* ++ * We have to restrict the amount of processed CQEs ++ * in one tx_burst routine call. The CQ may be large ++ * and many CQEs may be updated by the NIC in one ++ * transaction. Buffers freeing is time consuming, ++ * multiple iterations may introduce significant ++ * latency. ++ */ ++ if (likely(--count == 0)) ++ break; ++ } while (true); ++ if (likely(ring_doorbell)) { ++ /* Ring doorbell to notify hardware. */ ++ rte_compiler_barrier(); ++ *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); ++ mlx5_tx_comp_flush(txq, last_cqe, olx); ++ } + } + + /** +@@ -2145,9 +2172,6 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, + * Pointer to TX queue structure. + * @param loc + * Pointer to burst routine local context. +- * @param multi, +- * Routine is called from multi-segment sending loop, +- * do not correct the elts_head according to the pkts_copy. + * @param olx + * Configured Tx offloads mask. It is fully defined at + * compile time and may be used for optimization. +@@ -2155,13 +2179,12 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, + static __rte_always_inline void + mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq, + struct mlx5_txq_local *restrict loc, +- bool multi, + unsigned int olx) + { + uint16_t head = txq->elts_head; + unsigned int part; + +- part = (MLX5_TXOFF_CONFIG(INLINE) || multi) ? ++ part = MLX5_TXOFF_CONFIG(INLINE) ? + 0 : loc->pkts_sent - loc->pkts_copy; + head += part; + if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || +@@ -2175,15 +2198,15 @@ mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq, + /* Request unconditional completion on last WQE. */ + last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << + MLX5_COMP_MODE_OFFSET); +- /* Save elts_head in unused "immediate" field of WQE. */ +- last->cseg.misc = head; +- /* +- * A CQE slot must always be available. Count the +- * issued CEQ "always" request instead of production +- * index due to here can be CQE with errors and +- * difference with ci may become inconsistent. +- */ +- assert(txq->cqe_s > ++txq->cq_pi); ++ /* Save elts_head in dedicated free on completion queue. */ ++#ifdef NDEBUG ++ txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; ++#else ++ txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | ++ (last->cseg.opcode >> 8) << 16; ++#endif ++ /* A CQE slot must always be available. */ ++ assert((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); + } + } + +@@ -2818,8 +2841,14 @@ mlx5_tx_dseg_empw(struct mlx5_txq_data *restrict txq, + unsigned int part; + uint8_t *pdst; + +- dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); +- pdst = &dseg->inline_data[0]; ++ if (!MLX5_TXOFF_CONFIG(MPW)) { ++ /* Store the descriptor byte counter for eMPW sessions. */ ++ dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); ++ pdst = &dseg->inline_data[0]; ++ } else { ++ /* The entire legacy MPW session counter is stored on close. */ ++ pdst = (uint8_t *)dseg; ++ } + /* + * The WQEBB space availability is checked by caller. + * Here we should be aware of WQE ring buffer wraparound only. +@@ -2831,7 +2860,8 @@ mlx5_tx_dseg_empw(struct mlx5_txq_data *restrict txq, + len -= part; + if (likely(!len)) { + pdst += part; +- pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); ++ if (!MLX5_TXOFF_CONFIG(MPW)) ++ pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); + /* Note: no final wraparound check here. */ + return (struct mlx5_wqe_dseg *)pdst; + } +@@ -2879,9 +2909,16 @@ mlx5_tx_dseg_vlan(struct mlx5_txq_data *restrict txq, + static_assert(MLX5_DSEG_MIN_INLINE_SIZE == + (2 * RTE_ETHER_ADDR_LEN), + "invalid Data Segment data size"); +- dseg->bcount = rte_cpu_to_be_32((len + sizeof(struct rte_vlan_hdr)) | +- MLX5_ETH_WQE_DATA_INLINE); +- pdst = &dseg->inline_data[0]; ++ if (!MLX5_TXOFF_CONFIG(MPW)) { ++ /* Store the descriptor byte counter for eMPW sessions. */ ++ dseg->bcount = rte_cpu_to_be_32 ++ ((len + sizeof(struct rte_vlan_hdr)) | ++ MLX5_ETH_WQE_DATA_INLINE); ++ pdst = &dseg->inline_data[0]; ++ } else { ++ /* The entire legacy MPW session counter is stored on close. */ ++ pdst = (uint8_t *)dseg; ++ } + memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); + buf += MLX5_DSEG_MIN_INLINE_SIZE; + pdst += MLX5_DSEG_MIN_INLINE_SIZE; +@@ -2904,7 +2941,8 @@ mlx5_tx_dseg_vlan(struct mlx5_txq_data *restrict txq, + len -= part; + if (likely(!len)) { + pdst += part; +- pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); ++ if (!MLX5_TXOFF_CONFIG(MPW)) ++ pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); + /* Note: no final wraparound check here. */ + return (struct mlx5_wqe_dseg *)pdst; + } +@@ -3120,8 +3158,6 @@ mlx5_tx_packet_multi_tso(struct mlx5_txq_data *restrict txq, + wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); + txq->wqe_ci += (ds + 3) / 4; + loc->wqe_free -= (ds + 3) / 4; +- /* Request CQE generation if limits are reached. */ +- mlx5_tx_request_completion(txq, loc, true, olx); + return MLX5_TXCMP_CODE_MULTI; + } + +@@ -3230,8 +3266,6 @@ mlx5_tx_packet_multi_send(struct mlx5_txq_data *restrict txq, + } while (true); + txq->wqe_ci += (ds + 3) / 4; + loc->wqe_free -= (ds + 3) / 4; +- /* Request CQE generation if limits are reached. */ +- mlx5_tx_request_completion(txq, loc, true, olx); + return MLX5_TXCMP_CODE_MULTI; + } + +@@ -3388,8 +3422,6 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq, + wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); + txq->wqe_ci += (ds + 3) / 4; + loc->wqe_free -= (ds + 3) / 4; +- /* Request CQE generation if limits are reached. */ +- mlx5_tx_request_completion(txq, loc, true, olx); + return MLX5_TXCMP_CODE_MULTI; + } + +@@ -3599,8 +3631,6 @@ mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, + --loc->elts_free; + ++loc->pkts_sent; + --pkts_n; +- /* Request CQE generation if limits are reached. */ +- mlx5_tx_request_completion(txq, loc, false, olx); + if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) + return MLX5_TXCMP_CODE_EXIT; + loc->mbuf = *pkts++; +@@ -3750,7 +3780,7 @@ mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, + struct mlx5_txq_local *restrict loc, + unsigned int ds, + unsigned int slen, +- unsigned int olx) ++ unsigned int olx __rte_unused) + { + assert(!MLX5_TXOFF_CONFIG(INLINE)); + #ifdef MLX5_PMD_SOFT_COUNTERS +@@ -3765,8 +3795,6 @@ mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, + loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); + txq->wqe_ci += (ds + 3) / 4; + loc->wqe_free -= (ds + 3) / 4; +- /* Request CQE generation if limits are reached. */ +- mlx5_tx_request_completion(txq, loc, false, olx); + } + + /* +@@ -3797,20 +3825,36 @@ mlx5_tx_idone_empw(struct mlx5_txq_data *restrict txq, + unsigned int slen, + unsigned int olx __rte_unused) + { ++ struct mlx5_wqe_dseg *dseg = &loc->wqe_last->dseg[0]; ++ + assert(MLX5_TXOFF_CONFIG(INLINE)); +- assert((len % MLX5_WSEG_SIZE) == 0); + #ifdef MLX5_PMD_SOFT_COUNTERS + /* Update sent data bytes counter. */ + txq->stats.obytes += slen; + #else + (void)slen; + #endif +- len = len / MLX5_WSEG_SIZE + 2; ++ if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { ++ /* ++ * If the legacy MPW session contains the inline packets ++ * we should set the only inline data segment length ++ * and align the total length to the segment size. ++ */ ++ assert(len > sizeof(dseg->bcount)); ++ dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | ++ MLX5_ETH_WQE_DATA_INLINE); ++ len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; ++ } else { ++ /* ++ * The session is not legacy MPW or contains the ++ * data buffer pointer segments. ++ */ ++ assert((len % MLX5_WSEG_SIZE) == 0); ++ len = len / MLX5_WSEG_SIZE + 2; ++ } + loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); + txq->wqe_ci += (len + 3) / 4; + loc->wqe_free -= (len + 3) / 4; +- /* Request CQE generation if limits are reached. */ +- mlx5_tx_request_completion(txq, loc, false, olx); + } + + /** +@@ -4011,8 +4055,6 @@ mlx5_tx_burst_empw_simple(struct mlx5_txq_data *restrict txq, + txq->wqe_ci += (2 + part + 3) / 4; + loc->wqe_free -= (2 + part + 3) / 4; + pkts_n -= part; +- /* Request CQE generation if limits are reached. */ +- mlx5_tx_request_completion(txq, loc, false, olx); + if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) + return MLX5_TXCMP_CODE_EXIT; + loc->mbuf = *pkts++; +@@ -4088,6 +4130,15 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, + loc->wqe_free) * MLX5_WQE_SIZE - + MLX5_WQE_CSEG_SIZE - + MLX5_WQE_ESEG_SIZE; ++ /* Limit the room for legacy MPW sessions for performance. */ ++ if (MLX5_TXOFF_CONFIG(MPW)) ++ room = RTE_MIN(room, ++ RTE_MAX(txq->inlen_empw + ++ sizeof(dseg->bcount) + ++ (MLX5_TXOFF_CONFIG(VLAN) ? ++ sizeof(struct rte_vlan_hdr) : 0), ++ MLX5_MPW_INLINE_MAX_PACKETS * ++ MLX5_WQE_DSEG_SIZE)); + /* Build WQE till we have space, packets and resources. */ + part = room; + for (;;) { +@@ -4117,8 +4168,28 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, + /* Inline or not inline - that's the Question. */ + if (dlen > txq->inlen_empw) + goto pointer_empw; ++ if (MLX5_TXOFF_CONFIG(MPW)) { ++ if (dlen > txq->inlen_send) ++ goto pointer_empw; ++ tlen = dlen; ++ if (part == room) { ++ /* Open new inline MPW session. */ ++ tlen += sizeof(dseg->bcount); ++ dseg->bcount = RTE_BE32(0); ++ dseg = RTE_PTR_ADD ++ (dseg, sizeof(dseg->bcount)); ++ } else { ++ /* ++ * No pointer and inline descriptor ++ * intermix for legacy MPW sessions. ++ */ ++ if (loc->wqe_last->dseg[0].bcount) ++ break; ++ } ++ } else { ++ tlen = sizeof(dseg->bcount) + dlen; ++ } + /* Inline entire packet, optional VLAN insertion. */ +- tlen = sizeof(dseg->bcount) + dlen; + if (MLX5_TXOFF_CONFIG(VLAN) && + loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { + /* +@@ -4143,7 +4214,8 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, + dseg = mlx5_tx_dseg_empw(txq, loc, dseg, + dptr, dlen, olx); + } +- tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); ++ if (!MLX5_TXOFF_CONFIG(MPW)) ++ tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); + assert(room >= tlen); + room -= tlen; + /* +@@ -4153,6 +4225,14 @@ mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, + rte_pktmbuf_free_seg(loc->mbuf); + goto next_mbuf; + pointer_empw: ++ /* ++ * No pointer and inline descriptor ++ * intermix for legacy MPW sessions. ++ */ ++ if (MLX5_TXOFF_CONFIG(MPW) && ++ part != room && ++ loc->wqe_last->dseg[0].bcount == RTE_BE32(0)) ++ break; + /* + * Not inlinable VLAN packets are + * proceeded outside of this routine. +@@ -4496,8 +4576,6 @@ mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, + } + ++loc->pkts_sent; + --pkts_n; +- /* Request CQE generation if limits are reached. */ +- mlx5_tx_request_completion(txq, loc, false, olx); + if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) + return MLX5_TXCMP_CODE_EXIT; + loc->mbuf = *pkts++; +@@ -4596,7 +4674,7 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *restrict txq, + /* + * Calculate the number of available resources - elts and WQEs. + * There are two possible different scenarios: +- * - no data inlining into WQEs, one WQEBB may contains upto ++ * - no data inlining into WQEs, one WQEBB may contains up to + * four packets, in this case elts become scarce resource + * - data inlining into WQEs, one packet may require multiple + * WQEBBs, the WQEs become the limiting factor. +@@ -4776,6 +4854,8 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *restrict txq, + /* Take a shortcut if nothing is sent. */ + if (unlikely(loc.pkts_sent == loc.pkts_loop)) + goto burst_exit; ++ /* Request CQE generation if limits are reached. */ ++ mlx5_tx_request_completion(txq, &loc, olx); + /* + * Ring QP doorbell immediately after WQE building completion + * to improve latencies. The pure software related data treatment +@@ -4977,7 +5057,7 @@ MLX5_TXOFF_DECL(iv, + + /* + * Generate routines with Legacy Multi-Packet Write support. +- * This mode is supported by ConnectX-4LX only and imposes ++ * This mode is supported by ConnectX-4 Lx only and imposes + * offload limitations, not supported: + * - ACL/Flows (metadata are becoming meaningless) + * - WQE Inline headers +@@ -4995,6 +5075,10 @@ MLX5_TXOFF_DECL(mci_mpw, + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | + MLX5_TXOFF_CONFIG_MPW) + ++MLX5_TXOFF_DECL(mc_mpw, ++ MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | ++ MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) ++ + MLX5_TXOFF_DECL(i_mpw, + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | + MLX5_TXOFF_CONFIG_MPW) +@@ -5151,6 +5235,10 @@ MLX5_TXOFF_INFO(mci_mpw, + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | + MLX5_TXOFF_CONFIG_MPW) + ++MLX5_TXOFF_INFO(mc_mpw, ++ MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | ++ MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) ++ + MLX5_TXOFF_INFO(i_mpw, + MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | + MLX5_TXOFF_CONFIG_MPW) +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx.h b/dpdk/drivers/net/mlx5/mlx5_rxtx.h +index e927343f7d..a50f057c1e 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxtx.h ++++ b/dpdk/drivers/net/mlx5/mlx5_rxtx.h +@@ -114,9 +114,9 @@ struct mlx5_rxq_data { + unsigned int strd_sz_n:4; /* Log 2 of stride size. */ + unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */ + unsigned int err_state:2; /* enum mlx5_rxq_err_state. */ +- unsigned int strd_headroom_en:1; /* Enable mbuf headroom in MPRQ. */ ++ unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */ + unsigned int lro:1; /* Enable LRO. */ +- unsigned int :1; /* Remaining bits. */ ++ unsigned int dynf_meta:1; /* Dynamic metadata is configured. */ + volatile uint32_t *rq_db; + volatile uint32_t *cq_db; + uint16_t port_id; +@@ -154,6 +154,8 @@ struct mlx5_rxq_data { + /* CQ (UAR) access lock required for 32bit implementations */ + #endif + uint32_t tunnel; /* Tunnel information. */ ++ uint64_t flow_meta_mask; ++ int32_t flow_meta_offset; + } __rte_cache_aligned; + + enum mlx5_rxq_obj_type { +@@ -273,9 +275,7 @@ struct mlx5_txq_data { + uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ + /* WQ related fields. */ + uint16_t cq_ci; /* Consumer index for completion queue. */ +-#ifndef NDEBUG +- uint16_t cq_pi; /* Counter of issued CQE "always" requests. */ +-#endif ++ uint16_t cq_pi; /* Production index for completion queue. */ + uint16_t cqe_s; /* Number of CQ elements. */ + uint16_t cqe_m; /* Mask for CQ indices. */ + /* CQ related fields. */ +@@ -297,6 +297,11 @@ struct mlx5_txq_data { + struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ + struct mlx5_wqe *wqes; /* Work queue. */ + struct mlx5_wqe *wqes_end; /* Work queue array limit. */ ++#ifdef NDEBUG ++ uint16_t *fcqs; /* Free completion queue. */ ++#else ++ uint32_t *fcqs; /* Free completion queue (debug extended). */ ++#endif + volatile struct mlx5_cqe *cqes; /* Completion queue. */ + volatile uint32_t *qp_db; /* Work queue doorbell. */ + volatile uint32_t *cq_db; /* Completion queue doorbell. */ +@@ -440,6 +445,7 @@ int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); + int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); + int mlx5_txq_verify(struct rte_eth_dev *dev); + void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); ++void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); + uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); + + /* mlx5_rxtx.c */ +@@ -451,9 +457,6 @@ extern uint8_t mlx5_swp_types_table[]; + void mlx5_set_ptype_table(void); + void mlx5_set_cksum_table(void); + void mlx5_set_swp_types_table(void); +-__rte_noinline int mlx5_tx_error_cqe_handle +- (struct mlx5_txq_data *restrict txq, +- volatile struct mlx5_err_cqe *err_cqe); + uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n); + void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq); + __rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec); +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h +index 8e79883dfe..feb17fe1ce 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h ++++ b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h +@@ -11,7 +11,7 @@ + #include <string.h> + #include <stdlib.h> + +-#include <altivec.h> ++#include <rte_altivec.h> + + #include <rte_mbuf.h> + #include <rte_mempool.h> +@@ -263,6 +263,25 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + elts[pos + 2]->hash.fdir.hi = flow_tag; + elts[pos + 3]->hash.fdir.hi = flow_tag; + } ++ if (rxq->dynf_meta) { ++ int32_t offs = rxq->flow_meta_offset; ++ const uint32_t meta = ++ *RTE_MBUF_DYNFIELD(t_pkt, offs, uint32_t *); ++ ++ /* Check if title packet has valid metadata. */ ++ if (meta) { ++ assert(t_pkt->ol_flags & ++ rxq->flow_meta_mask); ++ *RTE_MBUF_DYNFIELD(elts[pos], offs, ++ uint32_t *) = meta; ++ *RTE_MBUF_DYNFIELD(elts[pos + 1], offs, ++ uint32_t *) = meta; ++ *RTE_MBUF_DYNFIELD(elts[pos + 2], offs, ++ uint32_t *) = meta; ++ *RTE_MBUF_DYNFIELD(elts[pos + 3], offs, ++ uint32_t *) = meta; ++ } ++ } + + pos += MLX5_VPMD_DESCS_PER_LOOP; + /* Move to next CQE and invalidate consumed CQEs. */ +@@ -1010,9 +1029,9 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, + pkts[pos + 3]->timestamp = + rte_be_to_cpu_64(cq[pos + p3].timestamp); + } +- if (rte_flow_dynf_metadata_avail()) { +- uint64_t flag = rte_flow_dynf_metadata_mask; +- int offs = rte_flow_dynf_metadata_offs; ++ if (rxq->dynf_meta) { ++ uint64_t flag = rxq->flow_meta_mask; ++ int32_t offs = rxq->flow_meta_offset; + uint32_t metadata; + + /* This code is subject for futher optimization. */ +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +index 86785c7496..f92ece4299 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_neon.h ++++ b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +@@ -205,6 +205,25 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + elts[pos + 2]->hash.fdir.hi = flow_tag; + elts[pos + 3]->hash.fdir.hi = flow_tag; + } ++ if (rxq->dynf_meta) { ++ int32_t offs = rxq->flow_meta_offset; ++ const uint32_t meta = ++ *RTE_MBUF_DYNFIELD(t_pkt, offs, uint32_t *); ++ ++ /* Check if title packet has valid metadata. */ ++ if (meta) { ++ assert(t_pkt->ol_flags & ++ rxq->flow_meta_mask); ++ *RTE_MBUF_DYNFIELD(elts[pos], offs, ++ uint32_t *) = meta; ++ *RTE_MBUF_DYNFIELD(elts[pos + 1], offs, ++ uint32_t *) = meta; ++ *RTE_MBUF_DYNFIELD(elts[pos + 2], offs, ++ uint32_t *) = meta; ++ *RTE_MBUF_DYNFIELD(elts[pos + 3], offs, ++ uint32_t *) = meta; ++ } ++ } + pos += MLX5_VPMD_DESCS_PER_LOOP; + /* Move to next CQE and invalidate consumed CQEs. */ + if (!(pos & 0x7) && pos < mcqe_n) { +@@ -687,28 +706,30 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, + container_of(p3, struct mlx5_cqe, + pkt_info)->timestamp); + } +- if (rte_flow_dynf_metadata_avail()) { ++ if (!!rxq->flow_meta_mask) { + /* This code is subject for futher optimization. */ +- *RTE_FLOW_DYNF_METADATA(elts[pos]) = ++ int32_t offs = rxq->flow_meta_offset; ++ ++ *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = + container_of(p0, struct mlx5_cqe, + pkt_info)->flow_table_metadata; +- *RTE_FLOW_DYNF_METADATA(elts[pos + 1]) = ++ *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = + container_of(p1, struct mlx5_cqe, + pkt_info)->flow_table_metadata; +- *RTE_FLOW_DYNF_METADATA(elts[pos + 2]) = ++ *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = + container_of(p2, struct mlx5_cqe, + pkt_info)->flow_table_metadata; +- *RTE_FLOW_DYNF_METADATA(elts[pos + 3]) = ++ *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = + container_of(p3, struct mlx5_cqe, + pkt_info)->flow_table_metadata; +- if (*RTE_FLOW_DYNF_METADATA(elts[pos])) +- elts[pos]->ol_flags |= PKT_RX_DYNF_METADATA; +- if (*RTE_FLOW_DYNF_METADATA(elts[pos + 1])) +- elts[pos + 1]->ol_flags |= PKT_RX_DYNF_METADATA; +- if (*RTE_FLOW_DYNF_METADATA(elts[pos + 2])) +- elts[pos + 2]->ol_flags |= PKT_RX_DYNF_METADATA; +- if (*RTE_FLOW_DYNF_METADATA(elts[pos + 3])) +- elts[pos + 3]->ol_flags |= PKT_RX_DYNF_METADATA; ++ if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *)) ++ elts[pos]->ol_flags |= rxq->flow_meta_mask; ++ if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *)) ++ elts[pos + 1]->ol_flags |= rxq->flow_meta_mask; ++ if (*RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *)) ++ elts[pos + 2]->ol_flags |= rxq->flow_meta_mask; ++ if (*RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *)) ++ elts[pos + 3]->ol_flags |= rxq->flow_meta_mask; + } + #ifdef MLX5_PMD_SOFT_COUNTERS + /* Add up received bytes count. */ +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +index 35b7761007..bb59163a26 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_sse.h ++++ b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +@@ -118,7 +118,6 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + 14, 15, 6, 7, + 10, 11, 2, 3); + #endif +- + /* + * A. load mCQEs into a 128bit register. + * B. store rearm data to mbuf. +@@ -191,6 +190,25 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + elts[pos + 2]->hash.fdir.hi = flow_tag; + elts[pos + 3]->hash.fdir.hi = flow_tag; + } ++ if (rxq->dynf_meta) { ++ int32_t offs = rxq->flow_meta_offset; ++ const uint32_t meta = ++ *RTE_MBUF_DYNFIELD(t_pkt, offs, uint32_t *); ++ ++ /* Check if title packet has valid metadata. */ ++ if (meta) { ++ assert(t_pkt->ol_flags & ++ rxq->flow_meta_mask); ++ *RTE_MBUF_DYNFIELD(elts[pos], offs, ++ uint32_t *) = meta; ++ *RTE_MBUF_DYNFIELD(elts[pos + 1], offs, ++ uint32_t *) = meta; ++ *RTE_MBUF_DYNFIELD(elts[pos + 2], offs, ++ uint32_t *) = meta; ++ *RTE_MBUF_DYNFIELD(elts[pos + 3], offs, ++ uint32_t *) = meta; ++ } ++ } + pos += MLX5_VPMD_DESCS_PER_LOOP; + /* Move to next CQE and invalidate consumed CQEs. */ + if (!(pos & 0x7) && pos < mcqe_n) { +@@ -640,24 +658,26 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, + pkts[pos + 3]->timestamp = + rte_be_to_cpu_64(cq[pos + p3].timestamp); + } +- if (rte_flow_dynf_metadata_avail()) { ++ if (rxq->dynf_meta) { + /* This code is subject for futher optimization. */ +- *RTE_FLOW_DYNF_METADATA(pkts[pos]) = ++ int32_t offs = rxq->flow_meta_offset; ++ ++ *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = + cq[pos].flow_table_metadata; +- *RTE_FLOW_DYNF_METADATA(pkts[pos + 1]) = ++ *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) = + cq[pos + p1].flow_table_metadata; +- *RTE_FLOW_DYNF_METADATA(pkts[pos + 2]) = ++ *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) = + cq[pos + p2].flow_table_metadata; +- *RTE_FLOW_DYNF_METADATA(pkts[pos + 3]) = ++ *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) = + cq[pos + p3].flow_table_metadata; +- if (*RTE_FLOW_DYNF_METADATA(pkts[pos])) +- pkts[pos]->ol_flags |= PKT_RX_DYNF_METADATA; +- if (*RTE_FLOW_DYNF_METADATA(pkts[pos + 1])) +- pkts[pos + 1]->ol_flags |= PKT_RX_DYNF_METADATA; +- if (*RTE_FLOW_DYNF_METADATA(pkts[pos + 2])) +- pkts[pos + 2]->ol_flags |= PKT_RX_DYNF_METADATA; +- if (*RTE_FLOW_DYNF_METADATA(pkts[pos + 3])) +- pkts[pos + 3]->ol_flags |= PKT_RX_DYNF_METADATA; ++ if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *)) ++ pkts[pos]->ol_flags |= rxq->flow_meta_mask; ++ if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *)) ++ pkts[pos + 1]->ol_flags |= rxq->flow_meta_mask; ++ if (*RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *)) ++ pkts[pos + 2]->ol_flags |= rxq->flow_meta_mask; ++ if (*RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *)) ++ pkts[pos + 3]->ol_flags |= rxq->flow_meta_mask; + } + #ifdef MLX5_PMD_SOFT_COUNTERS + /* Add up received bytes count. */ +diff --git a/dpdk/drivers/net/mlx5/mlx5_stats.c b/dpdk/drivers/net/mlx5/mlx5_stats.c +index 205e4fec78..636fc80c7c 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_stats.c ++++ b/dpdk/drivers/net/mlx5/mlx5_stats.c +@@ -3,11 +3,13 @@ + * Copyright 2015 Mellanox Technologies, Ltd + */ + ++#include <fcntl.h> + #include <inttypes.h> + #include <linux/sockios.h> + #include <linux/ethtool.h> + #include <stdint.h> + #include <stdio.h> ++#include <unistd.h> + + #include <rte_ethdev_driver.h> + #include <rte_common.h> +@@ -136,26 +138,30 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[] = { + + static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init); + +-static inline void ++static inline int + mlx5_read_ib_stat(struct mlx5_priv *priv, const char *ctr_name, uint64_t *stat) + { +- FILE *file; ++ int fd; ++ + if (priv->sh) { + MKSTR(path, "%s/ports/%d/hw_counters/%s", + priv->sh->ibdev_path, + priv->ibv_port, + ctr_name); +- +- file = fopen(path, "rb"); +- if (file) { +- int n = fscanf(file, "%" SCNu64, stat); +- +- fclose(file); +- if (n == 1) +- return; ++ fd = open(path, O_RDONLY); ++ if (fd != -1) { ++ char buf[21] = {'\0'}; ++ ssize_t n = read(fd, buf, sizeof(buf)); ++ ++ close(fd); ++ if (n != -1) { ++ *stat = strtoull(buf, NULL, 10); ++ return 0; ++ } + } + } + *stat = 0; ++ return 1; + } + + /** +@@ -194,8 +200,14 @@ mlx5_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats) + } + for (i = 0; i != xstats_ctrl->mlx5_stats_n; ++i) { + if (xstats_ctrl->info[i].ib) { +- mlx5_read_ib_stat(priv, xstats_ctrl->info[i].ctr_name, +- &stats[i]); ++ ret = mlx5_read_ib_stat(priv, ++ xstats_ctrl->info[i].ctr_name, ++ &stats[i]); ++ /* return last xstats counter if fail to read. */ ++ if (ret == 0) ++ xstats_ctrl->xstats[i] = stats[i]; ++ else ++ stats[i] = xstats_ctrl->xstats[i]; + } else { + stats[i] = (uint64_t) + et_stats->data[xstats_ctrl->dev_table_idx[i]]; +@@ -301,6 +313,7 @@ mlx5_stats_init(struct rte_eth_dev *dev) + unsigned int idx = xstats_ctrl->mlx5_stats_n++; + + xstats_ctrl->info[idx] = mlx5_counters_init[i]; ++ xstats_ctrl->hw_stats[idx] = 0; + } + } + assert(xstats_ctrl->mlx5_stats_n <= MLX5_MAX_XSTATS); +@@ -311,6 +324,7 @@ mlx5_stats_init(struct rte_eth_dev *dev) + DRV_LOG(ERR, "port %u cannot read device counters: %s", + dev->data->port_id, strerror(rte_errno)); + mlx5_read_ib_stat(priv, "out_of_buffer", &stats_ctrl->imissed_base); ++ stats_ctrl->imissed = 0; + free: + rte_free(strings); + } +@@ -353,7 +367,23 @@ mlx5_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats, + return ret; + for (i = 0; i != mlx5_stats_n; ++i) { + stats[i].id = i; +- stats[i].value = (counters[i] - xstats_ctrl->base[i]); ++ if (xstats_ctrl->info[i].ib) { ++ uint64_t wrap_n; ++ uint64_t hw_stat = xstats_ctrl->hw_stats[i]; ++ ++ stats[i].value = (counters[i] - ++ xstats_ctrl->base[i]) & ++ (uint64_t)UINT32_MAX; ++ wrap_n = hw_stat >> 32; ++ if (stats[i].value < ++ (hw_stat & (uint64_t)UINT32_MAX)) ++ wrap_n++; ++ stats[i].value |= (wrap_n) << 32; ++ xstats_ctrl->hw_stats[i] = stats[i].value; ++ } else { ++ stats[i].value = ++ (counters[i] - xstats_ctrl->base[i]); ++ } + } + } + return mlx5_stats_n; +@@ -375,9 +405,12 @@ int + mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + { + struct mlx5_priv *priv = dev->data->dev_private; ++ struct mlx5_stats_ctrl *stats_ctrl = &priv->stats_ctrl; + struct rte_eth_stats tmp; + unsigned int i; + unsigned int idx; ++ uint64_t wrap_n; ++ int ret; + + memset(&tmp, 0, sizeof(tmp)); + /* Add software counters. */ +@@ -420,8 +453,18 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + #endif + tmp.oerrors += txq->stats.oerrors; + } +- mlx5_read_ib_stat(priv, "out_of_buffer", &tmp.imissed); +- tmp.imissed -= priv->stats_ctrl.imissed_base; ++ ret = mlx5_read_ib_stat(priv, "out_of_buffer", &tmp.imissed); ++ if (ret == 0) { ++ tmp.imissed = (tmp.imissed - stats_ctrl->imissed_base) & ++ (uint64_t)UINT32_MAX; ++ wrap_n = stats_ctrl->imissed >> 32; ++ if (tmp.imissed < (stats_ctrl->imissed & (uint64_t)UINT32_MAX)) ++ wrap_n++; ++ tmp.imissed |= (wrap_n) << 32; ++ stats_ctrl->imissed = tmp.imissed; ++ } else { ++ tmp.imissed = stats_ctrl->imissed; ++ } + #ifndef MLX5_PMD_SOFT_COUNTERS + /* FIXME: retrieve and add hardware counters. */ + #endif +@@ -458,6 +501,7 @@ mlx5_stats_reset(struct rte_eth_dev *dev) + sizeof(struct mlx5_txq_stats)); + } + mlx5_read_ib_stat(priv, "out_of_buffer", &stats_ctrl->imissed_base); ++ stats_ctrl->imissed = 0; + #ifndef MLX5_PMD_SOFT_COUNTERS + /* FIXME: reset hardware counters. */ + #endif +@@ -500,8 +544,10 @@ mlx5_xstats_reset(struct rte_eth_dev *dev) + dev->data->port_id, strerror(rte_errno)); + return ret; + } +- for (i = 0; i != n; ++i) ++ for (i = 0; i != n; ++i) { + xstats_ctrl->base[i] = counters[i]; ++ xstats_ctrl->hw_stats[i] = 0; ++ } + + return 0; + } +diff --git a/dpdk/drivers/net/mlx5/mlx5_trigger.c b/dpdk/drivers/net/mlx5/mlx5_trigger.c +index cafab25c67..6fc4190f4e 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_trigger.c ++++ b/dpdk/drivers/net/mlx5/mlx5_trigger.c +@@ -106,9 +106,12 @@ mlx5_rxq_start(struct rte_eth_dev *dev) + unsigned int i; + int ret = 0; + enum mlx5_rxq_obj_type obj_type = MLX5_RXQ_OBJ_TYPE_IBV; ++ struct mlx5_rxq_data *rxq = NULL; + + for (i = 0; i < priv->rxqs_n; ++i) { +- if ((*priv->rxqs)[i]->lro) { ++ rxq = (*priv->rxqs)[i]; ++ ++ if (rxq && rxq->lro) { + obj_type = MLX5_RXQ_OBJ_TYPE_DEVX_RQ; + break; + } +@@ -269,11 +272,13 @@ mlx5_dev_start(struct rte_eth_dev *dev) + int ret; + + DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id); +- ret = mlx5_dev_configure_rss_reta(dev); +- if (ret) { +- DRV_LOG(ERR, "port %u reta config failed: %s", +- dev->data->port_id, strerror(rte_errno)); +- return -rte_errno; ++ if (dev->data->nb_rx_queues > 0) { ++ ret = mlx5_dev_configure_rss_reta(dev); ++ if (ret) { ++ DRV_LOG(ERR, "port %u reta config failed: %s", ++ dev->data->port_id, strerror(rte_errno)); ++ return -rte_errno; ++ } + } + ret = mlx5_txq_start(dev); + if (ret) { +@@ -309,6 +314,8 @@ mlx5_dev_start(struct rte_eth_dev *dev) + dev->data->port_id); + goto error; + } ++ /* Set a mask and offset of dynamic metadata flows into Rx queues*/ ++ mlx5_flow_rxq_dynf_metadata_set(dev); + ret = mlx5_flow_start(dev, &priv->flows); + if (ret) { + DRV_LOG(DEBUG, "port %u failed to set flows", +@@ -420,9 +427,14 @@ mlx5_traffic_enable(struct rte_eth_dev *dev) + } + mlx5_txq_release(dev, i); + } +- if (priv->config.dv_esw_en && !priv->config.vf) +- if (!mlx5_flow_create_esw_table_zero_flow(dev)) +- goto error; ++ if (priv->config.dv_esw_en && !priv->config.vf) { ++ if (mlx5_flow_create_esw_table_zero_flow(dev)) ++ priv->fdb_def_rule = 1; ++ else ++ DRV_LOG(INFO, "port %u FDB default rule cannot be" ++ " configured - only Eswitch group 0 flows are" ++ " supported.", dev->data->port_id); ++ } + if (priv->isolated) + return 0; + if (dev->data->promiscuous) { +diff --git a/dpdk/drivers/net/mlx5/mlx5_txq.c b/dpdk/drivers/net/mlx5/mlx5_txq.c +index bac4f71c24..c7751e83c0 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_txq.c ++++ b/dpdk/drivers/net/mlx5/mlx5_txq.c +@@ -62,7 +62,7 @@ txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl) + * @param txq_ctrl + * Pointer to TX queue structure. + */ +-static void ++void + txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) + { + const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n; +@@ -272,7 +272,6 @@ mlx5_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx, + DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", + dev->data->port_id, idx); + (*priv->txqs)[idx] = &txq_ctrl->txq; +- txq_ctrl->type = MLX5_TXQ_TYPE_HAIRPIN; + return 0; + } + +@@ -296,9 +295,9 @@ mlx5_tx_queue_release(void *dpdk_txq) + priv = txq_ctrl->priv; + for (i = 0; (i != priv->txqs_n); ++i) + if ((*priv->txqs)[i] == txq) { +- mlx5_txq_release(ETH_DEV(priv), i); + DRV_LOG(DEBUG, "port %u removing Tx queue %u from list", + PORT_ID(priv), txq->idx); ++ mlx5_txq_release(ETH_DEV(priv), i); + break; + } + } +@@ -315,7 +314,7 @@ static void + txq_uar_ncattr_init(struct mlx5_txq_ctrl *txq_ctrl, size_t page_size) + { + struct mlx5_priv *priv = txq_ctrl->priv; +- unsigned int cmd; ++ off_t cmd; + + txq_ctrl->txq.db_heu = priv->config.dbnc == MLX5_TXDB_HEURISTIC; + txq_ctrl->txq.db_nc = 0; +@@ -492,6 +491,7 @@ mlx5_txq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx) + struct mlx5_devx_create_sq_attr attr = { 0 }; + struct mlx5_txq_obj *tmpl = NULL; + int ret = 0; ++ uint32_t max_wq_data; + + assert(txq_data); + assert(!txq_ctrl->obj); +@@ -508,11 +508,15 @@ mlx5_txq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx) + tmpl->txq_ctrl = txq_ctrl; + attr.hairpin = 1; + attr.tis_lst_sz = 1; +- /* Workaround for hairpin startup */ +- attr.wq_attr.log_hairpin_num_packets = log2above(32); +- /* Workaround for packets larger than 1KB */ ++ max_wq_data = priv->config.hca_attr.log_max_hairpin_wq_data_sz; ++ /* Jumbo frames > 9KB should be supported, and more packets. */ + attr.wq_attr.log_hairpin_data_sz = +- priv->config.hca_attr.log_max_hairpin_wq_data_sz; ++ (max_wq_data < MLX5_HAIRPIN_JUMBO_LOG_SIZE) ? ++ max_wq_data : MLX5_HAIRPIN_JUMBO_LOG_SIZE; ++ /* Set the packets number to the maximum value for performance. */ ++ attr.wq_attr.log_hairpin_num_packets = ++ attr.wq_attr.log_hairpin_data_sz - ++ MLX5_HAIRPIN_QUEUE_STRIDE; + attr.tis_num = priv->sh->tis->id; + tmpl->sq = mlx5_devx_cmd_create_sq(priv->sh->ctx, &attr); + if (!tmpl->sq) { +@@ -718,13 +722,22 @@ mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx, + txq_data->cq_db = cq_info.dbrec; + txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf; + txq_data->cq_ci = 0; +-#ifndef NDEBUG + txq_data->cq_pi = 0; +-#endif + txq_data->wqe_ci = 0; + txq_data->wqe_pi = 0; + txq_data->wqe_comp = 0; + txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV; ++ txq_data->fcqs = rte_calloc_socket(__func__, ++ txq_data->cqe_s, ++ sizeof(*txq_data->fcqs), ++ RTE_CACHE_LINE_SIZE, ++ txq_ctrl->socket); ++ if (!txq_data->fcqs) { ++ DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory (FCQ)", ++ dev->data->port_id, idx); ++ rte_errno = ENOMEM; ++ goto error; ++ } + #ifdef HAVE_IBV_FLOW_DV_SUPPORT + /* + * If using DevX need to query and store TIS transport domain value. +@@ -773,6 +786,8 @@ mlx5_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx, + claim_zero(mlx5_glue->destroy_cq(tmpl.cq)); + if (tmpl.qp) + claim_zero(mlx5_glue->destroy_qp(tmpl.qp)); ++ if (txq_data && txq_data->fcqs) ++ rte_free(txq_data->fcqs); + if (txq_obj) + rte_free(txq_obj); + priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; +@@ -827,6 +842,8 @@ mlx5_txq_obj_release(struct mlx5_txq_obj *txq_obj) + } else { + claim_zero(mlx5_glue->destroy_qp(txq_obj->qp)); + claim_zero(mlx5_glue->destroy_cq(txq_obj->cq)); ++ if (txq_obj->txq_ctrl->txq.fcqs) ++ rte_free(txq_obj->txq_ctrl->txq.fcqs); + } + LIST_REMOVE(txq_obj, next); + rte_free(txq_obj); +@@ -964,7 +981,7 @@ txq_set_params(struct mlx5_txq_ctrl *txq_ctrl) + * If there is requested minimal amount of data to inline + * we MUST enable inlining. This is a case for ConnectX-4 + * which usually requires L2 inlined for correct operating +- * and ConnectX-4LX which requires L2-L4 inlined to ++ * and ConnectX-4 Lx which requires L2-L4 inlined to + * support E-Switch Flows. + */ + if (inlen_mode) { +diff --git a/dpdk/drivers/net/mlx5/mlx5_utils.h b/dpdk/drivers/net/mlx5/mlx5_utils.h +index b4ed8c6dad..fdf1379866 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_utils.h ++++ b/dpdk/drivers/net/mlx5/mlx5_utils.h +@@ -15,16 +15,6 @@ + + #include "mlx5_defs.h" + +-/* +- * Compilation workaround for PPC64 when AltiVec is fully enabled, e.g. std=c11. +- * Otherwise there would be a type conflict between stdbool and altivec. +- */ +-#if defined(__PPC64__) && !defined(__APPLE_ALTIVEC__) +-#undef bool +-/* redefine as in stdbool.h */ +-#define bool _Bool +-#endif +- + /* Bit-field manipulation. */ + #define BITFIELD_DECLARE(bf, type, size) \ + type bf[(((size_t)(size) / (sizeof(type) * CHAR_BIT)) + \ +@@ -146,9 +136,10 @@ extern int mlx5_logtype; + + /* Allocate a buffer on the stack and fill it with a printf format string. */ + #define MKSTR(name, ...) \ +- char name[snprintf(NULL, 0, __VA_ARGS__) + 1]; \ ++ int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \ ++ char name[mkstr_size_##name + 1]; \ + \ +- snprintf(name, sizeof(name), __VA_ARGS__) ++ snprintf(name, sizeof(name), "" __VA_ARGS__) + + /** + * Return logarithm of the nearest power of two above input value. +diff --git a/dpdk/drivers/net/mvneta/mvneta_ethdev.c b/dpdk/drivers/net/mvneta/mvneta_ethdev.c +index 865ad61aed..4aea876488 100644 +--- a/dpdk/drivers/net/mvneta/mvneta_ethdev.c ++++ b/dpdk/drivers/net/mvneta/mvneta_ethdev.c +@@ -751,7 +751,7 @@ mvneta_stats_reset(struct rte_eth_dev *dev) + + ret = mvneta_stats_get(dev, &priv->prev_stats); + if (unlikely(ret)) +- RTE_LOG(ERR, PMD, "Failed to reset port statistics"); ++ MVNETA_LOG(ERR, "Failed to reset port statistics"); + + return ret; + } +diff --git a/dpdk/drivers/net/mvpp2/mrvl_flow.c b/dpdk/drivers/net/mvpp2/mrvl_flow.c +index 381b54e291..ea43255284 100644 +--- a/dpdk/drivers/net/mvpp2/mrvl_flow.c ++++ b/dpdk/drivers/net/mvpp2/mrvl_flow.c +@@ -2511,14 +2511,14 @@ mrvl_create_cls_table(struct rte_eth_dev *dev, struct rte_flow *first_flow) + + if (first_flow->pattern & F_UDP_SPORT) { + key->proto_field[key->num_fields].proto = MV_NET_PROTO_UDP; +- key->proto_field[key->num_fields].field.tcp = MV_NET_TCP_F_SP; ++ key->proto_field[key->num_fields].field.udp = MV_NET_UDP_F_SP; + key->key_size += 2; + key->num_fields += 1; + } + + if (first_flow->pattern & F_UDP_DPORT) { + key->proto_field[key->num_fields].proto = MV_NET_PROTO_UDP; +- key->proto_field[key->num_fields].field.udp = MV_NET_TCP_F_DP; ++ key->proto_field[key->num_fields].field.udp = MV_NET_UDP_F_DP; + key->key_size += 2; + key->num_fields += 1; + } +diff --git a/dpdk/drivers/net/netvsc/hn_ethdev.c b/dpdk/drivers/net/netvsc/hn_ethdev.c +index 164e9ad174..6950682a94 100644 +--- a/dpdk/drivers/net/netvsc/hn_ethdev.c ++++ b/dpdk/drivers/net/netvsc/hn_ethdev.c +@@ -42,7 +42,8 @@ + DEV_TX_OFFLOAD_VLAN_INSERT) + + #define HN_RX_OFFLOAD_CAPS (DEV_RX_OFFLOAD_CHECKSUM | \ +- DEV_RX_OFFLOAD_VLAN_STRIP) ++ DEV_RX_OFFLOAD_VLAN_STRIP | \ ++ DEV_RX_OFFLOAD_RSS_HASH) + + int hn_logtype_init; + int hn_logtype_driver; +@@ -71,7 +72,7 @@ static const struct hn_xstats_name_off hn_stat_strings[] = { + + /* The default RSS key. + * This value is the same as MLX5 so that flows will be +- * received on same path for both VF ans synthetic NIC. ++ * received on same path for both VF and synthetic NIC. + */ + static const uint8_t rss_default_key[NDIS_HASH_KEYSIZE_TOEPLITZ] = { + 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, +@@ -133,8 +134,6 @@ eth_dev_vmbus_allocate(struct rte_vmbus_device *dev, size_t private_data_size) + static void + eth_dev_vmbus_release(struct rte_eth_dev *eth_dev) + { +- /* mac_addrs must not be freed alone because part of dev_private */ +- eth_dev->data->mac_addrs = NULL; + /* free ether device */ + rte_eth_dev_release_port(eth_dev); + +@@ -256,15 +255,19 @@ static int hn_dev_info_get(struct rte_eth_dev *dev, + dev_info->max_rx_queues = hv->max_queues; + dev_info->max_tx_queues = hv->max_queues; + +- rc = hn_rndis_get_offload(hv, dev_info); +- if (rc != 0) +- return rc; ++ dev_info->tx_desc_lim.nb_min = 1; ++ dev_info->tx_desc_lim.nb_max = 4096; ++ ++ if (rte_eal_process_type() != RTE_PROC_PRIMARY) ++ return 0; + +- rc = hn_vf_info_get(hv, dev_info); ++ /* fills in rx and tx offload capability */ ++ rc = hn_rndis_get_offload(hv, dev_info); + if (rc != 0) + return rc; + +- return 0; ++ /* merges the offload and queues of vf */ ++ return hn_vf_info_get(hv, dev_info); + } + + static int hn_rss_reta_update(struct rte_eth_dev *dev, +@@ -291,6 +294,13 @@ static int hn_rss_reta_update(struct rte_eth_dev *dev, + hv->rss_ind[i] = reta_conf[idx].reta[shift]; + } + ++ err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE); ++ if (err) { ++ PMD_DRV_LOG(NOTICE, ++ "rss disable failed"); ++ return err; ++ } ++ + err = hn_rndis_conf_rss(hv, 0); + if (err) { + PMD_DRV_LOG(NOTICE, +@@ -366,14 +376,15 @@ static int hn_rss_hash_update(struct rte_eth_dev *dev, + + hn_rss_hash_init(hv, rss_conf); + +- err = hn_rndis_conf_rss(hv, 0); +- if (err) { +- PMD_DRV_LOG(NOTICE, +- "rss reconfig failed (RSS disabled)"); +- return err; ++ if (rss_conf->rss_hf != 0) { ++ err = hn_rndis_conf_rss(hv, 0); ++ if (err) { ++ PMD_DRV_LOG(NOTICE, ++ "rss reconfig failed (RSS disabled)"); ++ return err; ++ } + } + +- + return hn_vf_rss_hash_update(dev, rss_conf); + } + +@@ -565,7 +576,7 @@ static int hn_dev_configure(struct rte_eth_dev *dev) + dev->data->nb_tx_queues); + + for (i = 0; i < NDIS_HASH_INDCNT; i++) +- hv->rss_ind[i] = i % hv->num_queues; ++ hv->rss_ind[i] = i % dev->data->nb_rx_queues; + + hn_rss_hash_init(hv, rss_conf); + +@@ -578,12 +589,21 @@ static int hn_dev_configure(struct rte_eth_dev *dev) + return err; + } + +- err = hn_rndis_conf_rss(hv, 0); ++ err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE); + if (err) { + PMD_DRV_LOG(NOTICE, +- "initial RSS config failed"); ++ "rss disable failed"); + return err; + } ++ ++ if (rss_conf->rss_hf != 0) { ++ err = hn_rndis_conf_rss(hv, 0); ++ if (err) { ++ PMD_DRV_LOG(NOTICE, ++ "initial RSS config failed"); ++ return err; ++ } ++ } + } + + return hn_vf_configure(dev, dev_conf); +@@ -807,6 +827,10 @@ hn_dev_start(struct rte_eth_dev *dev) + if (error) + hn_rndis_set_rxfilter(hv, 0); + ++ /* Initialize Link state */ ++ if (error == 0) ++ hn_dev_link_update(dev, 0); ++ + return error; + } + +@@ -921,8 +945,14 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + +- /* Since Hyper-V only supports one MAC address, just use local data */ +- eth_dev->data->mac_addrs = &hv->mac_addr; ++ /* Since Hyper-V only supports one MAC address */ ++ eth_dev->data->mac_addrs = rte_calloc("hv_mac", HN_MAX_MAC_ADDRS, ++ sizeof(struct rte_ether_addr), 0); ++ if (eth_dev->data->mac_addrs == NULL) { ++ PMD_INIT_LOG(ERR, ++ "Failed to allocate memory store MAC addresses"); ++ return -ENOMEM; ++ } + + hv->vmbus = vmbus; + hv->rxbuf_res = &vmbus->resource[HV_RECV_BUF_MAP]; +@@ -962,11 +992,11 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) + if (err) + goto failed; + +- err = hn_tx_pool_init(eth_dev); ++ err = hn_chim_init(eth_dev); + if (err) + goto failed; + +- err = hn_rndis_get_eaddr(hv, hv->mac_addr.addr_bytes); ++ err = hn_rndis_get_eaddr(hv, eth_dev->data->mac_addrs->addr_bytes); + if (err) + goto failed; + +@@ -998,7 +1028,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) + failed: + PMD_INIT_LOG(NOTICE, "device init failed"); + +- hn_tx_pool_uninit(eth_dev); ++ hn_chim_uninit(eth_dev); + hn_detach(hv); + return err; + } +@@ -1022,7 +1052,7 @@ eth_hn_dev_uninit(struct rte_eth_dev *eth_dev) + eth_dev->rx_pkt_burst = NULL; + + hn_detach(hv); +- hn_tx_pool_uninit(eth_dev); ++ hn_chim_uninit(eth_dev); + rte_vmbus_chan_close(hv->primary->chan); + rte_free(hv->primary); + ret = rte_eth_dev_owner_delete(hv->owner.id); +diff --git a/dpdk/drivers/net/netvsc/hn_nvs.c b/dpdk/drivers/net/netvsc/hn_nvs.c +index 6b518685ab..477202b2a0 100644 +--- a/dpdk/drivers/net/netvsc/hn_nvs.c ++++ b/dpdk/drivers/net/netvsc/hn_nvs.c +@@ -54,7 +54,7 @@ static int hn_nvs_req_send(struct hn_data *hv, + } + + static int +-hn_nvs_execute(struct hn_data *hv, ++__hn_nvs_execute(struct hn_data *hv, + void *req, uint32_t reqlen, + void *resp, uint32_t resplen, + uint32_t type) +@@ -62,6 +62,7 @@ hn_nvs_execute(struct hn_data *hv, + struct vmbus_channel *chan = hn_primary_chan(hv); + char buffer[NVS_RESPSIZE_MAX]; + const struct hn_nvs_hdr *hdr; ++ uint64_t xactid; + uint32_t len; + int ret; + +@@ -77,7 +78,7 @@ hn_nvs_execute(struct hn_data *hv, + + retry: + len = sizeof(buffer); +- ret = rte_vmbus_chan_recv(chan, buffer, &len, NULL); ++ ret = rte_vmbus_chan_recv(chan, buffer, &len, &xactid); + if (ret == -EAGAIN) { + rte_delay_us(HN_CHAN_INTERVAL_US); + goto retry; +@@ -88,7 +89,20 @@ hn_nvs_execute(struct hn_data *hv, + return ret; + } + ++ if (len < sizeof(*hdr)) { ++ PMD_DRV_LOG(ERR, "response missing NVS header"); ++ return -EINVAL; ++ } ++ + hdr = (struct hn_nvs_hdr *)buffer; ++ ++ /* Silently drop received packets while waiting for response */ ++ if (hdr->type == NVS_TYPE_RNDIS) { ++ hn_nvs_ack_rxbuf(chan, xactid); ++ --hv->rxbuf_outstanding; ++ goto retry; ++ } ++ + if (hdr->type != type) { + PMD_DRV_LOG(ERR, "unexpected NVS resp %#x, expect %#x", + hdr->type, type); +@@ -108,6 +122,29 @@ hn_nvs_execute(struct hn_data *hv, + return 0; + } + ++ ++/* ++ * Execute one control command and get the response. ++ * Only one command can be active on a channel at once ++ * Unlike BSD, DPDK does not have an interrupt context ++ * so the polling is required to wait for response. ++ */ ++static int ++hn_nvs_execute(struct hn_data *hv, ++ void *req, uint32_t reqlen, ++ void *resp, uint32_t resplen, ++ uint32_t type) ++{ ++ struct hn_rx_queue *rxq = hv->primary; ++ int ret; ++ ++ rte_spinlock_lock(&rxq->ring_lock); ++ ret = __hn_nvs_execute(hv, req, reqlen, resp, resplen, type); ++ rte_spinlock_unlock(&rxq->ring_lock); ++ ++ return ret; ++} ++ + static int + hn_nvs_doinit(struct hn_data *hv, uint32_t nvs_ver) + { +diff --git a/dpdk/drivers/net/netvsc/hn_nvs.h b/dpdk/drivers/net/netvsc/hn_nvs.h +index 2563fd8d86..015839e364 100644 +--- a/dpdk/drivers/net/netvsc/hn_nvs.h ++++ b/dpdk/drivers/net/netvsc/hn_nvs.h +@@ -37,7 +37,7 @@ + #define NVS_RNDIS_MTYPE_CTRL 1 + + /* +- * NVS message transacion status codes. ++ * NVS message transaction status codes. + */ + #define NVS_STATUS_OK 1 + #define NVS_STATUS_FAILED 2 +diff --git a/dpdk/drivers/net/netvsc/hn_rxtx.c b/dpdk/drivers/net/netvsc/hn_rxtx.c +index 7212780c15..19f00a0528 100644 +--- a/dpdk/drivers/net/netvsc/hn_rxtx.c ++++ b/dpdk/drivers/net/netvsc/hn_rxtx.c +@@ -18,6 +18,7 @@ + #include <rte_memzone.h> + #include <rte_malloc.h> + #include <rte_atomic.h> ++#include <rte_bitmap.h> + #include <rte_branch_prediction.h> + #include <rte_ether.h> + #include <rte_common.h> +@@ -83,7 +84,7 @@ struct hn_txdesc { + struct rte_mbuf *m; + + uint16_t queue_id; +- uint16_t chim_index; ++ uint32_t chim_index; + uint32_t chim_size; + uint32_t data_size; + uint32_t packets; +@@ -98,11 +99,13 @@ struct hn_txdesc { + RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ + RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) + ++#define HN_RNDIS_PKT_ALIGNED RTE_ALIGN(HN_RNDIS_PKT_LEN, RTE_CACHE_LINE_SIZE) ++ + /* Minimum space required for a packet */ + #define HN_PKTSIZE_MIN(align) \ + RTE_ALIGN(RTE_ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align) + +-#define DEFAULT_TX_FREE_THRESH 32U ++#define DEFAULT_TX_FREE_THRESH 32 + + static void + hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m) +@@ -150,63 +153,77 @@ hn_rndis_pktmsg_offset(uint32_t ofs) + static void hn_txd_init(struct rte_mempool *mp __rte_unused, + void *opaque, void *obj, unsigned int idx) + { ++ struct hn_tx_queue *txq = opaque; + struct hn_txdesc *txd = obj; +- struct rte_eth_dev *dev = opaque; +- struct rndis_packet_msg *pkt; + + memset(txd, 0, sizeof(*txd)); +- txd->chim_index = idx; + +- pkt = rte_malloc_socket("RNDIS_TX", HN_RNDIS_PKT_LEN, +- rte_align32pow2(HN_RNDIS_PKT_LEN), +- dev->device->numa_node); +- if (!pkt) +- rte_exit(EXIT_FAILURE, "can not allocate RNDIS header"); +- +- txd->rndis_pkt = pkt; ++ txd->queue_id = txq->queue_id; ++ txd->chim_index = NVS_CHIM_IDX_INVALID; ++ txd->rndis_pkt = (struct rndis_packet_msg *)(char *)txq->tx_rndis ++ + idx * HN_RNDIS_PKT_ALIGNED; + } + +-/* +- * Unlike Linux and FreeBSD, this driver uses a mempool +- * to limit outstanding transmits and reserve buffers +- */ + int +-hn_tx_pool_init(struct rte_eth_dev *dev) ++hn_chim_init(struct rte_eth_dev *dev) + { + struct hn_data *hv = dev->data->dev_private; +- char name[RTE_MEMPOOL_NAMESIZE]; +- struct rte_mempool *mp; ++ uint32_t i, chim_bmp_size; ++ ++ rte_spinlock_init(&hv->chim_lock); ++ chim_bmp_size = rte_bitmap_get_memory_footprint(hv->chim_cnt); ++ hv->chim_bmem = rte_zmalloc("hn_chim_bitmap", chim_bmp_size, ++ RTE_CACHE_LINE_SIZE); ++ if (hv->chim_bmem == NULL) { ++ PMD_INIT_LOG(ERR, "failed to allocate bitmap size %u", ++ chim_bmp_size); ++ return -1; ++ } + +- snprintf(name, sizeof(name), +- "hn_txd_%u", dev->data->port_id); +- +- PMD_INIT_LOG(DEBUG, "create a TX send pool %s n=%u size=%zu socket=%d", +- name, hv->chim_cnt, sizeof(struct hn_txdesc), +- dev->device->numa_node); +- +- mp = rte_mempool_create(name, hv->chim_cnt, sizeof(struct hn_txdesc), +- HN_TXD_CACHE_SIZE, 0, +- NULL, NULL, +- hn_txd_init, dev, +- dev->device->numa_node, 0); +- if (!mp) { +- PMD_DRV_LOG(ERR, +- "mempool %s create failed: %d", name, rte_errno); +- return -rte_errno; ++ hv->chim_bmap = rte_bitmap_init(hv->chim_cnt, ++ hv->chim_bmem, chim_bmp_size); ++ if (hv->chim_bmap == NULL) { ++ PMD_INIT_LOG(ERR, "failed to init chim bitmap"); ++ return -1; + } + +- hv->tx_pool = mp; ++ for (i = 0; i < hv->chim_cnt; i++) ++ rte_bitmap_set(hv->chim_bmap, i); ++ + return 0; + } + + void +-hn_tx_pool_uninit(struct rte_eth_dev *dev) ++hn_chim_uninit(struct rte_eth_dev *dev) + { + struct hn_data *hv = dev->data->dev_private; + +- if (hv->tx_pool) { +- rte_mempool_free(hv->tx_pool); +- hv->tx_pool = NULL; ++ rte_bitmap_free(hv->chim_bmap); ++ rte_free(hv->chim_bmem); ++ hv->chim_bmem = NULL; ++} ++ ++static uint32_t hn_chim_alloc(struct hn_data *hv) ++{ ++ uint32_t index = NVS_CHIM_IDX_INVALID; ++ uint64_t slab; ++ ++ rte_spinlock_lock(&hv->chim_lock); ++ if (rte_bitmap_scan(hv->chim_bmap, &index, &slab)) ++ rte_bitmap_clear(hv->chim_bmap, index); ++ rte_spinlock_unlock(&hv->chim_lock); ++ ++ return index; ++} ++ ++static void hn_chim_free(struct hn_data *hv, uint32_t chim_idx) ++{ ++ if (chim_idx >= hv->chim_cnt) { ++ PMD_DRV_LOG(ERR, "Invalid chimney index %u", chim_idx); ++ } else { ++ rte_spinlock_lock(&hv->chim_lock); ++ rte_bitmap_set(hv->chim_bmap, chim_idx); ++ rte_spinlock_unlock(&hv->chim_lock); + } + } + +@@ -220,15 +237,16 @@ static void hn_reset_txagg(struct hn_tx_queue *txq) + + int + hn_dev_tx_queue_setup(struct rte_eth_dev *dev, +- uint16_t queue_idx, uint16_t nb_desc __rte_unused, ++ uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) + + { + struct hn_data *hv = dev->data->dev_private; + struct hn_tx_queue *txq; ++ char name[RTE_MEMPOOL_NAMESIZE]; + uint32_t tx_free_thresh; +- int err; ++ int err = -ENOMEM; + + PMD_INIT_FUNC_TRACE(); + +@@ -244,14 +262,42 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev, + + tx_free_thresh = tx_conf->tx_free_thresh; + if (tx_free_thresh == 0) +- tx_free_thresh = RTE_MIN(hv->chim_cnt / 4, ++ tx_free_thresh = RTE_MIN(nb_desc / 4, + DEFAULT_TX_FREE_THRESH); + +- if (tx_free_thresh >= hv->chim_cnt - 3) +- tx_free_thresh = hv->chim_cnt - 3; ++ if (tx_free_thresh + 3 >= nb_desc) { ++ PMD_INIT_LOG(ERR, ++ "tx_free_thresh must be less than the number of TX entries minus 3(%u)." ++ " (tx_free_thresh=%u port=%u queue=%u)\n", ++ nb_desc - 3, ++ tx_free_thresh, dev->data->port_id, queue_idx); ++ return -EINVAL; ++ } + + txq->free_thresh = tx_free_thresh; + ++ snprintf(name, sizeof(name), ++ "hn_txd_%u_%u", dev->data->port_id, queue_idx); ++ ++ PMD_INIT_LOG(DEBUG, "TX descriptor pool %s n=%u size=%zu", ++ name, nb_desc, sizeof(struct hn_txdesc)); ++ ++ txq->tx_rndis = rte_calloc("hn_txq_rndis", nb_desc, ++ HN_RNDIS_PKT_ALIGNED, RTE_CACHE_LINE_SIZE); ++ if (txq->tx_rndis == NULL) ++ goto error; ++ ++ txq->txdesc_pool = rte_mempool_create(name, nb_desc, ++ sizeof(struct hn_txdesc), ++ 0, 0, NULL, NULL, ++ hn_txd_init, txq, ++ dev->device->numa_node, 0); ++ if (txq->txdesc_pool == NULL) { ++ PMD_DRV_LOG(ERR, ++ "mempool %s create failed: %d", name, rte_errno); ++ goto error; ++ } ++ + txq->agg_szmax = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size); + txq->agg_pktmax = hv->rndis_agg_pkts; + txq->agg_align = hv->rndis_agg_align; +@@ -260,31 +306,57 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev, + + err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc, + socket_id, tx_conf); +- if (err) { +- rte_free(txq); +- return err; ++ if (err == 0) { ++ dev->data->tx_queues[queue_idx] = txq; ++ return 0; + } + +- dev->data->tx_queues[queue_idx] = txq; +- return 0; ++error: ++ if (txq->txdesc_pool) ++ rte_mempool_free(txq->txdesc_pool); ++ rte_free(txq->tx_rndis); ++ rte_free(txq); ++ return err; ++} ++ ++ ++static struct hn_txdesc *hn_txd_get(struct hn_tx_queue *txq) ++{ ++ struct hn_txdesc *txd; ++ ++ if (rte_mempool_get(txq->txdesc_pool, (void **)&txd)) { ++ ++txq->stats.ring_full; ++ PMD_TX_LOG(DEBUG, "tx pool exhausted!"); ++ return NULL; ++ } ++ ++ txd->m = NULL; ++ txd->packets = 0; ++ txd->data_size = 0; ++ txd->chim_size = 0; ++ ++ return txd; ++} ++ ++static void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd) ++{ ++ rte_mempool_put(txq->txdesc_pool, txd); + } + + void + hn_dev_tx_queue_release(void *arg) + { + struct hn_tx_queue *txq = arg; +- struct hn_txdesc *txd; + + PMD_INIT_FUNC_TRACE(); + + if (!txq) + return; + +- /* If any pending data is still present just drop it */ +- txd = txq->agg_txd; +- if (txd) +- rte_mempool_put(txq->hv->tx_pool, txd); ++ if (txq->txdesc_pool) ++ rte_mempool_free(txq->txdesc_pool); + ++ rte_free(txq->tx_rndis); + rte_free(txq); + } + +@@ -292,6 +364,7 @@ static void + hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, + unsigned long xactid, const struct hn_nvs_rndis_ack *ack) + { ++ struct hn_data *hv = dev->data->dev_private; + struct hn_txdesc *txd = (struct hn_txdesc *)xactid; + struct hn_tx_queue *txq; + +@@ -312,9 +385,11 @@ hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, + ++txq->stats.errors; + } + +- rte_pktmbuf_free(txd->m); ++ if (txd->chim_index != NVS_CHIM_IDX_INVALID) ++ hn_chim_free(hv, txd->chim_index); + +- rte_mempool_put(txq->hv->tx_pool, txd); ++ rte_pktmbuf_free(txd->m); ++ hn_txd_put(txq, txd); + } + + /* Handle transmit completion events */ +@@ -894,10 +969,6 @@ uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, + + rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id]; + +- /* If no pending data then nothing to do */ +- if (rte_vmbus_chan_rx_empty(rxq->chan)) +- return 0; +- + /* + * Since channel is shared between Rx and TX queue need to have a lock + * since DPDK does not force same CPU to be used for Rx/Tx. +@@ -961,9 +1032,6 @@ uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, + + if (tx_limit && tx_done >= tx_limit) + break; +- +- if (rxq->rx_ring && rte_ring_full(rxq->rx_ring)) +- break; + } + + if (bytes_read > 0) +@@ -1036,28 +1104,15 @@ static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig) + return ret; + } + +-static struct hn_txdesc *hn_new_txd(struct hn_data *hv, +- struct hn_tx_queue *txq) +-{ +- struct hn_txdesc *txd; +- +- if (rte_mempool_get(hv->tx_pool, (void **)&txd)) { +- ++txq->stats.ring_full; +- PMD_TX_LOG(DEBUG, "tx pool exhausted!"); +- return NULL; +- } +- +- txd->m = NULL; +- txd->queue_id = txq->queue_id; +- txd->packets = 0; +- txd->data_size = 0; +- txd->chim_size = 0; +- +- return txd; +-} +- ++/* ++ * Try and find a place in a send chimney buffer to put ++ * the small packet. If space is available, this routine ++ * returns a pointer of where to place the data. ++ * If no space, caller should try direct transmit. ++ */ + static void * +-hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize) ++hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, ++ struct hn_txdesc *txd, uint32_t pktsize) + { + struct hn_txdesc *agg_txd = txq->agg_txd; + struct rndis_packet_msg *pkt; +@@ -1085,7 +1140,7 @@ hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize) + } + + chim = (uint8_t *)pkt + pkt->len; +- ++ txq->agg_prevpkt = chim; + txq->agg_pktleft--; + txq->agg_szleft -= pktsize; + if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) { +@@ -1095,18 +1150,21 @@ hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize) + */ + txq->agg_pktleft = 0; + } +- } else { +- agg_txd = hn_new_txd(hv, txq); +- if (!agg_txd) +- return NULL; +- +- chim = (uint8_t *)hv->chim_res->addr +- + agg_txd->chim_index * hv->chim_szmax; + +- txq->agg_txd = agg_txd; +- txq->agg_pktleft = txq->agg_pktmax - 1; +- txq->agg_szleft = txq->agg_szmax - pktsize; ++ hn_txd_put(txq, txd); ++ return chim; + } ++ ++ txd->chim_index = hn_chim_alloc(hv); ++ if (txd->chim_index == NVS_CHIM_IDX_INVALID) ++ return NULL; ++ ++ chim = (uint8_t *)hv->chim_res->addr ++ + txd->chim_index * hv->chim_szmax; ++ ++ txq->agg_txd = txd; ++ txq->agg_pktleft = txq->agg_pktmax - 1; ++ txq->agg_szleft = txq->agg_szmax - pktsize; + txq->agg_prevpkt = chim; + + return chim; +@@ -1314,7 +1372,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + struct hn_data *hv = txq->hv; + struct rte_eth_dev *vf_dev; + bool need_sig = false; +- uint16_t nb_tx; ++ uint16_t nb_tx, avail; + int ret; + + if (unlikely(hv->closed)) +@@ -1329,13 +1387,19 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts); + } + +- if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh) ++ avail = rte_mempool_avail_count(txq->txdesc_pool); ++ if (nb_pkts > avail || avail <= txq->free_thresh) + hn_process_events(hv, txq->queue_id, 0); + + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + struct rte_mbuf *m = tx_pkts[nb_tx]; + uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN; + struct rndis_packet_msg *pkt; ++ struct hn_txdesc *txd; ++ ++ txd = hn_txd_get(txq); ++ if (txd == NULL) ++ break; + + /* For small packets aggregate them in chimney buffer */ + if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) { +@@ -1346,7 +1410,8 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + goto fail; + } + +- pkt = hn_try_txagg(hv, txq, pkt_size); ++ ++ pkt = hn_try_txagg(hv, txq, txd, pkt_size); + if (unlikely(!pkt)) + break; + +@@ -1360,21 +1425,13 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + hn_flush_txagg(txq, &need_sig)) + goto fail; + } else { +- struct hn_txdesc *txd; +- +- /* can send chimney data and large packet at once */ +- txd = txq->agg_txd; +- if (txd) { +- hn_reset_txagg(txq); +- } else { +- txd = hn_new_txd(hv, txq); +- if (unlikely(!txd)) +- break; +- } ++ /* Send any outstanding packets in buffer */ ++ if (txq->agg_txd && hn_flush_txagg(txq, &need_sig)) ++ goto fail; + + pkt = txd->rndis_pkt; + txd->m = m; +- txd->data_size += m->pkt_len; ++ txd->data_size = m->pkt_len; + ++txd->packets; + + hn_encap(pkt, queue_id, m); +@@ -1383,7 +1440,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + if (unlikely(ret != 0)) { + PMD_TX_LOG(NOTICE, "sg send failed: %d", ret); + ++txq->stats.errors; +- rte_mempool_put(hv->tx_pool, txd); ++ hn_txd_put(txq, txd); + goto fail; + } + } +diff --git a/dpdk/drivers/net/netvsc/hn_var.h b/dpdk/drivers/net/netvsc/hn_var.h +index 05bc492511..b4c6171737 100644 +--- a/dpdk/drivers/net/netvsc/hn_var.h ++++ b/dpdk/drivers/net/netvsc/hn_var.h +@@ -52,6 +52,8 @@ struct hn_tx_queue { + uint16_t port_id; + uint16_t queue_id; + uint32_t free_thresh; ++ struct rte_mempool *txdesc_pool; ++ void *tx_rndis; + + /* Applied packet transmission aggregation limits. */ + uint32_t agg_szmax; +@@ -115,8 +117,10 @@ struct hn_data { + uint16_t num_queues; + uint64_t rss_offloads; + ++ rte_spinlock_t chim_lock; + struct rte_mem_resource *chim_res; /* UIO resource for Tx */ +- struct rte_mempool *tx_pool; /* Tx descriptors */ ++ struct rte_bitmap *chim_bmap; /* Send buffer map */ ++ void *chim_bmem; + uint32_t chim_szmax; /* Max size per buffer */ + uint32_t chim_cnt; /* Max packets per buffer */ + +@@ -135,8 +139,6 @@ struct hn_data { + uint8_t rss_key[40]; + uint16_t rss_ind[128]; + +- struct rte_ether_addr mac_addr; +- + struct rte_eth_dev_owner owner; + struct rte_intr_handle vf_intr; + +@@ -157,8 +159,8 @@ uint16_t hn_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t hn_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +-int hn_tx_pool_init(struct rte_eth_dev *dev); +-void hn_tx_pool_uninit(struct rte_eth_dev *dev); ++int hn_chim_init(struct rte_eth_dev *dev); ++void hn_chim_uninit(struct rte_eth_dev *dev); + int hn_dev_link_update(struct rte_eth_dev *dev, int wait); + int hn_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + uint16_t nb_desc, unsigned int socket_id, +diff --git a/dpdk/drivers/net/netvsc/hn_vf.c b/dpdk/drivers/net/netvsc/hn_vf.c +index 7a3734cadf..1261b2e2ef 100644 +--- a/dpdk/drivers/net/netvsc/hn_vf.c ++++ b/dpdk/drivers/net/netvsc/hn_vf.c +@@ -167,6 +167,17 @@ hn_nvs_handle_vfassoc(struct rte_eth_dev *dev, + hn_vf_remove(hv); + } + ++static void ++hn_vf_merge_desc_lim(struct rte_eth_desc_lim *lim, ++ const struct rte_eth_desc_lim *vf_lim) ++{ ++ lim->nb_max = RTE_MIN(vf_lim->nb_max, lim->nb_max); ++ lim->nb_min = RTE_MAX(vf_lim->nb_min, lim->nb_min); ++ lim->nb_align = RTE_MAX(vf_lim->nb_align, lim->nb_align); ++ lim->nb_seg_max = RTE_MIN(vf_lim->nb_seg_max, lim->nb_seg_max); ++ lim->nb_mtu_seg_max = RTE_MIN(vf_lim->nb_seg_max, lim->nb_seg_max); ++} ++ + /* + * Merge the info from the VF and synthetic path. + * use the default config of the VF +@@ -196,11 +207,13 @@ static int hn_vf_info_merge(struct rte_eth_dev *vf_dev, + info->max_tx_queues); + info->tx_offload_capa &= vf_info.tx_offload_capa; + info->tx_queue_offload_capa &= vf_info.tx_queue_offload_capa; ++ hn_vf_merge_desc_lim(&info->tx_desc_lim, &vf_info.tx_desc_lim); + + info->min_rx_bufsize = RTE_MAX(vf_info.min_rx_bufsize, + info->min_rx_bufsize); + info->max_rx_pktlen = RTE_MAX(vf_info.max_rx_pktlen, + info->max_rx_pktlen); ++ hn_vf_merge_desc_lim(&info->rx_desc_lim, &vf_info.rx_desc_lim); + + return 0; + } +diff --git a/dpdk/drivers/net/nfp/nfp_net.c b/dpdk/drivers/net/nfp/nfp_net.c +index 3aafa7f80f..b6ff5ecd7d 100644 +--- a/dpdk/drivers/net/nfp/nfp_net.c ++++ b/dpdk/drivers/net/nfp/nfp_net.c +@@ -3014,7 +3014,7 @@ nfp_cpp_bridge_serve_write(int sockfd, struct nfp_cpp *cpp) + size_t count, curlen, totlen = 0; + int err = 0; + +- PMD_CPP_LOG(DEBUG, "%s: offset size %lu, count_size: %lu\n", __func__, ++ PMD_CPP_LOG(DEBUG, "%s: offset size %zu, count_size: %zu\n", __func__, + sizeof(off_t), sizeof(size_t)); + + /* Reading the count param */ +@@ -3033,9 +3033,9 @@ nfp_cpp_bridge_serve_write(int sockfd, struct nfp_cpp *cpp) + cpp_id = (offset >> 40) << 8; + nfp_offset = offset & ((1ull << 40) - 1); + +- PMD_CPP_LOG(DEBUG, "%s: count %lu and offset %ld\n", __func__, count, ++ PMD_CPP_LOG(DEBUG, "%s: count %zu and offset %jd\n", __func__, count, + offset); +- PMD_CPP_LOG(DEBUG, "%s: cpp_id %08x and nfp_offset %ld\n", __func__, ++ PMD_CPP_LOG(DEBUG, "%s: cpp_id %08x and nfp_offset %jd\n", __func__, + cpp_id, nfp_offset); + + /* Adjust length if not aligned */ +@@ -3067,12 +3067,12 @@ nfp_cpp_bridge_serve_write(int sockfd, struct nfp_cpp *cpp) + if (len > sizeof(tmpbuf)) + len = sizeof(tmpbuf); + +- PMD_CPP_LOG(DEBUG, "%s: Receive %u of %lu\n", __func__, ++ PMD_CPP_LOG(DEBUG, "%s: Receive %u of %zu\n", __func__, + len, count); + err = recv(sockfd, tmpbuf, len, MSG_WAITALL); + if (err != (int)len) { + RTE_LOG(ERR, PMD, +- "%s: error when receiving, %d of %lu\n", ++ "%s: error when receiving, %d of %zu\n", + __func__, err, count); + nfp_cpp_area_release(area); + nfp_cpp_area_free(area); +@@ -3116,7 +3116,7 @@ nfp_cpp_bridge_serve_read(int sockfd, struct nfp_cpp *cpp) + size_t count, curlen, totlen = 0; + int err = 0; + +- PMD_CPP_LOG(DEBUG, "%s: offset size %lu, count_size: %lu\n", __func__, ++ PMD_CPP_LOG(DEBUG, "%s: offset size %zu, count_size: %zu\n", __func__, + sizeof(off_t), sizeof(size_t)); + + /* Reading the count param */ +@@ -3135,9 +3135,9 @@ nfp_cpp_bridge_serve_read(int sockfd, struct nfp_cpp *cpp) + cpp_id = (offset >> 40) << 8; + nfp_offset = offset & ((1ull << 40) - 1); + +- PMD_CPP_LOG(DEBUG, "%s: count %lu and offset %ld\n", __func__, count, ++ PMD_CPP_LOG(DEBUG, "%s: count %zu and offset %jd\n", __func__, count, + offset); +- PMD_CPP_LOG(DEBUG, "%s: cpp_id %08x and nfp_offset %ld\n", __func__, ++ PMD_CPP_LOG(DEBUG, "%s: cpp_id %08x and nfp_offset %jd\n", __func__, + cpp_id, nfp_offset); + + /* Adjust length if not aligned */ +@@ -3174,13 +3174,13 @@ nfp_cpp_bridge_serve_read(int sockfd, struct nfp_cpp *cpp) + nfp_cpp_area_free(area); + return -EIO; + } +- PMD_CPP_LOG(DEBUG, "%s: sending %u of %lu\n", __func__, ++ PMD_CPP_LOG(DEBUG, "%s: sending %u of %zu\n", __func__, + len, count); + + err = send(sockfd, tmpbuf, len, 0); + if (err != (int)len) { + RTE_LOG(ERR, PMD, +- "%s: error when sending: %d of %lu\n", ++ "%s: error when sending: %d of %zu\n", + __func__, err, count); + nfp_cpp_area_release(area); + nfp_cpp_area_free(area); +@@ -3451,9 +3451,10 @@ nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports, + probe_failed: + rte_free(port_name); + /* free ports private data if primary process */ +- if (rte_eal_process_type() == RTE_PROC_PRIMARY) ++ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_free(eth_dev->data->dev_private); +- ++ eth_dev->data->dev_private = NULL; ++ } + rte_eth_dev_release_port(eth_dev); + + return retval; +diff --git a/dpdk/drivers/net/null/rte_eth_null.c b/dpdk/drivers/net/null/rte_eth_null.c +index 025b73acb3..beedd5f4b2 100644 +--- a/dpdk/drivers/net/null/rte_eth_null.c ++++ b/dpdk/drivers/net/null/rte_eth_null.c +@@ -584,6 +584,7 @@ rte_pmd_null_probe(struct rte_vdev_device *dev) + PMD_LOG(INFO, "Initializing pmd_null for %s", name); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { ++ struct pmd_internals *internals; + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { + PMD_LOG(ERR, "Failed to probe %s", name); +@@ -592,7 +593,8 @@ rte_pmd_null_probe(struct rte_vdev_device *dev) + /* TODO: request info from primary to set up Rx and Tx */ + eth_dev->dev_ops = &ops; + eth_dev->device = &dev->device; +- if (packet_copy) { ++ internals = eth_dev->data->dev_private; ++ if (internals->packet_copy) { + eth_dev->rx_pkt_burst = eth_null_copy_rx; + eth_dev->tx_pkt_burst = eth_null_copy_tx; + } else { +@@ -608,23 +610,18 @@ rte_pmd_null_probe(struct rte_vdev_device *dev) + if (kvlist == NULL) + return -1; + +- if (rte_kvargs_count(kvlist, ETH_NULL_PACKET_SIZE_ARG) == 1) { ++ ret = rte_kvargs_process(kvlist, ++ ETH_NULL_PACKET_SIZE_ARG, ++ &get_packet_size_arg, &packet_size); ++ if (ret < 0) ++ goto free_kvlist; + +- ret = rte_kvargs_process(kvlist, +- ETH_NULL_PACKET_SIZE_ARG, +- &get_packet_size_arg, &packet_size); +- if (ret < 0) +- goto free_kvlist; +- } +- +- if (rte_kvargs_count(kvlist, ETH_NULL_PACKET_COPY_ARG) == 1) { + +- ret = rte_kvargs_process(kvlist, +- ETH_NULL_PACKET_COPY_ARG, +- &get_packet_copy_arg, &packet_copy); +- if (ret < 0) +- goto free_kvlist; +- } ++ ret = rte_kvargs_process(kvlist, ++ ETH_NULL_PACKET_COPY_ARG, ++ &get_packet_copy_arg, &packet_copy); ++ if (ret < 0) ++ goto free_kvlist; + } + + PMD_LOG(INFO, "Configure pmd_null: packet size is %d, " +diff --git a/dpdk/drivers/net/octeontx/base/meson.build b/dpdk/drivers/net/octeontx/base/meson.build +index a06a2c89c9..e1060fc4ec 100644 +--- a/dpdk/drivers/net/octeontx/base/meson.build ++++ b/dpdk/drivers/net/octeontx/base/meson.build +@@ -10,7 +10,10 @@ sources = [ + depends = ['ethdev', 'mempool_octeontx'] + static_objs = [] + foreach d: depends +- static_objs += [get_variable('static_rte_' + d)] ++ if not is_variable('shared_rte_' + d) ++ subdir_done() ++ endif ++ static_objs += get_variable('static_rte_' + d) + endforeach + + c_args = cflags +diff --git a/dpdk/drivers/net/octeontx/octeontx_ethdev.c b/dpdk/drivers/net/octeontx/octeontx_ethdev.c +index 679803dd4c..e85acdde0a 100644 +--- a/dpdk/drivers/net/octeontx/octeontx_ethdev.c ++++ b/dpdk/drivers/net/octeontx/octeontx_ethdev.c +@@ -351,6 +351,10 @@ octeontx_dev_close(struct rte_eth_dev *dev) + rte_free(txq); + } + ++ /* Free MAC address table */ ++ rte_free(dev->data->mac_addrs); ++ dev->data->mac_addrs = NULL; ++ + dev->tx_pkt_burst = NULL; + dev->rx_pkt_burst = NULL; + } +@@ -1099,7 +1103,7 @@ octeontx_create(struct rte_vdev_device *dev, int port, uint8_t evdev, + octeontx_log_err("eth_dev->port_id (%d) is diff to orig (%d)", + data->port_id, nic->port_id); + res = -EINVAL; +- goto err; ++ goto free_mac_addrs; + } + + /* Update port_id mac to eth_dev */ +@@ -1118,6 +1122,9 @@ octeontx_create(struct rte_vdev_device *dev, int port, uint8_t evdev, + rte_eth_dev_probing_finish(eth_dev); + return data->port_id; + ++free_mac_addrs: ++ rte_free(data->mac_addrs); ++ data->mac_addrs = NULL; + err: + if (nic) + octeontx_port_close(nic); +diff --git a/dpdk/drivers/net/octeontx2/otx2_ethdev.c b/dpdk/drivers/net/octeontx2/otx2_ethdev.c +index ed329273dc..102d06b39b 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_ethdev.c ++++ b/dpdk/drivers/net/octeontx2/otx2_ethdev.c +@@ -18,7 +18,8 @@ nix_get_rx_offload_capa(struct otx2_eth_dev *dev) + { + uint64_t capa = NIX_RX_OFFLOAD_CAPA; + +- if (otx2_dev_is_vf(dev)) ++ if (otx2_dev_is_vf(dev) || ++ dev->npc_flow.switch_header_type == OTX2_PRIV_FLAGS_HIGIG) + capa &= ~DEV_RX_OFFLOAD_TIMESTAMP; + + return capa; +@@ -204,7 +205,7 @@ cgx_intlbk_enable(struct otx2_eth_dev *dev, bool en) + { + struct otx2_mbox *mbox = dev->mbox; + +- if (otx2_dev_is_vf_or_sdp(dev)) ++ if (en && otx2_dev_is_vf_or_sdp(dev)) + return -ENOTSUP; + + if (en) +@@ -349,10 +350,7 @@ nix_cq_rq_init(struct rte_eth_dev *eth_dev, struct otx2_eth_dev *dev, + aq->rq.first_skip = first_skip; + aq->rq.later_skip = (sizeof(struct rte_mbuf) / 8); + aq->rq.flow_tagw = 32; /* 32-bits */ +- aq->rq.lpb_sizem1 = rte_pktmbuf_data_room_size(mp); +- aq->rq.lpb_sizem1 += rte_pktmbuf_priv_size(mp); +- aq->rq.lpb_sizem1 += sizeof(struct rte_mbuf); +- aq->rq.lpb_sizem1 /= 8; ++ aq->rq.lpb_sizem1 = mp->elt_size / 8; + aq->rq.lpb_sizem1 -= 1; /* Expressed in size minus one */ + aq->rq.ena = 1; + aq->rq.pb_caching = 0x2; /* First cache aligned block to LLC */ +@@ -1114,10 +1112,12 @@ nix_store_queue_cfg_and_then_release(struct rte_eth_dev *eth_dev) + txq = (struct otx2_eth_txq **)eth_dev->data->tx_queues; + for (i = 0; i < nb_txq; i++) { + if (txq[i] == NULL) { +- otx2_err("txq[%d] is already released", i); +- goto fail; ++ tx_qconf[i].valid = false; ++ otx2_info("txq[%d] is already released", i); ++ continue; + } + memcpy(&tx_qconf[i], &txq[i]->qconf, sizeof(*tx_qconf)); ++ tx_qconf[i].valid = true; + otx2_nix_tx_queue_release(txq[i]); + eth_dev->data->tx_queues[i] = NULL; + } +@@ -1125,10 +1125,12 @@ nix_store_queue_cfg_and_then_release(struct rte_eth_dev *eth_dev) + rxq = (struct otx2_eth_rxq **)eth_dev->data->rx_queues; + for (i = 0; i < nb_rxq; i++) { + if (rxq[i] == NULL) { +- otx2_err("rxq[%d] is already released", i); +- goto fail; ++ rx_qconf[i].valid = false; ++ otx2_info("rxq[%d] is already released", i); ++ continue; + } + memcpy(&rx_qconf[i], &rxq[i]->qconf, sizeof(*rx_qconf)); ++ rx_qconf[i].valid = true; + otx2_nix_rx_queue_release(rxq[i]); + eth_dev->data->rx_queues[i] = NULL; + } +@@ -1183,6 +1185,8 @@ nix_restore_queue_cfg(struct rte_eth_dev *eth_dev) + * queues are already setup in port_configure(). + */ + for (i = 0; i < nb_txq; i++) { ++ if (!tx_qconf[i].valid) ++ continue; + rc = otx2_nix_tx_queue_setup(eth_dev, i, tx_qconf[i].nb_desc, + tx_qconf[i].socket_id, + &tx_qconf[i].conf.tx); +@@ -1198,6 +1202,8 @@ nix_restore_queue_cfg(struct rte_eth_dev *eth_dev) + free(tx_qconf); tx_qconf = NULL; + + for (i = 0; i < nb_rxq; i++) { ++ if (!rx_qconf[i].valid) ++ continue; + rc = otx2_nix_rx_queue_setup(eth_dev, i, rx_qconf[i].nb_desc, + rx_qconf[i].socket_id, + &rx_qconf[i].conf.rx, +@@ -1641,6 +1647,15 @@ otx2_nix_configure(struct rte_eth_dev *eth_dev) + goto fail_offloads; + } + ++ otx2_nix_err_intr_enb_dis(eth_dev, true); ++ otx2_nix_ras_intr_enb_dis(eth_dev, true); ++ ++ if (dev->ptp_en && ++ dev->npc_flow.switch_header_type == OTX2_PRIV_FLAGS_HIGIG) { ++ otx2_err("Both PTP and switch header enabled"); ++ goto free_nix_lf; ++ } ++ + rc = nix_lf_switch_header_type_enable(dev); + if (rc) { + otx2_err("Failed to enable switch type nix_lf rc=%d", rc); +@@ -1714,6 +1729,12 @@ otx2_nix_configure(struct rte_eth_dev *eth_dev) + goto cq_fini; + } + ++ rc = otx2_nix_flow_ctrl_init(eth_dev); ++ if (rc) { ++ otx2_err("Failed to init flow ctrl mode %d", rc); ++ goto cq_fini; ++ } ++ + rc = otx2_nix_mc_addr_list_install(eth_dev); + if (rc < 0) { + otx2_err("Failed to install mc address list rc=%d", rc); +diff --git a/dpdk/drivers/net/octeontx2/otx2_ethdev.h b/dpdk/drivers/net/octeontx2/otx2_ethdev.h +index 987e7607c4..864356e36c 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_ethdev.h ++++ b/dpdk/drivers/net/octeontx2/otx2_ethdev.h +@@ -192,6 +192,7 @@ struct otx2_eth_qconf { + void *mempool; + uint32_t socket_id; + uint16_t nb_desc; ++ uint8_t valid; + }; + + struct otx2_fc_info { +@@ -438,6 +439,8 @@ int oxt2_nix_register_cq_irqs(struct rte_eth_dev *eth_dev); + void otx2_nix_unregister_irqs(struct rte_eth_dev *eth_dev); + void oxt2_nix_unregister_queue_irqs(struct rte_eth_dev *eth_dev); + void oxt2_nix_unregister_cq_irqs(struct rte_eth_dev *eth_dev); ++void otx2_nix_err_intr_enb_dis(struct rte_eth_dev *eth_dev, bool enb); ++void otx2_nix_ras_intr_enb_dis(struct rte_eth_dev *eth_dev, bool enb); + + int otx2_nix_rx_queue_intr_enable(struct rte_eth_dev *eth_dev, + uint16_t rx_queue_id); +@@ -504,6 +507,8 @@ int otx2_cgx_mac_addr_set(struct rte_eth_dev *eth_dev, + struct rte_ether_addr *addr); + + /* Flow Control */ ++int otx2_nix_flow_ctrl_init(struct rte_eth_dev *eth_dev); ++ + int otx2_nix_flow_ctrl_get(struct rte_eth_dev *eth_dev, + struct rte_eth_fc_conf *fc_conf); + +diff --git a/dpdk/drivers/net/octeontx2/otx2_ethdev_irq.c b/dpdk/drivers/net/octeontx2/otx2_ethdev_irq.c +index 2256e40b6f..b121488faf 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_ethdev_irq.c ++++ b/dpdk/drivers/net/octeontx2/otx2_ethdev_irq.c +@@ -41,11 +41,11 @@ nix_lf_register_err_irq(struct rte_eth_dev *eth_dev) + vec = dev->nix_msixoff + NIX_LF_INT_VEC_ERR_INT; + + /* Clear err interrupt */ +- otx2_write64(~0ull, dev->base + NIX_LF_ERR_INT_ENA_W1C); ++ otx2_nix_err_intr_enb_dis(eth_dev, false); + /* Set used interrupt vectors */ + rc = otx2_register_irq(handle, nix_lf_err_irq, eth_dev, vec); + /* Enable all dev interrupt except for RQ_DISABLED */ +- otx2_write64(~BIT_ULL(11), dev->base + NIX_LF_ERR_INT_ENA_W1S); ++ otx2_nix_err_intr_enb_dis(eth_dev, true); + + return rc; + } +@@ -61,7 +61,7 @@ nix_lf_unregister_err_irq(struct rte_eth_dev *eth_dev) + vec = dev->nix_msixoff + NIX_LF_INT_VEC_ERR_INT; + + /* Clear err interrupt */ +- otx2_write64(~0ull, dev->base + NIX_LF_ERR_INT_ENA_W1C); ++ otx2_nix_err_intr_enb_dis(eth_dev, false); + otx2_unregister_irq(handle, nix_lf_err_irq, eth_dev, vec); + } + +@@ -97,11 +97,11 @@ nix_lf_register_ras_irq(struct rte_eth_dev *eth_dev) + vec = dev->nix_msixoff + NIX_LF_INT_VEC_POISON; + + /* Clear err interrupt */ +- otx2_write64(~0ull, dev->base + NIX_LF_RAS_ENA_W1C); ++ otx2_nix_ras_intr_enb_dis(eth_dev, false); + /* Set used interrupt vectors */ + rc = otx2_register_irq(handle, nix_lf_ras_irq, eth_dev, vec); + /* Enable dev interrupt */ +- otx2_write64(~0ull, dev->base + NIX_LF_RAS_ENA_W1S); ++ otx2_nix_ras_intr_enb_dis(eth_dev, true); + + return rc; + } +@@ -117,7 +117,7 @@ nix_lf_unregister_ras_irq(struct rte_eth_dev *eth_dev) + vec = dev->nix_msixoff + NIX_LF_INT_VEC_POISON; + + /* Clear err interrupt */ +- otx2_write64(~0ull, dev->base + NIX_LF_RAS_ENA_W1C); ++ otx2_nix_ras_intr_enb_dis(eth_dev, false); + otx2_unregister_irq(handle, nix_lf_ras_irq, eth_dev, vec); + } + +@@ -466,3 +466,29 @@ otx2_nix_rx_queue_intr_disable(struct rte_eth_dev *eth_dev, + + return 0; + } ++ ++void ++otx2_nix_err_intr_enb_dis(struct rte_eth_dev *eth_dev, bool enb) ++{ ++ struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev); ++ ++ /* Enable all nix lf error interrupts except ++ * RQ_DISABLED and CQ_DISABLED. ++ */ ++ if (enb) ++ otx2_write64(~(BIT_ULL(11) | BIT_ULL(24)), ++ dev->base + NIX_LF_ERR_INT_ENA_W1S); ++ else ++ otx2_write64(~0ull, dev->base + NIX_LF_ERR_INT_ENA_W1C); ++} ++ ++void ++otx2_nix_ras_intr_enb_dis(struct rte_eth_dev *eth_dev, bool enb) ++{ ++ struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev); ++ ++ if (enb) ++ otx2_write64(~0ull, dev->base + NIX_LF_RAS_ENA_W1S); ++ else ++ otx2_write64(~0ull, dev->base + NIX_LF_RAS_ENA_W1C); ++} +diff --git a/dpdk/drivers/net/octeontx2/otx2_flow_ctrl.c b/dpdk/drivers/net/octeontx2/otx2_flow_ctrl.c +index c6d7b1971a..76bf481001 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_flow_ctrl.c ++++ b/dpdk/drivers/net/octeontx2/otx2_flow_ctrl.c +@@ -200,19 +200,18 @@ int + otx2_nix_update_flow_ctrl_mode(struct rte_eth_dev *eth_dev) + { + struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev); ++ struct otx2_fc_info *fc = &dev->fc_info; + struct rte_eth_fc_conf fc_conf; + + if (otx2_dev_is_lbk(dev) || otx2_dev_is_sdp(dev)) + return 0; + + memset(&fc_conf, 0, sizeof(struct rte_eth_fc_conf)); +- /* Both Rx & Tx flow ctrl get enabled(RTE_FC_FULL) in HW +- * by AF driver, update those info in PMD structure. +- */ +- otx2_nix_flow_ctrl_get(eth_dev, &fc_conf); ++ fc_conf.mode = fc->mode; + + /* To avoid Link credit deadlock on Ax, disable Tx FC if it's enabled */ + if (otx2_dev_is_Ax(dev) && ++ (dev->npc_flow.switch_header_type != OTX2_PRIV_FLAGS_HIGIG) && + (fc_conf.mode == RTE_FC_FULL || fc_conf.mode == RTE_FC_RX_PAUSE)) { + fc_conf.mode = + (fc_conf.mode == RTE_FC_FULL || +@@ -222,3 +221,32 @@ otx2_nix_update_flow_ctrl_mode(struct rte_eth_dev *eth_dev) + + return otx2_nix_flow_ctrl_set(eth_dev, &fc_conf); + } ++ ++int ++otx2_nix_flow_ctrl_init(struct rte_eth_dev *eth_dev) ++{ ++ struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev); ++ struct otx2_fc_info *fc = &dev->fc_info; ++ struct rte_eth_fc_conf fc_conf; ++ int rc; ++ ++ if (otx2_dev_is_lbk(dev) || otx2_dev_is_sdp(dev)) ++ return 0; ++ ++ memset(&fc_conf, 0, sizeof(struct rte_eth_fc_conf)); ++ /* Both Rx & Tx flow ctrl get enabled(RTE_FC_FULL) in HW ++ * by AF driver, update those info in PMD structure. ++ */ ++ rc = otx2_nix_flow_ctrl_get(eth_dev, &fc_conf); ++ if (rc) ++ goto exit; ++ ++ fc->mode = fc_conf.mode; ++ fc->rx_pause = (fc_conf.mode == RTE_FC_FULL) || ++ (fc_conf.mode == RTE_FC_RX_PAUSE); ++ fc->tx_pause = (fc_conf.mode == RTE_FC_FULL) || ++ (fc_conf.mode == RTE_FC_TX_PAUSE); ++ ++exit: ++ return rc; ++} +diff --git a/dpdk/drivers/net/octeontx2/otx2_link.c b/dpdk/drivers/net/octeontx2/otx2_link.c +index f5679b06e7..4128f56d90 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_link.c ++++ b/dpdk/drivers/net/octeontx2/otx2_link.c +@@ -82,32 +82,57 @@ otx2_eth_dev_link_status_update(struct otx2_dev *dev, + _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL); + } + ++static int ++lbk_link_update(struct rte_eth_link *link) ++{ ++ link->link_status = ETH_LINK_UP; ++ link->link_speed = ETH_SPEED_NUM_100G; ++ link->link_autoneg = ETH_LINK_FIXED; ++ link->link_duplex = ETH_LINK_FULL_DUPLEX; ++ return 0; ++} ++ ++static int ++cgx_link_update(struct otx2_eth_dev *dev, struct rte_eth_link *link) ++{ ++ struct otx2_mbox *mbox = dev->mbox; ++ struct cgx_link_info_msg *rsp; ++ int rc; ++ otx2_mbox_alloc_msg_cgx_get_linkinfo(mbox); ++ rc = otx2_mbox_process_msg(mbox, (void *)&rsp); ++ if (rc) ++ return rc; ++ ++ link->link_status = rsp->link_info.link_up; ++ link->link_speed = rsp->link_info.speed; ++ link->link_autoneg = ETH_LINK_AUTONEG; ++ ++ if (rsp->link_info.full_duplex) ++ link->link_duplex = rsp->link_info.full_duplex; ++ return 0; ++} ++ + int + otx2_nix_link_update(struct rte_eth_dev *eth_dev, int wait_to_complete) + { + struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev); +- struct otx2_mbox *mbox = dev->mbox; +- struct cgx_link_info_msg *rsp; + struct rte_eth_link link; + int rc; + + RTE_SET_USED(wait_to_complete); ++ memset(&link, 0, sizeof(struct rte_eth_link)); + +- if (otx2_dev_is_lbk(dev) || otx2_dev_is_sdp(dev)) ++ if (otx2_dev_is_sdp(dev)) + return 0; + +- otx2_mbox_alloc_msg_cgx_get_linkinfo(mbox); +- rc = otx2_mbox_process_msg(mbox, (void *)&rsp); ++ if (otx2_dev_is_lbk(dev)) ++ rc = lbk_link_update(&link); ++ else ++ rc = cgx_link_update(dev, &link); ++ + if (rc) + return rc; + +- link.link_status = rsp->link_info.link_up; +- link.link_speed = rsp->link_info.speed; +- link.link_autoneg = ETH_LINK_AUTONEG; +- +- if (rsp->link_info.full_duplex) +- link.link_duplex = rsp->link_info.full_duplex; +- + return rte_eth_linkstatus_set(eth_dev, &link); + } + +diff --git a/dpdk/drivers/net/octeontx2/otx2_lookup.c b/dpdk/drivers/net/octeontx2/otx2_lookup.c +index bcf2ff4e8f..5685571166 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_lookup.c ++++ b/dpdk/drivers/net/octeontx2/otx2_lookup.c +@@ -17,7 +17,7 @@ + const uint32_t * + otx2_nix_supported_ptypes_get(struct rte_eth_dev *eth_dev) + { +- struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev); ++ RTE_SET_USED(eth_dev); + + static const uint32_t ptypes[] = { + RTE_PTYPE_L2_ETHER_QINQ, /* LB */ +@@ -56,10 +56,7 @@ otx2_nix_supported_ptypes_get(struct rte_eth_dev *eth_dev) + RTE_PTYPE_UNKNOWN, + }; + +- if (dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F) +- return ptypes; +- else +- return NULL; ++ return ptypes; + } + + int +diff --git a/dpdk/drivers/net/octeontx2/otx2_ptp.c b/dpdk/drivers/net/octeontx2/otx2_ptp.c +index f34b9339c4..ae5a2b7cd1 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_ptp.c ++++ b/dpdk/drivers/net/octeontx2/otx2_ptp.c +@@ -221,6 +221,11 @@ otx2_nix_timesync_enable(struct rte_eth_dev *eth_dev) + return -EINVAL; + } + ++ if (dev->npc_flow.switch_header_type == OTX2_PRIV_FLAGS_HIGIG) { ++ otx2_err("Both PTP and switch header enabled"); ++ return -EINVAL; ++ } ++ + /* Allocating a iova address for tx tstamp */ + const struct rte_memzone *ts; + ts = rte_eth_dma_zone_reserve(eth_dev, "otx2_ts", +diff --git a/dpdk/drivers/net/octeontx2/otx2_rss.c b/dpdk/drivers/net/octeontx2/otx2_rss.c +index bc7b64387a..d80579725a 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_rss.c ++++ b/dpdk/drivers/net/octeontx2/otx2_rss.c +@@ -341,7 +341,7 @@ otx2_nix_rss_config(struct rte_eth_dev *eth_dev) + int rc; + + /* Skip further configuration if selected mode is not RSS */ +- if (eth_dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_RSS) ++ if (eth_dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_RSS || !qcnt) + return 0; + + /* Update default RSS key and cfg */ +diff --git a/dpdk/drivers/net/pfe/pfe_ethdev.c b/dpdk/drivers/net/pfe/pfe_ethdev.c +index 9403478198..b1de866d34 100644 +--- a/dpdk/drivers/net/pfe/pfe_ethdev.c ++++ b/dpdk/drivers/net/pfe/pfe_ethdev.c +@@ -13,7 +13,7 @@ + #include "pfe_logs.h" + #include "pfe_mod.h" + +-#define PFE_MAX_MACS 1 /*we can support upto 4 MACs per IF*/ ++#define PFE_MAX_MACS 1 /* we can support up to 4 MACs per IF */ + #define PFE_VDEV_GEM_ID_ARG "intf" + + struct pfe_vdev_init_params { +@@ -396,7 +396,6 @@ pfe_eth_exit(struct rte_eth_dev *dev, struct pfe *pfe) + /* Close the device file for link status */ + pfe_eth_close_cdev(dev->data->dev_private); + +- rte_free(dev->data->mac_addrs); + rte_eth_dev_release_port(dev); + pfe->nb_devs--; + } +@@ -990,7 +989,7 @@ pmd_pfe_probe(struct rte_vdev_device *vdev) + if (rc < 0) + return -EINVAL; + +- RTE_LOG(INFO, PMD, "Initializing pmd_pfe for %s Given gem-id %d\n", ++ PFE_PMD_LOG(INFO, "Initializing pmd_pfe for %s Given gem-id %d", + name, init_params.gem_id); + + if (g_pfe) { +@@ -1118,7 +1117,7 @@ pmd_pfe_probe(struct rte_vdev_device *vdev) + else + gem_id = init_params.gem_id; + +- RTE_LOG(INFO, PMD, "Init pmd_pfe for %s gem-id %d(given =%d)\n", ++ PFE_PMD_LOG(INFO, "Init pmd_pfe for %s gem-id %d(given =%d)", + name, gem_id, init_params.gem_id); + + rc = pfe_eth_init(vdev, g_pfe, gem_id); +diff --git a/dpdk/drivers/net/qede/base/ecore_dev.c b/dpdk/drivers/net/qede/base/ecore_dev.c +index 9d1db14590..86ecfb2690 100644 +--- a/dpdk/drivers/net/qede/base/ecore_dev.c ++++ b/dpdk/drivers/net/qede/base/ecore_dev.c +@@ -5253,7 +5253,6 @@ static void ecore_emul_hw_info_port_num(struct ecore_hwfn *p_hwfn, + + /* MISCS_REG_ECO_RESERVED[15:12]: num of ports in an engine */ + eco_reserved = ecore_rd(p_hwfn, p_ptt, MISCS_REG_ECO_RESERVED); +- + switch ((eco_reserved & 0xf000) >> 12) { + case 1: + p_dev->num_ports_in_engine = 1; +@@ -5268,7 +5267,7 @@ static void ecore_emul_hw_info_port_num(struct ecore_hwfn *p_hwfn, + DP_NOTICE(p_hwfn, false, + "Emulation: Unknown port mode [ECO_RESERVED 0x%08x]\n", + eco_reserved); +- p_dev->num_ports_in_engine = 2; /* Default to something */ ++ p_dev->num_ports_in_engine = 1; /* Default to something */ + break; + } + +@@ -5281,8 +5280,8 @@ static void ecore_emul_hw_info_port_num(struct ecore_hwfn *p_hwfn, + static void ecore_hw_info_port_num(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt) + { ++ u32 addr, global_offsize, global_addr, port_mode; + struct ecore_dev *p_dev = p_hwfn->p_dev; +- u32 addr, global_offsize, global_addr; + + #ifndef ASIC_ONLY + if (CHIP_REV_IS_TEDIBEAR(p_dev)) { +@@ -5304,15 +5303,32 @@ static void ecore_hw_info_port_num(struct ecore_hwfn *p_hwfn, + return; + } + +- addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base, +- PUBLIC_GLOBAL); +- global_offsize = ecore_rd(p_hwfn, p_ptt, addr); +- global_addr = SECTION_ADDR(global_offsize, 0); +- addr = global_addr + OFFSETOF(struct public_global, max_ports); +- p_dev->num_ports = (u8)ecore_rd(p_hwfn, p_ptt, addr); ++ /* Determine the number of ports per engine */ ++ port_mode = ecore_rd(p_hwfn, p_ptt, MISC_REG_PORT_MODE); ++ switch (port_mode) { ++ case 0x0: ++ p_dev->num_ports_in_engine = 1; ++ break; ++ case 0x1: ++ p_dev->num_ports_in_engine = 2; ++ break; ++ case 0x2: ++ p_dev->num_ports_in_engine = 4; ++ break; ++ default: ++ DP_NOTICE(p_hwfn, false, "Unknown port mode 0x%08x\n", ++ port_mode); ++ p_dev->num_ports_in_engine = 1; /* Default to something */ ++ break; ++ } + +- p_dev->num_ports_in_engine = p_dev->num_ports >> +- (ecore_device_num_engines(p_dev) - 1); ++ /* Get the total number of ports of the device */ ++ addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base, ++ PUBLIC_GLOBAL); ++ global_offsize = ecore_rd(p_hwfn, p_ptt, addr); ++ global_addr = SECTION_ADDR(global_offsize, 0); ++ addr = global_addr + OFFSETOF(struct public_global, max_ports); ++ p_dev->num_ports = (u8)ecore_rd(p_hwfn, p_ptt, addr); + } + + static void ecore_mcp_get_eee_caps(struct ecore_hwfn *p_hwfn, +@@ -5601,7 +5617,7 @@ ecore_hw_prepare_single(struct ecore_hwfn *p_hwfn, void OSAL_IOMEM *p_regview, + p_hwfn->db_phys_addr = db_phys_addr; + + if (IS_VF(p_dev)) +- return ecore_vf_hw_prepare(p_hwfn); ++ return ecore_vf_hw_prepare(p_hwfn, p_params); + + /* Validate that chip access is feasible */ + if (REG_RD(p_hwfn, PXP_PF_ME_OPAQUE_ADDR) == 0xffffffff) { +diff --git a/dpdk/drivers/net/qede/base/ecore_dev_api.h b/dpdk/drivers/net/qede/base/ecore_dev_api.h +index 4d5cc1a0fa..5ea8427a07 100644 +--- a/dpdk/drivers/net/qede/base/ecore_dev_api.h ++++ b/dpdk/drivers/net/qede/base/ecore_dev_api.h +@@ -277,6 +277,9 @@ struct ecore_hw_prepare_params { + + /* Indicates whether this PF serves a storage target */ + bool b_is_target; ++ ++ /* retry count for VF acquire on channel timeout */ ++ u8 acquire_retry_cnt; + }; + + /** +diff --git a/dpdk/drivers/net/qede/base/ecore_iov_api.h b/dpdk/drivers/net/qede/base/ecore_iov_api.h +index c998dbf8d5..5450018121 100644 +--- a/dpdk/drivers/net/qede/base/ecore_iov_api.h ++++ b/dpdk/drivers/net/qede/base/ecore_iov_api.h +@@ -51,6 +51,7 @@ enum ecore_iov_pf_to_vf_status { + PFVF_STATUS_NO_RESOURCE, + PFVF_STATUS_FORCED, + PFVF_STATUS_MALICIOUS, ++ PFVF_STATUS_ACQUIRED, + }; + + struct ecore_mcp_link_params; +diff --git a/dpdk/drivers/net/qede/base/ecore_sriov.c b/dpdk/drivers/net/qede/base/ecore_sriov.c +index deee04ac4b..e60257e190 100644 +--- a/dpdk/drivers/net/qede/base/ecore_sriov.c ++++ b/dpdk/drivers/net/qede/base/ecore_sriov.c +@@ -61,6 +61,39 @@ const char *qede_ecore_channel_tlvs_string[] = { + "CHANNEL_TLV_COALESCE_READ", + "CHANNEL_TLV_BULLETIN_UPDATE_MAC", + "CHANNEL_TLV_UPDATE_MTU", ++ "CHANNEL_TLV_RDMA_ACQUIRE", ++ "CHANNEL_TLV_RDMA_START", ++ "CHANNEL_TLV_RDMA_STOP", ++ "CHANNEL_TLV_RDMA_ADD_USER", ++ "CHANNEL_TLV_RDMA_REMOVE_USER", ++ "CHANNEL_TLV_RDMA_QUERY_COUNTERS", ++ "CHANNEL_TLV_RDMA_ALLOC_TID", ++ "CHANNEL_TLV_RDMA_REGISTER_TID", ++ "CHANNEL_TLV_RDMA_DEREGISTER_TID", ++ "CHANNEL_TLV_RDMA_FREE_TID", ++ "CHANNEL_TLV_RDMA_CREATE_CQ", ++ "CHANNEL_TLV_RDMA_RESIZE_CQ", ++ "CHANNEL_TLV_RDMA_DESTROY_CQ", ++ "CHANNEL_TLV_RDMA_CREATE_QP", ++ "CHANNEL_TLV_RDMA_MODIFY_QP", ++ "CHANNEL_TLV_RDMA_QUERY_QP", ++ "CHANNEL_TLV_RDMA_DESTROY_QP", ++ "CHANNEL_TLV_RDMA_CREATE_SRQ", ++ "CHANNEL_TLV_RDMA_MODIFY_SRQ", ++ "CHANNEL_TLV_RDMA_DESTROY_SRQ", ++ "CHANNEL_TLV_RDMA_QUERY_PORT", ++ "CHANNEL_TLV_RDMA_QUERY_DEVICE", ++ "CHANNEL_TLV_RDMA_IWARP_CONNECT", ++ "CHANNEL_TLV_RDMA_IWARP_ACCEPT", ++ "CHANNEL_TLV_RDMA_IWARP_CREATE_LISTEN", ++ "CHANNEL_TLV_RDMA_IWARP_DESTROY_LISTEN", ++ "CHANNEL_TLV_RDMA_IWARP_PAUSE_LISTEN", ++ "CHANNEL_TLV_RDMA_IWARP_REJECT", ++ "CHANNEL_TLV_RDMA_IWARP_SEND_RTR", ++ "CHANNEL_TLV_ESTABLISH_LL2_CONN", ++ "CHANNEL_TLV_TERMINATE_LL2_CONN", ++ "CHANNEL_TLV_ASYNC_EVENT", ++ "CHANNEL_TLV_SOFT_FLR", + "CHANNEL_TLV_MAX" + }; + +diff --git a/dpdk/drivers/net/qede/base/ecore_vf.c b/dpdk/drivers/net/qede/base/ecore_vf.c +index 24846cfb51..0e5b7d5eb3 100644 +--- a/dpdk/drivers/net/qede/base/ecore_vf.c ++++ b/dpdk/drivers/net/qede/base/ecore_vf.c +@@ -226,7 +226,6 @@ enum _ecore_status_t ecore_vf_pf_release(struct ecore_hwfn *p_hwfn) + return _ecore_vf_pf_release(p_hwfn, true); + } + +-#define VF_ACQUIRE_THRESH 3 + static void ecore_vf_pf_acquire_reduce_resc(struct ecore_hwfn *p_hwfn, + struct vf_pf_resc_request *p_req, + struct pf_vf_resc *p_resp) +@@ -251,13 +250,47 @@ static void ecore_vf_pf_acquire_reduce_resc(struct ecore_hwfn *p_hwfn, + p_req->num_cids = p_resp->num_cids; + } + +-static enum _ecore_status_t ecore_vf_pf_acquire(struct ecore_hwfn *p_hwfn) ++static enum _ecore_status_t ++ecore_vf_pf_soft_flr_acquire(struct ecore_hwfn *p_hwfn) ++{ ++ struct ecore_vf_iov *p_iov = p_hwfn->vf_iov_info; ++ struct pfvf_def_resp_tlv *resp; ++ struct vfpf_soft_flr_tlv *req; ++ enum _ecore_status_t rc; ++ ++ req = ecore_vf_pf_prep(p_hwfn, CHANNEL_TLV_SOFT_FLR, sizeof(*req)); ++ ++ /* add list termination tlv */ ++ ecore_add_tlv(&p_iov->offset, ++ CHANNEL_TLV_LIST_END, ++ sizeof(struct channel_list_end_tlv)); ++ ++ resp = &p_iov->pf2vf_reply->default_resp; ++ rc = ecore_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); ++ ++ DP_VERBOSE(p_hwfn, ECORE_MSG_IOV, "rc=0x%x\n", rc); ++ ++ /* to release the mutex as ecore_vf_pf_acquire() take the mutex */ ++ ecore_vf_pf_req_end(p_hwfn, ECORE_AGAIN); ++ ++ /* As of today, there is no mechanism in place for VF to know the FLR ++ * status, so sufficiently (worst case time) wait for FLR to complete, ++ * as mailbox request to MFW by the PF for initiating VF flr and PF ++ * processing VF FLR could take time. ++ */ ++ OSAL_MSLEEP(3000); ++ ++ return ecore_vf_pf_acquire(p_hwfn); ++} ++ ++enum _ecore_status_t ecore_vf_pf_acquire(struct ecore_hwfn *p_hwfn) + { + struct ecore_vf_iov *p_iov = p_hwfn->vf_iov_info; + struct pfvf_acquire_resp_tlv *resp = &p_iov->pf2vf_reply->acquire_resp; + struct pf_vf_pfdev_info *pfdev_info = &resp->pfdev_info; + struct ecore_vf_acquire_sw_info vf_sw_info; + struct ecore_dev *p_dev = p_hwfn->p_dev; ++ u8 retry_cnt = p_iov->acquire_retry_cnt; + struct vf_pf_resc_request *p_resc; + bool resources_acquired = false; + struct vfpf_acquire_tlv *req; +@@ -318,6 +351,14 @@ static enum _ecore_status_t ecore_vf_pf_acquire(struct ecore_hwfn *p_hwfn) + /* send acquire request */ + rc = ecore_send_msg2pf(p_hwfn, + &resp->hdr.status, sizeof(*resp)); ++ ++ if (retry_cnt && rc == ECORE_TIMEOUT) { ++ DP_VERBOSE(p_hwfn, ECORE_MSG_IOV, ++ "VF retrying to acquire due to VPC timeout\n"); ++ retry_cnt--; ++ continue; ++ } ++ + if (rc != ECORE_SUCCESS) + goto exit; + +@@ -343,7 +384,7 @@ static enum _ecore_status_t ecore_vf_pf_acquire(struct ecore_hwfn *p_hwfn) + resources_acquired = true; + } /* PF refuses to allocate our resources */ + else if (resp->hdr.status == PFVF_STATUS_NO_RESOURCE && +- attempts < VF_ACQUIRE_THRESH) { ++ attempts < ECORE_VF_ACQUIRE_THRESH) { + ecore_vf_pf_acquire_reduce_resc(p_hwfn, p_resc, + &resp->resc); + +@@ -391,6 +432,9 @@ static enum _ecore_status_t ecore_vf_pf_acquire(struct ecore_hwfn *p_hwfn) + "PF rejected acquisition by VF\n"); + rc = ECORE_INVAL; + goto exit; ++ } else if (resp->hdr.status == PFVF_STATUS_ACQUIRED) { ++ ecore_vf_pf_req_end(p_hwfn, ECORE_AGAIN); ++ return ecore_vf_pf_soft_flr_acquire(p_hwfn); + } else { + DP_ERR(p_hwfn, + "PF returned err %d to VF acquisition request\n", +@@ -477,7 +521,9 @@ u32 ecore_vf_hw_bar_size(struct ecore_hwfn *p_hwfn, + return 0; + } + +-enum _ecore_status_t ecore_vf_hw_prepare(struct ecore_hwfn *p_hwfn) ++enum _ecore_status_t ++ecore_vf_hw_prepare(struct ecore_hwfn *p_hwfn, ++ struct ecore_hw_prepare_params *p_params) + { + struct ecore_hwfn *p_lead = ECORE_LEADING_HWFN(p_hwfn->p_dev); + struct ecore_vf_iov *p_iov; +@@ -583,6 +629,7 @@ enum _ecore_status_t ecore_vf_hw_prepare(struct ecore_hwfn *p_hwfn) + #endif + OSAL_MUTEX_INIT(&p_iov->mutex); + ++ p_iov->acquire_retry_cnt = p_params->acquire_retry_cnt; + p_hwfn->vf_iov_info = p_iov; + + p_hwfn->hw_info.personality = ECORE_PCI_ETH; +diff --git a/dpdk/drivers/net/qede/base/ecore_vf.h b/dpdk/drivers/net/qede/base/ecore_vf.h +index a07f82ebd9..f027eba3ea 100644 +--- a/dpdk/drivers/net/qede/base/ecore_vf.h ++++ b/dpdk/drivers/net/qede/base/ecore_vf.h +@@ -11,6 +11,7 @@ + #include "ecore_vf_api.h" + #include "ecore_l2_api.h" + #include "ecore_vfpf_if.h" ++#include "ecore_dev_api.h" + + /* Default number of CIDs [total of both Rx and Tx] to be requested + * by default. +@@ -59,6 +60,9 @@ struct ecore_vf_iov { + * bar or via the doorbell bar. + */ + bool b_doorbell_bar; ++ ++ /* retry count for VF acquire on channel timeout */ ++ u8 acquire_retry_cnt; + }; + + /** +@@ -72,6 +76,8 @@ struct ecore_vf_iov { + enum _ecore_status_t ecore_vf_pf_get_coalesce(struct ecore_hwfn *p_hwfn, + u16 *p_coal, + struct ecore_queue_cid *p_cid); ++ ++enum _ecore_status_t ecore_vf_pf_acquire(struct ecore_hwfn *p_hwfn); + /** + * @brief VF - Set Rx/Tx coalesce per VF's relative queue. + * Coalesce value '0' will omit the configuration. +@@ -92,10 +98,13 @@ enum _ecore_status_t ecore_vf_pf_set_coalesce(struct ecore_hwfn *p_hwfn, + * sends ACQUIRE message + * + * @param p_hwfn ++ * @param p_params + * + * @return enum _ecore_status_t + */ +-enum _ecore_status_t ecore_vf_hw_prepare(struct ecore_hwfn *p_hwfn); ++enum _ecore_status_t ++ecore_vf_hw_prepare(struct ecore_hwfn *p_hwfn, ++ struct ecore_hw_prepare_params *p_params); + + /** + * @brief VF - start the RX Queue by sending a message to the PF +diff --git a/dpdk/drivers/net/qede/base/ecore_vf_api.h b/dpdk/drivers/net/qede/base/ecore_vf_api.h +index 1a9fb3b1f2..43951a9a34 100644 +--- a/dpdk/drivers/net/qede/base/ecore_vf_api.h ++++ b/dpdk/drivers/net/qede/base/ecore_vf_api.h +@@ -11,6 +11,9 @@ + #include "ecore_mcp_api.h" + + #ifdef CONFIG_ECORE_SRIOV ++ ++#define ECORE_VF_ACQUIRE_THRESH 3 ++ + /** + * @brief Read the VF bulletin and act on it if needed + * +diff --git a/dpdk/drivers/net/qede/base/ecore_vfpf_if.h b/dpdk/drivers/net/qede/base/ecore_vfpf_if.h +index c7ecb01c28..f92dc428af 100644 +--- a/dpdk/drivers/net/qede/base/ecore_vfpf_if.h ++++ b/dpdk/drivers/net/qede/base/ecore_vfpf_if.h +@@ -251,6 +251,13 @@ struct vfpf_qid_tlv { + u8 padding[3]; + }; + ++/* Soft FLR req */ ++struct vfpf_soft_flr_tlv { ++ struct vfpf_first_tlv first_tlv; ++ u32 reserved1; ++ u32 reserved2; ++}; ++ + /* Setup Queue */ + struct vfpf_start_rxq_tlv { + struct vfpf_first_tlv first_tlv; +@@ -557,6 +564,7 @@ union vfpf_tlvs { + struct vfpf_read_coal_req_tlv read_coal_req; + struct vfpf_bulletin_update_mac_tlv bulletin_update_mac; + struct vfpf_update_mtu_tlv update_mtu; ++ struct vfpf_soft_flr_tlv soft_flr; + struct tlv_buffer_size tlv_buf_size; + }; + +@@ -689,6 +697,39 @@ enum { + CHANNEL_TLV_COALESCE_READ, + CHANNEL_TLV_BULLETIN_UPDATE_MAC, + CHANNEL_TLV_UPDATE_MTU, ++ CHANNEL_TLV_RDMA_ACQUIRE, ++ CHANNEL_TLV_RDMA_START, ++ CHANNEL_TLV_RDMA_STOP, ++ CHANNEL_TLV_RDMA_ADD_USER, ++ CHANNEL_TLV_RDMA_REMOVE_USER, ++ CHANNEL_TLV_RDMA_QUERY_COUNTERS, ++ CHANNEL_TLV_RDMA_ALLOC_TID, ++ CHANNEL_TLV_RDMA_REGISTER_TID, ++ CHANNEL_TLV_RDMA_DEREGISTER_TID, ++ CHANNEL_TLV_RDMA_FREE_TID, ++ CHANNEL_TLV_RDMA_CREATE_CQ, ++ CHANNEL_TLV_RDMA_RESIZE_CQ, ++ CHANNEL_TLV_RDMA_DESTROY_CQ, ++ CHANNEL_TLV_RDMA_CREATE_QP, ++ CHANNEL_TLV_RDMA_MODIFY_QP, ++ CHANNEL_TLV_RDMA_QUERY_QP, ++ CHANNEL_TLV_RDMA_DESTROY_QP, ++ CHANNEL_TLV_RDMA_QUERY_PORT, ++ CHANNEL_TLV_RDMA_QUERY_DEVICE, ++ CHANNEL_TLV_RDMA_IWARP_CONNECT, ++ CHANNEL_TLV_RDMA_IWARP_ACCEPT, ++ CHANNEL_TLV_RDMA_IWARP_CREATE_LISTEN, ++ CHANNEL_TLV_RDMA_IWARP_DESTROY_LISTEN, ++ CHANNEL_TLV_RDMA_IWARP_PAUSE_LISTEN, ++ CHANNEL_TLV_RDMA_IWARP_REJECT, ++ CHANNEL_TLV_RDMA_IWARP_SEND_RTR, ++ CHANNEL_TLV_ESTABLISH_LL2_CONN, ++ CHANNEL_TLV_TERMINATE_LL2_CONN, ++ CHANNEL_TLV_ASYNC_EVENT, ++ CHANNEL_TLV_RDMA_CREATE_SRQ, ++ CHANNEL_TLV_RDMA_MODIFY_SRQ, ++ CHANNEL_TLV_RDMA_DESTROY_SRQ, ++ CHANNEL_TLV_SOFT_FLR, + CHANNEL_TLV_MAX, + + /* Required for iterating over vport-update tlvs. +diff --git a/dpdk/drivers/net/qede/base/mcp_public.h b/dpdk/drivers/net/qede/base/mcp_public.h +index 98b9723dd4..6667c2d7ab 100644 +--- a/dpdk/drivers/net/qede/base/mcp_public.h ++++ b/dpdk/drivers/net/qede/base/mcp_public.h +@@ -1290,6 +1290,7 @@ struct public_drv_mb { + /*deprecated don't use*/ + #define DRV_MSG_CODE_INITIATE_FLR_DEPRECATED 0x02000000 + #define DRV_MSG_CODE_INITIATE_PF_FLR 0x02010000 ++#define DRV_MSG_CODE_INITIATE_VF_FLR 0x02020000 + #define DRV_MSG_CODE_VF_DISABLED_DONE 0xc0000000 + #define DRV_MSG_CODE_CFG_VF_MSIX 0xc0010000 + #define DRV_MSG_CODE_CFG_PF_VFS_MSIX 0xc0020000 +@@ -1749,6 +1750,7 @@ struct public_drv_mb { + #define FW_MSG_CODE_NIG_DRAIN_DONE 0x30000000 + #define FW_MSG_CODE_VF_DISABLED_DONE 0xb0000000 + #define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE 0xb0010000 ++#define FW_MSG_CODE_INITIATE_VF_FLR_OK 0xb0030000 + #define FW_MSG_CODE_ERR_RESOURCE_TEMPORARY_UNAVAILABLE 0x008b0000 + #define FW_MSG_CODE_ERR_RESOURCE_ALREADY_ALLOCATED 0x008c0000 + #define FW_MSG_CODE_ERR_RESOURCE_NOT_ALLOCATED 0x008d0000 +diff --git a/dpdk/drivers/net/qede/qede_ethdev.c b/dpdk/drivers/net/qede/qede_ethdev.c +index 19d2e96191..2a1c82ac9a 100644 +--- a/dpdk/drivers/net/qede/qede_ethdev.c ++++ b/dpdk/drivers/net/qede/qede_ethdev.c +@@ -1064,7 +1064,7 @@ static int qede_dev_start(struct rte_eth_dev *eth_dev) + qede_reset_queue_stats(qdev, true); + + /* Newer SR-IOV PF driver expects RX/TX queues to be started before +- * enabling RSS. Hence RSS configuration is deferred upto this point. ++ * enabling RSS. Hence RSS configuration is deferred up to this point. + * Also, we would like to retain similar behavior in PF case, so we + * don't do PF/VF specific check here. + */ +@@ -1076,6 +1076,9 @@ static int qede_dev_start(struct rte_eth_dev *eth_dev) + if (qede_activate_vport(eth_dev, true)) + goto err; + ++ /* Bring-up the link */ ++ qede_dev_set_link_state(eth_dev, true); ++ + /* Update link status */ + qede_link_update(eth_dev, 0); + +@@ -1097,6 +1100,12 @@ static void qede_dev_stop(struct rte_eth_dev *eth_dev) + + PMD_INIT_FUNC_TRACE(edev); + ++ /* Bring the link down */ ++ qede_dev_set_link_state(eth_dev, false); ++ ++ /* Update link status */ ++ qede_link_update(eth_dev, 0); ++ + /* Disable vport */ + if (qede_activate_vport(eth_dev, false)) + return; +@@ -1182,6 +1191,8 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev) + struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); + struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); + struct rte_eth_rxmode *rxmode = ð_dev->data->dev_conf.rxmode; ++ uint8_t num_rxqs; ++ uint8_t num_txqs; + int ret; + + PMD_INIT_FUNC_TRACE(edev); +@@ -1214,12 +1225,17 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev) + if (qede_check_fdir_support(eth_dev)) + return -ENOTSUP; + +- qede_dealloc_fp_resc(eth_dev); +- qdev->num_tx_queues = eth_dev->data->nb_tx_queues * edev->num_hwfns; +- qdev->num_rx_queues = eth_dev->data->nb_rx_queues * edev->num_hwfns; +- +- if (qede_alloc_fp_resc(qdev)) +- return -ENOMEM; ++ /* Allocate/reallocate fastpath resources only for new queue config */ ++ num_txqs = eth_dev->data->nb_tx_queues * edev->num_hwfns; ++ num_rxqs = eth_dev->data->nb_rx_queues * edev->num_hwfns; ++ if (qdev->num_tx_queues != num_txqs || ++ qdev->num_rx_queues != num_rxqs) { ++ qede_dealloc_fp_resc(eth_dev); ++ qdev->num_tx_queues = num_txqs; ++ qdev->num_rx_queues = num_rxqs; ++ if (qede_alloc_fp_resc(qdev)) ++ return -ENOMEM; ++ } + + /* If jumbo enabled adjust MTU */ + if (rxmode->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) +@@ -1472,7 +1488,8 @@ static void qede_dev_close(struct rte_eth_dev *eth_dev) + if (eth_dev->data->dev_started) + qede_dev_stop(eth_dev); + +- qede_stop_vport(edev); ++ if (qdev->vport_started) ++ qede_stop_vport(edev); + qdev->vport_started = false; + qede_fdir_dealloc_resc(eth_dev); + qede_dealloc_fp_resc(eth_dev); +@@ -1480,8 +1497,6 @@ static void qede_dev_close(struct rte_eth_dev *eth_dev) + eth_dev->data->nb_rx_queues = 0; + eth_dev->data->nb_tx_queues = 0; + +- /* Bring the link down */ +- qede_dev_set_link_state(eth_dev, false); + qdev->ops->common->slowpath_stop(edev); + qdev->ops->common->remove(edev); + rte_intr_disable(&pci_dev->intr_handle); +@@ -2604,9 +2619,6 @@ static int qede_common_dev_init(struct rte_eth_dev *eth_dev, bool is_vf) + + eth_dev->dev_ops = (is_vf) ? &qede_eth_vf_dev_ops : &qede_eth_dev_ops; + +- /* Bring-up the link */ +- qede_dev_set_link_state(eth_dev, true); +- + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + SLIST_INIT(&adapter->arfs_info.arfs_list_head); +diff --git a/dpdk/drivers/net/qede/qede_main.c b/dpdk/drivers/net/qede/qede_main.c +index 4eb79d0fbb..8580cbcd7f 100644 +--- a/dpdk/drivers/net/qede/qede_main.c ++++ b/dpdk/drivers/net/qede/qede_main.c +@@ -56,6 +56,10 @@ qed_probe(struct ecore_dev *edev, struct rte_pci_device *pci_dev, + qed_init_pci(edev, pci_dev); + + memset(&hw_prepare_params, 0, sizeof(hw_prepare_params)); ++ ++ if (is_vf) ++ hw_prepare_params.acquire_retry_cnt = ECORE_VF_ACQUIRE_THRESH; ++ + hw_prepare_params.personality = ECORE_PCI_ETH; + hw_prepare_params.drv_resc_alloc = false; + hw_prepare_params.chk_reg_fifo = false; +diff --git a/dpdk/drivers/net/qede/qede_rxtx.c b/dpdk/drivers/net/qede/qede_rxtx.c +index a28dd0a07f..3c55c0efdf 100644 +--- a/dpdk/drivers/net/qede/qede_rxtx.c ++++ b/dpdk/drivers/net/qede/qede_rxtx.c +@@ -593,12 +593,14 @@ qede_alloc_mem_sb(struct qede_dev *qdev, struct ecore_sb_info *sb_info, + + int qede_alloc_fp_resc(struct qede_dev *qdev) + { +- struct ecore_dev *edev = &qdev->edev; ++ struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); + struct qede_fastpath *fp; + uint32_t num_sbs; + uint16_t sb_idx; + int i; + ++ PMD_INIT_FUNC_TRACE(edev); ++ + if (IS_VF(edev)) + ecore_vf_get_num_sbs(ECORE_LEADING_HWFN(edev), &num_sbs); + else +diff --git a/dpdk/drivers/net/ring/rte_eth_ring.c b/dpdk/drivers/net/ring/rte_eth_ring.c +index 41acbc513d..f0fafa0c0d 100644 +--- a/dpdk/drivers/net/ring/rte_eth_ring.c ++++ b/dpdk/drivers/net/ring/rte_eth_ring.c +@@ -246,6 +246,7 @@ static const struct eth_dev_ops ops = { + + static int + do_eth_dev_ring_create(const char *name, ++ struct rte_vdev_device *vdev, + struct rte_ring * const rx_queues[], + const unsigned int nb_rx_queues, + struct rte_ring *const tx_queues[], +@@ -291,12 +292,15 @@ do_eth_dev_ring_create(const char *name, + } + + /* now put it all together ++ * - store EAL device in eth_dev, + * - store queue data in internals, + * - store numa_node info in eth_dev_data + * - point eth_dev_data to internals + * - and point eth_dev structure to new eth_dev_data structure + */ + ++ eth_dev->device = &vdev->device; ++ + data = eth_dev->data; + data->rx_queues = rx_queues_local; + data->tx_queues = tx_queues_local; +@@ -408,7 +412,9 @@ rte_eth_from_ring(struct rte_ring *r) + } + + static int +-eth_dev_ring_create(const char *name, const unsigned int numa_node, ++eth_dev_ring_create(const char *name, ++ struct rte_vdev_device *vdev, ++ const unsigned int numa_node, + enum dev_action action, struct rte_eth_dev **eth_dev) + { + /* rx and tx are so-called from point of view of first port. +@@ -438,7 +444,7 @@ eth_dev_ring_create(const char *name, const unsigned int numa_node, + return -1; + } + +- if (do_eth_dev_ring_create(name, rxtx, num_rings, rxtx, num_rings, ++ if (do_eth_dev_ring_create(name, vdev, rxtx, num_rings, rxtx, num_rings, + numa_node, action, eth_dev) < 0) + return -1; + +@@ -560,12 +566,12 @@ rte_pmd_ring_probe(struct rte_vdev_device *dev) + PMD_LOG(INFO, "Initializing pmd_ring for %s", name); + + if (params == NULL || params[0] == '\0') { +- ret = eth_dev_ring_create(name, rte_socket_id(), DEV_CREATE, ++ ret = eth_dev_ring_create(name, dev, rte_socket_id(), DEV_CREATE, + ð_dev); + if (ret == -1) { + PMD_LOG(INFO, + "Attach to pmd_ring for %s", name); +- ret = eth_dev_ring_create(name, rte_socket_id(), ++ ret = eth_dev_ring_create(name, dev, rte_socket_id(), + DEV_ATTACH, ð_dev); + } + } else { +@@ -574,19 +580,16 @@ rte_pmd_ring_probe(struct rte_vdev_device *dev) + if (!kvlist) { + PMD_LOG(INFO, + "Ignoring unsupported parameters when creatingrings-backed ethernet device"); +- ret = eth_dev_ring_create(name, rte_socket_id(), ++ ret = eth_dev_ring_create(name, dev, rte_socket_id(), + DEV_CREATE, ð_dev); + if (ret == -1) { + PMD_LOG(INFO, + "Attach to pmd_ring for %s", + name); +- ret = eth_dev_ring_create(name, rte_socket_id(), ++ ret = eth_dev_ring_create(name, dev, rte_socket_id(), + DEV_ATTACH, ð_dev); + } + +- if (eth_dev) +- eth_dev->device = &dev->device; +- + return ret; + } + +@@ -597,7 +600,7 @@ rte_pmd_ring_probe(struct rte_vdev_device *dev) + if (ret < 0) + goto out_free; + +- ret = do_eth_dev_ring_create(name, ++ ret = do_eth_dev_ring_create(name, dev, + internal_args->rx_queues, + internal_args->nb_rx_queues, + internal_args->tx_queues, +@@ -627,6 +630,7 @@ rte_pmd_ring_probe(struct rte_vdev_device *dev) + + for (info->count = 0; info->count < info->total; info->count++) { + ret = eth_dev_ring_create(info->list[info->count].name, ++ dev, + info->list[info->count].node, + info->list[info->count].action, + ð_dev); +@@ -635,7 +639,7 @@ rte_pmd_ring_probe(struct rte_vdev_device *dev) + PMD_LOG(INFO, + "Attach to pmd_ring for %s", + name); +- ret = eth_dev_ring_create(name, ++ ret = eth_dev_ring_create(name, dev, + info->list[info->count].node, + DEV_ATTACH, + ð_dev); +@@ -644,9 +648,6 @@ rte_pmd_ring_probe(struct rte_vdev_device *dev) + } + } + +- if (eth_dev) +- eth_dev->device = &dev->device; +- + out_free: + rte_kvargs_free(kvlist); + rte_free(info); +diff --git a/dpdk/drivers/net/sfc/base/ef10_evb.c b/dpdk/drivers/net/sfc/base/ef10_evb.c +index 1788a2c96a..9b33e89fc1 100644 +--- a/dpdk/drivers/net/sfc/base/ef10_evb.c ++++ b/dpdk/drivers/net/sfc/base/ef10_evb.c +@@ -9,15 +9,13 @@ + + #if EFSYS_OPT_EVB + +-#if EFSYS_OPT_HUNTINGTON || EFSYS_OPT_MEDFORD || EFSYS_OPT_MEDFORD2 ++#if EFX_OPTS_EF10() + + __checkReturn efx_rc_t + ef10_evb_init( + __in efx_nic_t *enp) + { +- EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON || +- enp->en_family == EFX_FAMILY_MEDFORD || +- enp->en_family == EFX_FAMILY_MEDFORD2); ++ EFSYS_ASSERT(EFX_FAMILY_IS_EF10(enp)); + + return (0); + } +@@ -26,12 +24,10 @@ ef10_evb_init( + ef10_evb_fini( + __in efx_nic_t *enp) + { +- EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON || +- enp->en_family == EFX_FAMILY_MEDFORD || +- enp->en_family == EFX_FAMILY_MEDFORD2); ++ EFSYS_ASSERT(EFX_FAMILY_IS_EF10(enp)); + } + +- __checkReturn efx_rc_t ++static __checkReturn efx_rc_t + efx_mcdi_vswitch_alloc( + __in efx_nic_t *enp, + __in efx_vport_id_t vport_id, +@@ -98,7 +94,7 @@ efx_mcdi_vswitch_alloc( + return (rc); + } + +- __checkReturn efx_rc_t ++static __checkReturn efx_rc_t + efx_mcdi_vswitch_free( + __in efx_nic_t *enp) + { +@@ -129,7 +125,7 @@ efx_mcdi_vswitch_free( + return (rc); + } + +- __checkReturn efx_rc_t ++static __checkReturn efx_rc_t + efx_mcdi_vport_alloc( + __in efx_nic_t *enp, + __in efx_vport_type_t vport_type, +@@ -192,7 +188,7 @@ efx_mcdi_vport_alloc( + return (rc); + } + +- __checkReturn efx_rc_t ++static __checkReturn efx_rc_t + efx_mcdi_vport_free( + __in efx_nic_t *enp, + __in efx_vport_id_t vport_id) +@@ -223,7 +219,7 @@ efx_mcdi_vport_free( + return (rc); + } + +- __checkReturn efx_rc_t ++static __checkReturn efx_rc_t + efx_mcdi_vport_mac_addr_add( + __in efx_nic_t *enp, + __in efx_vport_id_t vport_id, +@@ -258,7 +254,7 @@ efx_mcdi_vport_mac_addr_add( + return (rc); + } + +- __checkReturn efx_rc_t ++static __checkReturn efx_rc_t + efx_mcdi_vport_mac_addr_del( + __in efx_nic_t *enp, + __in efx_vport_id_t vport_id, +@@ -293,7 +289,7 @@ efx_mcdi_vport_mac_addr_del( + return (rc); + } + +- __checkReturn efx_rc_t ++static __checkReturn efx_rc_t + efx_mcdi_port_assign( + __in efx_nic_t *enp, + __in efx_vport_id_t vport_id, +@@ -330,7 +326,7 @@ efx_mcdi_port_assign( + return (rc); + } + +- __checkReturn efx_rc_t ++static __checkReturn efx_rc_t + efx_mcdi_vport_reconfigure( + __in efx_nic_t *enp, + __in efx_vport_id_t vport_id, +@@ -549,5 +545,5 @@ ef10_evb_vport_stats( + EFX_STATS_UPLOAD, 0)); + } + +-#endif /* EFSYS_OPT_HUNTINGTON || EFSYS_OPT_MEDFORD || EFSYS_OPT_MEDFORD2 */ ++#endif /* EFX_OPTS_EF10() */ + #endif /* EFSYS_OPT_EVB */ +diff --git a/dpdk/drivers/net/sfc/base/ef10_filter.c b/dpdk/drivers/net/sfc/base/ef10_filter.c +index e4f8de51c0..158e77e3bb 100644 +--- a/dpdk/drivers/net/sfc/base/ef10_filter.c ++++ b/dpdk/drivers/net/sfc/base/ef10_filter.c +@@ -590,6 +590,231 @@ ef10_filter_restore( + return (rc); + } + ++enum ef10_filter_add_action_e { ++ /* Insert a new filter */ ++ EF10_FILTER_ADD_NEW, ++ /* ++ * Replace old filter with a new, overriding the old one ++ * if it has lower priority. ++ */ ++ EF10_FILTER_ADD_REPLACE, ++ /* Store new, lower priority filter as overridden by old filter */ ++ EF10_FILTER_ADD_STORE, ++ /* Special case for AUTO filters, remove AUTO_OLD flag */ ++ EF10_FILTER_ADD_REFRESH, ++}; ++ ++static __checkReturn efx_rc_t ++ef10_filter_add_lookup_equal_spec( ++ __in efx_filter_spec_t *spec, ++ __in efx_filter_spec_t *probe_spec, ++ __in efx_filter_replacement_policy_t policy, ++ __out boolean_t *found) ++{ ++ efx_rc_t rc; ++ ++ /* Refreshing AUTO filter */ ++ if (spec->efs_priority == EFX_FILTER_PRI_AUTO && ++ probe_spec->efs_priority == EFX_FILTER_PRI_AUTO) { ++ *found = B_TRUE; ++ return (0); ++ } ++ ++ /* ++ * With exclusive filters, higher priority ones ++ * override lower priority ones, and lower priority ++ * ones are stored in case the higher priority one ++ * is removed. ++ */ ++ if (ef10_filter_is_exclusive(spec)) { ++ switch (policy) { ++ case EFX_FILTER_REPLACEMENT_HIGHER_OR_EQUAL_PRIORITY: ++ if (spec->efs_priority == probe_spec->efs_priority) { ++ *found = B_TRUE; ++ break; ++ } ++ /* Fall-through */ ++ case EFX_FILTER_REPLACEMENT_HIGHER_PRIORITY: ++ if (spec->efs_priority > probe_spec->efs_priority) { ++ *found = B_TRUE; ++ break; ++ } ++ /* Fall-through */ ++ case EFX_FILTER_REPLACEMENT_NEVER: ++ /* ++ * Lower priority filter needs to be ++ * stored. It does *not* replace the ++ * old one. That is why EEXIST is not ++ * returned in that case. ++ */ ++ if (spec->efs_priority < probe_spec->efs_priority) { ++ *found = B_TRUE; ++ break; ++ } else { ++ rc = EEXIST; ++ goto fail1; ++ } ++ default: ++ EFSYS_ASSERT(0); ++ rc = EEXIST; ++ goto fail2; ++ } ++ } else { ++ *found = B_FALSE; ++ } ++ ++ return (0); ++ ++fail2: ++ EFSYS_PROBE(fail2); ++ ++fail1: ++ EFSYS_PROBE1(fail1, efx_rc_t, rc); ++ ++ return (rc); ++} ++ ++ ++static void ++ef10_filter_add_select_action( ++ __in efx_filter_spec_t *saved_spec, ++ __in efx_filter_spec_t *spec, ++ __out enum ef10_filter_add_action_e *action, ++ __out efx_filter_spec_t **overridden_spec) ++{ ++ efx_filter_spec_t *overridden = NULL; ++ ++ if (saved_spec == NULL) { ++ *action = EF10_FILTER_ADD_NEW; ++ } else if (ef10_filter_is_exclusive(spec) == B_FALSE) { ++ /* ++ * Non-exclusive filters are always stored in separate entries ++ * in the table. The only case involving a saved spec is ++ * refreshing an AUTO filter. ++ */ ++ EFSYS_ASSERT(saved_spec->efs_overridden_spec == NULL); ++ EFSYS_ASSERT(spec->efs_priority == EFX_FILTER_PRI_AUTO); ++ EFSYS_ASSERT(saved_spec->efs_priority == EFX_FILTER_PRI_AUTO); ++ *action = EF10_FILTER_ADD_REFRESH; ++ } else { ++ /* Exclusive filters stored in the same entry */ ++ if (spec->efs_priority > saved_spec->efs_priority) { ++ /* ++ * Insert a high priority filter over a lower priority ++ * one. Only two priority levels are implemented, so ++ * there must not already be an overridden filter. ++ */ ++ EFX_STATIC_ASSERT(EFX_FILTER_NPRI == 2); ++ EFSYS_ASSERT(saved_spec->efs_overridden_spec == NULL); ++ overridden = saved_spec; ++ *action = EF10_FILTER_ADD_REPLACE; ++ } else if (spec->efs_priority == saved_spec->efs_priority) { ++ /* Replace in-place or refresh an existing filter */ ++ if (spec->efs_priority == EFX_FILTER_PRI_AUTO) ++ *action = EF10_FILTER_ADD_REFRESH; ++ else ++ *action = EF10_FILTER_ADD_REPLACE; ++ } else { ++ /* ++ * Insert a lower priority filter, storing it in case ++ * the higher priority filter is removed. ++ * ++ * Currently there are only two priority levels, so this ++ * must be an AUTO filter. ++ */ ++ EFX_STATIC_ASSERT(EFX_FILTER_NPRI == 2); ++ EFSYS_ASSERT(spec->efs_priority == EFX_FILTER_PRI_AUTO); ++ if (saved_spec->efs_overridden_spec != NULL) { ++ *action = EF10_FILTER_ADD_REFRESH; ++ } else { ++ overridden = spec; ++ *action = EF10_FILTER_ADD_STORE; ++ } ++ } ++ } ++ ++ *overridden_spec = overridden; ++} ++ ++static __checkReturn efx_rc_t ++ef10_filter_add_execute_action( ++ __in efx_nic_t *enp, ++ __in efx_filter_spec_t *saved_spec, ++ __in efx_filter_spec_t *spec, ++ __in efx_filter_spec_t *overridden_spec, ++ __in enum ef10_filter_add_action_e action, ++ __in int ins_index) ++{ ++ ef10_filter_table_t *eftp = enp->en_filter.ef_ef10_filter_table; ++ efsys_lock_state_t state; ++ efx_rc_t rc; ++ ++ EFSYS_LOCK(enp->en_eslp, state); ++ ++ if (action == EF10_FILTER_ADD_REFRESH) { ++ ef10_filter_set_entry_not_auto_old(eftp, ins_index); ++ goto out_unlock; ++ } else if (action == EF10_FILTER_ADD_STORE) { ++ EFSYS_ASSERT(overridden_spec != NULL); ++ saved_spec->efs_overridden_spec = overridden_spec; ++ goto out_unlock; ++ } ++ ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ ++ switch (action) { ++ case EF10_FILTER_ADD_REPLACE: ++ /* ++ * On replacing the filter handle may change after a ++ * successful replace operation. ++ */ ++ rc = efx_mcdi_filter_op_add(enp, spec, ++ MC_CMD_FILTER_OP_IN_OP_REPLACE, ++ &eftp->eft_entry[ins_index].efe_handle); ++ break; ++ case EF10_FILTER_ADD_NEW: ++ if (ef10_filter_is_exclusive(spec)) { ++ rc = efx_mcdi_filter_op_add(enp, spec, ++ MC_CMD_FILTER_OP_IN_OP_INSERT, ++ &eftp->eft_entry[ins_index].efe_handle); ++ } else { ++ rc = efx_mcdi_filter_op_add(enp, spec, ++ MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE, ++ &eftp->eft_entry[ins_index].efe_handle); ++ } ++ break; ++ default: ++ rc = EINVAL; ++ EFSYS_ASSERT(0); ++ break; ++ } ++ if (rc != 0) ++ goto fail1; ++ ++ EFSYS_LOCK(enp->en_eslp, state); ++ ++ if (action == EF10_FILTER_ADD_REPLACE) { ++ /* Update the fields that may differ */ ++ saved_spec->efs_priority = spec->efs_priority; ++ saved_spec->efs_flags = spec->efs_flags; ++ saved_spec->efs_rss_context = spec->efs_rss_context; ++ saved_spec->efs_dmaq_id = spec->efs_dmaq_id; ++ ++ if (overridden_spec != NULL) ++ saved_spec->efs_overridden_spec = overridden_spec; ++ } ++ ++out_unlock: ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ ++ return (0); ++ ++fail1: ++ EFSYS_PROBE1(fail1, efx_rc_t, rc); ++ ++ return (rc); ++} ++ + /* + * An arbitrary search limit for the software hash table. As per the linux net + * driver. +@@ -600,22 +825,24 @@ static __checkReturn efx_rc_t + ef10_filter_add_internal( + __in efx_nic_t *enp, + __inout efx_filter_spec_t *spec, +- __in boolean_t may_replace, ++ __in efx_filter_replacement_policy_t policy, + __out_opt uint32_t *filter_id) + { + efx_rc_t rc; + ef10_filter_table_t *eftp = enp->en_filter.ef_ef10_filter_table; ++ enum ef10_filter_add_action_e action; ++ efx_filter_spec_t *overridden_spec = NULL; + efx_filter_spec_t *saved_spec; + uint32_t hash; + unsigned int depth; + int ins_index; +- boolean_t replacing = B_FALSE; +- unsigned int i; + efsys_lock_state_t state; + boolean_t locked = B_FALSE; + + EFSYS_ASSERT(EFX_FAMILY_IS_EF10(enp)); + ++ EFSYS_ASSERT(spec->efs_overridden_spec == NULL); ++ + hash = ef10_filter_hash(spec); + + /* +@@ -628,145 +855,136 @@ ef10_filter_add_internal( + * else a free slot to insert at. If any of them are busy, + * we have to wait and retry. + */ +- for (;;) { +- ins_index = -1; +- depth = 1; +- EFSYS_LOCK(enp->en_eslp, state); +- locked = B_TRUE; ++retry: ++ EFSYS_LOCK(enp->en_eslp, state); ++ locked = B_TRUE; ++ ++ ins_index = -1; ++ ++ for (depth = 1; depth <= EF10_FILTER_SEARCH_LIMIT; depth++) { ++ unsigned int probe_index; ++ efx_filter_spec_t *probe_spec; + +- for (;;) { +- i = (hash + depth) & (EFX_EF10_FILTER_TBL_ROWS - 1); +- saved_spec = ef10_filter_entry_spec(eftp, i); +- +- if (!saved_spec) { +- if (ins_index < 0) { +- ins_index = i; +- } +- } else if (ef10_filter_equal(spec, saved_spec)) { +- if (ef10_filter_entry_is_busy(eftp, i)) +- break; +- if (saved_spec->efs_priority +- == EFX_FILTER_PRI_AUTO) { +- ins_index = i; +- goto found; +- } else if (ef10_filter_is_exclusive(spec)) { +- if (may_replace) { +- ins_index = i; +- goto found; +- } else { +- rc = EEXIST; +- goto fail1; +- } +- } +- +- /* Leave existing */ ++ probe_index = (hash + depth) & (EFX_EF10_FILTER_TBL_ROWS - 1); ++ probe_spec = ef10_filter_entry_spec(eftp, probe_index); ++ ++ if (probe_spec == NULL) { ++ if (ins_index < 0) ++ ins_index = probe_index; ++ } else if (ef10_filter_equal(spec, probe_spec)) { ++ boolean_t found; ++ ++ if (ef10_filter_entry_is_busy(eftp, probe_index)) { ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ locked = B_FALSE; ++ goto retry; + } + +- /* +- * Once we reach the maximum search depth, use +- * the first suitable slot or return EBUSY if +- * there was none. +- */ +- if (depth == EF10_FILTER_SEARCH_LIMIT) { +- if (ins_index < 0) { +- rc = EBUSY; +- goto fail2; +- } +- goto found; ++ rc = ef10_filter_add_lookup_equal_spec(spec, ++ probe_spec, policy, &found); ++ if (rc != 0) ++ goto fail1; ++ ++ if (found != B_FALSE) { ++ ins_index = probe_index; ++ break; + } +- depth++; + } +- EFSYS_UNLOCK(enp->en_eslp, state); +- locked = B_FALSE; + } + +-found: + /* +- * Create a software table entry if necessary, and mark it +- * busy. We might yet fail to insert, but any attempt to +- * insert a conflicting filter while we're waiting for the +- * firmware must find the busy entry. ++ * Once we reach the maximum search depth, use the first suitable slot ++ * or return EBUSY if there was none. + */ +- saved_spec = ef10_filter_entry_spec(eftp, ins_index); +- if (saved_spec) { +- if (saved_spec->efs_priority == EFX_FILTER_PRI_AUTO) { +- /* This is a filter we are refreshing */ +- ef10_filter_set_entry_not_auto_old(eftp, ins_index); +- goto out_unlock; +- +- } +- replacing = B_TRUE; +- } else { +- EFSYS_KMEM_ALLOC(enp->en_esip, sizeof (*spec), saved_spec); +- if (!saved_spec) { +- rc = ENOMEM; +- goto fail3; +- } +- *saved_spec = *spec; +- ef10_filter_set_entry(eftp, ins_index, saved_spec); ++ if (ins_index < 0) { ++ rc = EBUSY; ++ goto fail2; + } ++ ++ /* ++ * Mark software table entry busy. We might yet fail to insert, ++ * but any attempt to insert a conflicting filter while we're ++ * waiting for the firmware must find the busy entry. ++ */ + ef10_filter_set_entry_busy(eftp, ins_index); + +- EFSYS_UNLOCK(enp->en_eslp, state); +- locked = B_FALSE; ++ saved_spec = ef10_filter_entry_spec(eftp, ins_index); ++ ef10_filter_add_select_action(saved_spec, spec, &action, ++ &overridden_spec); + + /* +- * On replacing the filter handle may change after after a successful +- * replace operation. ++ * Allocate a new filter if found entry is empty or ++ * a filter should be overridden. + */ +- if (replacing) { +- rc = efx_mcdi_filter_op_add(enp, spec, +- MC_CMD_FILTER_OP_IN_OP_REPLACE, +- &eftp->eft_entry[ins_index].efe_handle); +- } else if (ef10_filter_is_exclusive(spec)) { +- rc = efx_mcdi_filter_op_add(enp, spec, +- MC_CMD_FILTER_OP_IN_OP_INSERT, +- &eftp->eft_entry[ins_index].efe_handle); +- } else { +- rc = efx_mcdi_filter_op_add(enp, spec, +- MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE, +- &eftp->eft_entry[ins_index].efe_handle); +- } +- +- if (rc != 0) +- goto fail4; ++ if (overridden_spec != NULL || saved_spec == NULL) { ++ efx_filter_spec_t *new_spec; + +- EFSYS_LOCK(enp->en_eslp, state); +- locked = B_TRUE; ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ locked = B_FALSE; + +- if (replacing) { +- /* Update the fields that may differ */ +- saved_spec->efs_priority = spec->efs_priority; +- saved_spec->efs_flags = spec->efs_flags; +- saved_spec->efs_rss_context = spec->efs_rss_context; +- saved_spec->efs_dmaq_id = spec->efs_dmaq_id; +- } ++ EFSYS_KMEM_ALLOC(enp->en_esip, sizeof (*new_spec), new_spec); ++ if (new_spec == NULL) { ++ rc = ENOMEM; ++ overridden_spec = NULL; ++ goto fail3; ++ } + +- ef10_filter_set_entry_not_busy(eftp, ins_index); ++ EFSYS_LOCK(enp->en_eslp, state); ++ locked = B_TRUE; + +-out_unlock: ++ if (saved_spec == NULL) { ++ *new_spec = *spec; ++ ef10_filter_set_entry(eftp, ins_index, new_spec); ++ } else { ++ *new_spec = *overridden_spec; ++ overridden_spec = new_spec; ++ } ++ } + + EFSYS_UNLOCK(enp->en_eslp, state); + locked = B_FALSE; + ++ rc = ef10_filter_add_execute_action(enp, saved_spec, spec, ++ overridden_spec, action, ins_index); ++ if (rc != 0) ++ goto fail4; ++ + if (filter_id) + *filter_id = ins_index; + ++ EFSYS_LOCK(enp->en_eslp, state); ++ ef10_filter_set_entry_not_busy(eftp, ins_index); ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ + return (0); + + fail4: + EFSYS_PROBE(fail4); + +- if (!replacing) { +- EFSYS_KMEM_FREE(enp->en_esip, sizeof (*spec), saved_spec); +- saved_spec = NULL; ++ EFSYS_ASSERT(locked == B_FALSE); ++ EFSYS_LOCK(enp->en_eslp, state); ++ ++ if (action == EF10_FILTER_ADD_NEW) { ++ EFSYS_KMEM_FREE(enp->en_esip, sizeof (*spec), ++ ef10_filter_entry_spec(eftp, ins_index)); ++ ef10_filter_set_entry(eftp, ins_index, NULL); + } +- ef10_filter_set_entry_not_busy(eftp, ins_index); +- ef10_filter_set_entry(eftp, ins_index, NULL); ++ ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ ++ if (overridden_spec != NULL) ++ EFSYS_KMEM_FREE(enp->en_esip, sizeof (*spec), overridden_spec); + + fail3: + EFSYS_PROBE(fail3); + ++ EFSYS_ASSERT(locked == B_FALSE); ++ EFSYS_LOCK(enp->en_eslp, state); ++ ++ ef10_filter_set_entry_not_busy(eftp, ins_index); ++ ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ + fail2: + EFSYS_PROBE(fail2); + +@@ -783,11 +1001,11 @@ ef10_filter_add_internal( + ef10_filter_add( + __in efx_nic_t *enp, + __inout efx_filter_spec_t *spec, +- __in boolean_t may_replace) ++ __in enum efx_filter_replacement_policy_e policy) + { + efx_rc_t rc; + +- rc = ef10_filter_add_internal(enp, spec, may_replace, NULL); ++ rc = ef10_filter_add_internal(enp, spec, policy, NULL); + if (rc != 0) + goto fail1; + +@@ -799,11 +1017,15 @@ ef10_filter_add( + return (rc); + } + +- ++/* ++ * Delete a filter by index from the filter table with priority ++ * that is not higher than specified. ++ */ + static __checkReturn efx_rc_t + ef10_filter_delete_internal( + __in efx_nic_t *enp, +- __in uint32_t filter_id) ++ __in uint32_t filter_id, ++ __in efx_filter_priority_t priority) + { + efx_rc_t rc; + ef10_filter_table_t *table = enp->en_filter.ef_ef10_filter_table; +@@ -825,7 +1047,8 @@ ef10_filter_delete_internal( + EFSYS_LOCK(enp->en_eslp, state); + } + if ((spec = ef10_filter_entry_spec(table, filter_idx)) != NULL) { +- ef10_filter_set_entry_busy(table, filter_idx); ++ if (spec->efs_priority <= priority) ++ ef10_filter_set_entry_busy(table, filter_idx); + } + EFSYS_UNLOCK(enp->en_eslp, state); + +@@ -834,31 +1057,53 @@ ef10_filter_delete_internal( + goto fail1; + } + +- /* +- * Try to remove the hardware filter. This may fail if the MC has +- * rebooted (which frees all hardware filter resources). +- */ +- if (ef10_filter_is_exclusive(spec)) { +- rc = efx_mcdi_filter_op_delete(enp, +- MC_CMD_FILTER_OP_IN_OP_REMOVE, +- &table->eft_entry[filter_idx].efe_handle); ++ if (spec->efs_priority > priority) { ++ /* ++ * Applied filter stays, but overridden filter is removed since ++ * next user request to delete the applied filter should not ++ * restore outdated filter. ++ */ ++ if (spec->efs_overridden_spec != NULL) { ++ EFSYS_ASSERT(spec->efs_overridden_spec->efs_overridden_spec == ++ NULL); ++ EFSYS_KMEM_FREE(enp->en_esip, sizeof (*spec), ++ spec->efs_overridden_spec); ++ spec->efs_overridden_spec = NULL; ++ } + } else { +- rc = efx_mcdi_filter_op_delete(enp, +- MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE, +- &table->eft_entry[filter_idx].efe_handle); +- } ++ /* ++ * Try to remove the hardware filter or replace it with the ++ * saved automatic filter. This may fail if the MC has ++ * rebooted (which frees all hardware filter resources). ++ */ ++ if (spec->efs_overridden_spec != NULL) { ++ rc = efx_mcdi_filter_op_add(enp, ++ spec->efs_overridden_spec, ++ MC_CMD_FILTER_OP_IN_OP_REPLACE, ++ &table->eft_entry[filter_idx].efe_handle); ++ } else if (ef10_filter_is_exclusive(spec)) { ++ rc = efx_mcdi_filter_op_delete(enp, ++ MC_CMD_FILTER_OP_IN_OP_REMOVE, ++ &table->eft_entry[filter_idx].efe_handle); ++ } else { ++ rc = efx_mcdi_filter_op_delete(enp, ++ MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE, ++ &table->eft_entry[filter_idx].efe_handle); ++ } + +- /* Free the software table entry */ +- EFSYS_LOCK(enp->en_eslp, state); +- ef10_filter_set_entry_not_busy(table, filter_idx); +- ef10_filter_set_entry(table, filter_idx, NULL); +- EFSYS_UNLOCK(enp->en_eslp, state); ++ /* Free the software table entry */ ++ EFSYS_LOCK(enp->en_eslp, state); ++ ef10_filter_set_entry_not_busy(table, filter_idx); ++ ef10_filter_set_entry(table, filter_idx, ++ spec->efs_overridden_spec); ++ EFSYS_UNLOCK(enp->en_eslp, state); + +- EFSYS_KMEM_FREE(enp->en_esip, sizeof (*spec), spec); ++ EFSYS_KMEM_FREE(enp->en_esip, sizeof (*spec), spec); + +- /* Check result of hardware filter removal */ +- if (rc != 0) +- goto fail2; ++ /* Check result of hardware filter removal */ ++ if (rc != 0) ++ goto fail2; ++ } + + return (0); + +@@ -871,6 +1116,25 @@ ef10_filter_delete_internal( + return (rc); + } + ++static void ++ef10_filter_delete_auto( ++ __in efx_nic_t *enp, ++ __in uint32_t filter_id) ++{ ++ ef10_filter_table_t *table = enp->en_filter.ef_ef10_filter_table; ++ uint32_t filter_idx = filter_id % EFX_EF10_FILTER_TBL_ROWS; ++ ++ /* ++ * AUTO_OLD flag is cleared since the auto filter that is to be removed ++ * may not be the filter at the specified index itself, but the filter ++ * that is overridden by it. ++ */ ++ ef10_filter_set_entry_not_auto_old(table, filter_idx); ++ ++ (void) ef10_filter_delete_internal(enp, filter_idx, ++ EFX_FILTER_PRI_AUTO); ++} ++ + __checkReturn efx_rc_t + ef10_filter_delete( + __in efx_nic_t *enp, +@@ -897,7 +1161,8 @@ ef10_filter_delete( + i = (hash + depth) & (EFX_EF10_FILTER_TBL_ROWS - 1); + saved_spec = ef10_filter_entry_spec(table, i); + if (saved_spec && ef10_filter_equal(spec, saved_spec) && +- ef10_filter_same_dest(spec, saved_spec)) { ++ ef10_filter_same_dest(spec, saved_spec) && ++ saved_spec->efs_priority == EFX_FILTER_PRI_MANUAL) { + break; + } + if (depth == EF10_FILTER_SEARCH_LIMIT) { +@@ -910,7 +1175,7 @@ ef10_filter_delete( + EFSYS_UNLOCK(enp->en_eslp, state); + locked = B_FALSE; + +- rc = ef10_filter_delete_internal(enp, i); ++ rc = ef10_filter_delete_internal(enp, i, EFX_FILTER_PRI_MANUAL); + if (rc != 0) + goto fail2; + +@@ -1135,7 +1400,7 @@ ef10_filter_insert_unicast( + if (rc != 0) + goto fail1; + +- rc = ef10_filter_add_internal(enp, &spec, B_TRUE, ++ rc = ef10_filter_add_internal(enp, &spec, EFX_FILTER_REPLACEMENT_NEVER, + &eftp->eft_unicst_filter_indexes[eftp->eft_unicst_filter_count]); + if (rc != 0) + goto fail2; +@@ -1169,7 +1434,7 @@ ef10_filter_insert_all_unicast( + rc = efx_filter_spec_set_uc_def(&spec); + if (rc != 0) + goto fail1; +- rc = ef10_filter_add_internal(enp, &spec, B_TRUE, ++ rc = ef10_filter_add_internal(enp, &spec, EFX_FILTER_REPLACEMENT_NEVER, + &eftp->eft_unicst_filter_indexes[eftp->eft_unicst_filter_count]); + if (rc != 0) + goto fail2; +@@ -1239,8 +1504,8 @@ ef10_filter_insert_multicast_list( + } + } + +- rc = ef10_filter_add_internal(enp, &spec, B_TRUE, +- &filter_index); ++ rc = ef10_filter_add_internal(enp, &spec, ++ EFX_FILTER_REPLACEMENT_NEVER, &filter_index); + + if (rc == 0) { + eftp->eft_mulcst_filter_indexes[filter_count] = +@@ -1267,8 +1532,8 @@ ef10_filter_insert_multicast_list( + goto rollback; + } + +- rc = ef10_filter_add_internal(enp, &spec, B_TRUE, +- &filter_index); ++ rc = ef10_filter_add_internal(enp, &spec, ++ EFX_FILTER_REPLACEMENT_NEVER, &filter_index); + + if (rc == 0) { + eftp->eft_mulcst_filter_indexes[filter_count] = +@@ -1289,7 +1554,7 @@ ef10_filter_insert_multicast_list( + /* Remove any filters we have inserted */ + i = filter_count; + while (i--) { +- (void) ef10_filter_delete_internal(enp, ++ ef10_filter_delete_auto(enp, + eftp->eft_mulcst_filter_indexes[i]); + } + eftp->eft_mulcst_filter_count = 0; +@@ -1317,7 +1582,7 @@ ef10_filter_insert_all_multicast( + if (rc != 0) + goto fail1; + +- rc = ef10_filter_add_internal(enp, &spec, B_TRUE, ++ rc = ef10_filter_add_internal(enp, &spec, EFX_FILTER_REPLACEMENT_NEVER, + &eftp->eft_mulcst_filter_indexes[0]); + if (rc != 0) + goto fail2; +@@ -1420,8 +1685,9 @@ ef10_filter_insert_encap_filters( + if (rc != 0) + goto fail1; + +- rc = ef10_filter_add_internal(enp, &spec, B_TRUE, +- &table->eft_encap_filter_indexes[ ++ rc = ef10_filter_add_internal(enp, &spec, ++ EFX_FILTER_REPLACEMENT_NEVER, ++ &table->eft_encap_filter_indexes[ + table->eft_encap_filter_count]); + if (rc != 0) { + if (rc != EACCES) +@@ -1450,7 +1716,7 @@ ef10_filter_remove_old( + + for (i = 0; i < EFX_ARRAY_SIZE(table->eft_entry); i++) { + if (ef10_filter_entry_is_auto_old(table, i)) { +- (void) ef10_filter_delete_internal(enp, i); ++ ef10_filter_delete_auto(enp, i); + } + } + } +@@ -1525,19 +1791,19 @@ ef10_filter_reconfigure( + * has rebooted, which removes hardware filters). + */ + for (i = 0; i < table->eft_unicst_filter_count; i++) { +- (void) ef10_filter_delete_internal(enp, ++ ef10_filter_delete_auto(enp, + table->eft_unicst_filter_indexes[i]); + } + table->eft_unicst_filter_count = 0; + + for (i = 0; i < table->eft_mulcst_filter_count; i++) { +- (void) ef10_filter_delete_internal(enp, ++ ef10_filter_delete_auto(enp, + table->eft_mulcst_filter_indexes[i]); + } + table->eft_mulcst_filter_count = 0; + + for (i = 0; i < table->eft_encap_filter_count; i++) { +- (void) ef10_filter_delete_internal(enp, ++ ef10_filter_delete_auto(enp, + table->eft_encap_filter_indexes[i]); + } + table->eft_encap_filter_count = 0; +diff --git a/dpdk/drivers/net/sfc/base/ef10_impl.h b/dpdk/drivers/net/sfc/base/ef10_impl.h +index 7a00047829..67abf3b853 100644 +--- a/dpdk/drivers/net/sfc/base/ef10_impl.h ++++ b/dpdk/drivers/net/sfc/base/ef10_impl.h +@@ -1079,6 +1079,8 @@ ef10_rx_fini( + + #if EFSYS_OPT_FILTER + ++enum efx_filter_replacement_policy_e; ++ + typedef struct ef10_filter_handle_s { + uint32_t efh_lo; + uint32_t efh_hi; +@@ -1148,7 +1150,7 @@ ef10_filter_restore( + ef10_filter_add( + __in efx_nic_t *enp, + __inout efx_filter_spec_t *spec, +- __in boolean_t may_replace); ++ __in enum efx_filter_replacement_policy_e policy); + + __checkReturn efx_rc_t + ef10_filter_delete( +diff --git a/dpdk/drivers/net/sfc/base/ef10_nic.c b/dpdk/drivers/net/sfc/base/ef10_nic.c +index b25ce1908e..3eb4674c5e 100644 +--- a/dpdk/drivers/net/sfc/base/ef10_nic.c ++++ b/dpdk/drivers/net/sfc/base/ef10_nic.c +@@ -2288,9 +2288,7 @@ ef10_nic_init( + efx_rc_t rc; + boolean_t alloc_vadaptor = B_TRUE; + +- EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON || +- enp->en_family == EFX_FAMILY_MEDFORD || +- enp->en_family == EFX_FAMILY_MEDFORD2); ++ EFSYS_ASSERT(EFX_FAMILY_IS_EF10(enp)); + + /* Enable reporting of some events (e.g. link change) */ + if ((rc = efx_mcdi_log_ctrl(enp)) != 0) +diff --git a/dpdk/drivers/net/sfc/base/ef10_proxy.c b/dpdk/drivers/net/sfc/base/ef10_proxy.c +index 059b2f5f4d..619d98e472 100644 +--- a/dpdk/drivers/net/sfc/base/ef10_proxy.c ++++ b/dpdk/drivers/net/sfc/base/ef10_proxy.c +@@ -13,9 +13,7 @@ + ef10_proxy_auth_init( + __in efx_nic_t *enp) + { +- EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON || +- enp->en_family == EFX_FAMILY_MEDFORD || +- enp->en_family == EFX_FAMILY_MEDFORD2); ++ EFSYS_ASSERT(EFX_FAMILY_IS_EF10(enp)); + + return (0); + } +@@ -24,9 +22,7 @@ ef10_proxy_auth_init( + ef10_proxy_auth_fini( + __in efx_nic_t *enp) + { +- EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON || +- enp->en_family == EFX_FAMILY_MEDFORD || +- enp->en_family == EFX_FAMILY_MEDFORD2); ++ EFSYS_ASSERT(EFX_FAMILY_IS_EF10(enp)); + } + + static __checkReturn efx_rc_t +diff --git a/dpdk/drivers/net/sfc/base/efx.h b/dpdk/drivers/net/sfc/base/efx.h +index 53ddaa987c..d94d3c02f7 100644 +--- a/dpdk/drivers/net/sfc/base/efx.h ++++ b/dpdk/drivers/net/sfc/base/efx.h +@@ -2949,17 +2949,15 @@ typedef uint8_t efx_filter_flags_t; + + typedef uint32_t efx_filter_match_flags_t; + ++/* Filter priority from lowest to highest */ + typedef enum efx_filter_priority_s { +- EFX_FILTER_PRI_HINT = 0, /* Performance hint */ +- EFX_FILTER_PRI_AUTO, /* Automatic filter based on device ++ EFX_FILTER_PRI_AUTO = 0, /* Automatic filter based on device + * address list or hardware + * requirements. This may only be used + * by the filter implementation for + * each NIC type. */ + EFX_FILTER_PRI_MANUAL, /* Manually configured filter */ +- EFX_FILTER_PRI_REQUIRED, /* Required for correct behaviour of the +- * client (e.g. SR-IOV, HyperV VMQ etc.) +- */ ++ EFX_FILTER_NPRI, + } efx_filter_priority_t; + + /* +@@ -2974,6 +2972,11 @@ typedef struct efx_filter_spec_s { + uint16_t efs_dmaq_id; + uint32_t efs_rss_context; + uint32_t efs_mark; ++ /* ++ * Saved lower-priority filter. If it is set, it is restored on ++ * filter delete operation. ++ */ ++ struct efx_filter_spec_s *efs_overridden_spec; + /* Fields below here are hashed for software filter lookup */ + uint16_t efs_outer_vid; + uint16_t efs_inner_vid; +diff --git a/dpdk/drivers/net/sfc/base/efx_evb.c b/dpdk/drivers/net/sfc/base/efx_evb.c +index dd64bc7854..5fa0d99809 100644 +--- a/dpdk/drivers/net/sfc/base/efx_evb.c ++++ b/dpdk/drivers/net/sfc/base/efx_evb.c +@@ -28,7 +28,7 @@ static const efx_evb_ops_t __efx_evb_dummy_ops = { + }; + #endif /* EFSYS_OPT_SIENA */ + +-#if EFSYS_OPT_HUNTINGTON || EFSYS_OPT_MEDFORD || EFSYS_OPT_MEDFORD2 ++#if EFX_OPTS_EF10() + static const efx_evb_ops_t __efx_evb_ef10_ops = { + ef10_evb_init, /* eeo_init */ + ef10_evb_fini, /* eeo_fini */ +@@ -44,7 +44,7 @@ static const efx_evb_ops_t __efx_evb_ef10_ops = { + ef10_evb_vport_reconfigure, /* eeo_vport_reconfigure */ + ef10_evb_vport_stats, /* eeo_vport_stats */ + }; +-#endif /* EFSYS_OPT_HUNTINGTON || EFSYS_OPT_MEDFORD || EFSYS_OPT_MEDFORD2 */ ++#endif /* EFX_OPTS_EF10() */ + + __checkReturn efx_rc_t + efx_evb_init( +diff --git a/dpdk/drivers/net/sfc/base/efx_filter.c b/dpdk/drivers/net/sfc/base/efx_filter.c +index 7efb380641..9949d05bb3 100644 +--- a/dpdk/drivers/net/sfc/base/efx_filter.c ++++ b/dpdk/drivers/net/sfc/base/efx_filter.c +@@ -28,7 +28,7 @@ static __checkReturn efx_rc_t + siena_filter_add( + __in efx_nic_t *enp, + __inout efx_filter_spec_t *spec, +- __in boolean_t may_replace); ++ __in efx_filter_replacement_policy_t policy); + + static __checkReturn efx_rc_t + siena_filter_delete( +@@ -93,8 +93,16 @@ efx_filter_insert( + goto fail2; + } + +- return (efop->efo_add(enp, spec, B_FALSE)); ++ if (spec->efs_priority == EFX_FILTER_PRI_AUTO) { ++ rc = EINVAL; ++ goto fail3; ++ } + ++ return (efop->efo_add(enp, spec, ++ EFX_FILTER_REPLACEMENT_HIGHER_PRIORITY)); ++ ++fail3: ++ EFSYS_PROBE(fail3); + fail2: + EFSYS_PROBE(fail2); + fail1: +@@ -314,7 +322,7 @@ efx_filter_spec_init_tx( + EFSYS_ASSERT3P(etp, !=, NULL); + + memset(spec, 0, sizeof (*spec)); +- spec->efs_priority = EFX_FILTER_PRI_REQUIRED; ++ spec->efs_priority = EFX_FILTER_PRI_MANUAL; + spec->efs_flags = EFX_FILTER_FLAG_TX; + spec->efs_dmaq_id = (uint16_t)etp->et_index; + } +@@ -1437,7 +1445,7 @@ static __checkReturn efx_rc_t + siena_filter_add( + __in efx_nic_t *enp, + __inout efx_filter_spec_t *spec, +- __in boolean_t may_replace) ++ __in efx_filter_replacement_policy_t policy) + { + efx_rc_t rc; + siena_filter_spec_t sf_spec; +@@ -1478,9 +1486,17 @@ siena_filter_add( + saved_sf_spec = &sftp->sft_spec[filter_idx]; + + if (siena_filter_test_used(sftp, filter_idx)) { +- if (may_replace == B_FALSE) { ++ /* All Siena filter are considered the same priority */ ++ switch (policy) { ++ case EFX_FILTER_REPLACEMENT_NEVER: ++ case EFX_FILTER_REPLACEMENT_HIGHER_PRIORITY: + rc = EEXIST; + goto fail4; ++ case EFX_FILTER_REPLACEMENT_HIGHER_OR_EQUAL_PRIORITY: ++ break; ++ default: ++ EFSYS_ASSERT(0); ++ break; + } + } + siena_filter_set_used(sftp, filter_idx); +diff --git a/dpdk/drivers/net/sfc/base/efx_impl.h b/dpdk/drivers/net/sfc/base/efx_impl.h +index 85d984f651..9755f4dfd2 100644 +--- a/dpdk/drivers/net/sfc/base/efx_impl.h ++++ b/dpdk/drivers/net/sfc/base/efx_impl.h +@@ -246,12 +246,31 @@ typedef struct efx_phy_ops_s { + } efx_phy_ops_t; + + #if EFSYS_OPT_FILTER ++ ++/* ++ * Policy for replacing existing filter when inserting a new one. ++ * Note that all policies allow for storing the new lower priority ++ * filters as overridden by existing higher priority ones. It is needed ++ * to restore the lower priority filters on higher priority ones removal. ++ */ ++typedef enum efx_filter_replacement_policy_e { ++ /* Cannot replace existing filter */ ++ EFX_FILTER_REPLACEMENT_NEVER, ++ /* Higher priority filters can replace lower priotiry ones */ ++ EFX_FILTER_REPLACEMENT_HIGHER_PRIORITY, ++ /* ++ * Higher priority filters can replace lower priority ones and ++ * equal priority filters can replace each other. ++ */ ++ EFX_FILTER_REPLACEMENT_HIGHER_OR_EQUAL_PRIORITY, ++} efx_filter_replacement_policy_t; ++ + typedef struct efx_filter_ops_s { + efx_rc_t (*efo_init)(efx_nic_t *); + void (*efo_fini)(efx_nic_t *); + efx_rc_t (*efo_restore)(efx_nic_t *); + efx_rc_t (*efo_add)(efx_nic_t *, efx_filter_spec_t *, +- boolean_t may_replace); ++ efx_filter_replacement_policy_t policy); + efx_rc_t (*efo_delete)(efx_nic_t *, efx_filter_spec_t *); + efx_rc_t (*efo_supported_filters)(efx_nic_t *, uint32_t *, + size_t, size_t *); +diff --git a/dpdk/drivers/net/sfc/base/efx_proxy.c b/dpdk/drivers/net/sfc/base/efx_proxy.c +index 791105a5a0..ecf703b03d 100644 +--- a/dpdk/drivers/net/sfc/base/efx_proxy.c ++++ b/dpdk/drivers/net/sfc/base/efx_proxy.c +@@ -23,7 +23,7 @@ static const efx_proxy_ops_t __efx_proxy_dummy_ops = { + }; + #endif /* EFSYS_OPT_SIENA */ + +-#if EFSYS_OPT_HUNTINGTON || EFSYS_OPT_MEDFORD || EFSYS_OPT_MEDFORD2 ++#if EFX_OPTS_EF10() + static const efx_proxy_ops_t __efx_proxy_ef10_ops = { + ef10_proxy_auth_init, /* epo_init */ + ef10_proxy_auth_fini, /* epo_fini */ +@@ -35,7 +35,7 @@ static const efx_proxy_ops_t __efx_proxy_ef10_ops = { + ef10_proxy_auth_exec_cmd, /* epo_exec_cmd */ + ef10_proxy_auth_get_privilege_mask, /* epo_get_privilege_mask */ + }; +-#endif /* EFSYS_OPT_HUNTINGTON || EFSYS_OPT_MEDFORD || EFSYS_OPT_MEDFORD2 */ ++#endif /* EFX_OPTS_EF10() */ + + __checkReturn efx_rc_t + efx_proxy_auth_init( +diff --git a/dpdk/drivers/net/sfc/sfc.c b/dpdk/drivers/net/sfc/sfc.c +index 141c767f09..3f5cd7758b 100644 +--- a/dpdk/drivers/net/sfc/sfc.c ++++ b/dpdk/drivers/net/sfc/sfc.c +@@ -30,7 +30,7 @@ sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id, + { + const struct rte_memzone *mz; + +- sfc_log_init(sa, "name=%s id=%u len=%lu socket_id=%d", ++ sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d", + name, id, len, socket_id); + + mz = rte_eth_dma_zone_reserve(sa->eth_dev, name, id, len, +@@ -241,8 +241,8 @@ sfc_estimate_resource_limits(struct sfc_adapter *sa) + return 0; + + fail_get_vi_pool: +-fail_nic_init: + efx_nic_fini(sa->nic); ++fail_nic_init: + return rc; + } + +diff --git a/dpdk/drivers/net/sfc/sfc_ethdev.c b/dpdk/drivers/net/sfc/sfc_ethdev.c +index 454b8956a2..f8867b0ec0 100644 +--- a/dpdk/drivers/net/sfc/sfc_ethdev.c ++++ b/dpdk/drivers/net/sfc/sfc_ethdev.c +@@ -405,25 +405,37 @@ sfc_dev_filter_set(struct rte_eth_dev *dev, enum sfc_dev_filter_mode mode, + static int + sfc_dev_promisc_enable(struct rte_eth_dev *dev) + { +- return sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_PROMISC, B_TRUE); ++ int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_PROMISC, B_TRUE); ++ ++ SFC_ASSERT(rc >= 0); ++ return -rc; + } + + static int + sfc_dev_promisc_disable(struct rte_eth_dev *dev) + { +- return sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_PROMISC, B_FALSE); ++ int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_PROMISC, B_FALSE); ++ ++ SFC_ASSERT(rc >= 0); ++ return -rc; + } + + static int + sfc_dev_allmulti_enable(struct rte_eth_dev *dev) + { +- return sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_ALLMULTI, B_TRUE); ++ int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_ALLMULTI, B_TRUE); ++ ++ SFC_ASSERT(rc >= 0); ++ return -rc; + } + + static int + sfc_dev_allmulti_disable(struct rte_eth_dev *dev) + { +- return sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_ALLMULTI, B_FALSE); ++ int rc = sfc_dev_filter_set(dev, SFC_DEV_FILTER_MODE_ALLMULTI, B_FALSE); ++ ++ SFC_ASSERT(rc >= 0); ++ return -rc; + } + + static int +@@ -1520,7 +1532,7 @@ sfc_dev_rss_hash_update(struct rte_eth_dev *dev, + + if ((rss_conf->rss_key != NULL) && + (rss_conf->rss_key_len != sizeof(rss->key))) { +- sfc_err(sa, "RSS key size is wrong (should be %lu)", ++ sfc_err(sa, "RSS key size is wrong (should be %zu)", + sizeof(rss->key)); + return -EINVAL; + } +diff --git a/dpdk/drivers/net/sfc/sfc_flow.c b/dpdk/drivers/net/sfc/sfc_flow.c +index 8d636f6923..023e55d951 100644 +--- a/dpdk/drivers/net/sfc/sfc_flow.c ++++ b/dpdk/drivers/net/sfc/sfc_flow.c +@@ -1132,6 +1132,7 @@ sfc_flow_parse_attr(const struct rte_flow_attr *attr, + + flow->spec.template.efs_flags |= EFX_FILTER_FLAG_RX; + flow->spec.template.efs_rss_context = EFX_RSS_CONTEXT_DEFAULT; ++ flow->spec.template.efs_priority = EFX_FILTER_PRI_MANUAL; + + return 0; + } +diff --git a/dpdk/drivers/net/sfc/sfc_rx.c b/dpdk/drivers/net/sfc/sfc_rx.c +index 74218296cd..891709fd04 100644 +--- a/dpdk/drivers/net/sfc/sfc_rx.c ++++ b/dpdk/drivers/net/sfc/sfc_rx.c +@@ -719,6 +719,7 @@ sfc_rx_default_rxq_set_filter(struct sfc_adapter *sa, struct sfc_rxq *rxq) + sfc_warn(sa, "promiscuous mode will be disabled"); + + port->promisc = B_FALSE; ++ sa->eth_dev->data->promiscuous = 0; + rc = sfc_set_rx_mode(sa); + if (rc != 0) + return rc; +@@ -732,6 +733,7 @@ sfc_rx_default_rxq_set_filter(struct sfc_adapter *sa, struct sfc_rxq *rxq) + sfc_warn(sa, "all-multicast mode will be disabled"); + + port->allmulti = B_FALSE; ++ sa->eth_dev->data->all_multicast = 0; + rc = sfc_set_rx_mode(sa); + if (rc != 0) + return rc; +@@ -820,10 +822,12 @@ sfc_rx_qstart(struct sfc_adapter *sa, unsigned int sw_index) + return 0; + + fail_mac_filter_default_rxq_set: ++ sfc_rx_qflush(sa, sw_index); + sa->priv.dp_rx->qstop(rxq_info->dp, &rxq->evq->read_ptr); ++ rxq_info->state = SFC_RXQ_INITIALIZED; + + fail_dp_qstart: +- sfc_rx_qflush(sa, sw_index); ++ efx_rx_qdestroy(rxq->common); + + fail_rx_qcreate: + fail_bad_contig_block_size: +@@ -1403,7 +1407,7 @@ sfc_rx_process_adv_conf_rss(struct sfc_adapter *sa, + + if (conf->rss_key != NULL) { + if (conf->rss_key_len != sizeof(rss->key)) { +- sfc_err(sa, "RSS key size is wrong (should be %lu)", ++ sfc_err(sa, "RSS key size is wrong (should be %zu)", + sizeof(rss->key)); + return EINVAL; + } +diff --git a/dpdk/drivers/net/softnic/rte_eth_softnic_thread.c b/dpdk/drivers/net/softnic/rte_eth_softnic_thread.c +index d610b1617e..dcfb5eb82c 100644 +--- a/dpdk/drivers/net/softnic/rte_eth_softnic_thread.c ++++ b/dpdk/drivers/net/softnic/rte_eth_softnic_thread.c +@@ -359,8 +359,6 @@ softnic_thread_pipeline_enable(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = thread_msg_send_recv(softnic, thread_id, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -444,8 +442,6 @@ softnic_thread_pipeline_disable(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = thread_msg_send_recv(softnic, thread_id, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -839,8 +835,6 @@ softnic_pipeline_port_in_stats_read(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -888,8 +882,6 @@ softnic_pipeline_port_in_enable(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -935,8 +927,6 @@ softnic_pipeline_port_in_disable(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -990,8 +980,6 @@ softnic_pipeline_port_out_stats_read(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1047,8 +1035,6 @@ softnic_pipeline_table_stats_read(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1327,8 +1313,6 @@ softnic_pipeline_table_rule_add(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1411,8 +1395,6 @@ softnic_pipeline_table_rule_add_default(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1569,8 +1551,6 @@ softnic_pipeline_table_rule_add_bulk(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1634,8 +1614,6 @@ softnic_pipeline_table_rule_delete(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1684,8 +1662,6 @@ softnic_pipeline_table_rule_delete_default(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1744,8 +1720,6 @@ softnic_pipeline_table_rule_stats_read(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1825,10 +1799,6 @@ softnic_pipeline_table_mtr_profile_add(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) { +- free(mp); +- return -1; +- } + + /* Read response */ + status = rsp->status; +@@ -1884,8 +1854,6 @@ softnic_pipeline_table_mtr_profile_delete(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1947,8 +1915,6 @@ softnic_pipeline_table_rule_mtr_read(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -2012,8 +1978,6 @@ softnic_pipeline_table_dscp_table_update(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -2077,8 +2041,6 @@ softnic_pipeline_table_rule_ttl_read(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +diff --git a/dpdk/drivers/net/tap/rte_eth_tap.c b/dpdk/drivers/net/tap/rte_eth_tap.c +index a13d8d50d7..7081ae23e9 100644 +--- a/dpdk/drivers/net/tap/rte_eth_tap.c ++++ b/dpdk/drivers/net/tap/rte_eth_tap.c +@@ -18,8 +18,8 @@ + #include <rte_string_fns.h> + #include <rte_ethdev.h> + #include <rte_errno.h> ++#include <rte_cycles.h> + +-#include <assert.h> + #include <sys/types.h> + #include <sys/stat.h> + #include <sys/socket.h> +@@ -339,6 +339,23 @@ tap_rx_offload_get_queue_capa(void) + DEV_RX_OFFLOAD_TCP_CKSUM; + } + ++static void ++tap_rxq_pool_free(struct rte_mbuf *pool) ++{ ++ struct rte_mbuf *mbuf = pool; ++ uint16_t nb_segs = 1; ++ ++ if (mbuf == NULL) ++ return; ++ ++ while (mbuf->next) { ++ mbuf = mbuf->next; ++ nb_segs++; ++ } ++ pool->nb_segs = nb_segs; ++ rte_pktmbuf_free(pool); ++} ++ + /* Callback to handle the rx burst of packets to the correct interface and + * file descriptor(s) in a multi-queue setup. + */ +@@ -389,7 +406,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + goto end; + + seg->next = NULL; +- rte_pktmbuf_free(mbuf); ++ tap_rxq_pool_free(mbuf); + + goto end; + } +@@ -521,7 +538,7 @@ tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len, + } + } + +-static inline void ++static inline int + tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, + struct rte_mbuf **pmbufs, + uint16_t *num_packets, unsigned long *num_tx_bytes) +@@ -588,7 +605,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, + seg_len = rte_pktmbuf_data_len(mbuf); + l234_hlen = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; + if (seg_len < l234_hlen) +- break; ++ return -1; + + /* To change checksums, work on a * copy of l2, l3 + * headers + l4 pseudo header +@@ -634,10 +651,12 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, + /* copy the tx frame data */ + n = writev(process_private->txq_fds[txq->queue_id], iovecs, j); + if (n <= 0) +- break; ++ return -1; ++ + (*num_packets)++; + (*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf); + } ++ return 0; + } + + /* Callback to handle sending packets from the tap interface +@@ -663,16 +682,14 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + uint16_t num_mbufs = 0; + uint16_t tso_segsz = 0; + int ret; ++ int num_tso_mbufs; + uint16_t hdrs_len; +- int j; + uint64_t tso; + + tso = mbuf_in->ol_flags & PKT_TX_TCP_SEG; + if (tso) { + struct rte_gso_ctx *gso_ctx = &txq->gso_ctx; + +- assert(gso_ctx != NULL); +- + /* TCP segmentation implies TCP checksum offload */ + mbuf_in->ol_flags |= PKT_TX_TCP_CKSUM; + +@@ -686,43 +703,51 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + break; + } + gso_ctx->gso_size = tso_segsz; +- ret = rte_gso_segment(mbuf_in, /* packet to segment */ ++ /* 'mbuf_in' packet to segment */ ++ num_tso_mbufs = rte_gso_segment(mbuf_in, + gso_ctx, /* gso control block */ + (struct rte_mbuf **)&gso_mbufs, /* out mbufs */ + RTE_DIM(gso_mbufs)); /* max tso mbufs */ + + /* ret contains the number of new created mbufs */ +- if (ret < 0) ++ if (num_tso_mbufs < 0) + break; + + mbuf = gso_mbufs; +- num_mbufs = ret; ++ num_mbufs = num_tso_mbufs; + } else { + /* stats.errs will be incremented */ + if (rte_pktmbuf_pkt_len(mbuf_in) > max_size) + break; + + /* ret 0 indicates no new mbufs were created */ +- ret = 0; ++ num_tso_mbufs = 0; + mbuf = &mbuf_in; + num_mbufs = 1; + } + +- tap_write_mbufs(txq, num_mbufs, mbuf, ++ ret = tap_write_mbufs(txq, num_mbufs, mbuf, + &num_packets, &num_tx_bytes); ++ if (ret == -1) { ++ txq->stats.errs++; ++ /* free tso mbufs */ ++ if (num_tso_mbufs > 0) ++ rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs); ++ break; ++ } + num_tx++; + /* free original mbuf */ + rte_pktmbuf_free(mbuf_in); + /* free tso mbufs */ +- for (j = 0; j < ret; j++) +- rte_pktmbuf_free(mbuf[j]); ++ if (num_tso_mbufs > 0) ++ rte_pktmbuf_free_bulk(mbuf, num_tso_mbufs); + } + + txq->stats.opackets += num_packets; + txq->stats.errs += nb_pkts - num_tx; + txq->stats.obytes += num_tx_bytes; + +- return num_packets; ++ return num_tx; + } + + static const char * +@@ -778,7 +803,7 @@ tap_ioctl(struct pmd_internals *pmd, unsigned long request, + case SIOCSIFMTU: + break; + default: +- RTE_LOG(WARNING, PMD, "%s: ioctl() called with wrong arg\n", ++ TAP_LOG(WARNING, "%s: ioctl() called with wrong arg", + pmd->name); + return -EINVAL; + } +@@ -1013,15 +1038,25 @@ tap_dev_close(struct rte_eth_dev *dev) + int i; + struct pmd_internals *internals = dev->data->dev_private; + struct pmd_process_private *process_private = dev->process_private; ++ struct rx_queue *rxq; + + tap_link_set_down(dev); +- tap_flow_flush(dev, NULL); +- tap_flow_implicit_flush(internals, NULL); ++ if (internals->nlsk_fd != -1) { ++ tap_flow_flush(dev, NULL); ++ tap_flow_implicit_flush(internals, NULL); ++ tap_nl_final(internals->nlsk_fd); ++ internals->nlsk_fd = -1; ++ } + + for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { + if (process_private->rxq_fds[i] != -1) { ++ rxq = &internals->rxq[i]; + close(process_private->rxq_fds[i]); + process_private->rxq_fds[i] = -1; ++ tap_rxq_pool_free(rxq->pool); ++ rte_free(rxq->iovecs); ++ rxq->pool = NULL; ++ rxq->iovecs = NULL; + } + if (process_private->txq_fds[i] != -1) { + close(process_private->txq_fds[i]); +@@ -1054,10 +1089,10 @@ tap_rx_queue_release(void *queue) + if (!rxq) + return; + process_private = rte_eth_devices[rxq->in_port].process_private; +- if (process_private->rxq_fds[rxq->queue_id] > 0) { ++ if (process_private->rxq_fds[rxq->queue_id] != -1) { + close(process_private->rxq_fds[rxq->queue_id]); + process_private->rxq_fds[rxq->queue_id] = -1; +- rte_pktmbuf_free(rxq->pool); ++ tap_rxq_pool_free(rxq->pool); + rte_free(rxq->iovecs); + rxq->pool = NULL; + rxq->iovecs = NULL; +@@ -1074,7 +1109,7 @@ tap_tx_queue_release(void *queue) + return; + process_private = rte_eth_devices[txq->out_port].process_private; + +- if (process_private->txq_fds[txq->queue_id] > 0) { ++ if (process_private->txq_fds[txq->queue_id] != -1) { + close(process_private->txq_fds[txq->queue_id]); + process_private->txq_fds[txq->queue_id] = -1; + } +@@ -1301,7 +1336,9 @@ tap_gso_ctx_setup(struct rte_gso_ctx *gso_ctx, struct rte_eth_dev *dev) + SOCKET_ID_ANY); + if (!mp) { + struct pmd_internals *pmd = dev->data->dev_private; +- RTE_LOG(DEBUG, PMD, "%s: failed to create mbuf pool for device %s\n", ++ ++ TAP_LOG(ERR, ++ "%s: failed to create mbuf pool for device %s\n", + pmd->name, dev->device->name); + return -1; + } +@@ -1465,7 +1502,7 @@ tap_rx_queue_setup(struct rte_eth_dev *dev, + return 0; + + error: +- rte_pktmbuf_free(rxq->pool); ++ tap_rxq_pool_free(rxq->pool); + rxq->pool = NULL; + rte_free(rxq->iovecs); + rxq->iovecs = NULL; +@@ -1563,13 +1600,12 @@ static int + tap_lsc_intr_handle_set(struct rte_eth_dev *dev, int set) + { + struct pmd_internals *pmd = dev->data->dev_private; ++ int ret; + + /* In any case, disable interrupt if the conf is no longer there. */ + if (!dev->data->dev_conf.intr_conf.lsc) { + if (pmd->intr_handle.fd != -1) { +- tap_nl_final(pmd->intr_handle.fd); +- rte_intr_callback_unregister(&pmd->intr_handle, +- tap_dev_intr_handler, dev); ++ goto clean; + } + return 0; + } +@@ -1580,9 +1616,26 @@ tap_lsc_intr_handle_set(struct rte_eth_dev *dev, int set) + return rte_intr_callback_register( + &pmd->intr_handle, tap_dev_intr_handler, dev); + } ++ ++clean: ++ do { ++ ret = rte_intr_callback_unregister(&pmd->intr_handle, ++ tap_dev_intr_handler, dev); ++ if (ret >= 0) { ++ break; ++ } else if (ret == -EAGAIN) { ++ rte_delay_ms(100); ++ } else { ++ TAP_LOG(ERR, "intr callback unregister failed: %d", ++ ret); ++ break; ++ } ++ } while (true); ++ + tap_nl_final(pmd->intr_handle.fd); +- return rte_intr_callback_unregister(&pmd->intr_handle, +- tap_dev_intr_handler, dev); ++ pmd->intr_handle.fd = -1; ++ ++ return 0; + } + + static int +@@ -1591,8 +1644,11 @@ tap_intr_handle_set(struct rte_eth_dev *dev, int set) + int err; + + err = tap_lsc_intr_handle_set(dev, set); +- if (err) ++ if (err < 0) { ++ if (!set) ++ tap_rx_intr_vec_set(dev, 0); + return err; ++ } + err = tap_rx_intr_vec_set(dev, set); + if (err && set) + tap_lsc_intr_handle_set(dev, 0); +@@ -1784,6 +1840,8 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name, + pmd->dev = dev; + strlcpy(pmd->name, tap_name, sizeof(pmd->name)); + pmd->type = type; ++ pmd->ka_fd = -1; ++ pmd->nlsk_fd = -1; + + pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0); + if (pmd->ioctl_sock == -1) { +@@ -1814,7 +1872,6 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name, + dev->intr_handle = &pmd->intr_handle; + + /* Presetup the fds to -1 as being not valid */ +- pmd->ka_fd = -1; + for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { + process_private->rxq_fds[i] = -1; + process_private->txq_fds[i] = -1; +@@ -1954,7 +2011,11 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name, + tap_flow_implicit_flush(pmd, NULL); + + error_exit: +- if (pmd->ioctl_sock > 0) ++ if (pmd->nlsk_fd != -1) ++ close(pmd->nlsk_fd); ++ if (pmd->ka_fd != -1) ++ close(pmd->ka_fd); ++ if (pmd->ioctl_sock != -1) + close(pmd->ioctl_sock); + /* mac_addrs must not be freed alone because part of dev_private */ + dev->data->mac_addrs = NULL; +@@ -2386,8 +2447,6 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev) + { + struct rte_eth_dev *eth_dev = NULL; + struct pmd_internals *internals; +- struct pmd_process_private *process_private; +- int i; + + /* find the ethdev entry */ + eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev)); +@@ -2400,28 +2459,12 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev) + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return rte_eth_dev_release_port(eth_dev); + +- internals = eth_dev->data->dev_private; +- process_private = eth_dev->process_private; ++ tap_dev_close(eth_dev); + ++ internals = eth_dev->data->dev_private; + TAP_LOG(DEBUG, "Closing %s Ethernet device on numa %u", + tuntap_types[internals->type], rte_socket_id()); + +- if (internals->nlsk_fd) { +- tap_flow_flush(eth_dev, NULL); +- tap_flow_implicit_flush(internals, NULL); +- tap_nl_final(internals->nlsk_fd); +- } +- for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { +- if (process_private->rxq_fds[i] != -1) { +- close(process_private->rxq_fds[i]); +- process_private->rxq_fds[i] = -1; +- } +- if (process_private->txq_fds[i] != -1) { +- close(process_private->txq_fds[i]); +- process_private->txq_fds[i] = -1; +- } +- } +- + close(internals->ioctl_sock); + rte_free(eth_dev->process_private); + if (tap_devices_count == 1) +@@ -2429,10 +2472,6 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev) + tap_devices_count--; + rte_eth_dev_release_port(eth_dev); + +- if (internals->ka_fd != -1) { +- close(internals->ka_fd); +- internals->ka_fd = -1; +- } + return 0; + } + +diff --git a/dpdk/drivers/net/tap/tap_flow.c b/dpdk/drivers/net/tap/tap_flow.c +index 9d90361d99..1538349e9c 100644 +--- a/dpdk/drivers/net/tap/tap_flow.c ++++ b/dpdk/drivers/net/tap/tap_flow.c +@@ -1380,7 +1380,7 @@ tap_flow_create(struct rte_eth_dev *dev, + NULL, "priority value too big"); + goto fail; + } +- flow = rte_malloc(__func__, sizeof(struct rte_flow), 0); ++ flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); + if (!flow) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "cannot allocate memory for rte_flow"); +@@ -1416,7 +1416,7 @@ tap_flow_create(struct rte_eth_dev *dev, + * to the local pmd->if_index. + */ + if (pmd->remote_if_index) { +- remote_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0); ++ remote_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); + if (!remote_flow) { + rte_flow_error_set( + error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, +@@ -1693,7 +1693,7 @@ int tap_flow_implicit_create(struct pmd_internals *pmd, + } + }; + +- remote_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0); ++ remote_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); + if (!remote_flow) { + TAP_LOG(ERR, "Cannot allocate memory for rte_flow"); + goto fail; +@@ -1896,7 +1896,7 @@ static int rss_enable(struct pmd_internals *pmd, + return -ENOTSUP; + } + +- rss_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0); ++ rss_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); + if (!rss_flow) { + TAP_LOG(ERR, + "Cannot allocate memory for rte_flow"); +diff --git a/dpdk/drivers/net/tap/tap_intr.c b/dpdk/drivers/net/tap/tap_intr.c +index 7af0010e37..5cf4f173a0 100644 +--- a/dpdk/drivers/net/tap/tap_intr.c ++++ b/dpdk/drivers/net/tap/tap_intr.c +@@ -7,7 +7,6 @@ + * Interrupts handling for tap driver. + */ + +-#include <assert.h> + #include <errno.h> + #include <fcntl.h> + #include <signal.h> +@@ -72,7 +71,7 @@ tap_rx_intr_vec_install(struct rte_eth_dev *dev) + struct rx_queue *rxq = pmd->dev->data->rx_queues[i]; + + /* Skip queues that cannot request interrupts. */ +- if (!rxq || process_private->rxq_fds[i] <= 0) { ++ if (!rxq || process_private->rxq_fds[i] == -1) { + /* Use invalid intr_vec[] index to disable entry. */ + intr_handle->intr_vec[i] = + RTE_INTR_VEC_RXTX_OFFSET + +diff --git a/dpdk/drivers/net/thunderx/nicvf_ethdev.c b/dpdk/drivers/net/thunderx/nicvf_ethdev.c +index 2cf0ffe13b..26191586f7 100644 +--- a/dpdk/drivers/net/thunderx/nicvf_ethdev.c ++++ b/dpdk/drivers/net/thunderx/nicvf_ethdev.c +@@ -496,9 +496,10 @@ nicvf_dev_reta_query(struct rte_eth_dev *dev, + int ret, i, j; + + if (reta_size != NIC_MAX_RSS_IDR_TBL_SIZE) { +- RTE_LOG(ERR, PMD, "The size of hash lookup table configured " +- "(%d) doesn't match the number hardware can supported " +- "(%d)", reta_size, NIC_MAX_RSS_IDR_TBL_SIZE); ++ PMD_DRV_LOG(ERR, ++ "The size of hash lookup table configured " ++ "(%u) doesn't match the number hardware can supported " ++ "(%u)", reta_size, NIC_MAX_RSS_IDR_TBL_SIZE); + return -EINVAL; + } + +@@ -526,9 +527,9 @@ nicvf_dev_reta_update(struct rte_eth_dev *dev, + int ret, i, j; + + if (reta_size != NIC_MAX_RSS_IDR_TBL_SIZE) { +- RTE_LOG(ERR, PMD, "The size of hash lookup table configured " +- "(%d) doesn't match the number hardware can supported " +- "(%d)", reta_size, NIC_MAX_RSS_IDR_TBL_SIZE); ++ PMD_DRV_LOG(ERR, "The size of hash lookup table configured " ++ "(%u) doesn't match the number hardware can supported " ++ "(%u)", reta_size, NIC_MAX_RSS_IDR_TBL_SIZE); + return -EINVAL; + } + +@@ -569,8 +570,8 @@ nicvf_dev_rss_hash_update(struct rte_eth_dev *dev, + + if (rss_conf->rss_key && + rss_conf->rss_key_len != RSS_HASH_KEY_BYTE_SIZE) { +- RTE_LOG(ERR, PMD, "Hash key size mismatch %d", +- rss_conf->rss_key_len); ++ PMD_DRV_LOG(ERR, "Hash key size mismatch %u", ++ rss_conf->rss_key_len); + return -EINVAL; + } + +diff --git a/dpdk/drivers/net/vhost/rte_eth_vhost.c b/dpdk/drivers/net/vhost/rte_eth_vhost.c +index 46f01a7f46..85f91f0b9d 100644 +--- a/dpdk/drivers/net/vhost/rte_eth_vhost.c ++++ b/dpdk/drivers/net/vhost/rte_eth_vhost.c +@@ -97,6 +97,8 @@ struct pmd_internal { + rte_atomic32_t dev_attached; + char *dev_name; + char *iface_name; ++ uint64_t flags; ++ uint64_t disable_flags; + uint16_t max_queues; + int vid; + rte_atomic32_t started; +@@ -491,17 +493,6 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) + return nb_tx; + } + +-static int +-eth_dev_configure(struct rte_eth_dev *dev __rte_unused) +-{ +- struct pmd_internal *internal = dev->data->dev_private; +- const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; +- +- internal->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP); +- +- return 0; +-} +- + static inline struct internal_list * + find_internal_resource(char *ifname) + { +@@ -877,6 +868,74 @@ static struct vhost_device_ops vhost_ops = { + .vring_state_changed = vring_state_changed, + }; + ++static int ++vhost_driver_setup(struct rte_eth_dev *eth_dev) ++{ ++ struct pmd_internal *internal = eth_dev->data->dev_private; ++ struct internal_list *list = NULL; ++ struct rte_vhost_vring_state *vring_state = NULL; ++ unsigned int numa_node = eth_dev->device->numa_node; ++ const char *name = eth_dev->device->name; ++ ++ /* Don't try to setup again if it has already been done. */ ++ list = find_internal_resource(internal->iface_name); ++ if (list) ++ return 0; ++ ++ list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); ++ if (list == NULL) ++ return -1; ++ ++ vring_state = rte_zmalloc_socket(name, sizeof(*vring_state), ++ 0, numa_node); ++ if (vring_state == NULL) ++ goto free_list; ++ ++ list->eth_dev = eth_dev; ++ pthread_mutex_lock(&internal_list_lock); ++ TAILQ_INSERT_TAIL(&internal_list, list, next); ++ pthread_mutex_unlock(&internal_list_lock); ++ ++ rte_spinlock_init(&vring_state->lock); ++ vring_states[eth_dev->data->port_id] = vring_state; ++ ++ if (rte_vhost_driver_register(internal->iface_name, internal->flags)) ++ goto list_remove; ++ ++ if (internal->disable_flags) { ++ if (rte_vhost_driver_disable_features(internal->iface_name, ++ internal->disable_flags)) ++ goto drv_unreg; ++ } ++ ++ if (rte_vhost_driver_callback_register(internal->iface_name, ++ &vhost_ops) < 0) { ++ VHOST_LOG(ERR, "Can't register callbacks\n"); ++ goto drv_unreg; ++ } ++ ++ if (rte_vhost_driver_start(internal->iface_name) < 0) { ++ VHOST_LOG(ERR, "Failed to start driver for %s\n", ++ internal->iface_name); ++ goto drv_unreg; ++ } ++ ++ return 0; ++ ++drv_unreg: ++ rte_vhost_driver_unregister(internal->iface_name); ++list_remove: ++ vring_states[eth_dev->data->port_id] = NULL; ++ pthread_mutex_lock(&internal_list_lock); ++ TAILQ_REMOVE(&internal_list, list, next); ++ pthread_mutex_unlock(&internal_list_lock); ++ rte_free(vring_state); ++free_list: ++ rte_free(list); ++ ++ return -1; ++} ++ + int + rte_eth_vhost_get_queue_event(uint16_t port_id, + struct rte_eth_vhost_queue_event *event) +@@ -943,6 +1002,24 @@ rte_eth_vhost_get_vid_from_port_id(uint16_t port_id) + return vid; + } + ++static int ++eth_dev_configure(struct rte_eth_dev *dev) ++{ ++ struct pmd_internal *internal = dev->data->dev_private; ++ const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; ++ ++ /* NOTE: the same process has to operate a vhost interface ++ * from beginning to end (from eth_dev configure to eth_dev close). ++ * It is user's responsibility at the moment. ++ */ ++ if (vhost_driver_setup(dev) < 0) ++ return -1; ++ ++ internal->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP); ++ ++ return 0; ++} ++ + static int + eth_dev_start(struct rte_eth_dev *eth_dev) + { +@@ -989,16 +1066,14 @@ eth_dev_close(struct rte_eth_dev *dev) + + eth_dev_stop(dev); + +- rte_vhost_driver_unregister(internal->iface_name); +- + list = find_internal_resource(internal->iface_name); +- if (!list) +- return; +- +- pthread_mutex_lock(&internal_list_lock); +- TAILQ_REMOVE(&internal_list, list, next); +- pthread_mutex_unlock(&internal_list_lock); +- rte_free(list); ++ if (list) { ++ rte_vhost_driver_unregister(internal->iface_name); ++ pthread_mutex_lock(&internal_list_lock); ++ TAILQ_REMOVE(&internal_list, list, next); ++ pthread_mutex_unlock(&internal_list_lock); ++ rte_free(list); ++ } + + if (dev->data->rx_queues) + for (i = 0; i < dev->data->nb_rx_queues; i++) +@@ -1009,7 +1084,7 @@ eth_dev_close(struct rte_eth_dev *dev) + rte_free(dev->data->tx_queues[i]); + + free(internal->dev_name); +- free(internal->iface_name); ++ rte_free(internal->iface_name); + rte_free(internal); + + dev->data->dev_private = NULL; +@@ -1219,16 +1294,10 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, + struct pmd_internal *internal = NULL; + struct rte_eth_dev *eth_dev = NULL; + struct rte_ether_addr *eth_addr = NULL; +- struct rte_vhost_vring_state *vring_state = NULL; +- struct internal_list *list = NULL; + + VHOST_LOG(INFO, "Creating VHOST-USER backend on numa socket %u\n", + numa_node); + +- list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); +- if (list == NULL) +- goto error; +- + /* reserve an ethdev entry */ + eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal)); + if (eth_dev == NULL) +@@ -1242,11 +1311,6 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, + *eth_addr = base_eth_addr; + eth_addr->addr_bytes[5] = eth_dev->data->port_id; + +- vring_state = rte_zmalloc_socket(name, +- sizeof(*vring_state), 0, numa_node); +- if (vring_state == NULL) +- goto error; +- + /* now put it all together + * - store queue data in internal, + * - point eth_dev_data to internals +@@ -1256,22 +1320,18 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, + internal->dev_name = strdup(name); + if (internal->dev_name == NULL) + goto error; +- internal->iface_name = strdup(iface_name); ++ internal->iface_name = rte_malloc_socket(name, strlen(iface_name) + 1, ++ 0, numa_node); + if (internal->iface_name == NULL) + goto error; +- +- list->eth_dev = eth_dev; +- pthread_mutex_lock(&internal_list_lock); +- TAILQ_INSERT_TAIL(&internal_list, list, next); +- pthread_mutex_unlock(&internal_list_lock); +- +- rte_spinlock_init(&vring_state->lock); +- vring_states[eth_dev->data->port_id] = vring_state; ++ strcpy(internal->iface_name, iface_name); + + data->nb_rx_queues = queues; + data->nb_tx_queues = queues; + internal->max_queues = queues; + internal->vid = -1; ++ internal->flags = flags; ++ internal->disable_flags = disable_flags; + data->dev_link = pmd_link; + data->dev_flags = RTE_ETH_DEV_INTR_LSC | RTE_ETH_DEV_CLOSE_REMOVE; + +@@ -1281,37 +1341,15 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, + eth_dev->rx_pkt_burst = eth_vhost_rx; + eth_dev->tx_pkt_burst = eth_vhost_tx; + +- if (rte_vhost_driver_register(iface_name, flags)) +- goto error; +- +- if (disable_flags) { +- if (rte_vhost_driver_disable_features(iface_name, +- disable_flags)) +- goto error; +- } +- +- if (rte_vhost_driver_callback_register(iface_name, &vhost_ops) < 0) { +- VHOST_LOG(ERR, "Can't register callbacks\n"); +- goto error; +- } +- +- if (rte_vhost_driver_start(iface_name) < 0) { +- VHOST_LOG(ERR, "Failed to start driver for %s\n", +- iface_name); +- goto error; +- } +- + rte_eth_dev_probing_finish(eth_dev); +- return data->port_id; ++ return 0; + + error: + if (internal) { +- free(internal->iface_name); ++ rte_free(internal->iface_name); + free(internal->dev_name); + } +- rte_free(vring_state); + rte_eth_dev_release_port(eth_dev); +- rte_free(list); + + return -1; + } +@@ -1369,8 +1407,11 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev) + VHOST_LOG(ERR, "Failed to probe %s\n", name); + return -1; + } +- /* TODO: request info from primary to set up Rx and Tx */ ++ eth_dev->rx_pkt_burst = eth_vhost_rx; ++ eth_dev->tx_pkt_burst = eth_vhost_tx; + eth_dev->dev_ops = &ops; ++ if (dev->device.numa_node == SOCKET_ID_ANY) ++ dev->device.numa_node = rte_socket_id(); + eth_dev->device = &dev->device; + rte_eth_dev_probing_finish(eth_dev); + return 0; +@@ -1455,8 +1496,10 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev) + if (dev->device.numa_node == SOCKET_ID_ANY) + dev->device.numa_node = rte_socket_id(); + +- eth_dev_vhost_create(dev, iface_name, queues, dev->device.numa_node, +- flags, disable_flags); ++ ret = eth_dev_vhost_create(dev, iface_name, queues, ++ dev->device.numa_node, flags, disable_flags); ++ if (ret == -1) ++ VHOST_LOG(ERR, "Failed to create %s\n", name); + + out_free: + rte_kvargs_free(kvlist); +diff --git a/dpdk/drivers/net/virtio/virtio_ethdev.c b/dpdk/drivers/net/virtio/virtio_ethdev.c +index 044eb10a70..35203940a7 100644 +--- a/dpdk/drivers/net/virtio/virtio_ethdev.c ++++ b/dpdk/drivers/net/virtio/virtio_ethdev.c +@@ -466,7 +466,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx) + } + + if (!vtpci_packed_queue(hw) && !rte_is_power_of_2(vq_size)) { +- PMD_INIT_LOG(ERR, "split virtqueue size is not powerof 2"); ++ PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2"); + return -EINVAL; + } + +@@ -588,8 +588,8 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx) + hw->cvq = cvq; + } + +- /* For virtio_user case (that is when hw->dev is NULL), we use +- * virtual address. And we need properly set _offset_, please see ++ /* For virtio_user case (that is when hw->virtio_user_dev is not NULL), ++ * we use virtual address. And we need properly set _offset_, please see + * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information. + */ + if (!hw->virtio_user_dev) +@@ -1913,6 +1913,8 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) + goto err_vtpci_init; + } + ++ rte_spinlock_init(&hw->state_lock); ++ + /* reset device and negotiate default features */ + ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES); + if (ret < 0) +@@ -2155,8 +2157,6 @@ virtio_dev_configure(struct rte_eth_dev *dev) + return -EBUSY; + } + +- rte_spinlock_init(&hw->state_lock); +- + hw->use_simple_rx = 1; + + if (vtpci_with_feature(hw, VIRTIO_F_IN_ORDER)) { +diff --git a/dpdk/drivers/net/virtio/virtio_rxtx.c b/dpdk/drivers/net/virtio/virtio_rxtx.c +index 752faa0f6e..060410577a 100644 +--- a/dpdk/drivers/net/virtio/virtio_rxtx.c ++++ b/dpdk/drivers/net/virtio/virtio_rxtx.c +@@ -1085,7 +1085,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, + RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); + + if (tx_free_thresh >= (vq->vq_nentries - 3)) { +- RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " ++ PMD_DRV_LOG(ERR, "tx_free_thresh must be less than the " + "number of TX entries minus 3 (%u)." + " (tx_free_thresh=%u port=%u queue=%u)\n", + vq->vq_nentries - 3, +@@ -1133,7 +1133,7 @@ virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) + error = virtqueue_enqueue_recv_refill(vq, &m, 1); + + if (unlikely(error)) { +- RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); ++ PMD_DRV_LOG(ERR, "cannot requeue discarded mbuf"); + rte_pktmbuf_free(m); + } + } +@@ -1145,7 +1145,7 @@ virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m) + + error = virtqueue_enqueue_refill_inorder(vq, &m, 1); + if (unlikely(error)) { +- RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); ++ PMD_DRV_LOG(ERR, "cannot requeue discarded mbuf"); + rte_pktmbuf_free(m); + } + } +diff --git a/dpdk/drivers/net/virtio/virtio_rxtx_simple_altivec.c b/dpdk/drivers/net/virtio/virtio_rxtx_simple_altivec.c +index 47225f4121..003b6ec3f6 100644 +--- a/dpdk/drivers/net/virtio/virtio_rxtx_simple_altivec.c ++++ b/dpdk/drivers/net/virtio/virtio_rxtx_simple_altivec.c +@@ -9,8 +9,7 @@ + #include <string.h> + #include <errno.h> + +-#include <altivec.h> +- ++#include <rte_altivec.h> + #include <rte_byteorder.h> + #include <rte_branch_prediction.h> + #include <rte_cycles.h> +diff --git a/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c b/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c +index 5c81e8dd9f..2c805077af 100644 +--- a/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c ++++ b/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c +@@ -330,16 +330,34 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, + + vhostfd = dev->vhostfds[pair_idx]; + ++ if (dev->qp_enabled[pair_idx] == enable) ++ return 0; ++ + if (!enable) { +- if (dev->tapfds[pair_idx] >= 0) { +- close(dev->tapfds[pair_idx]); +- dev->tapfds[pair_idx] = -1; ++ tapfd = dev->tapfds[pair_idx]; ++ if (vhost_kernel_set_backend(vhostfd, -1) < 0) { ++ PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); ++ return -1; + } +- return vhost_kernel_set_backend(vhostfd, -1); +- } else if (dev->tapfds[pair_idx] >= 0) { ++ if (req_mq && vhost_kernel_tap_set_queue(tapfd, false) < 0) { ++ PMD_DRV_LOG(ERR, "fail to disable tap for vhost kernel"); ++ return -1; ++ } ++ dev->qp_enabled[pair_idx] = false; + return 0; + } + ++ if (dev->tapfds[pair_idx] >= 0) { ++ tapfd = dev->tapfds[pair_idx]; ++ if (vhost_kernel_tap_set_offload(tapfd, dev->features) == -1) ++ return -1; ++ if (req_mq && vhost_kernel_tap_set_queue(tapfd, true) < 0) { ++ PMD_DRV_LOG(ERR, "fail to enable tap for vhost kernel"); ++ return -1; ++ } ++ goto set_backend; ++ } ++ + if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || + (dev->features & (1ULL << VIRTIO_F_VERSION_1))) + hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); +@@ -353,13 +371,15 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, + return -1; + } + ++ dev->tapfds[pair_idx] = tapfd; ++ ++set_backend: + if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { + PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); +- close(tapfd); + return -1; + } + +- dev->tapfds[pair_idx] = tapfd; ++ dev->qp_enabled[pair_idx] = true; + return 0; + } + +diff --git a/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c b/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c +index 76bf75423e..2fa4f0d661 100644 +--- a/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c ++++ b/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c +@@ -18,7 +18,7 @@ + #include "../virtio_logs.h" + #include "../virtio_pci.h" + +-static int ++int + vhost_kernel_tap_set_offload(int fd, uint64_t features) + { + unsigned int offload = 0; +@@ -37,26 +37,34 @@ vhost_kernel_tap_set_offload(int fd, uint64_t features) + offload |= TUN_F_UFO; + } + +- if (offload != 0) { +- /* Check if our kernel supports TUNSETOFFLOAD */ +- if (ioctl(fd, TUNSETOFFLOAD, 0) != 0 && errno == EINVAL) { +- PMD_DRV_LOG(ERR, "Kernel does't support TUNSETOFFLOAD\n"); +- return -ENOTSUP; +- } ++ /* Check if our kernel supports TUNSETOFFLOAD */ ++ if (ioctl(fd, TUNSETOFFLOAD, 0) != 0 && errno == EINVAL) { ++ PMD_DRV_LOG(ERR, "Kernel does't support TUNSETOFFLOAD\n"); ++ return -ENOTSUP; ++ } + ++ if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) { ++ offload &= ~TUN_F_UFO; + if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) { +- offload &= ~TUN_F_UFO; +- if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) { +- PMD_DRV_LOG(ERR, "TUNSETOFFLOAD ioctl() failed: %s\n", +- strerror(errno)); +- return -1; +- } ++ PMD_DRV_LOG(ERR, "TUNSETOFFLOAD ioctl() failed: %s\n", ++ strerror(errno)); ++ return -1; + } + } + + return 0; + } + ++int ++vhost_kernel_tap_set_queue(int fd, bool attach) ++{ ++ struct ifreq ifr = { ++ .ifr_flags = attach ? IFF_ATTACH_QUEUE : IFF_DETACH_QUEUE, ++ }; ++ ++ return ioctl(fd, TUNSETQUEUE, &ifr); ++} ++ + int + vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq, + const char *mac, uint64_t features) +@@ -66,6 +74,7 @@ vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq, + int sndbuf = INT_MAX; + struct ifreq ifr; + int tapfd; ++ int ret; + + /* TODO: + * 1. verify we can get/set vnet_hdr_len, tap_probe_vnet_hdr_len +@@ -131,7 +140,9 @@ vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq, + goto error; + } + +- vhost_kernel_tap_set_offload(tapfd, features); ++ ret = vhost_kernel_tap_set_offload(tapfd, features); ++ if (ret < 0 && ret != -ENOTSUP) ++ goto error; + + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; +diff --git a/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h b/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h +index e0e95b4f59..5c4447b296 100644 +--- a/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h ++++ b/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h +@@ -2,6 +2,10 @@ + * Copyright(c) 2016 Intel Corporation + */ + ++#ifndef _VHOST_KERNEL_TAP_H ++#define _VHOST_KERNEL_TAP_H ++ ++#include <stdbool.h> + #include <sys/ioctl.h> + + /* TUN ioctls */ +@@ -37,3 +41,7 @@ + + int vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq, + const char *mac, uint64_t features); ++int vhost_kernel_tap_set_offload(int fd, uint64_t features); ++int vhost_kernel_tap_set_queue(int fd, bool attach); ++ ++#endif +diff --git a/dpdk/drivers/net/virtio/virtio_user/vhost_user.c b/dpdk/drivers/net/virtio/virtio_user/vhost_user.c +index a4b5c25cd3..d8e083ba8b 100644 +--- a/dpdk/drivers/net/virtio/virtio_user/vhost_user.c ++++ b/dpdk/drivers/net/virtio/virtio_user/vhost_user.c +@@ -456,6 +456,9 @@ vhost_user_enable_queue_pair(struct virtio_user_dev *dev, + { + int i; + ++ if (dev->qp_enabled[pair_idx] == enable) ++ return 0; ++ + for (i = 0; i < 2; ++i) { + struct vhost_vring_state state = { + .index = pair_idx * 2 + i, +@@ -466,6 +469,7 @@ vhost_user_enable_queue_pair(struct virtio_user_dev *dev, + return -1; + } + ++ dev->qp_enabled[pair_idx] = enable; + return 0; + } + +diff --git a/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c b/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c +index ea016e85d8..1c6b26f8d3 100644 +--- a/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c ++++ b/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c +@@ -537,7 +537,8 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev) + close(dev->kickfds[i]); + } + +- close(dev->vhostfd); ++ if (dev->vhostfd >= 0) ++ close(dev->vhostfd); + + if (dev->is_server && dev->listenfd >= 0) { + close(dev->listenfd); +@@ -545,8 +546,11 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev) + } + + if (dev->vhostfds) { +- for (i = 0; i < dev->max_queue_pairs; ++i) ++ for (i = 0; i < dev->max_queue_pairs; ++i) { + close(dev->vhostfds[i]); ++ if (dev->tapfds[i] >= 0) ++ close(dev->tapfds[i]); ++ } + free(dev->vhostfds); + free(dev->tapfds); + } +diff --git a/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h b/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h +index ad86837717..3b6b6065a5 100644 +--- a/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h ++++ b/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h +@@ -49,6 +49,7 @@ struct virtio_user_dev { + struct vring_packed packed_vrings[VIRTIO_MAX_VIRTQUEUES]; + }; + struct virtio_user_queue packed_queues[VIRTIO_MAX_VIRTQUEUES]; ++ bool qp_enabled[VIRTIO_MAX_VIRTQUEUE_PAIRS]; + + struct virtio_user_backend_ops *ops; + pthread_mutex_t mutex; +diff --git a/dpdk/drivers/net/virtio/virtio_user_ethdev.c b/dpdk/drivers/net/virtio/virtio_user_ethdev.c +index 3fc1725736..e2cbd2478d 100644 +--- a/dpdk/drivers/net/virtio/virtio_user_ethdev.c ++++ b/dpdk/drivers/net/virtio/virtio_user_ethdev.c +@@ -13,6 +13,7 @@ + #include <rte_ethdev_vdev.h> + #include <rte_bus_vdev.h> + #include <rte_alarm.h> ++#include <rte_cycles.h> + + #include "virtio_ethdev.h" + #include "virtio_logs.h" +@@ -25,12 +26,48 @@ + #define virtio_user_get_dev(hw) \ + ((struct virtio_user_dev *)(hw)->virtio_user_dev) + ++static void ++virtio_user_reset_queues_packed(struct rte_eth_dev *dev) ++{ ++ struct virtio_hw *hw = dev->data->dev_private; ++ struct virtnet_rx *rxvq; ++ struct virtnet_tx *txvq; ++ uint16_t i; ++ ++ /* Add lock to avoid queue contention. */ ++ rte_spinlock_lock(&hw->state_lock); ++ hw->started = 0; ++ ++ /* ++ * Waitting for datapath to complete before resetting queues. ++ * 1 ms should be enough for the ongoing Tx/Rx function to finish. ++ */ ++ rte_delay_ms(1); ++ ++ /* Vring reset for each Tx queue and Rx queue. */ ++ for (i = 0; i < dev->data->nb_rx_queues; i++) { ++ rxvq = dev->data->rx_queues[i]; ++ virtqueue_rxvq_reset_packed(rxvq->vq); ++ virtio_dev_rx_queue_setup_finish(dev, i); ++ } ++ ++ for (i = 0; i < dev->data->nb_tx_queues; i++) { ++ txvq = dev->data->tx_queues[i]; ++ virtqueue_txvq_reset_packed(txvq->vq); ++ } ++ ++ hw->started = 1; ++ rte_spinlock_unlock(&hw->state_lock); ++} ++ ++ + static int + virtio_user_server_reconnect(struct virtio_user_dev *dev) + { + int ret; + int connectfd; + struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id]; ++ struct virtio_hw *hw = eth_dev->data->dev_private; + + connectfd = accept(dev->listenfd, NULL, NULL); + if (connectfd < 0) +@@ -51,6 +88,14 @@ virtio_user_server_reconnect(struct virtio_user_dev *dev) + + dev->features &= dev->device_features; + ++ /* For packed ring, resetting queues is required in reconnection. */ ++ if (vtpci_packed_queue(hw) && ++ (vtpci_get_status(hw) & VIRTIO_CONFIG_STATUS_DRIVER_OK)) { ++ PMD_INIT_LOG(NOTICE, "Packets on the fly will be dropped" ++ " when packed ring reconnecting."); ++ virtio_user_reset_queues_packed(eth_dev); ++ } ++ + ret = virtio_user_start_device(dev); + if (ret < 0) + return -1; +@@ -433,12 +478,17 @@ static int + get_integer_arg(const char *key __rte_unused, + const char *value, void *extra_args) + { ++ uint64_t integer = 0; + if (!value || !extra_args) + return -EINVAL; +- +- *(uint64_t *)extra_args = strtoull(value, NULL, 0); +- +- return 0; ++ errno = 0; ++ integer = strtoull(value, NULL, 0); ++ /* extra_args keeps default value, it should be replaced ++ * only in case of successful parsing of the 'value' arg ++ */ ++ if (errno == 0) ++ *(uint64_t *)extra_args = integer; ++ return -errno; + } + + static struct rte_eth_dev * +@@ -517,7 +567,7 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev) + const char *name = rte_vdev_device_name(dev); + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { +- RTE_LOG(ERR, PMD, "Failed to probe %s\n", name); ++ PMD_INIT_LOG(ERR, "Failed to probe %s", name); + return -1; + } + +@@ -669,7 +719,7 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev) + goto end; + } + +- /* previously called by rte_pci_probe() for physical dev */ ++ /* previously called by pci probing for physical dev */ + if (eth_virtio_dev_init(eth_dev) < 0) { + PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails"); + virtio_user_eth_dev_free(eth_dev); +diff --git a/dpdk/drivers/net/virtio/virtqueue.c b/dpdk/drivers/net/virtio/virtqueue.c +index 5ff1e3587e..02c8b9fc54 100644 +--- a/dpdk/drivers/net/virtio/virtqueue.c ++++ b/dpdk/drivers/net/virtio/virtqueue.c +@@ -141,3 +141,76 @@ virtqueue_rxvq_flush(struct virtqueue *vq) + else + virtqueue_rxvq_flush_split(vq); + } ++ ++int ++virtqueue_rxvq_reset_packed(struct virtqueue *vq) ++{ ++ int size = vq->vq_nentries; ++ struct vq_desc_extra *dxp; ++ struct virtnet_rx *rxvq; ++ uint16_t desc_idx; ++ ++ vq->vq_used_cons_idx = 0; ++ vq->vq_desc_head_idx = 0; ++ vq->vq_avail_idx = 0; ++ vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1); ++ vq->vq_free_cnt = vq->vq_nentries; ++ ++ vq->vq_packed.used_wrap_counter = 1; ++ vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL; ++ vq->vq_packed.event_flags_shadow = 0; ++ vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE; ++ ++ rxvq = &vq->rxq; ++ memset(rxvq->mz->addr, 0, rxvq->mz->len); ++ ++ for (desc_idx = 0; desc_idx < vq->vq_nentries; desc_idx++) { ++ dxp = &vq->vq_descx[desc_idx]; ++ if (dxp->cookie != NULL) { ++ rte_pktmbuf_free(dxp->cookie); ++ dxp->cookie = NULL; ++ } ++ } ++ ++ vring_desc_init_packed(vq, size); ++ ++ virtqueue_disable_intr(vq); ++ return 0; ++} ++ ++int ++virtqueue_txvq_reset_packed(struct virtqueue *vq) ++{ ++ int size = vq->vq_nentries; ++ struct vq_desc_extra *dxp; ++ struct virtnet_tx *txvq; ++ uint16_t desc_idx; ++ ++ vq->vq_used_cons_idx = 0; ++ vq->vq_desc_head_idx = 0; ++ vq->vq_avail_idx = 0; ++ vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1); ++ vq->vq_free_cnt = vq->vq_nentries; ++ ++ vq->vq_packed.used_wrap_counter = 1; ++ vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL; ++ vq->vq_packed.event_flags_shadow = 0; ++ ++ txvq = &vq->txq; ++ memset(txvq->mz->addr, 0, txvq->mz->len); ++ memset(txvq->virtio_net_hdr_mz->addr, 0, ++ txvq->virtio_net_hdr_mz->len); ++ ++ for (desc_idx = 0; desc_idx < vq->vq_nentries; desc_idx++) { ++ dxp = &vq->vq_descx[desc_idx]; ++ if (dxp->cookie != NULL) { ++ rte_pktmbuf_free(dxp->cookie); ++ dxp->cookie = NULL; ++ } ++ } ++ ++ vring_desc_init_packed(vq, size); ++ ++ virtqueue_disable_intr(vq); ++ return 0; ++} +diff --git a/dpdk/drivers/net/virtio/virtqueue.h b/dpdk/drivers/net/virtio/virtqueue.h +index 8d7f197b13..58ad7309ae 100644 +--- a/dpdk/drivers/net/virtio/virtqueue.h ++++ b/dpdk/drivers/net/virtio/virtqueue.h +@@ -443,6 +443,10 @@ struct rte_mbuf *virtqueue_detach_unused(struct virtqueue *vq); + /* Flush the elements in the used ring. */ + void virtqueue_rxvq_flush(struct virtqueue *vq); + ++int virtqueue_rxvq_reset_packed(struct virtqueue *vq); ++ ++int virtqueue_txvq_reset_packed(struct virtqueue *vq); ++ + static inline int + virtqueue_full(const struct virtqueue *vq) + { +diff --git a/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c b/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c +index 6e6efa9603..705e9760f4 100644 +--- a/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c ++++ b/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c +@@ -771,7 +771,8 @@ vmxnet3_dev_start(struct rte_eth_dev *dev) + PMD_INIT_LOG(DEBUG, "Failed to setup memory region\n"); + } + +- if (VMXNET3_VERSION_GE_4(hw)) { ++ if (VMXNET3_VERSION_GE_4(hw) && ++ dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) { + /* Check for additional RSS */ + ret = vmxnet3_v4_rss_configure(dev); + if (ret != VMXNET3_SUCCESS) { +diff --git a/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h b/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h +index 8c2b6f8771..dd685b02b7 100644 +--- a/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h ++++ b/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h +@@ -38,6 +38,10 @@ + ETH_RSS_NONFRAG_IPV4_UDP | \ + ETH_RSS_NONFRAG_IPV6_UDP) + ++#define VMXNET3_MANDATORY_V4_RSS ( \ ++ ETH_RSS_NONFRAG_IPV4_TCP | \ ++ ETH_RSS_NONFRAG_IPV6_TCP) ++ + /* RSS configuration structure - shared with device through GPA */ + typedef struct VMXNET3_RSSConf { + uint16_t hashType; +diff --git a/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c b/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c +index 7794d74214..73e270f30f 100644 +--- a/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c ++++ b/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c +@@ -950,13 +950,17 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + + RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY); + +- if (rxm->data_len) { ++ if (likely(start && rxm->data_len > 0)) { + start->pkt_len += rxm->data_len; + start->nb_segs++; + + rxq->last_seg->next = rxm; + rxq->last_seg = rxm; + } else { ++ PMD_RX_LOG(ERR, "Error received empty or out of order frame."); ++ rxq->stats.drop_total++; ++ rxq->stats.drop_err++; ++ + rte_pktmbuf_free_seg(rxm); + } + } +@@ -1311,6 +1315,14 @@ vmxnet3_v4_rss_configure(struct rte_eth_dev *dev) + + cmdInfo->setRSSFields = 0; + port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf; ++ ++ if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) != ++ VMXNET3_MANDATORY_V4_RSS) { ++ PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS," ++ "automatically setting it"); ++ port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS; ++ } ++ + rss_hf = port_rss_conf->rss_hf & + (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL); + +diff --git a/dpdk/drivers/raw/ifpga/meson.build b/dpdk/drivers/raw/ifpga/meson.build +index 206136ff48..d4027068d6 100644 +--- a/dpdk/drivers/raw/ifpga/meson.build ++++ b/dpdk/drivers/raw/ifpga/meson.build +@@ -15,7 +15,7 @@ if build + objs = [base_objs] + + deps += ['ethdev', 'rawdev', 'pci', 'bus_pci', 'kvargs', +- 'bus_vdev', 'bus_ifpga', 'net', 'i40e', 'ipn3ke'] ++ 'bus_vdev', 'bus_ifpga', 'net', 'pmd_i40e', 'pmd_ipn3ke'] + ext_deps += dep + + sources = files('ifpga_rawdev.c') +diff --git a/dpdk/drivers/raw/ntb/ntb.c b/dpdk/drivers/raw/ntb/ntb.c +index ad7f6abfd3..dd0b72f8c5 100644 +--- a/dpdk/drivers/raw/ntb/ntb.c ++++ b/dpdk/drivers/raw/ntb/ntb.c +@@ -683,8 +683,8 @@ ntb_enqueue_bufs(struct rte_rawdev *dev, + sizeof(struct ntb_used) * nb1); + rte_memcpy(txq->tx_used_ring, tx_used + nb1, + sizeof(struct ntb_used) * nb2); +- *txq->used_cnt = txq->last_used; + rte_wmb(); ++ *txq->used_cnt = txq->last_used; + + /* update queue stats */ + hw->ntb_xstats[NTB_TX_BYTES_ID + off] += bytes; +@@ -789,8 +789,8 @@ ntb_dequeue_bufs(struct rte_rawdev *dev, + sizeof(struct ntb_desc) * nb1); + rte_memcpy(rxq->rx_desc_ring, rx_desc + nb1, + sizeof(struct ntb_desc) * nb2); +- *rxq->avail_cnt = rxq->last_avail; + rte_wmb(); ++ *rxq->avail_cnt = rxq->last_avail; + + /* update queue stats */ + off = NTB_XSTATS_NUM * ((size_t)context + 1); +diff --git a/dpdk/examples/ethtool/lib/rte_ethtool.c b/dpdk/examples/ethtool/lib/rte_ethtool.c +index 667d7eaf27..db8150efd5 100644 +--- a/dpdk/examples/ethtool/lib/rte_ethtool.c ++++ b/dpdk/examples/ethtool/lib/rte_ethtool.c +@@ -402,7 +402,9 @@ rte_ethtool_net_set_rx_mode(uint16_t port_id) + } + + /* Enable Rx vlan filter, VF unspport status is discard */ +- rte_eth_dev_set_vlan_offload(port_id, ETH_VLAN_FILTER_MASK); ++ ret = rte_eth_dev_set_vlan_offload(port_id, ETH_VLAN_FILTER_MASK); ++ if (ret != 0) ++ return ret; + + return 0; + } +diff --git a/dpdk/examples/eventdev_pipeline/main.c b/dpdk/examples/eventdev_pipeline/main.c +index d3ff1bbe4f..21958269f7 100644 +--- a/dpdk/examples/eventdev_pipeline/main.c ++++ b/dpdk/examples/eventdev_pipeline/main.c +@@ -10,6 +10,8 @@ + + #include "pipeline_common.h" + ++struct fastpath_data *fdata; ++ + struct config_data cdata = { + .num_packets = (1L << 25), /* do ~32M packets */ + .num_fids = 512, +@@ -299,12 +301,6 @@ signal_handler(int signum) + + rte_eal_mp_wait_lcore(); + +- RTE_ETH_FOREACH_DEV(portid) { +- rte_eth_dev_close(portid); +- } +- +- rte_event_dev_stop(0); +- rte_event_dev_close(0); + } + if (signum == SIGTSTP) + rte_event_dev_dump(0, stdout); +@@ -467,5 +463,14 @@ main(int argc, char **argv) + + } + ++ RTE_ETH_FOREACH_DEV(portid) { ++ rte_eth_dev_close(portid); ++ } ++ ++ rte_event_dev_stop(0); ++ rte_event_dev_close(0); ++ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/eventdev_pipeline/pipeline_common.h b/dpdk/examples/eventdev_pipeline/pipeline_common.h +index 8e30393d09..c7245f7f0f 100644 +--- a/dpdk/examples/eventdev_pipeline/pipeline_common.h ++++ b/dpdk/examples/eventdev_pipeline/pipeline_common.h +@@ -93,8 +93,8 @@ struct port_link { + uint8_t priority; + }; + +-struct fastpath_data *fdata; +-struct config_data cdata; ++extern struct fastpath_data *fdata; ++extern struct config_data cdata; + + static __rte_always_inline void + exchange_mac(struct rte_mbuf *m) +diff --git a/dpdk/examples/fips_validation/fips_validation.c b/dpdk/examples/fips_validation/fips_validation.c +index 07ffa62e9e..b79a095aca 100644 +--- a/dpdk/examples/fips_validation/fips_validation.c ++++ b/dpdk/examples/fips_validation/fips_validation.c +@@ -144,6 +144,24 @@ fips_test_parse_header(void) + ret = parse_test_tdes_init(); + if (ret < 0) + return 0; ++ } else if (strstr(info.vec[i], "PERMUTATION")) { ++ algo_parsed = 1; ++ info.algo = FIPS_TEST_ALGO_TDES; ++ ret = parse_test_tdes_init(); ++ if (ret < 0) ++ return 0; ++ } else if (strstr(info.vec[i], "VARIABLE")) { ++ algo_parsed = 1; ++ info.algo = FIPS_TEST_ALGO_TDES; ++ ret = parse_test_tdes_init(); ++ if (ret < 0) ++ return 0; ++ } else if (strstr(info.vec[i], "SUBSTITUTION")) { ++ algo_parsed = 1; ++ info.algo = FIPS_TEST_ALGO_TDES; ++ ret = parse_test_tdes_init(); ++ if (ret < 0) ++ return 0; + } else if (strstr(info.vec[i], "SHA-")) { + algo_parsed = 1; + info.algo = FIPS_TEST_ALGO_SHA; +diff --git a/dpdk/examples/fips_validation/fips_validation_gcm.c b/dpdk/examples/fips_validation/fips_validation_gcm.c +index ea48ddf707..47576e9a38 100644 +--- a/dpdk/examples/fips_validation/fips_validation_gcm.c ++++ b/dpdk/examples/fips_validation/fips_validation_gcm.c +@@ -46,6 +46,10 @@ struct fips_test_callback gcm_interim_vectors[] = { + {KEYLEN_STR, parser_read_uint32_bit_val, &vec.aead.key}, + {IVLEN_STR, parser_read_uint32_bit_val, &vec.iv}, + {PTLEN_STR, parser_read_uint32_bit_val, &vec.pt}, ++ {PTLEN_STR, parser_read_uint32_bit_val, &vec.ct}, ++ /**< The NIST test vectors use 'PTlen' to denote input text ++ * length in case of decrypt & encrypt operations. ++ */ + {AADLEN_STR, parser_read_uint32_bit_val, &vec.aead.aad}, + {TAGLEN_STR, parser_read_uint32_bit_val, + &vec.aead.digest}, +diff --git a/dpdk/examples/ioat/ioatfwd.c b/dpdk/examples/ioat/ioatfwd.c +index e9117718fe..53de231795 100644 +--- a/dpdk/examples/ioat/ioatfwd.c ++++ b/dpdk/examples/ioat/ioatfwd.c +@@ -460,7 +460,7 @@ ioat_tx_port(struct rxtx_port_config *tx_config) + MAX_PKT_BURST, NULL); + } + +- if (nb_dq <= 0) ++ if ((int32_t) nb_dq <= 0) + return; + + if (copy_mode == COPY_MODE_IOAT_NUM) +@@ -697,7 +697,7 @@ check_link_status(uint32_t port_mask) + { + uint16_t portid; + struct rte_eth_link link; +- int retval = 0; ++ int ret, link_status = 0; + + printf("\nChecking link status\n"); + RTE_ETH_FOREACH_DEV(portid) { +@@ -705,7 +705,12 @@ check_link_status(uint32_t port_mask) + continue; + + memset(&link, 0, sizeof(link)); +- rte_eth_link_get(portid, &link); ++ ret = rte_eth_link_get(portid, &link); ++ if (ret < 0) { ++ printf("Port %u link get failed: err=%d\n", ++ portid, ret); ++ continue; ++ } + + /* Print link status */ + if (link.link_status) { +@@ -713,12 +718,12 @@ check_link_status(uint32_t port_mask) + "Port %d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); +- retval = 1; ++ ("full-duplex") : ("half-duplex")); ++ link_status = 1; + } else + printf("Port %d Link Down\n", portid); + } +- return retval; ++ return link_status; + } + + static void +@@ -824,7 +829,11 @@ port_init(uint16_t portid, struct rte_mempool *mbuf_pool, uint16_t nb_queues) + /* Init port */ + printf("Initializing port %u... ", portid); + fflush(stdout); +- rte_eth_dev_info_get(portid, &dev_info); ++ ret = rte_eth_dev_info_get(portid, &dev_info); ++ if (ret < 0) ++ rte_exit(EXIT_FAILURE, "Cannot get device info: %s, port=%u\n", ++ rte_strerror(-ret), portid); ++ + local_port_conf.rx_adv_conf.rss_conf.rss_hf &= + dev_info.flow_type_rss_offloads; + if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) +diff --git a/dpdk/examples/ip_fragmentation/main.c b/dpdk/examples/ip_fragmentation/main.c +index 104612339c..90e4d1ea4a 100644 +--- a/dpdk/examples/ip_fragmentation/main.c ++++ b/dpdk/examples/ip_fragmentation/main.c +@@ -617,7 +617,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up .Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +@@ -646,7 +646,7 @@ check_all_ports_link_status(uint32_t port_mask) + } + } + +-/* Check L3 packet type detection capablity of the NIC port */ ++/* Check L3 packet type detection capability of the NIC port */ + static int + check_ptype(int portid) + { +diff --git a/dpdk/examples/ip_pipeline/thread.c b/dpdk/examples/ip_pipeline/thread.c +index 272fbbeed1..adb83167cd 100644 +--- a/dpdk/examples/ip_pipeline/thread.c ++++ b/dpdk/examples/ip_pipeline/thread.c +@@ -325,8 +325,6 @@ thread_pipeline_enable(uint32_t thread_id, + + /* Send request and wait for response */ + rsp = thread_msg_send_recv(thread_id, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -412,8 +410,6 @@ thread_pipeline_disable(uint32_t thread_id, + + /* Send request and wait for response */ + rsp = thread_msg_send_recv(thread_id, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -815,8 +811,6 @@ pipeline_port_in_stats_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -863,8 +857,6 @@ pipeline_port_in_enable(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -909,8 +901,6 @@ pipeline_port_in_disable(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -963,8 +953,6 @@ pipeline_port_out_stats_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1019,8 +1007,6 @@ pipeline_table_stats_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1436,10 +1422,6 @@ pipeline_table_rule_add(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) { +- free(rule); +- return -1; +- } + + /* Read response */ + status = rsp->status; +@@ -1538,10 +1520,6 @@ pipeline_table_rule_add_default(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) { +- free(rule); +- return -1; +- } + + /* Read response */ + status = rsp->status; +@@ -1655,10 +1633,6 @@ pipeline_table_rule_add_bulk(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) { +- table_rule_list_free(list); +- return -ENOMEM; +- } + + /* Read response */ + status = rsp->status; +@@ -1733,8 +1707,6 @@ pipeline_table_rule_delete(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1790,8 +1762,6 @@ pipeline_table_rule_delete_default(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1857,8 +1827,6 @@ pipeline_table_rule_stats_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1915,8 +1883,6 @@ pipeline_table_mtr_profile_add(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1967,8 +1933,6 @@ pipeline_table_mtr_profile_delete(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -2037,8 +2001,6 @@ pipeline_table_rule_mtr_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -2096,8 +2058,6 @@ pipeline_table_dscp_table_update(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -2164,8 +2124,6 @@ pipeline_table_rule_ttl_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -2229,8 +2187,6 @@ pipeline_table_rule_time_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +diff --git a/dpdk/examples/ip_reassembly/main.c b/dpdk/examples/ip_reassembly/main.c +index d59e6d02ff..29b34d0710 100644 +--- a/dpdk/examples/ip_reassembly/main.c ++++ b/dpdk/examples/ip_reassembly/main.c +@@ -736,7 +736,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/ipsec-secgw/ipsec-secgw.c b/dpdk/examples/ipsec-secgw/ipsec-secgw.c +index 3b5aaf6832..1493be9025 100644 +--- a/dpdk/examples/ipsec-secgw/ipsec-secgw.c ++++ b/dpdk/examples/ipsec-secgw/ipsec-secgw.c +@@ -1668,7 +1668,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up - speed %u Mbps -%s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/ipsec-secgw/ipsec_process.c b/dpdk/examples/ipsec-secgw/ipsec_process.c +index 2eb5c8b345..37f406d46c 100644 +--- a/dpdk/examples/ipsec-secgw/ipsec_process.c ++++ b/dpdk/examples/ipsec-secgw/ipsec_process.c +@@ -125,6 +125,7 @@ sa_group(void *sa_ptr[], struct rte_mbuf *pkts[], + void * const nosa = &spi; + + sa = nosa; ++ grp[0].m = pkts; + for (i = 0, n = 0; i != num; i++) { + + if (sa != sa_ptr[i]) { +diff --git a/dpdk/examples/ipsec-secgw/sa.c b/dpdk/examples/ipsec-secgw/sa.c +index 7f046e3ed7..fcc6695388 100644 +--- a/dpdk/examples/ipsec-secgw/sa.c ++++ b/dpdk/examples/ipsec-secgw/sa.c +@@ -314,6 +314,9 @@ parse_sa_tokens(char **tokens, uint32_t n_tokens, + APP_CHECK(algo != NULL, status, "unrecognized " + "input \"%s\"", tokens[ti]); + ++ if (status->status < 0) ++ return; ++ + rule->cipher_algo = algo->algo; + rule->block_size = algo->block_size; + rule->iv_len = algo->iv_len; +@@ -378,6 +381,9 @@ parse_sa_tokens(char **tokens, uint32_t n_tokens, + APP_CHECK(algo != NULL, status, "unrecognized " + "input \"%s\"", tokens[ti]); + ++ if (status->status < 0) ++ return; ++ + rule->auth_algo = algo->algo; + rule->auth_key_len = algo->key_len; + rule->digest_len = algo->digest_len; +@@ -433,6 +439,9 @@ parse_sa_tokens(char **tokens, uint32_t n_tokens, + APP_CHECK(algo != NULL, status, "unrecognized " + "input \"%s\"", tokens[ti]); + ++ if (status->status < 0) ++ return; ++ + rule->aead_algo = algo->algo; + rule->cipher_key_len = algo->key_len; + rule->digest_len = algo->digest_len; +@@ -984,7 +993,6 @@ sa_add_rules(struct sa_ctx *sa_ctx, const struct ipsec_sa entries[], + } + + if (sa->aead_algo == RTE_CRYPTO_AEAD_AES_GCM) { +- struct rte_ipsec_session *ips; + iv_length = 12; + + sa_ctx->xf[idx].a.type = RTE_CRYPTO_SYM_XFORM_AEAD; +@@ -1004,20 +1012,6 @@ sa_add_rules(struct sa_ctx *sa_ctx, const struct ipsec_sa entries[], + sa->digest_len; + + sa->xforms = &sa_ctx->xf[idx].a; +- +- ips = ipsec_get_primary_session(sa); +- if (ips->type == +- RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL || +- ips->type == +- RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO) { +- rc = create_inline_session(skt_ctx, sa, ips); +- if (rc != 0) { +- RTE_LOG(ERR, IPSEC_ESP, +- "create_inline_session() failed\n"); +- return -EINVAL; +- } +- } +- print_one_sa_rule(sa, inbound); + } else { + switch (sa->cipher_algo) { + case RTE_CRYPTO_CIPHER_NULL: +@@ -1082,9 +1076,21 @@ sa_add_rules(struct sa_ctx *sa_ctx, const struct ipsec_sa entries[], + sa_ctx->xf[idx].a.next = &sa_ctx->xf[idx].b; + sa_ctx->xf[idx].b.next = NULL; + sa->xforms = &sa_ctx->xf[idx].a; ++ } + +- print_one_sa_rule(sa, inbound); ++ if (ips->type == ++ RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL || ++ ips->type == ++ RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO) { ++ rc = create_inline_session(skt_ctx, sa, ips); ++ if (rc != 0) { ++ RTE_LOG(ERR, IPSEC_ESP, ++ "create_inline_session() failed\n"); ++ return -EINVAL; ++ } + } ++ ++ print_one_sa_rule(sa, inbound); + } + + return 0; +diff --git a/dpdk/examples/ipv4_multicast/main.c b/dpdk/examples/ipv4_multicast/main.c +index 63333b5b69..09d9270aff 100644 +--- a/dpdk/examples/ipv4_multicast/main.c ++++ b/dpdk/examples/ipv4_multicast/main.c +@@ -600,7 +600,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/kni/main.c b/dpdk/examples/kni/main.c +index 5f713e6b22..d48a59fcb1 100644 +--- a/dpdk/examples/kni/main.c ++++ b/dpdk/examples/kni/main.c +@@ -679,7 +679,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up - speed %uMbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +@@ -764,15 +764,16 @@ monitor_all_ports_link_status(void *arg) + return NULL; + } + +-/* Callback for request of changing MTU */ + static int +-kni_change_mtu(uint16_t port_id, unsigned int new_mtu) ++kni_change_mtu_(uint16_t port_id, unsigned int new_mtu) + { + int ret; + uint16_t nb_rxd = NB_RXD; ++ uint16_t nb_txd = NB_TXD; + struct rte_eth_conf conf; + struct rte_eth_dev_info dev_info; + struct rte_eth_rxconf rxq_conf; ++ struct rte_eth_txconf txq_conf; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id); +@@ -800,7 +801,7 @@ kni_change_mtu(uint16_t port_id, unsigned int new_mtu) + return ret; + } + +- ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, NULL); ++ ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Could not adjust number of descriptors " + "for port%u (%d)\n", (unsigned int)port_id, +@@ -825,6 +826,16 @@ kni_change_mtu(uint16_t port_id, unsigned int new_mtu) + return ret; + } + ++ txq_conf = dev_info.default_txconf; ++ txq_conf.offloads = conf.txmode.offloads; ++ ret = rte_eth_tx_queue_setup(port_id, 0, nb_txd, ++ rte_eth_dev_socket_id(port_id), &txq_conf); ++ if (ret < 0) { ++ RTE_LOG(ERR, APP, "Fail to setup Tx queue of port %d\n", ++ port_id); ++ return ret; ++ } ++ + /* Restart specific port */ + ret = rte_eth_dev_start(port_id); + if (ret < 0) { +@@ -835,6 +846,19 @@ kni_change_mtu(uint16_t port_id, unsigned int new_mtu) + return 0; + } + ++/* Callback for request of changing MTU */ ++static int ++kni_change_mtu(uint16_t port_id, unsigned int new_mtu) ++{ ++ int ret; ++ ++ rte_atomic32_inc(&kni_pause); ++ ret = kni_change_mtu_(port_id, new_mtu); ++ rte_atomic32_dec(&kni_pause); ++ ++ return ret; ++} ++ + /* Callback for request of configuring network interface up/down */ + static int + kni_config_network_interface(uint16_t port_id, uint8_t if_up) +diff --git a/dpdk/examples/l2fwd-crypto/main.c b/dpdk/examples/l2fwd-crypto/main.c +index 61d78295d4..fcb55c370a 100644 +--- a/dpdk/examples/l2fwd-crypto/main.c ++++ b/dpdk/examples/l2fwd-crypto/main.c +@@ -1756,7 +1756,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/l2fwd-event/l2fwd_common.c b/dpdk/examples/l2fwd-event/l2fwd_common.c +index 181301fe6b..ab341e55b2 100644 +--- a/dpdk/examples/l2fwd-event/l2fwd_common.c ++++ b/dpdk/examples/l2fwd-event/l2fwd_common.c +@@ -50,6 +50,17 @@ l2fwd_event_init_ports(struct l2fwd_resources *rsrc) + if (ret != 0) + rte_panic("Error during getting device (port %u) info: %s\n", + port_id, strerror(-ret)); ++ local_port_conf.rx_adv_conf.rss_conf.rss_hf &= ++ dev_info.flow_type_rss_offloads; ++ if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != ++ port_conf.rx_adv_conf.rss_conf.rss_hf) { ++ printf("Port %u modified RSS hash function based on hardware support," ++ "requested:%#"PRIx64" configured:%#"PRIx64"", ++ port_id, ++ port_conf.rx_adv_conf.rss_conf.rss_hf, ++ local_port_conf.rx_adv_conf.rss_conf.rss_hf); ++ } ++ + if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) + local_port_conf.txmode.offloads |= + DEV_TX_OFFLOAD_MBUF_FAST_FREE; +diff --git a/dpdk/examples/l2fwd-event/l2fwd_event.c b/dpdk/examples/l2fwd-event/l2fwd_event.c +index 0379c580d6..38d590c14c 100644 +--- a/dpdk/examples/l2fwd-event/l2fwd_event.c ++++ b/dpdk/examples/l2fwd-event/l2fwd_event.c +@@ -67,7 +67,7 @@ l2fwd_event_service_setup(struct l2fwd_resources *rsrc) + int ret, i; + + rte_event_dev_info_get(evt_rsrc->event_d_id, &evdev_info); +- if (evdev_info.event_dev_cap & RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED) { ++ if (!(evdev_info.event_dev_cap & RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED)) { + ret = rte_event_dev_service_id_get(evt_rsrc->event_d_id, + &service_id); + if (ret != -ESRCH && ret != 0) +diff --git a/dpdk/examples/l2fwd-event/l2fwd_event_generic.c b/dpdk/examples/l2fwd-event/l2fwd_event_generic.c +index b7e467c1e1..2dc95e5f7d 100644 +--- a/dpdk/examples/l2fwd-event/l2fwd_event_generic.c ++++ b/dpdk/examples/l2fwd-event/l2fwd_event_generic.c +@@ -42,8 +42,10 @@ l2fwd_event_device_setup_generic(struct l2fwd_resources *rsrc) + + /* Event device configurtion */ + rte_event_dev_info_get(event_d_id, &dev_info); +- evt_rsrc->disable_implicit_release = !!(dev_info.event_dev_cap & +- RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE); ++ ++ /* Enable implicit release */ ++ if (dev_info.event_dev_cap & RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE) ++ evt_rsrc->disable_implicit_release = 0; + + if (dev_info.event_dev_cap & RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES) + event_queue_cfg |= RTE_EVENT_QUEUE_CFG_ALL_TYPES; +@@ -70,7 +72,8 @@ l2fwd_event_device_setup_generic(struct l2fwd_resources *rsrc) + event_d_conf.nb_event_port_enqueue_depth = + dev_info.max_event_port_enqueue_depth; + +- num_workers = rte_lcore_count() - rte_service_lcore_count(); ++ /* Ignore Master core and service cores. */ ++ num_workers = rte_lcore_count() - 1 - rte_service_lcore_count(); + if (dev_info.max_event_ports < num_workers) + num_workers = dev_info.max_event_ports; + +@@ -109,7 +112,9 @@ l2fwd_event_port_setup_generic(struct l2fwd_resources *rsrc) + rte_panic("No space is available\n"); + + memset(&def_p_conf, 0, sizeof(struct rte_event_port_conf)); +- rte_event_port_default_conf_get(event_d_id, 0, &def_p_conf); ++ ret = rte_event_port_default_conf_get(event_d_id, 0, &def_p_conf); ++ if (ret < 0) ++ rte_panic("Error to get default configuration of event port\n"); + + if (def_p_conf.new_event_threshold < event_p_conf.new_event_threshold) + event_p_conf.new_event_threshold = +@@ -170,7 +175,10 @@ l2fwd_event_queue_setup_generic(struct l2fwd_resources *rsrc, + if (!evt_rsrc->evq.event_q_id) + rte_panic("Memory allocation failure\n"); + +- rte_event_queue_default_conf_get(event_d_id, 0, &def_q_conf); ++ ret = rte_event_queue_default_conf_get(event_d_id, 0, &def_q_conf); ++ if (ret < 0) ++ rte_panic("Error to get default config of event queue\n"); ++ + if (def_q_conf.nb_atomic_flows < event_q_conf.nb_atomic_flows) + event_q_conf.nb_atomic_flows = def_q_conf.nb_atomic_flows; + +diff --git a/dpdk/examples/l2fwd-event/l2fwd_event_internal_port.c b/dpdk/examples/l2fwd-event/l2fwd_event_internal_port.c +index b382763dd9..63d57b46c2 100644 +--- a/dpdk/examples/l2fwd-event/l2fwd_event_internal_port.c ++++ b/dpdk/examples/l2fwd-event/l2fwd_event_internal_port.c +@@ -27,7 +27,6 @@ l2fwd_event_device_setup_internal_port(struct l2fwd_resources *rsrc) + .nb_event_port_enqueue_depth = 128 + }; + struct rte_event_dev_info dev_info; +- uint8_t disable_implicit_release; + const uint8_t event_d_id = 0; /* Always use first event device only */ + uint32_t event_queue_cfg = 0; + uint16_t ethdev_count = 0; +@@ -44,10 +43,9 @@ l2fwd_event_device_setup_internal_port(struct l2fwd_resources *rsrc) + /* Event device configurtion */ + rte_event_dev_info_get(event_d_id, &dev_info); + +- disable_implicit_release = !!(dev_info.event_dev_cap & +- RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE); +- evt_rsrc->disable_implicit_release = +- disable_implicit_release; ++ /* Enable implicit release */ ++ if (dev_info.event_dev_cap & RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE) ++ evt_rsrc->disable_implicit_release = 0; + + if (dev_info.event_dev_cap & RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES) + event_queue_cfg |= RTE_EVENT_QUEUE_CFG_ALL_TYPES; +@@ -73,7 +71,8 @@ l2fwd_event_device_setup_internal_port(struct l2fwd_resources *rsrc) + event_d_conf.nb_event_port_enqueue_depth = + dev_info.max_event_port_enqueue_depth; + +- num_workers = rte_lcore_count(); ++ /* Ignore Master core. */ ++ num_workers = rte_lcore_count() - 1; + if (dev_info.max_event_ports < num_workers) + num_workers = dev_info.max_event_ports; + +@@ -110,7 +109,10 @@ l2fwd_event_port_setup_internal_port(struct l2fwd_resources *rsrc) + if (!evt_rsrc->evp.event_p_id) + rte_panic("Failed to allocate memory for Event Ports\n"); + +- rte_event_port_default_conf_get(event_d_id, 0, &def_p_conf); ++ ret = rte_event_port_default_conf_get(event_d_id, 0, &def_p_conf); ++ if (ret < 0) ++ rte_panic("Error to get default configuration of event port\n"); ++ + if (def_p_conf.new_event_threshold < event_p_conf.new_event_threshold) + event_p_conf.new_event_threshold = + def_p_conf.new_event_threshold; +@@ -162,7 +164,10 @@ l2fwd_event_queue_setup_internal_port(struct l2fwd_resources *rsrc, + uint8_t event_q_id = 0; + int32_t ret; + +- rte_event_queue_default_conf_get(event_d_id, event_q_id, &def_q_conf); ++ ret = rte_event_queue_default_conf_get(event_d_id, event_q_id, ++ &def_q_conf); ++ if (ret < 0) ++ rte_panic("Error to get default config of event queue\n"); + + if (def_q_conf.nb_atomic_flows < event_q_conf.nb_atomic_flows) + event_q_conf.nb_atomic_flows = def_q_conf.nb_atomic_flows; +diff --git a/dpdk/examples/l2fwd-event/l2fwd_poll.c b/dpdk/examples/l2fwd-event/l2fwd_poll.c +index a3a3835582..2033c65e54 100644 +--- a/dpdk/examples/l2fwd-event/l2fwd_poll.c ++++ b/dpdk/examples/l2fwd-event/l2fwd_poll.c +@@ -116,6 +116,7 @@ l2fwd_poll_lcore_config(struct l2fwd_resources *rsrc) + + /* get the lcore_id for this port */ + while (rte_lcore_is_enabled(rx_lcore_id) == 0 || ++ rx_lcore_id == rte_get_master_lcore() || + poll_rsrc->lcore_queue_conf[rx_lcore_id].n_rx_port == + rsrc->rx_queue_per_lcore) { + rx_lcore_id++; +diff --git a/dpdk/examples/l2fwd-event/main.c b/dpdk/examples/l2fwd-event/main.c +index 89a6bb9a44..384b71238f 100644 +--- a/dpdk/examples/l2fwd-event/main.c ++++ b/dpdk/examples/l2fwd-event/main.c +@@ -263,7 +263,7 @@ check_all_ports_link_status(struct l2fwd_resources *rsrc, + "Port%d Link Up. Speed %u Mbps - %s\n", + port_id, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", port_id); + continue; +diff --git a/dpdk/examples/l2fwd-jobstats/main.c b/dpdk/examples/l2fwd-jobstats/main.c +index f975aa12d0..e0255080e2 100644 +--- a/dpdk/examples/l2fwd-jobstats/main.c ++++ b/dpdk/examples/l2fwd-jobstats/main.c +@@ -710,7 +710,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/l2fwd-keepalive/main.c b/dpdk/examples/l2fwd-keepalive/main.c +index b36834974e..3d59e2ca90 100644 +--- a/dpdk/examples/l2fwd-keepalive/main.c ++++ b/dpdk/examples/l2fwd-keepalive/main.c +@@ -44,7 +44,7 @@ + + #define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1 + +-#define NB_MBUF 8192 ++#define NB_MBUF_PER_PORT 3000 + + #define MAX_PKT_BURST 32 + #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ +@@ -475,7 +475,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +@@ -536,6 +536,7 @@ main(int argc, char **argv) + uint16_t portid, last_port; + unsigned lcore_id, rx_lcore_id; + unsigned nb_ports_in_mask = 0; ++ unsigned int total_nb_mbufs; + struct sigaction signal_handler; + struct rte_keepalive_shm *ka_shm; + +@@ -561,16 +562,19 @@ main(int argc, char **argv) + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments\n"); + +- /* create the mbuf pool */ +- l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 32, +- 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); +- if (l2fwd_pktmbuf_pool == NULL) +- rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); +- + nb_ports = rte_eth_dev_count_avail(); + if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); + ++ /* create the mbuf pool */ ++ total_nb_mbufs = NB_MBUF_PER_PORT * nb_ports; ++ ++ l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", ++ total_nb_mbufs, 32, 0, RTE_MBUF_DEFAULT_BUF_SIZE, ++ rte_socket_id()); ++ if (l2fwd_pktmbuf_pool == NULL) ++ rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); ++ + /* reset l2fwd_dst_ports */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) + l2fwd_dst_ports[portid] = 0; +diff --git a/dpdk/examples/l2fwd/main.c b/dpdk/examples/l2fwd/main.c +index 09257aab1c..fcef232731 100644 +--- a/dpdk/examples/l2fwd/main.c ++++ b/dpdk/examples/l2fwd/main.c +@@ -478,7 +478,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/l3fwd-acl/main.c b/dpdk/examples/l3fwd-acl/main.c +index fa92a28297..cfbeee962b 100644 +--- a/dpdk/examples/l3fwd-acl/main.c ++++ b/dpdk/examples/l3fwd-acl/main.c +@@ -1839,7 +1839,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/l3fwd-power/main.c b/dpdk/examples/l3fwd-power/main.c +index d049d8a5dc..aa6ff2627b 100644 +--- a/dpdk/examples/l3fwd-power/main.c ++++ b/dpdk/examples/l3fwd-power/main.c +@@ -880,9 +880,6 @@ sleep_until_rx_interrupt(int num) + port_id = ((uintptr_t)data) >> CHAR_BIT; + queue_id = ((uintptr_t)data) & + RTE_LEN2MASK(CHAR_BIT, uint8_t); +- rte_spinlock_lock(&(locks[port_id])); +- rte_eth_dev_rx_intr_disable(port_id, queue_id); +- rte_spinlock_unlock(&(locks[port_id])); + RTE_LOG(INFO, L3FWD_POWER, + "lcore %u is waked up from rx interrupt on" + " port %d queue %d\n", +@@ -892,7 +889,7 @@ sleep_until_rx_interrupt(int num) + return 0; + } + +-static void turn_on_intr(struct lcore_conf *qconf) ++static void turn_on_off_intr(struct lcore_conf *qconf, bool on) + { + int i; + struct lcore_rx_queue *rx_queue; +@@ -905,7 +902,10 @@ static void turn_on_intr(struct lcore_conf *qconf) + queue_id = rx_queue->queue_id; + + rte_spinlock_lock(&(locks[port_id])); +- rte_eth_dev_rx_intr_enable(port_id, queue_id); ++ if (on) ++ rte_eth_dev_rx_intr_enable(port_id, queue_id); ++ else ++ rte_eth_dev_rx_intr_disable(port_id, queue_id); + rte_spinlock_unlock(&(locks[port_id])); + } + } +@@ -1338,11 +1338,12 @@ main_loop(__attribute__((unused)) void *dummy) + */ + rte_delay_us(lcore_idle_hint); + else { +- /* suspend until rx interrupt trigges */ ++ /* suspend until rx interrupt triggers */ + if (intr_en) { +- turn_on_intr(qconf); ++ turn_on_off_intr(qconf, 1); + sleep_until_rx_interrupt( + qconf->n_rx_queue); ++ turn_on_off_intr(qconf, 0); + /** + * start receiving packets immediately + */ +@@ -1997,7 +1998,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); +diff --git a/dpdk/examples/l3fwd/main.c b/dpdk/examples/l3fwd/main.c +index 4dea12a653..3a8ec5a7f2 100644 +--- a/dpdk/examples/l3fwd/main.c ++++ b/dpdk/examples/l3fwd/main.c +@@ -747,7 +747,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps -%s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/link_status_interrupt/main.c b/dpdk/examples/link_status_interrupt/main.c +index a924aa2313..72f86e502f 100644 +--- a/dpdk/examples/link_status_interrupt/main.c ++++ b/dpdk/examples/link_status_interrupt/main.c +@@ -500,7 +500,7 @@ check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/multi_process/client_server_mp/mp_server/init.c b/dpdk/examples/multi_process/client_server_mp/mp_server/init.c +index ad9f46f0aa..c2ec07ac65 100644 +--- a/dpdk/examples/multi_process/client_server_mp/mp_server/init.c ++++ b/dpdk/examples/multi_process/client_server_mp/mp_server/init.c +@@ -209,7 +209,7 @@ check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) + "Mbps - %s\n", ports->id[portid], + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", + (uint8_t)ports->id[portid]); +diff --git a/dpdk/examples/multi_process/symmetric_mp/main.c b/dpdk/examples/multi_process/symmetric_mp/main.c +index 7f491452a7..c5cd8825e5 100644 +--- a/dpdk/examples/multi_process/symmetric_mp/main.c ++++ b/dpdk/examples/multi_process/symmetric_mp/main.c +@@ -389,7 +389,7 @@ check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/ntb/ntb_fwd.c b/dpdk/examples/ntb/ntb_fwd.c +index c914256dd4..17eedcf0b8 100644 +--- a/dpdk/examples/ntb/ntb_fwd.c ++++ b/dpdk/examples/ntb/ntb_fwd.c +@@ -19,6 +19,7 @@ + #include <rte_lcore.h> + #include <rte_cycles.h> + #include <rte_pmd_ntb.h> ++#include <rte_mbuf_pool_ops.h> + + /* Per-port statistics struct */ + struct ntb_port_statistics { +@@ -1256,6 +1257,11 @@ ntb_mbuf_pool_create(uint16_t mbuf_seg_size, uint32_t nb_mbuf, + if (mp == NULL) + return NULL; + ++ if (rte_mempool_set_ops_byname(mp, rte_mbuf_best_mempool_ops(), NULL)) { ++ printf("error setting mempool handler\n"); ++ goto fail; ++ } ++ + memset(&mbp_priv, 0, sizeof(mbp_priv)); + mbp_priv.mbuf_data_room_size = mbuf_seg_size; + mbp_priv.mbuf_priv_size = 0; +diff --git a/dpdk/examples/performance-thread/l3fwd-thread/main.c b/dpdk/examples/performance-thread/l3fwd-thread/main.c +index ad540fd842..f58a70b77f 100644 +--- a/dpdk/examples/performance-thread/l3fwd-thread/main.c ++++ b/dpdk/examples/performance-thread/l3fwd-thread/main.c +@@ -3457,7 +3457,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/qos_sched/cfg_file.c b/dpdk/examples/qos_sched/cfg_file.c +index 5714c3f36d..f078e4f7de 100644 +--- a/dpdk/examples/qos_sched/cfg_file.c ++++ b/dpdk/examples/qos_sched/cfg_file.c +@@ -20,6 +20,9 @@ + * for new entries do we add in */ + #define CFG_ALLOC_ENTRY_BATCH 16 + ++uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE]; ++uint32_t n_active_queues; ++ + int + cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port_params) + { +diff --git a/dpdk/examples/qos_sched/init.c b/dpdk/examples/qos_sched/init.c +index 0a17e0d4d5..9626c15b81 100644 +--- a/dpdk/examples/qos_sched/init.c ++++ b/dpdk/examples/qos_sched/init.c +@@ -164,7 +164,7 @@ app_init_port(uint16_t portid, struct rte_mempool *mp) + printf(" Link Up - speed %u Mbps - %s\n", + (uint32_t) link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + } else { + printf(" Link Down\n"); + } +diff --git a/dpdk/examples/qos_sched/main.h b/dpdk/examples/qos_sched/main.h +index baa2b3eadc..23bc418d97 100644 +--- a/dpdk/examples/qos_sched/main.h ++++ b/dpdk/examples/qos_sched/main.h +@@ -148,8 +148,8 @@ extern struct burst_conf burst_conf; + extern struct ring_thresh rx_thresh; + extern struct ring_thresh tx_thresh; + +-uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE]; +-uint32_t n_active_queues; ++extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE]; ++extern uint32_t n_active_queues; + + extern struct rte_sched_port_params port_params; + extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS]; +diff --git a/dpdk/examples/server_node_efd/server/init.c b/dpdk/examples/server_node_efd/server/init.c +index 00e2e40599..378a74fa5c 100644 +--- a/dpdk/examples/server_node_efd/server/init.c ++++ b/dpdk/examples/server_node_efd/server/init.c +@@ -272,7 +272,7 @@ check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) + info->id[portid], + link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", + info->id[portid]); +diff --git a/dpdk/examples/tep_termination/vxlan_setup.c b/dpdk/examples/tep_termination/vxlan_setup.c +index eca119a728..4b44ccc143 100644 +--- a/dpdk/examples/tep_termination/vxlan_setup.c ++++ b/dpdk/examples/tep_termination/vxlan_setup.c +@@ -194,8 +194,6 @@ vxlan_port_init(uint16_t port, struct rte_mempool *mbuf_pool) + ports_eth_addr[port].addr_bytes[5]); + + if (tso_segsz != 0) { +- struct rte_eth_dev_info dev_info; +- rte_eth_dev_info_get(port, &dev_info); + if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) == 0) + RTE_LOG(WARNING, PORT, + "hardware TSO offload is not supported\n"); +diff --git a/dpdk/examples/vhost_blk/vhost_blk.c b/dpdk/examples/vhost_blk/vhost_blk.c +index 3182a488bb..b757c9228b 100644 +--- a/dpdk/examples/vhost_blk/vhost_blk.c ++++ b/dpdk/examples/vhost_blk/vhost_blk.c +@@ -31,6 +31,8 @@ + (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \ + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)) + ++struct vhost_blk_ctrlr *g_vhost_ctrlr; ++ + /* Path to folder where character device will be created. Can be set by user. */ + static char dev_pathname[PATH_MAX] = ""; + static sem_t exit_sem; +@@ -856,6 +858,7 @@ new_device(int vid) + ctrlr->bdev->vid, i, + &blk_vq->last_avail_idx, + &blk_vq->last_used_idx); ++ assert(ret == 0); + + blk_vq->avail_wrap_counter = blk_vq->last_avail_idx & + (1 << 15); +@@ -993,11 +996,7 @@ vhost_blk_ctrlr_construct(const char *ctrlr_name) + } + snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name); + +- if (access(dev_pathname, F_OK) != -1) { +- if (unlink(dev_pathname) != 0) +- rte_exit(EXIT_FAILURE, "Cannot remove %s.\n", +- dev_pathname); +- } ++ unlink(dev_pathname); + + if (rte_vhost_driver_register(dev_pathname, 0) != 0) { + fprintf(stderr, "socket %s already exists\n", dev_pathname); +@@ -1040,8 +1039,7 @@ signal_handler(__rte_unused int signum) + { + struct vhost_blk_ctrlr *ctrlr; + +- if (access(dev_pathname, F_OK) == 0) +- unlink(dev_pathname); ++ unlink(dev_pathname); + + if (g_should_stop != -1) { + g_should_stop = 1; +diff --git a/dpdk/examples/vhost_blk/vhost_blk.h b/dpdk/examples/vhost_blk/vhost_blk.h +index 933e2b7c57..17258d284b 100644 +--- a/dpdk/examples/vhost_blk/vhost_blk.h ++++ b/dpdk/examples/vhost_blk/vhost_blk.h +@@ -112,8 +112,8 @@ struct inflight_blk_task { + struct rte_vhost_inflight_info_packed *inflight_packed; + }; + +-struct vhost_blk_ctrlr *g_vhost_ctrlr; +-struct vhost_device_ops vhost_blk_device_ops; ++extern struct vhost_blk_ctrlr *g_vhost_ctrlr; ++extern struct vhost_device_ops vhost_blk_device_ops; + + int vhost_bdev_process_blk_commands(struct vhost_block_dev *bdev, + struct vhost_blk_task *task); +diff --git a/dpdk/examples/vm_power_manager/channel_manager.c b/dpdk/examples/vm_power_manager/channel_manager.c +index 4ac21f02c1..74a2a677e8 100644 +--- a/dpdk/examples/vm_power_manager/channel_manager.c ++++ b/dpdk/examples/vm_power_manager/channel_manager.c +@@ -4,7 +4,6 @@ + + #include <stdio.h> + #include <stdlib.h> +-#include <sys/un.h> + #include <fcntl.h> + #include <unistd.h> + #include <inttypes.h> +@@ -35,6 +34,8 @@ + + #define RTE_LOGTYPE_CHANNEL_MANAGER RTE_LOGTYPE_USER1 + ++struct libvirt_vm_info lvm_info[MAX_CLIENTS]; ++ + /* Global pointer to libvirt connection */ + static virConnectPtr global_vir_conn_ptr; + +diff --git a/dpdk/examples/vm_power_manager/channel_manager.h b/dpdk/examples/vm_power_manager/channel_manager.h +index 8284be0a18..e55376fcdb 100644 +--- a/dpdk/examples/vm_power_manager/channel_manager.h ++++ b/dpdk/examples/vm_power_manager/channel_manager.h +@@ -10,7 +10,7 @@ extern "C" { + #endif + + #include <linux/limits.h> +-#include <sys/un.h> ++#include <linux/un.h> + #include <rte_atomic.h> + #include <stdbool.h> + +@@ -26,11 +26,6 @@ extern "C" { + /* FIFO file name template */ + #define CHANNEL_MGR_FIFO_PATTERN_NAME "fifo" + +-#ifndef UNIX_PATH_MAX +-struct sockaddr_un _sockaddr_un; +-#define UNIX_PATH_MAX sizeof(_sockaddr_un.sun_path) +-#endif +- + #define MAX_CLIENTS 64 + #define MAX_VCPUS 20 + +@@ -41,7 +36,7 @@ struct libvirt_vm_info { + uint8_t num_cpus; + }; + +-struct libvirt_vm_info lvm_info[MAX_CLIENTS]; ++extern struct libvirt_vm_info lvm_info[MAX_CLIENTS]; + /* Communication Channel Status */ + enum channel_status { CHANNEL_MGR_CHANNEL_DISCONNECTED = 0, + CHANNEL_MGR_CHANNEL_CONNECTED, +diff --git a/dpdk/examples/vm_power_manager/channel_monitor.c b/dpdk/examples/vm_power_manager/channel_monitor.c +index 090c2a98b0..1d00a6cf6c 100644 +--- a/dpdk/examples/vm_power_manager/channel_monitor.c ++++ b/dpdk/examples/vm_power_manager/channel_monitor.c +@@ -868,7 +868,7 @@ process_request(struct channel_packet *pkt, struct channel_info *chan_info) + if (valid_unit) { + ret = send_ack_for_received_cmd(pkt, + chan_info, +- scale_res > 0 ? ++ scale_res >= 0 ? + CPU_POWER_CMD_ACK : + CPU_POWER_CMD_NACK); + if (ret < 0) +diff --git a/dpdk/examples/vm_power_manager/main.c b/dpdk/examples/vm_power_manager/main.c +index d39f044c1e..0409a832b5 100644 +--- a/dpdk/examples/vm_power_manager/main.c ++++ b/dpdk/examples/vm_power_manager/main.c +@@ -272,7 +272,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Mbps - %s\n", (uint16_t)portid, + (unsigned int)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", + (uint16_t)portid); +diff --git a/dpdk/examples/vm_power_manager/power_manager.c b/dpdk/examples/vm_power_manager/power_manager.c +index 7b4f4b3c4d..cd51d4741f 100644 +--- a/dpdk/examples/vm_power_manager/power_manager.c ++++ b/dpdk/examples/vm_power_manager/power_manager.c +@@ -6,7 +6,6 @@ + #include <stdlib.h> + #include <stdint.h> + #include <inttypes.h> +-#include <sys/un.h> + #include <fcntl.h> + #include <unistd.h> + #include <dirent.h> +diff --git a/dpdk/examples/vmdq/main.c b/dpdk/examples/vmdq/main.c +index 6e6fc91ec0..b082bc8c1c 100644 +--- a/dpdk/examples/vmdq/main.c ++++ b/dpdk/examples/vmdq/main.c +@@ -59,6 +59,7 @@ static uint32_t enabled_port_mask; + /* number of pools (if user does not specify any, 8 by default */ + static uint32_t num_queues = 8; + static uint32_t num_pools = 8; ++static uint8_t rss_enable; + + /* empty vmdq configuration structure. Filled in programatically */ + static const struct rte_eth_conf vmdq_conf_default = { +@@ -143,6 +144,13 @@ get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools) + (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf))); + (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf, + sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf))); ++ if (rss_enable) { ++ eth_conf->rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS; ++ eth_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | ++ ETH_RSS_UDP | ++ ETH_RSS_TCP | ++ ETH_RSS_SCTP; ++ } + return 0; + } + +@@ -164,6 +172,7 @@ port_init(uint16_t port, struct rte_mempool *mbuf_pool) + uint16_t q; + uint16_t queues_per_pool; + uint32_t max_nb_pools; ++ uint64_t rss_hf_tmp; + + /* + * The max pool number from dev_info will be used to validate the pool +@@ -209,6 +218,17 @@ port_init(uint16_t port, struct rte_mempool *mbuf_pool) + if (!rte_eth_dev_is_valid_port(port)) + return -1; + ++ rss_hf_tmp = port_conf.rx_adv_conf.rss_conf.rss_hf; ++ port_conf.rx_adv_conf.rss_conf.rss_hf &= ++ dev_info.flow_type_rss_offloads; ++ if (port_conf.rx_adv_conf.rss_conf.rss_hf != rss_hf_tmp) { ++ printf("Port %u modified RSS hash function based on hardware support," ++ "requested:%#"PRIx64" configured:%#"PRIx64"\n", ++ port, ++ rss_hf_tmp, ++ port_conf.rx_adv_conf.rss_conf.rss_hf); ++ } ++ + /* + * Though in this example, we only receive packets from the first queue + * of each pool and send packets through first rte_lcore_count() tx +@@ -363,7 +383,8 @@ static void + vmdq_usage(const char *prgname) + { + printf("%s [EAL options] -- -p PORTMASK]\n" +- " --nb-pools NP: number of pools\n", ++ " --nb-pools NP: number of pools\n" ++ " --enable-rss: enable RSS (disabled by default)\n", + prgname); + } + +@@ -377,6 +398,7 @@ vmdq_parse_args(int argc, char **argv) + const char *prgname = argv[0]; + static struct option long_option[] = { + {"nb-pools", required_argument, NULL, 0}, ++ {"enable-rss", 0, NULL, 0}, + {NULL, 0, 0, 0} + }; + +@@ -394,11 +416,18 @@ vmdq_parse_args(int argc, char **argv) + } + break; + case 0: +- if (vmdq_parse_num_pools(optarg) == -1) { +- printf("invalid number of pools\n"); +- vmdq_usage(prgname); +- return -1; ++ if (!strcmp(long_option[option_index].name, ++ "nb-pools")) { ++ if (vmdq_parse_num_pools(optarg) == -1) { ++ printf("invalid number of pools\n"); ++ vmdq_usage(prgname); ++ return -1; ++ } + } ++ ++ if (!strcmp(long_option[option_index].name, ++ "enable-rss")) ++ rss_enable = 1; + break; + + default: +@@ -441,10 +470,11 @@ update_mac_address(struct rte_mbuf *m, unsigned dst_port) + static void + sighup_handler(int signum) + { +- unsigned q; +- for (q = 0; q < num_queues; q++) { +- if (q % (num_queues/num_pools) == 0) +- printf("\nPool %u: ", q/(num_queues/num_pools)); ++ unsigned int q = vmdq_queue_base; ++ for (; q < num_queues; q++) { ++ if ((q - vmdq_queue_base) % (num_vmdq_queues / num_pools) == 0) ++ printf("\nPool %u: ", (q - vmdq_queue_base) / ++ (num_vmdq_queues / num_pools)); + printf("%lu ", rxPackets[q]); + } + printf("\nFinished handling signal %d\n", signum); +diff --git a/dpdk/kernel/freebsd/contigmem/contigmem.c b/dpdk/kernel/freebsd/contigmem/contigmem.c +index 64e0a7fecd..abb76f241e 100644 +--- a/dpdk/kernel/freebsd/contigmem/contigmem.c ++++ b/dpdk/kernel/freebsd/contigmem/contigmem.c +@@ -165,9 +165,11 @@ contigmem_load() + + error: + for (i = 0; i < contigmem_num_buffers; i++) { +- if (contigmem_buffers[i].addr != NULL) ++ if (contigmem_buffers[i].addr != NULL) { + contigfree(contigmem_buffers[i].addr, + contigmem_buffer_size, M_CONTIGMEM); ++ contigmem_buffers[i].addr = NULL; ++ } + if (mtx_initialized(&contigmem_buffers[i].mtx)) + mtx_destroy(&contigmem_buffers[i].mtx); + } +diff --git a/dpdk/kernel/linux/kni/compat.h b/dpdk/kernel/linux/kni/compat.h +index 7109474ec5..9ee45dbf6f 100644 +--- a/dpdk/kernel/linux/kni/compat.h ++++ b/dpdk/kernel/linux/kni/compat.h +@@ -130,3 +130,7 @@ + #if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE + #define HAVE_IOVA_TO_KVA_MAPPING_SUPPORT + #endif ++ ++#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE ++#define HAVE_TX_TIMEOUT_TXQUEUE ++#endif +diff --git a/dpdk/kernel/linux/kni/kni_dev.h b/dpdk/kernel/linux/kni/kni_dev.h +index 5e75c6371f..ca5f92a47b 100644 +--- a/dpdk/kernel/linux/kni/kni_dev.h ++++ b/dpdk/kernel/linux/kni/kni_dev.h +@@ -32,7 +32,7 @@ + #define MBUF_BURST_SZ 32 + + /* Default carrier state for created KNI network interfaces */ +-extern uint32_t dflt_carrier; ++extern uint32_t kni_dflt_carrier; + + /** + * A structure describing the private information for a kni device. +diff --git a/dpdk/kernel/linux/kni/kni_misc.c b/dpdk/kernel/linux/kni/kni_misc.c +index cda71bde08..2b464c4381 100644 +--- a/dpdk/kernel/linux/kni/kni_misc.c ++++ b/dpdk/kernel/linux/kni/kni_misc.c +@@ -39,7 +39,7 @@ static uint32_t multiple_kthread_on; + + /* Default carrier state for created KNI network interfaces */ + static char *carrier; +-uint32_t dflt_carrier; ++uint32_t kni_dflt_carrier; + + #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */ + +@@ -554,14 +554,14 @@ static int __init + kni_parse_carrier_state(void) + { + if (!carrier) { +- dflt_carrier = 0; ++ kni_dflt_carrier = 0; + return 0; + } + + if (strcmp(carrier, "off") == 0) +- dflt_carrier = 0; ++ kni_dflt_carrier = 0; + else if (strcmp(carrier, "on") == 0) +- dflt_carrier = 1; ++ kni_dflt_carrier = 1; + else + return -1; + +@@ -588,7 +588,7 @@ kni_init(void) + return -EINVAL; + } + +- if (dflt_carrier == 0) ++ if (kni_dflt_carrier == 0) + pr_debug("Default carrier state set to off.\n"); + else + pr_debug("Default carrier state set to on.\n"); +diff --git a/dpdk/kernel/linux/kni/kni_net.c b/dpdk/kernel/linux/kni/kni_net.c +index 1ba9b1b99f..c82c881a2c 100644 +--- a/dpdk/kernel/linux/kni/kni_net.c ++++ b/dpdk/kernel/linux/kni/kni_net.c +@@ -158,7 +158,7 @@ kni_net_open(struct net_device *dev) + struct kni_dev *kni = netdev_priv(dev); + + netif_start_queue(dev); +- if (dflt_carrier == 1) ++ if (kni_dflt_carrier == 1) + netif_carrier_on(dev); + else + netif_carrier_off(dev); +@@ -623,8 +623,13 @@ kni_net_rx(struct kni_dev *kni) + /* + * Deal with a transmit timeout. + */ ++#ifdef HAVE_TX_TIMEOUT_TXQUEUE ++static void ++kni_net_tx_timeout(struct net_device *dev, unsigned int txqueue) ++#else + static void + kni_net_tx_timeout(struct net_device *dev) ++#endif + { + pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies, + jiffies - dev_trans_start(dev)); +diff --git a/dpdk/kernel/linux/kni/meson.build b/dpdk/kernel/linux/kni/meson.build +index 955eec9496..f93e97fa09 100644 +--- a/dpdk/kernel/linux/kni/meson.build ++++ b/dpdk/kernel/linux/kni/meson.build +@@ -23,7 +23,6 @@ custom_target('rte_kni', + ' -I' + meson.current_source_dir(), + 'modules'], + depends: kni_mkfile, +- console: true, + install: true, + install_dir: kernel_dir + '/extra/dpdk', + build_by_default: get_option('enable_kmods')) +diff --git a/dpdk/lib/Makefile b/dpdk/lib/Makefile +index 46b91ae1a4..2cbb096f12 100644 +--- a/dpdk/lib/Makefile ++++ b/dpdk/lib/Makefile +@@ -113,7 +113,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_BPF) += librte_bpf + DEPDIRS-librte_bpf := librte_eal librte_mempool librte_mbuf librte_ethdev + DIRS-$(CONFIG_RTE_LIBRTE_IPSEC) += librte_ipsec + DEPDIRS-librte_ipsec := librte_eal librte_mbuf librte_cryptodev librte_security \ +- librte_net ++ librte_net librte_hash + DIRS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += librte_telemetry + DEPDIRS-librte_telemetry := librte_eal librte_metrics librte_ethdev + DIRS-$(CONFIG_RTE_LIBRTE_RCU) += librte_rcu +diff --git a/dpdk/lib/librte_acl/acl_bld.c b/dpdk/lib/librte_acl/acl_bld.c +index b06bbe9207..d1f920b09c 100644 +--- a/dpdk/lib/librte_acl/acl_bld.c ++++ b/dpdk/lib/librte_acl/acl_bld.c +@@ -778,9 +778,8 @@ acl_build_reset(struct rte_acl_ctx *ctx) + } + + static void +-acl_gen_range(struct acl_build_context *context, +- const uint8_t *hi, const uint8_t *lo, int size, int level, +- struct rte_acl_node *root, struct rte_acl_node *end) ++acl_gen_full_range(struct acl_build_context *context, struct rte_acl_node *root, ++ struct rte_acl_node *end, int size, int level) + { + struct rte_acl_node *node, *prev; + uint32_t n; +@@ -788,10 +787,71 @@ acl_gen_range(struct acl_build_context *context, + prev = root; + for (n = size - 1; n > 0; n--) { + node = acl_alloc_node(context, level++); +- acl_add_ptr_range(context, prev, node, lo[n], hi[n]); ++ acl_add_ptr_range(context, prev, node, 0, UINT8_MAX); + prev = node; + } +- acl_add_ptr_range(context, prev, end, lo[0], hi[0]); ++ acl_add_ptr_range(context, prev, end, 0, UINT8_MAX); ++} ++ ++static void ++acl_gen_range_mdl(struct acl_build_context *context, struct rte_acl_node *root, ++ struct rte_acl_node *end, uint8_t lo, uint8_t hi, int size, int level) ++{ ++ struct rte_acl_node *node; ++ ++ node = acl_alloc_node(context, level++); ++ acl_add_ptr_range(context, root, node, lo, hi); ++ acl_gen_full_range(context, node, end, size - 1, level); ++} ++ ++static void ++acl_gen_range_low(struct acl_build_context *context, struct rte_acl_node *root, ++ struct rte_acl_node *end, const uint8_t *lo, int size, int level) ++{ ++ struct rte_acl_node *node; ++ uint32_t n; ++ ++ n = size - 1; ++ if (n == 0) { ++ acl_add_ptr_range(context, root, end, lo[0], UINT8_MAX); ++ return; ++ } ++ ++ node = acl_alloc_node(context, level++); ++ acl_add_ptr_range(context, root, node, lo[n], lo[n]); ++ ++ /* generate lower-bound sub-trie */ ++ acl_gen_range_low(context, node, end, lo, n, level); ++ ++ /* generate middle sub-trie */ ++ if (n > 1 && lo[n - 1] != UINT8_MAX) ++ acl_gen_range_mdl(context, node, end, lo[n - 1] + 1, UINT8_MAX, ++ n, level); ++} ++ ++static void ++acl_gen_range_high(struct acl_build_context *context, struct rte_acl_node *root, ++ struct rte_acl_node *end, const uint8_t *hi, int size, int level) ++{ ++ struct rte_acl_node *node; ++ uint32_t n; ++ ++ n = size - 1; ++ if (n == 0) { ++ acl_add_ptr_range(context, root, end, 0, hi[0]); ++ return; ++ } ++ ++ node = acl_alloc_node(context, level++); ++ acl_add_ptr_range(context, root, node, hi[n], hi[n]); ++ ++ /* generate upper-bound sub-trie */ ++ acl_gen_range_high(context, node, end, hi, n, level); ++ ++ /* generate middle sub-trie */ ++ if (n > 1 && hi[n - 1] != 0) ++ acl_gen_range_mdl(context, node, end, 0, hi[n - 1] - 1, ++ n, level); + } + + static struct rte_acl_node * +@@ -799,52 +859,56 @@ acl_gen_range_trie(struct acl_build_context *context, + const void *min, const void *max, + int size, int level, struct rte_acl_node **pend) + { +- int32_t n; +- struct rte_acl_node *root; +- const uint8_t *lo = min; +- const uint8_t *hi = max; ++ int32_t k, n; ++ uint8_t hi_ff, lo_00; ++ struct rte_acl_node *node, *prev, *root; ++ const uint8_t *lo; ++ const uint8_t *hi; ++ ++ lo = min; ++ hi = max; + +- *pend = acl_alloc_node(context, level+size); ++ *pend = acl_alloc_node(context, level + size); + root = acl_alloc_node(context, level++); ++ prev = root; + +- if (lo[size - 1] == hi[size - 1]) { +- acl_gen_range(context, hi, lo, size, level, root, *pend); +- } else { +- uint8_t limit_lo[64]; +- uint8_t limit_hi[64]; +- uint8_t hi_ff = UINT8_MAX; +- uint8_t lo_00 = 0; ++ /* build common sub-trie till possible */ ++ for (n = size - 1; n > 0 && lo[n] == hi[n]; n--) { ++ node = acl_alloc_node(context, level++); ++ acl_add_ptr_range(context, prev, node, lo[n], hi[n]); ++ prev = node; ++ } + +- memset(limit_lo, 0, RTE_DIM(limit_lo)); +- memset(limit_hi, UINT8_MAX, RTE_DIM(limit_hi)); ++ /* no branch needed, just one sub-trie */ ++ if (n == 0) { ++ acl_add_ptr_range(context, prev, *pend, lo[0], hi[0]); ++ return root; ++ } + +- for (n = size - 2; n >= 0; n--) { +- hi_ff = (uint8_t)(hi_ff & hi[n]); +- lo_00 = (uint8_t)(lo_00 | lo[n]); +- } ++ /* gather information about divirgent paths */ ++ lo_00 = 0; ++ hi_ff = UINT8_MAX; ++ for (k = n - 1; k >= 0; k--) { ++ hi_ff &= hi[k]; ++ lo_00 |= lo[k]; ++ } + +- if (hi_ff != UINT8_MAX) { +- limit_lo[size - 1] = hi[size - 1]; +- acl_gen_range(context, hi, limit_lo, size, level, +- root, *pend); +- } ++ /* generate left (lower-bound) sub-trie */ ++ if (lo_00 != 0) ++ acl_gen_range_low(context, prev, *pend, lo, n + 1, level); + +- if (lo_00 != 0) { +- limit_hi[size - 1] = lo[size - 1]; +- acl_gen_range(context, limit_hi, lo, size, level, +- root, *pend); +- } ++ /* generate right (upper-bound) sub-trie */ ++ if (hi_ff != UINT8_MAX) ++ acl_gen_range_high(context, prev, *pend, hi, n + 1, level); + +- if (hi[size - 1] - lo[size - 1] > 1 || +- lo_00 == 0 || +- hi_ff == UINT8_MAX) { +- limit_lo[size-1] = (uint8_t)(lo[size-1] + (lo_00 != 0)); +- limit_hi[size-1] = (uint8_t)(hi[size-1] - +- (hi_ff != UINT8_MAX)); +- acl_gen_range(context, limit_hi, limit_lo, size, +- level, root, *pend); +- } ++ /* generate sub-trie in the middle */ ++ if (lo[n] + 1 != hi[n] || lo_00 == 0 || hi_ff == UINT8_MAX) { ++ lo_00 = lo[n] + (lo_00 != 0); ++ hi_ff = hi[n] - (hi_ff != UINT8_MAX); ++ acl_gen_range_mdl(context, prev, *pend, lo_00, hi_ff, ++ n + 1, level); + } ++ + return root; + } + +diff --git a/dpdk/lib/librte_bbdev/rte_bbdev.h b/dpdk/lib/librte_bbdev/rte_bbdev.h +index 591fb7914a..1f58a0762f 100644 +--- a/dpdk/lib/librte_bbdev/rte_bbdev.h ++++ b/dpdk/lib/librte_bbdev/rte_bbdev.h +@@ -440,21 +440,21 @@ TAILQ_HEAD(rte_bbdev_cb_list, rte_bbdev_callback); + * these fields, but should only write to the *_ops fields. + */ + struct __rte_cache_aligned rte_bbdev { +- /**< Enqueue encode function */ ++ /** Enqueue encode function */ + rte_bbdev_enqueue_enc_ops_t enqueue_enc_ops; +- /**< Enqueue decode function */ ++ /** Enqueue decode function */ + rte_bbdev_enqueue_dec_ops_t enqueue_dec_ops; +- /**< Dequeue encode function */ ++ /** Dequeue encode function */ + rte_bbdev_dequeue_enc_ops_t dequeue_enc_ops; +- /**< Dequeue decode function */ ++ /** Dequeue decode function */ + rte_bbdev_dequeue_dec_ops_t dequeue_dec_ops; +- /**< Enqueue encode function */ ++ /** Enqueue encode function */ + rte_bbdev_enqueue_enc_ops_t enqueue_ldpc_enc_ops; +- /**< Enqueue decode function */ ++ /** Enqueue decode function */ + rte_bbdev_enqueue_dec_ops_t enqueue_ldpc_dec_ops; +- /**< Dequeue encode function */ ++ /** Dequeue encode function */ + rte_bbdev_dequeue_enc_ops_t dequeue_ldpc_enc_ops; +- /**< Dequeue decode function */ ++ /** Dequeue decode function */ + rte_bbdev_dequeue_dec_ops_t dequeue_ldpc_dec_ops; + const struct rte_bbdev_ops *dev_ops; /**< Functions exported by PMD */ + struct rte_bbdev_data *data; /**< Pointer to device data */ +diff --git a/dpdk/lib/librte_bbdev/rte_bbdev_op.h b/dpdk/lib/librte_bbdev/rte_bbdev_op.h +index 1e119a757b..6e43495fb4 100644 +--- a/dpdk/lib/librte_bbdev/rte_bbdev_op.h ++++ b/dpdk/lib/librte_bbdev/rte_bbdev_op.h +@@ -389,12 +389,12 @@ struct rte_bbdev_op_turbo_dec { + */ + uint8_t num_maps; + +- /**< [0 - TB : 1 - CB] */ ++ /** [0 - TB : 1 - CB] */ + uint8_t code_block_mode; + union { +- /**< Struct which stores Code Block specific parameters */ ++ /** Struct which stores Code Block specific parameters */ + struct rte_bbdev_op_dec_turbo_cb_params cb_params; +- /**< Struct which stores Transport Block specific parameters */ ++ /** Struct which stores Transport Block specific parameters */ + struct rte_bbdev_op_dec_turbo_tb_params tb_params; + }; + }; +@@ -545,7 +545,7 @@ struct rte_bbdev_op_enc_turbo_tb_params { + * the Turbo operation when r >= C-, [K:3*Kpi] + */ + uint16_t ncb_pos; +- /**< The index of the first CB in the inbound mbuf data, default is 0 */ ++ /** The index of the first CB in the inbound mbuf data, default is 0 */ + uint8_t r; + }; + +@@ -744,11 +744,11 @@ enum { + + /** Structure specifying a single encode operation */ + struct rte_bbdev_enc_op { +- /**< Status of operation that was performed */ ++ /** Status of operation that was performed */ + int status; +- /**< Mempool which op instance is in */ ++ /** Mempool which op instance is in */ + struct rte_mempool *mempool; +- /**< Opaque pointer for user data */ ++ /** Opaque pointer for user data */ + void *opaque_data; + union { + /** Contains turbo decoder specific parameters */ +@@ -785,7 +785,7 @@ struct rte_bbdev_op_cap { + } cap; /**< Operation-type specific capabilities */ + }; + +-/**< @internal Private data structure stored with operation pool. */ ++/** @internal Private data structure stored with operation pool. */ + struct rte_bbdev_op_pool_private { + enum rte_bbdev_op_type type; /**< Type of operations in a pool */ + }; +diff --git a/dpdk/lib/librte_bbdev/rte_bbdev_pmd.h b/dpdk/lib/librte_bbdev/rte_bbdev_pmd.h +index 24ddcee7af..237e3361d7 100644 +--- a/dpdk/lib/librte_bbdev/rte_bbdev_pmd.h ++++ b/dpdk/lib/librte_bbdev/rte_bbdev_pmd.h +@@ -146,18 +146,18 @@ typedef int (*rte_bbdev_queue_intr_disable_t)(struct rte_bbdev *dev, + * fields are for non-vital operations + */ + struct rte_bbdev_ops { +- /**< Allocate and configure device memory. Optional. */ ++ /** Allocate and configure device memory. Optional. */ + rte_bbdev_setup_queues_t setup_queues; +- /**< Configure interrupts. Optional. */ ++ /** Configure interrupts. Optional. */ + rte_bbdev_intr_enable_t intr_enable; +- /**< Start device. Optional. */ ++ /** Start device. Optional. */ + rte_bbdev_start_t start; +- /**< Stop device. Optional. */ ++ /** Stop device. Optional. */ + rte_bbdev_stop_t stop; +- /**< Close device. Optional. */ ++ /** Close device. Optional. */ + rte_bbdev_close_t close; + +- /**< Get device info. Required. */ ++ /** Get device info. Required. */ + rte_bbdev_info_get_t info_get; + /** Get device statistics. Optional. */ + rte_bbdev_stats_get_t stats_get; +@@ -170,7 +170,7 @@ struct rte_bbdev_ops { + rte_bbdev_queue_release_t queue_release; + /** Start a queue. Optional. */ + rte_bbdev_queue_start_t queue_start; +- /**< Stop a queue pair. Optional. */ ++ /** Stop a queue pair. Optional. */ + rte_bbdev_queue_stop_t queue_stop; + + /** Enable queue interrupt. Optional */ +diff --git a/dpdk/lib/librte_bpf/meson.build b/dpdk/lib/librte_bpf/meson.build +index 13fc02db38..52cfaf9ac2 100644 +--- a/dpdk/lib/librte_bpf/meson.build ++++ b/dpdk/lib/librte_bpf/meson.build +@@ -14,7 +14,7 @@ elif dpdk_conf.has('RTE_ARCH_ARM64') + sources += files('bpf_jit_arm64.c') + endif + +-install_headers = files('bpf_def.h', ++install_headers('bpf_def.h', + 'rte_bpf.h', + 'rte_bpf_ethdev.h') + +diff --git a/dpdk/lib/librte_cfgfile/rte_cfgfile_version.map b/dpdk/lib/librte_cfgfile/rte_cfgfile_version.map +index 906eee96bf..22c999fe16 100644 +--- a/dpdk/lib/librte_cfgfile/rte_cfgfile_version.map ++++ b/dpdk/lib/librte_cfgfile/rte_cfgfile_version.map +@@ -15,6 +15,7 @@ DPDK_20.0 { + rte_cfgfile_section_entries; + rte_cfgfile_section_entries_by_index; + rte_cfgfile_section_num_entries; ++ rte_cfgfile_section_num_entries_by_index; + rte_cfgfile_sections; + rte_cfgfile_set_entry; + +diff --git a/dpdk/lib/librte_cryptodev/rte_crypto_sym.h b/dpdk/lib/librte_cryptodev/rte_crypto_sym.h +index ffa038dc40..4e05c7c6ac 100644 +--- a/dpdk/lib/librte_cryptodev/rte_crypto_sym.h ++++ b/dpdk/lib/librte_cryptodev/rte_crypto_sym.h +@@ -208,9 +208,12 @@ enum rte_crypto_auth_algorithm { + /**< HMAC using MD5 algorithm */ + + RTE_CRYPTO_AUTH_SHA1, +- /**< 128 bit SHA algorithm. */ ++ /**< 160 bit SHA algorithm. */ + RTE_CRYPTO_AUTH_SHA1_HMAC, +- /**< HMAC using 128 bit SHA algorithm. */ ++ /**< HMAC using 160 bit SHA algorithm. ++ * HMAC-SHA-1-96 can be generated by setting ++ * digest_length to 12 bytes in auth/aead xforms. ++ */ + RTE_CRYPTO_AUTH_SHA224, + /**< 224 bit SHA algorithm. */ + RTE_CRYPTO_AUTH_SHA224_HMAC, +diff --git a/dpdk/lib/librte_cryptodev/rte_cryptodev.c b/dpdk/lib/librte_cryptodev/rte_cryptodev.c +index 89aa2ed3e2..ed9de3eb92 100644 +--- a/dpdk/lib/librte_cryptodev/rte_cryptodev.c ++++ b/dpdk/lib/librte_cryptodev/rte_cryptodev.c +@@ -491,6 +491,8 @@ rte_cryptodev_get_feature_name(uint64_t flag) + return "RSA_PRIV_OP_KEY_QT"; + case RTE_CRYPTODEV_FF_DIGEST_ENCRYPTED: + return "DIGEST_ENCRYPTED"; ++ case RTE_CRYPTODEV_FF_ASYM_SESSIONLESS: ++ return "ASYM_SESSIONLESS"; + default: + return NULL; + } +@@ -525,7 +527,8 @@ rte_cryptodev_pmd_get_named_dev(const char *name) + static inline uint8_t + rte_cryptodev_is_valid_device_data(uint8_t dev_id) + { +- if (rte_crypto_devices[dev_id].data == NULL) ++ if (dev_id >= RTE_CRYPTO_MAX_DEVS || ++ rte_crypto_devices[dev_id].data == NULL) + return 0; + + return 1; +@@ -617,8 +620,9 @@ rte_cryptodev_devices_get(const char *driver_name, uint8_t *devices, + void * + rte_cryptodev_get_sec_ctx(uint8_t dev_id) + { +- if (rte_crypto_devices[dev_id].feature_flags & +- RTE_CRYPTODEV_FF_SECURITY) ++ if (dev_id < RTE_CRYPTO_MAX_DEVS && ++ (rte_crypto_devices[dev_id].feature_flags & ++ RTE_CRYPTODEV_FF_SECURITY)) + return rte_crypto_devices[dev_id].security_ctx; + + return NULL; +@@ -789,6 +793,11 @@ rte_cryptodev_queue_pair_count(uint8_t dev_id) + { + struct rte_cryptodev *dev; + ++ if (!rte_cryptodev_is_valid_device_data(dev_id)) { ++ CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); ++ return 0; ++ } ++ + dev = &rte_crypto_devices[dev_id]; + return dev->data->nb_queue_pairs; + } +@@ -1254,6 +1263,11 @@ rte_cryptodev_sym_session_init(uint8_t dev_id, + uint8_t index; + int ret; + ++ if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { ++ CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); ++ return -EINVAL; ++ } ++ + dev = rte_cryptodev_pmd_get_dev(dev_id); + + if (sess == NULL || xforms == NULL || dev == NULL) +@@ -1293,6 +1307,11 @@ rte_cryptodev_asym_session_init(uint8_t dev_id, + uint8_t index; + int ret; + ++ if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { ++ CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); ++ return -EINVAL; ++ } ++ + dev = rte_cryptodev_pmd_get_dev(dev_id); + + if (sess == NULL || xforms == NULL || dev == NULL) +@@ -1428,6 +1447,11 @@ rte_cryptodev_sym_session_clear(uint8_t dev_id, + struct rte_cryptodev *dev; + uint8_t driver_id; + ++ if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { ++ CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); ++ return -EINVAL; ++ } ++ + dev = rte_cryptodev_pmd_get_dev(dev_id); + + if (dev == NULL || sess == NULL) +@@ -1452,6 +1476,11 @@ rte_cryptodev_asym_session_clear(uint8_t dev_id, + { + struct rte_cryptodev *dev; + ++ if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { ++ CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); ++ return -EINVAL; ++ } ++ + dev = rte_cryptodev_pmd_get_dev(dev_id); + + if (dev == NULL || sess == NULL) +@@ -1754,8 +1783,14 @@ rte_cryptodev_driver_id_get(const char *name) + const char * + rte_cryptodev_name_get(uint8_t dev_id) + { +- struct rte_cryptodev *dev = rte_cryptodev_pmd_get_dev(dev_id); ++ struct rte_cryptodev *dev; + ++ if (!rte_cryptodev_is_valid_device_data(dev_id)) { ++ CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); ++ return NULL; ++ } ++ ++ dev = rte_cryptodev_pmd_get_dev(dev_id); + if (dev == NULL) + return NULL; + +diff --git a/dpdk/lib/librte_distributor/meson.build b/dpdk/lib/librte_distributor/meson.build +index 50b91887b5..266af64348 100644 +--- a/dpdk/lib/librte_distributor/meson.build ++++ b/dpdk/lib/librte_distributor/meson.build +@@ -9,7 +9,6 @@ else + endif + headers = files('rte_distributor.h') + deps += ['mbuf'] +-use_function_versioning = true + + # for clang 32-bit compiles we need libatomic for 64-bit atomic ops + if cc.get_id() == 'clang' and dpdk_conf.get('RTE_ARCH_64') == false +diff --git a/dpdk/lib/librte_distributor/rte_distributor.c b/dpdk/lib/librte_distributor/rte_distributor.c +index 6c5b0c86e8..1c047f065a 100644 +--- a/dpdk/lib/librte_distributor/rte_distributor.c ++++ b/dpdk/lib/librte_distributor/rte_distributor.c +@@ -8,7 +8,6 @@ + #include <rte_mbuf.h> + #include <rte_memory.h> + #include <rte_cycles.h> +-#include <rte_function_versioning.h> + #include <rte_memzone.h> + #include <rte_errno.h> + #include <rte_string_fns.h> +diff --git a/dpdk/lib/librte_distributor/rte_distributor_single.c b/dpdk/lib/librte_distributor/rte_distributor_single.c +index 91d8824c64..abaf7730c3 100644 +--- a/dpdk/lib/librte_distributor/rte_distributor_single.c ++++ b/dpdk/lib/librte_distributor/rte_distributor_single.c +@@ -9,7 +9,6 @@ + #include <rte_memory.h> + #include <rte_memzone.h> + #include <rte_errno.h> +-#include <rte_function_versioning.h> + #include <rte_string_fns.h> + #include <rte_eal_memconfig.h> + #include <rte_pause.h> +diff --git a/dpdk/lib/librte_eal/common/eal_common_fbarray.c b/dpdk/lib/librte_eal/common/eal_common_fbarray.c +index 1312f936b8..4f8f1af73c 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_fbarray.c ++++ b/dpdk/lib/librte_eal/common/eal_common_fbarray.c +@@ -1337,7 +1337,7 @@ fbarray_find_biggest(struct rte_fbarray *arr, unsigned int start, bool used, + */ + + /* the API's called are thread-safe, but something may still happen +- * inbetween the API calls, so lock the fbarray. all other API's are ++ * between the API calls, so lock the fbarray. all other API's are + * read-locking the fbarray, so read lock here is OK. + */ + rte_rwlock_read_lock(&arr->rwlock); +diff --git a/dpdk/lib/librte_eal/common/eal_common_log.c b/dpdk/lib/librte_eal/common/eal_common_log.c +index c0efd5214f..975aea90db 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_log.c ++++ b/dpdk/lib/librte_eal/common/eal_common_log.c +@@ -302,7 +302,7 @@ rte_log_register_type_and_pick_level(const char *name, uint32_t level_def) + continue; + + if (opt_ll->pattern) { +- if (fnmatch(opt_ll->pattern, name, 0)) ++ if (fnmatch(opt_ll->pattern, name, 0) == 0) + level = opt_ll->level; + } else { + if (regexec(&opt_ll->re_match, name, 0, NULL, 0) == 0) +diff --git a/dpdk/lib/librte_eal/common/eal_common_memory.c b/dpdk/lib/librte_eal/common/eal_common_memory.c +index 4a9cc1f19a..cc7d54e0c7 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_memory.c ++++ b/dpdk/lib/librte_eal/common/eal_common_memory.c +@@ -97,7 +97,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size, + return NULL; + } + +- mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_READ, ++ mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_NONE, + mmap_flags, -1, 0); + if (mapped_addr == MAP_FAILED && allow_shrink) + *size -= page_sz; +diff --git a/dpdk/lib/librte_eal/common/eal_common_options.c b/dpdk/lib/librte_eal/common/eal_common_options.c +index a7f9c5f9bd..f791e9671d 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_options.c ++++ b/dpdk/lib/librte_eal/common/eal_common_options.c +@@ -1039,7 +1039,7 @@ eal_parse_log_level(const char *arg) + if (regex) { + if (rte_log_set_level_regexp(regex, priority) < 0) { + fprintf(stderr, "cannot set log level %s,%d\n", +- pattern, priority); ++ regex, priority); + goto fail; + } + if (rte_log_save_regexp(regex, priority) < 0) +diff --git a/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h b/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h +index 859b09748c..f79718ce8c 100644 +--- a/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h ++++ b/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h +@@ -57,7 +57,7 @@ __rte_rdtsc_syscall(void) + * asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(29)); + * asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(0x8000000f)); + * +- * which is possible only from the priviledged mode (kernel space). ++ * which is possible only from the privileged mode (kernel space). + */ + static inline uint64_t + __rte_rdtsc_pmccntr(void) +diff --git a/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h b/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h +index 68e7c73384..da557b6a10 100644 +--- a/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h ++++ b/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h +@@ -62,7 +62,7 @@ rte_rdtsc(void) + static inline uint64_t + rte_rdtsc_precise(void) + { +- rte_mb(); ++ asm volatile("isb" : : : "memory"); + return rte_rdtsc(); + } + +diff --git a/dpdk/lib/librte_eal/common/include/arch/ppc_64/meson.build b/dpdk/lib/librte_eal/common/include/arch/ppc_64/meson.build +index 00f9611768..7949c86258 100644 +--- a/dpdk/lib/librte_eal/common/include/arch/ppc_64/meson.build ++++ b/dpdk/lib/librte_eal/common/include/arch/ppc_64/meson.build +@@ -2,6 +2,7 @@ + # Copyright(c) 2018 Luca Boccassi <bluca@debian.org> + + install_headers( ++ 'rte_altivec.h', + 'rte_atomic.h', + 'rte_byteorder.h', + 'rte_cpuflags.h', +diff --git a/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_altivec.h b/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_altivec.h +new file mode 100644 +index 0000000000..1551a94544 +--- /dev/null ++++ b/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_altivec.h +@@ -0,0 +1,22 @@ ++/* ++ * SPDX-License-Identifier: BSD-3-Clause ++ * Copyright (C) Mellanox 2020. ++ */ ++ ++#ifndef _RTE_ALTIVEC_H_ ++#define _RTE_ALTIVEC_H_ ++ ++/* To include altivec.h, GCC version must be >= 4.8 */ ++#include <altivec.h> ++ ++/* ++ * Compilation workaround for PPC64 when AltiVec is fully enabled, e.g. std=c11. ++ * Otherwise there would be a type conflict between stdbool and altivec. ++ */ ++#if defined(__PPC64__) && !defined(__APPLE_ALTIVEC__) ++#undef bool ++/* redefine as in stdbool.h */ ++#define bool _Bool ++#endif ++ ++#endif /* _RTE_ALTIVEC_H_ */ +diff --git a/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h b/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h +index 25311ba1d7..e63a1211a8 100644 +--- a/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h ++++ b/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h +@@ -8,8 +8,10 @@ + + #include <stdint.h> + #include <string.h> +-/*To include altivec.h, GCC version must >= 4.8 */ +-#include <altivec.h> ++ ++#include "rte_altivec.h" ++ ++#include "rte_common.h" + + #ifdef __cplusplus + extern "C" { +@@ -17,6 +19,11 @@ extern "C" { + + #include "generic/rte_memcpy.h" + ++#if (GCC_VERSION >= 90000 && GCC_VERSION < 90400) ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Warray-bounds" ++#endif ++ + static inline void + rte_mov16(uint8_t *dst, const uint8_t *src) + { +@@ -192,6 +199,10 @@ rte_memcpy_func(void *dst, const void *src, size_t n) + return ret; + } + ++#if (GCC_VERSION >= 90000 && GCC_VERSION < 90400) ++#pragma GCC diagnostic pop ++#endif ++ + #ifdef __cplusplus + } + #endif +diff --git a/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h b/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h +index 068c805b22..4caafd9d2b 100644 +--- a/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h ++++ b/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_vect.h +@@ -6,7 +6,8 @@ + #ifndef _RTE_VECT_PPC_64_H_ + #define _RTE_VECT_PPC_64_H_ + +-#include <altivec.h> ++#include "rte_altivec.h" ++ + #include "generic/rte_vect.h" + + #ifdef __cplusplus +diff --git a/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h +index 148398f50a..b9dcd30aba 100644 +--- a/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h ++++ b/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h +@@ -55,7 +55,7 @@ extern "C" { + * + * As pointed by Java guys, that makes possible to use lock-prefixed + * instructions to get the same effect as mfence and on most modern HW +- * that gives a better perfomance then using mfence: ++ * that gives a better performance then using mfence: + * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ + * Basic idea is to use lock prefixed add with some dummy memory location + * as the destination. From their experiments 128B(2 cache lines) below +diff --git a/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +index ba44c4a328..9c67232df9 100644 +--- a/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h ++++ b/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +@@ -22,6 +22,11 @@ + extern "C" { + #endif + ++#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 100000) ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wstringop-overflow" ++#endif ++ + /** + * Copy bytes from one location to another. The locations must not overlap. + * +@@ -869,6 +874,10 @@ rte_memcpy(void *dst, const void *src, size_t n) + return rte_memcpy_generic(dst, src, n); + } + ++#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 100000) ++#pragma GCC diagnostic pop ++#endif ++ + #ifdef __cplusplus + } + #endif +diff --git a/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h b/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h +index 38e8cfd32b..9ca960932f 100644 +--- a/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h ++++ b/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h +@@ -93,9 +93,9 @@ + #define RTE_BE16(v) (rte_be16_t)(RTE_STATIC_BSWAP16(v)) + #define RTE_BE32(v) (rte_be32_t)(RTE_STATIC_BSWAP32(v)) + #define RTE_BE64(v) (rte_be64_t)(RTE_STATIC_BSWAP64(v)) +-#define RTE_LE16(v) (rte_be16_t)(v) +-#define RTE_LE32(v) (rte_be32_t)(v) +-#define RTE_LE64(v) (rte_be64_t)(v) ++#define RTE_LE16(v) (rte_le16_t)(v) ++#define RTE_LE32(v) (rte_le32_t)(v) ++#define RTE_LE64(v) (rte_le64_t)(v) + #else + #error Unsupported endianness. + #endif +diff --git a/dpdk/lib/librte_eal/common/include/rte_common.h b/dpdk/lib/librte_eal/common/include/rte_common.h +index 459d082d14..41e2778ec1 100644 +--- a/dpdk/lib/librte_eal/common/include/rte_common.h ++++ b/dpdk/lib/librte_eal/common/include/rte_common.h +@@ -347,7 +347,7 @@ typedef uint64_t rte_iova_t; + * The combined value. + */ + static inline uint32_t +-rte_combine32ms1b(register uint32_t x) ++rte_combine32ms1b(uint32_t x) + { + x |= x >> 1; + x |= x >> 2; +@@ -369,7 +369,7 @@ rte_combine32ms1b(register uint32_t x) + * The combined value. + */ + static inline uint64_t +-rte_combine64ms1b(register uint64_t v) ++rte_combine64ms1b(uint64_t v) + { + v |= v >> 1; + v |= v >> 2; +@@ -538,6 +538,9 @@ rte_bsf32_safe(uint64_t v, uint32_t *pos) + /** + * Return the rounded-up log2 of a integer. + * ++ * @note Contrary to the logarithm mathematical operation, ++ * rte_log2_u32(0) == 0 and not -inf. ++ * + * @param v + * The input parameter. + * @return +@@ -632,6 +635,9 @@ rte_fls_u64(uint64_t x) + /** + * Return the rounded-up log2 of a 64-bit integer. + * ++ * @note Contrary to the logarithm mathematical operation, ++ * rte_log2_u64(0) == 0 and not -inf. ++ * + * @param v + * The input parameter. + * @return +diff --git a/dpdk/lib/librte_eal/common/include/rte_service.h b/dpdk/lib/librte_eal/common/include/rte_service.h +index d8701dd4cf..3a1c735c58 100644 +--- a/dpdk/lib/librte_eal/common/include/rte_service.h ++++ b/dpdk/lib/librte_eal/common/include/rte_service.h +@@ -104,12 +104,16 @@ int32_t rte_service_probe_capability(uint32_t id, uint32_t capability); + * Each core can be added or removed from running a specific service. This + * function enables or disables *lcore* to run *service_id*. + * +- * If multiple cores are enabled on a service, an atomic is used to ensure that +- * only one cores runs the service at a time. The exception to this is when ++ * If multiple cores are enabled on a service, a lock is used to ensure that ++ * only one core runs the service at a time. The exception to this is when + * a service indicates that it is multi-thread safe by setting the capability + * called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set, + * the service function can be run on multiple threads at the same time. + * ++ * If the service is known to be mapped to a single lcore, setting the ++ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve ++ * better performance by avoiding the use of lock. ++ * + * @param service_id the service to apply the lcore to + * @param lcore The lcore that will be mapped to service + * @param enable Zero to unmap or disable the core, non-zero to enable +diff --git a/dpdk/lib/librte_eal/common/include/rte_service_component.h b/dpdk/lib/librte_eal/common/include/rte_service_component.h +index 16eab79eea..b75aba11b9 100644 +--- a/dpdk/lib/librte_eal/common/include/rte_service_component.h ++++ b/dpdk/lib/librte_eal/common/include/rte_service_component.h +@@ -43,7 +43,7 @@ struct rte_service_spec { + /** + * Register a new service. + * +- * A service represents a component that the requires CPU time periodically to ++ * A service represents a component that requires CPU time periodically to + * achieve its purpose. + * + * For example the eventdev SW PMD requires CPU cycles to perform its +@@ -56,6 +56,10 @@ struct rte_service_spec { + * *rte_service_component_runstate_set*, which indicates that the service + * component is ready to be executed. + * ++ * If the service is known to be mapped to a single lcore, setting the ++ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve ++ * better performance. ++ * + * @param spec The specification of the service to register + * @param[out] service_id A pointer to a uint32_t, which will be filled in + * during registration of the service. It is set to the integers +diff --git a/dpdk/lib/librte_eal/common/malloc_elem.c b/dpdk/lib/librte_eal/common/malloc_elem.c +index 885d00424b..51cdfc5d59 100644 +--- a/dpdk/lib/librte_eal/common/malloc_elem.c ++++ b/dpdk/lib/librte_eal/common/malloc_elem.c +@@ -171,7 +171,7 @@ malloc_elem_insert(struct malloc_elem *elem) + next_elem = NULL; + heap->last = elem; + } else { +- /* the new memory is somewhere inbetween start and end */ ++ /* the new memory is somewhere between start and end */ + uint64_t dist_from_start, dist_from_end; + + dist_from_end = RTE_PTR_DIFF(heap->last, elem); +diff --git a/dpdk/lib/librte_eal/common/malloc_heap.c b/dpdk/lib/librte_eal/common/malloc_heap.c +index 842eb9de75..bd5065698d 100644 +--- a/dpdk/lib/librte_eal/common/malloc_heap.c ++++ b/dpdk/lib/librte_eal/common/malloc_heap.c +@@ -241,6 +241,9 @@ heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size, + size = RTE_CACHE_LINE_ROUNDUP(size); + align = RTE_CACHE_LINE_ROUNDUP(align); + ++ /* roundup might cause an overflow */ ++ if (size == 0) ++ return NULL; + elem = find_suitable_element(heap, size, flags, align, bound, contig); + if (elem != NULL) { + elem = malloc_elem_alloc(elem, size, align, bound, contig); +diff --git a/dpdk/lib/librte_eal/common/rte_random.c b/dpdk/lib/librte_eal/common/rte_random.c +index 57ec8fb2b3..b7a089ac4f 100644 +--- a/dpdk/lib/librte_eal/common/rte_random.c ++++ b/dpdk/lib/librte_eal/common/rte_random.c +@@ -198,7 +198,7 @@ __rte_random_initial_seed(void) + return (uint64_t)rdseed_low | ((uint64_t)rdseed_high << 32); + #endif + /* second fallback: seed using rdtsc */ +- return rte_get_timer_cycles(); ++ return rte_get_tsc_cycles(); + } + + RTE_INIT(rte_rand_init) +diff --git a/dpdk/lib/librte_eal/common/rte_service.c b/dpdk/lib/librte_eal/common/rte_service.c +index 79235c03f8..d5dd32d8d9 100644 +--- a/dpdk/lib/librte_eal/common/rte_service.c ++++ b/dpdk/lib/librte_eal/common/rte_service.c +@@ -50,6 +50,10 @@ struct rte_service_spec_impl { + uint8_t internal_flags; + + /* per service statistics */ ++ /* Indicates how many cores the service is mapped to run on. ++ * It does not indicate the number of cores the service is running ++ * on currently. ++ */ + rte_atomic32_t num_mapped_cores; + uint64_t calls; + uint64_t cycles_spent; +@@ -122,6 +126,9 @@ rte_service_finalize(void) + if (!rte_service_library_initialized) + return; + ++ rte_service_lcore_reset_all(); ++ rte_eal_mp_wait_lcore(); ++ + rte_free(rte_services); + rte_free(lcore_states); + +@@ -137,6 +144,12 @@ service_valid(uint32_t id) + return !!(rte_services[id].internal_flags & SERVICE_F_REGISTERED); + } + ++static struct rte_service_spec_impl * ++service_get(uint32_t id) ++{ ++ return &rte_services[id]; ++} ++ + /* validate ID and retrieve service pointer, or return error value */ + #define SERVICE_VALID_GET_OR_ERR_RET(id, service, retval) do { \ + if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id)) \ +@@ -327,8 +340,8 @@ rte_service_runstate_get(uint32_t id) + } + + static inline void +-rte_service_runner_do_callback(struct rte_service_spec_impl *s, +- struct core_state *cs, uint32_t service_idx) ++service_runner_do_callback(struct rte_service_spec_impl *s, ++ struct core_state *cs, uint32_t service_idx) + { + void *userdata = s->spec.callback_userdata; + +@@ -344,12 +357,14 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s, + } + + +-static inline int32_t +-service_run(uint32_t i, struct core_state *cs, uint64_t service_mask) ++/* Expects the service 's' is valid. */ ++static int32_t ++service_run(uint32_t i, struct core_state *cs, uint64_t service_mask, ++ struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe) + { +- if (!service_valid(i)) ++ if (!s) + return -EINVAL; +- struct rte_service_spec_impl *s = &rte_services[i]; ++ + if (s->comp_runstate != RUNSTATE_RUNNING || + s->app_runstate != RUNSTATE_RUNNING || + !(service_mask & (UINT64_C(1) << i))) { +@@ -359,19 +374,14 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask) + + cs->service_active_on_lcore[i] = 1; + +- /* check do we need cmpset, if MT safe or <= 1 core +- * mapped, atomic ops are not required. +- */ +- const int use_atomics = (service_mt_safe(s) == 0) && +- (rte_atomic32_read(&s->num_mapped_cores) > 1); +- if (use_atomics) { ++ if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) { + if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1)) + return -EBUSY; + +- rte_service_runner_do_callback(s, cs, i); ++ service_runner_do_callback(s, cs, i); + rte_atomic32_clear(&s->execute_lock); + } else +- rte_service_runner_do_callback(s, cs, i); ++ service_runner_do_callback(s, cs, i); + + return 0; + } +@@ -383,7 +393,7 @@ rte_service_may_be_active(uint32_t id) + int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE); + int i; + +- if (!service_valid(id)) ++ if (id >= RTE_SERVICE_NUM_MAX || !service_valid(id)) + return -EINVAL; + + for (i = 0; i < lcore_count; i++) { +@@ -397,49 +407,39 @@ rte_service_may_be_active(uint32_t id) + int32_t + rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe) + { +- /* run service on calling core, using all-ones as the service mask */ +- if (!service_valid(id)) +- return -EINVAL; +- + struct core_state *cs = &lcore_states[rte_lcore_id()]; +- struct rte_service_spec_impl *s = &rte_services[id]; ++ struct rte_service_spec_impl *s; + +- /* Atomically add this core to the mapped cores first, then examine if +- * we can run the service. This avoids a race condition between +- * checking the value, and atomically adding to the mapped count. +- */ +- if (serialize_mt_unsafe) +- rte_atomic32_inc(&s->num_mapped_cores); ++ SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + +- if (service_mt_safe(s) == 0 && +- rte_atomic32_read(&s->num_mapped_cores) > 1) { +- if (serialize_mt_unsafe) +- rte_atomic32_dec(&s->num_mapped_cores); +- return -EBUSY; +- } ++ /* Increment num_mapped_cores to reflect that this core is ++ * now mapped capable of running the service. ++ */ ++ rte_atomic32_inc(&s->num_mapped_cores); + +- int ret = service_run(id, cs, UINT64_MAX); ++ int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe); + +- if (serialize_mt_unsafe) +- rte_atomic32_dec(&s->num_mapped_cores); ++ rte_atomic32_dec(&s->num_mapped_cores); + + return ret; + } + + static int32_t +-rte_service_runner_func(void *arg) ++service_runner_func(void *arg) + { + RTE_SET_USED(arg); + uint32_t i; + const int lcore = rte_lcore_id(); + struct core_state *cs = &lcore_states[lcore]; + +- while (lcore_states[lcore].runstate == RUNSTATE_RUNNING) { ++ while (cs->runstate == RUNSTATE_RUNNING) { + const uint64_t service_mask = cs->service_mask; + + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { ++ if (!service_valid(i)) ++ continue; + /* return value ignored as no change to code flow */ +- service_run(i, cs, service_mask); ++ service_run(i, cs, service_mask, service_get(i), 1); + } + + cs->loops++; +@@ -693,9 +693,9 @@ rte_service_lcore_start(uint32_t lcore) + /* set core to run state first, and then launch otherwise it will + * return immediately as runstate keeps it in the service poll loop + */ +- lcore_states[lcore].runstate = RUNSTATE_RUNNING; ++ cs->runstate = RUNSTATE_RUNNING; + +- int ret = rte_eal_remote_launch(rte_service_runner_func, 0, lcore); ++ int ret = rte_eal_remote_launch(service_runner_func, 0, lcore); + /* returns -EBUSY if the core is already launched, 0 on success */ + return ret; + } +@@ -774,13 +774,9 @@ rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id, + } + + static void +-rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s, +- uint64_t all_cycles, uint32_t reset) ++service_dump_one(FILE *f, struct rte_service_spec_impl *s, uint32_t reset) + { + /* avoid divide by zero */ +- if (all_cycles == 0) +- all_cycles = 1; +- + int calls = 1; + if (s->calls != 0) + calls = s->calls; +@@ -807,7 +803,7 @@ rte_service_attr_reset_all(uint32_t id) + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + + int reset = 1; +- rte_service_dump_one(NULL, s, 0, reset); ++ service_dump_one(NULL, s, reset); + return 0; + } + +@@ -851,21 +847,13 @@ rte_service_dump(FILE *f, uint32_t id) + uint32_t i; + int print_one = (id != UINT32_MAX); + +- uint64_t total_cycles = 0; +- +- for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { +- if (!service_valid(i)) +- continue; +- total_cycles += rte_services[i].cycles_spent; +- } +- + /* print only the specified service */ + if (print_one) { + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + fprintf(f, "Service %s Summary\n", s->spec.name); + uint32_t reset = 0; +- rte_service_dump_one(f, s, total_cycles, reset); ++ service_dump_one(f, s, reset); + return 0; + } + +@@ -875,7 +863,7 @@ rte_service_dump(FILE *f, uint32_t id) + if (!service_valid(i)) + continue; + uint32_t reset = 0; +- rte_service_dump_one(f, &rte_services[i], total_cycles, reset); ++ service_dump_one(f, &rte_services[i], reset); + } + + fprintf(f, "Service Cores Summary\n"); +diff --git a/dpdk/lib/librte_eal/freebsd/eal/eal_interrupts.c b/dpdk/lib/librte_eal/freebsd/eal/eal_interrupts.c +index f6831b7902..3fee762be9 100644 +--- a/dpdk/lib/librte_eal/freebsd/eal/eal_interrupts.c ++++ b/dpdk/lib/librte_eal/freebsd/eal/eal_interrupts.c +@@ -83,9 +83,9 @@ int + rte_intr_callback_register(const struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, void *cb_arg) + { +- struct rte_intr_callback *callback = NULL; +- struct rte_intr_source *src = NULL; +- int ret, add_event; ++ struct rte_intr_callback *callback; ++ struct rte_intr_source *src; ++ int ret, add_event = 0; + + /* first do parameter checking */ + if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) { +@@ -98,47 +98,53 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle, + return -ENODEV; + } + +- /* allocate a new interrupt callback entity */ +- callback = calloc(1, sizeof(*callback)); +- if (callback == NULL) { +- RTE_LOG(ERR, EAL, "Can not allocate memory\n"); +- return -ENOMEM; +- } +- callback->cb_fn = cb; +- callback->cb_arg = cb_arg; +- callback->pending_delete = 0; +- callback->ucb_fn = NULL; +- + rte_spinlock_lock(&intr_lock); + +- /* check if there is at least one callback registered for the fd */ ++ /* find the source for this intr_handle */ + TAILQ_FOREACH(src, &intr_sources, next) { +- if (src->intr_handle.fd == intr_handle->fd) { +- /* we had no interrupts for this */ +- if (TAILQ_EMPTY(&src->callbacks)) +- add_event = 1; +- +- TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); +- ret = 0; ++ if (src->intr_handle.fd == intr_handle->fd) + break; +- } + } + +- /* no existing callbacks for this - add new source */ +- if (src == NULL) { +- src = calloc(1, sizeof(*src)); +- if (src == NULL) { ++ /* if this is an alarm interrupt and it already has a callback, ++ * then we don't want to create a new callback because the only ++ * thing on the list should be eal_alarm_callback() and we may ++ * be called just to reset the timer. ++ */ ++ if (src != NULL && src->intr_handle.type == RTE_INTR_HANDLE_ALARM && ++ !TAILQ_EMPTY(&src->callbacks)) { ++ callback = NULL; ++ } else { ++ /* allocate a new interrupt callback entity */ ++ callback = calloc(1, sizeof(*callback)); ++ if (callback == NULL) { + RTE_LOG(ERR, EAL, "Can not allocate memory\n"); + ret = -ENOMEM; + goto fail; +- } else { +- src->intr_handle = *intr_handle; +- TAILQ_INIT(&src->callbacks); +- TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); +- TAILQ_INSERT_TAIL(&intr_sources, src, next); +- add_event = 1; +- ret = 0; + } ++ callback->cb_fn = cb; ++ callback->cb_arg = cb_arg; ++ callback->pending_delete = 0; ++ callback->ucb_fn = NULL; ++ ++ if (src == NULL) { ++ src = calloc(1, sizeof(*src)); ++ if (src == NULL) { ++ RTE_LOG(ERR, EAL, "Can not allocate memory\n"); ++ ret = -ENOMEM; ++ goto fail; ++ } else { ++ src->intr_handle = *intr_handle; ++ TAILQ_INIT(&src->callbacks); ++ TAILQ_INSERT_TAIL(&intr_sources, src, next); ++ } ++ } ++ ++ /* we had no interrupts for this */ ++ if (TAILQ_EMPTY(&src->callbacks)) ++ add_event = 1; ++ ++ TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); + } + + /* add events to the queue. timer events are special as we need to +@@ -178,11 +184,12 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle, + } + rte_spinlock_unlock(&intr_lock); + +- return ret; ++ return 0; + fail: + /* clean up */ + if (src != NULL) { +- TAILQ_REMOVE(&(src->callbacks), callback, next); ++ if (callback != NULL) ++ TAILQ_REMOVE(&(src->callbacks), callback, next); + if (TAILQ_EMPTY(&(src->callbacks))) { + TAILQ_REMOVE(&intr_sources, src, next); + free(src); +diff --git a/dpdk/lib/librte_eal/freebsd/eal/eal_memory.c b/dpdk/lib/librte_eal/freebsd/eal/eal_memory.c +index a97d8f0f0c..5bc2da160c 100644 +--- a/dpdk/lib/librte_eal/freebsd/eal/eal_memory.c ++++ b/dpdk/lib/librte_eal/freebsd/eal/eal_memory.c +@@ -449,7 +449,7 @@ memseg_primary_init(void) + * + * we need (N*2)-1 segments because we cannot guarantee that + * each segment will be IOVA-contiguous with the previous one, +- * so we will allocate more and put spaces inbetween segments ++ * so we will allocate more and put spaces between segments + * that are non-contiguous. + */ + avail_segs = (hpi->num_pages[0] * 2) - 1; +diff --git a/dpdk/lib/librte_eal/linux/eal/eal.c b/dpdk/lib/librte_eal/linux/eal/eal.c +index c4233ec3c8..e6d4cc7178 100644 +--- a/dpdk/lib/librte_eal/linux/eal/eal.c ++++ b/dpdk/lib/librte_eal/linux/eal/eal.c +@@ -25,6 +25,7 @@ + #if defined(RTE_ARCH_X86) + #include <sys/io.h> + #endif ++#include <linux/version.h> + + #include <rte_compat.h> + #include <rte_common.h> +@@ -1076,7 +1077,7 @@ rte_eal_init(int argc, char **argv) + #if defined(RTE_LIBRTE_KNI) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + } else if (rte_eal_check_module("rte_kni") == 1) { + iova_mode = RTE_IOVA_PA; +- RTE_LOG(DEBUG, EAL, "KNI is loaded, selecting IOVA as PA mode for better KNI perfomance.\n"); ++ RTE_LOG(DEBUG, EAL, "KNI is loaded, selecting IOVA as PA mode for better KNI performance.\n"); + #endif + } else if (is_iommu_enabled()) { + /* we have an IOMMU, pick IOVA as VA mode */ +diff --git a/dpdk/lib/librte_eal/linux/eal/eal_interrupts.c b/dpdk/lib/librte_eal/linux/eal/eal_interrupts.c +index 1955324d30..14ebb108ce 100644 +--- a/dpdk/lib/librte_eal/linux/eal/eal_interrupts.c ++++ b/dpdk/lib/librte_eal/linux/eal/eal_interrupts.c +@@ -1045,8 +1045,6 @@ eal_intr_handle_interrupts(int pfd, unsigned totalfds) + static __attribute__((noreturn)) void * + eal_intr_thread_main(__rte_unused void *arg) + { +- struct epoll_event ev; +- + /* host thread, never break out */ + for (;;) { + /* build up the epoll fd with all descriptors we are to +@@ -1078,8 +1076,11 @@ eal_intr_thread_main(__rte_unused void *arg) + rte_spinlock_lock(&intr_lock); + + TAILQ_FOREACH(src, &intr_sources, next) { ++ struct epoll_event ev; ++ + if (src->callbacks.tqh_first == NULL) + continue; /* skip those with no callbacks */ ++ memset(&ev, 0, sizeof(ev)); + ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP; + ev.data.fd = src->intr_handle.fd; + +diff --git a/dpdk/lib/librte_eal/linux/eal/eal_memalloc.c b/dpdk/lib/librte_eal/linux/eal/eal_memalloc.c +index af6d0d023a..678094acf9 100644 +--- a/dpdk/lib/librte_eal/linux/eal/eal_memalloc.c ++++ b/dpdk/lib/librte_eal/linux/eal/eal_memalloc.c +@@ -680,7 +680,7 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi, + /* erase page data */ + memset(ms->addr, 0, ms->len); + +- if (mmap(ms->addr, ms->len, PROT_READ, ++ if (mmap(ms->addr, ms->len, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) == + MAP_FAILED) { + RTE_LOG(DEBUG, EAL, "couldn't unmap page\n"); +diff --git a/dpdk/lib/librte_eal/linux/eal/eal_memory.c b/dpdk/lib/librte_eal/linux/eal/eal_memory.c +index 43e4ffc757..7a9c97ff88 100644 +--- a/dpdk/lib/librte_eal/linux/eal/eal_memory.c ++++ b/dpdk/lib/librte_eal/linux/eal/eal_memory.c +@@ -1340,6 +1340,8 @@ eal_legacy_hugepage_init(void) + + /* hugetlbfs can be disabled */ + if (internal_config.no_hugetlbfs) { ++ void *prealloc_addr; ++ size_t mem_sz; + struct rte_memseg_list *msl; + int n_segs, cur_seg, fd, flags; + #ifdef MEMFD_SUPPORTED +@@ -1395,17 +1397,31 @@ eal_legacy_hugepage_init(void) + } + } + #endif +- addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE, +- flags, fd, 0); +- if (addr == MAP_FAILED) { ++ /* preallocate address space for the memory, so that it can be ++ * fit into the DMA mask. ++ */ ++ mem_sz = internal_config.memory; ++ prealloc_addr = eal_get_virtual_area( ++ NULL, &mem_sz, page_sz, 0, 0); ++ if (prealloc_addr == NULL) { ++ RTE_LOG(ERR, EAL, ++ "%s: reserving memory area failed: " ++ "%s\n", ++ __func__, strerror(errno)); ++ return -1; ++ } ++ addr = mmap(prealloc_addr, mem_sz, PROT_READ | PROT_WRITE, ++ flags | MAP_FIXED, fd, 0); ++ if (addr == MAP_FAILED || addr != prealloc_addr) { + RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, + strerror(errno)); ++ munmap(prealloc_addr, mem_sz); + return -1; + } + msl->base_va = addr; + msl->page_sz = page_sz; + msl->socket_id = 0; +- msl->len = internal_config.memory; ++ msl->len = mem_sz; + msl->heap = 1; + + /* we're in single-file segments mode, so only the segment list +@@ -1928,7 +1944,7 @@ eal_legacy_hugepage_attach(void) + if (flock(fd, LOCK_SH) < 0) { + RTE_LOG(DEBUG, EAL, "%s(): Locking file failed: %s\n", + __func__, strerror(errno)); +- goto fd_error; ++ goto mmap_error; + } + + /* find segment data */ +@@ -1936,13 +1952,13 @@ eal_legacy_hugepage_attach(void) + if (msl == NULL) { + RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg list\n", + __func__); +- goto fd_error; ++ goto mmap_error; + } + ms = rte_mem_virt2memseg(map_addr, msl); + if (ms == NULL) { + RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg\n", + __func__); +- goto fd_error; ++ goto mmap_error; + } + + msl_idx = msl - mcfg->memsegs; +@@ -1950,7 +1966,7 @@ eal_legacy_hugepage_attach(void) + if (ms_idx < 0) { + RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg idx\n", + __func__); +- goto fd_error; ++ goto mmap_error; + } + + /* store segment fd internally */ +@@ -1963,18 +1979,15 @@ eal_legacy_hugepage_attach(void) + close(fd_hugepage); + return 0; + ++mmap_error: ++ munmap(hp[i].final_va, hp[i].size); + fd_error: + close(fd); + error: +- /* map all segments into memory to make sure we get the addrs */ +- cur_seg = 0; +- for (cur_seg = 0; cur_seg < i; cur_seg++) { +- struct hugepage_file *hf = &hp[i]; +- size_t map_sz = hf->size; +- void *map_addr = hf->final_va; ++ /* unwind mmap's done so far */ ++ for (cur_seg = 0; cur_seg < i; cur_seg++) ++ munmap(hp[cur_seg].final_va, hp[cur_seg].size); + +- munmap(map_addr, map_sz); +- } + if (hp != NULL && hp != MAP_FAILED) + munmap(hp, size); + if (fd_hugepage >= 0) +diff --git a/dpdk/lib/librte_eal/linux/eal/eal_vfio.c b/dpdk/lib/librte_eal/linux/eal/eal_vfio.c +index 95f615c2e3..62ffe13e0e 100644 +--- a/dpdk/lib/librte_eal/linux/eal/eal_vfio.c ++++ b/dpdk/lib/librte_eal/linux/eal/eal_vfio.c +@@ -379,7 +379,7 @@ vfio_get_group_fd(struct vfio_config *vfio_cfg, + } + + vfio_group_fd = vfio_open_group_fd(iommu_group_num); +- if (vfio_group_fd < 0) { ++ if (vfio_group_fd <= 0) { + RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num); + return -1; + } +@@ -532,6 +532,17 @@ vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len, + return; + } + ++#ifdef RTE_ARCH_PPC_64 ++ ms = rte_mem_virt2memseg(addr, msl); ++ while (cur_len < len) { ++ int idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); ++ ++ rte_fbarray_set_free(&msl->memseg_arr, idx); ++ cur_len += ms->len; ++ ++ms; ++ } ++ cur_len = 0; ++#endif + /* memsegs are contiguous in memory */ + ms = rte_mem_virt2memseg(addr, msl); + while (cur_len < len) { +@@ -551,6 +562,17 @@ vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len, + cur_len += ms->len; + ++ms; + } ++#ifdef RTE_ARCH_PPC_64 ++ cur_len = 0; ++ ms = rte_mem_virt2memseg(addr, msl); ++ while (cur_len < len) { ++ int idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); ++ ++ rte_fbarray_set_used(&msl->memseg_arr, idx); ++ cur_len += ms->len; ++ ++ms; ++ } ++#endif + } + + static int +@@ -1027,6 +1049,7 @@ vfio_get_default_container_fd(void) + struct rte_mp_reply mp_reply = {0}; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; ++ int container_fd; + + if (default_vfio_cfg->vfio_enabled) + return default_vfio_cfg->vfio_container_fd; +@@ -1049,8 +1072,9 @@ vfio_get_default_container_fd(void) + mp_rep = &mp_reply.msgs[0]; + p = (struct vfio_mp_param *)mp_rep->param; + if (p->result == SOCKET_OK && mp_rep->num_fds == 1) { ++ container_fd = mp_rep->fds[0]; + free(mp_reply.msgs); +- return mp_rep->fds[0]; ++ return container_fd; + } + } + +@@ -1416,16 +1440,11 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, + return 0; + } + +-struct spapr_remap_walk_param { +- int vfio_container_fd; +- uint64_t addr_64; +-}; +- + static int + vfio_spapr_map_walk(const struct rte_memseg_list *msl, + const struct rte_memseg *ms, void *arg) + { +- struct spapr_remap_walk_param *param = arg; ++ int *vfio_container_fd = arg; + + /* skip external memory that isn't a heap */ + if (msl->external && !msl->heap) +@@ -1435,10 +1454,7 @@ vfio_spapr_map_walk(const struct rte_memseg_list *msl, + if (ms->iova == RTE_BAD_IOVA) + return 0; + +- if (ms->addr_64 == param->addr_64) +- return 0; +- +- return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova, ++ return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova, + ms->len, 1); + } + +@@ -1446,7 +1462,7 @@ static int + vfio_spapr_unmap_walk(const struct rte_memseg_list *msl, + const struct rte_memseg *ms, void *arg) + { +- struct spapr_remap_walk_param *param = arg; ++ int *vfio_container_fd = arg; + + /* skip external memory that isn't a heap */ + if (msl->external && !msl->heap) +@@ -1456,17 +1472,13 @@ vfio_spapr_unmap_walk(const struct rte_memseg_list *msl, + if (ms->iova == RTE_BAD_IOVA) + return 0; + +- if (ms->addr_64 == param->addr_64) +- return 0; +- +- return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova, ++ return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova, + ms->len, 0); + } + + struct spapr_walk_param { + uint64_t window_size; + uint64_t hugepage_sz; +- uint64_t addr_64; + }; + + static int +@@ -1484,10 +1496,6 @@ vfio_spapr_window_size_walk(const struct rte_memseg_list *msl, + if (ms->iova == RTE_BAD_IOVA) + return 0; + +- /* do not iterate ms we haven't mapped yet */ +- if (param->addr_64 && ms->addr_64 == param->addr_64) +- return 0; +- + if (max > param->window_size) { + param->hugepage_sz = ms->hugepage_sz; + param->window_size = max; +@@ -1531,20 +1539,11 @@ vfio_spapr_create_new_dma_window(int vfio_container_fd, + /* try possible page_shift and levels for workaround */ + uint32_t levels; + +- for (levels = 1; levels <= info.ddw.levels; levels++) { +- uint32_t pgsizes = info.ddw.pgsizes; +- +- while (pgsizes != 0) { +- create->page_shift = 31 - __builtin_clz(pgsizes); +- create->levels = levels; +- ret = ioctl(vfio_container_fd, +- VFIO_IOMMU_SPAPR_TCE_CREATE, create); +- if (!ret) +- break; +- pgsizes &= ~(1 << create->page_shift); +- } +- if (!ret) +- break; ++ for (levels = create->levels + 1; ++ ret && levels <= info.ddw.levels; levels++) { ++ create->levels = levels; ++ ret = ioctl(vfio_container_fd, ++ VFIO_IOMMU_SPAPR_TCE_CREATE, create); + } + #endif + if (ret) { +@@ -1585,7 +1584,6 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, + + /* check if window size needs to be adjusted */ + memset(¶m, 0, sizeof(param)); +- param.addr_64 = vaddr; + + /* we're inside a callback so use thread-unsafe version */ + if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk, +@@ -1610,14 +1608,9 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, + if (do_map) { + /* re-create window and remap the entire memory */ + if (iova + len > create.window_size) { +- struct spapr_remap_walk_param remap_param = { +- .vfio_container_fd = vfio_container_fd, +- .addr_64 = vaddr, +- }; +- + /* release all maps before recreating the window */ + if (rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk, +- &remap_param) < 0) { ++ &vfio_container_fd) < 0) { + RTE_LOG(ERR, EAL, "Could not release DMA maps\n"); + ret = -1; + goto out; +@@ -1644,7 +1637,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, + /* we're inside a callback, so use thread-unsafe version + */ + if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk, +- &remap_param) < 0) { ++ &vfio_container_fd) < 0) { + RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n"); + ret = -1; + goto out; +@@ -1691,7 +1684,6 @@ vfio_spapr_dma_map(int vfio_container_fd) + struct spapr_walk_param param; + + memset(¶m, 0, sizeof(param)); +- param.addr_64 = 0UL; + + /* create DMA window from 0 to max(phys_addr + len) */ + rte_memseg_walk(vfio_spapr_window_size_walk, ¶m); +diff --git a/dpdk/lib/librte_eal/windows/eal/include/sched.h b/dpdk/lib/librte_eal/windows/eal/include/sched.h +index 257060594c..29868c93d1 100644 +--- a/dpdk/lib/librte_eal/windows/eal/include/sched.h ++++ b/dpdk/lib/librte_eal/windows/eal/include/sched.h +@@ -14,8 +14,8 @@ + extern "C" { + #endif + +-#ifndef CPU_SET_SIZE +-#define CPU_SET_SIZE RTE_MAX_LCORE ++#ifndef CPU_SETSIZE ++#define CPU_SETSIZE RTE_MAX_LCORE + #endif + + #define _BITS_PER_SET (sizeof(long long) * 8) +@@ -26,7 +26,7 @@ extern "C" { + #define _WHICH_BIT(b) ((b) & (_BITS_PER_SET - 1)) + + typedef struct _rte_cpuset_s { +- long long _bits[_NUM_SETS(CPU_SET_SIZE)]; ++ long long _bits[_NUM_SETS(CPU_SETSIZE)]; + } rte_cpuset_t; + + #define CPU_SET(b, s) ((s)->_bits[_WHICH_SET(b)] |= (1LL << _WHICH_BIT(b))) +@@ -35,7 +35,7 @@ typedef struct _rte_cpuset_s { + do { \ + unsigned int _i; \ + \ +- for (_i = 0; _i < _NUM_SETS(CPU_SET_SIZE); _i++) \ ++ for (_i = 0; _i < _NUM_SETS(CPU_SETSIZE); _i++) \ + (s)->_bits[_i] = 0LL; \ + } while (0) + +diff --git a/dpdk/lib/librte_ethdev/ethdev_profile.h b/dpdk/lib/librte_ethdev/ethdev_profile.h +index 65031e6f3f..e5ee4df824 100644 +--- a/dpdk/lib/librte_ethdev/ethdev_profile.h ++++ b/dpdk/lib/librte_ethdev/ethdev_profile.h +@@ -24,4 +24,13 @@ + int + __rte_eth_dev_profile_init(uint16_t port_id, struct rte_eth_dev *dev); + ++#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE ++ ++uint16_t ++profile_hook_rx_burst_cb(uint16_t port_id, uint16_t queue_id, ++ struct rte_mbuf *pkts[], uint16_t nb_pkts, ++ uint16_t max_pkts, void *user_param); ++ ++#endif /* RTE_ETHDEV_PROFILE_WITH_VTUNE */ ++ + #endif +diff --git a/dpdk/lib/librte_ethdev/rte_ethdev.c b/dpdk/lib/librte_ethdev/rte_ethdev.c +index 6e9cb243ea..c3657509c5 100644 +--- a/dpdk/lib/librte_ethdev/rte_ethdev.c ++++ b/dpdk/lib/librte_ethdev/rte_ethdev.c +@@ -1166,14 +1166,14 @@ check_lro_pkt_size(uint16_t port_id, uint32_t config_size, + + /* + * Validate offloads that are requested through rte_eth_dev_configure against +- * the offloads successfuly set by the ethernet device. ++ * the offloads successfully set by the ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param req_offloads + * The offloads that have been requested through `rte_eth_dev_configure`. + * @param set_offloads +- * The offloads successfuly set by the ethernet device. ++ * The offloads successfully set by the ethernet device. + * @param offload_type + * The offload type i.e. Rx/Tx string. + * @param offload_name +@@ -1202,7 +1202,7 @@ validate_offloads(uint16_t port_id, uint64_t req_offloads, + ret = -EINVAL; + } + +- /* Chech if offload couldn't be disabled. */ ++ /* Check if offload couldn't be disabled. */ + if (offload & set_offloads) { + RTE_ETHDEV_LOG(DEBUG, + "Port %u %s offload %s is not requested but enabled\n", +@@ -2968,6 +2968,7 @@ rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info) + * return status and does not know if get is successful or not. + */ + memset(dev_info, 0, sizeof(struct rte_eth_dev_info)); ++ dev_info->switch_info.domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; +@@ -3253,53 +3254,53 @@ rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask) + int mask = 0; + int cur, org = 0; + uint64_t orig_offloads; +- uint64_t *dev_offloads; ++ uint64_t dev_offloads; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + /* save original values in case of failure */ + orig_offloads = dev->data->dev_conf.rxmode.offloads; +- dev_offloads = &dev->data->dev_conf.rxmode.offloads; ++ dev_offloads = orig_offloads; + +- /*check which option changed by application*/ ++ /* check which option changed by application */ + cur = !!(offload_mask & ETH_VLAN_STRIP_OFFLOAD); +- org = !!(*dev_offloads & DEV_RX_OFFLOAD_VLAN_STRIP); ++ org = !!(dev_offloads & DEV_RX_OFFLOAD_VLAN_STRIP); + if (cur != org) { + if (cur) +- *dev_offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; ++ dev_offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; + else +- *dev_offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP; ++ dev_offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP; + mask |= ETH_VLAN_STRIP_MASK; + } + + cur = !!(offload_mask & ETH_VLAN_FILTER_OFFLOAD); +- org = !!(*dev_offloads & DEV_RX_OFFLOAD_VLAN_FILTER); ++ org = !!(dev_offloads & DEV_RX_OFFLOAD_VLAN_FILTER); + if (cur != org) { + if (cur) +- *dev_offloads |= DEV_RX_OFFLOAD_VLAN_FILTER; ++ dev_offloads |= DEV_RX_OFFLOAD_VLAN_FILTER; + else +- *dev_offloads &= ~DEV_RX_OFFLOAD_VLAN_FILTER; ++ dev_offloads &= ~DEV_RX_OFFLOAD_VLAN_FILTER; + mask |= ETH_VLAN_FILTER_MASK; + } + + cur = !!(offload_mask & ETH_VLAN_EXTEND_OFFLOAD); +- org = !!(*dev_offloads & DEV_RX_OFFLOAD_VLAN_EXTEND); ++ org = !!(dev_offloads & DEV_RX_OFFLOAD_VLAN_EXTEND); + if (cur != org) { + if (cur) +- *dev_offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND; ++ dev_offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND; + else +- *dev_offloads &= ~DEV_RX_OFFLOAD_VLAN_EXTEND; ++ dev_offloads &= ~DEV_RX_OFFLOAD_VLAN_EXTEND; + mask |= ETH_VLAN_EXTEND_MASK; + } + + cur = !!(offload_mask & ETH_QINQ_STRIP_OFFLOAD); +- org = !!(*dev_offloads & DEV_RX_OFFLOAD_QINQ_STRIP); ++ org = !!(dev_offloads & DEV_RX_OFFLOAD_QINQ_STRIP); + if (cur != org) { + if (cur) +- *dev_offloads |= DEV_RX_OFFLOAD_QINQ_STRIP; ++ dev_offloads |= DEV_RX_OFFLOAD_QINQ_STRIP; + else +- *dev_offloads &= ~DEV_RX_OFFLOAD_QINQ_STRIP; ++ dev_offloads &= ~DEV_RX_OFFLOAD_QINQ_STRIP; + mask |= ETH_QINQ_STRIP_MASK; + } + +@@ -3308,10 +3309,11 @@ rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask) + return ret; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_offload_set, -ENOTSUP); ++ dev->data->dev_conf.rxmode.offloads = dev_offloads; + ret = (*dev->dev_ops->vlan_offload_set)(dev, mask); + if (ret) { + /* hit an error restore original values */ +- *dev_offloads = orig_offloads; ++ dev->data->dev_conf.rxmode.offloads = orig_offloads; + } + + return eth_err(port_id, ret); +@@ -4039,7 +4041,7 @@ rte_eth_dev_callback_unregister(uint16_t port_id, + next = TAILQ_NEXT(cb, next); + + if (cb->cb_fn != cb_fn || cb->event != event || +- (cb->cb_arg != (void *)-1 && cb->cb_arg != cb_arg)) ++ (cb_arg != (void *)-1 && cb->cb_arg != cb_arg)) + continue; + + /* +@@ -4452,7 +4454,7 @@ rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id, + cb->param = user_param; + + rte_spinlock_lock(&rte_eth_rx_cb_lock); +- /* Add the callbacks at fisrt position*/ ++ /* Add the callbacks at first position */ + cb->next = rte_eth_devices[port_id].post_rx_burst_cbs[queue_id]; + rte_smp_wmb(); + rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb; +@@ -5064,8 +5066,7 @@ rte_eth_switch_domain_alloc(uint16_t *domain_id) + + *domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; + +- for (i = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID + 1; +- i < RTE_MAX_ETHPORTS; i++) { ++ for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (rte_eth_switch_domains[i].state == + RTE_ETH_SWITCH_DOMAIN_UNUSED) { + rte_eth_switch_domains[i].state = +diff --git a/dpdk/lib/librte_ethdev/rte_ethdev.h b/dpdk/lib/librte_ethdev/rte_ethdev.h +index 18a9defc24..d1a593ad11 100644 +--- a/dpdk/lib/librte_ethdev/rte_ethdev.h ++++ b/dpdk/lib/librte_ethdev/rte_ethdev.h +@@ -1196,7 +1196,7 @@ struct rte_eth_dev_portconf { + * Default values for switch domain id when ethdev does not support switch + * domain definitions. + */ +-#define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID (0) ++#define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID (UINT16_MAX) + + /** + * Ethernet device associated switch information +diff --git a/dpdk/lib/librte_ethdev/rte_ethdev_pci.h b/dpdk/lib/librte_ethdev/rte_ethdev_pci.h +index ccdbb46ec0..cca94ec864 100644 +--- a/dpdk/lib/librte_ethdev/rte_ethdev_pci.h ++++ b/dpdk/lib/librte_ethdev/rte_ethdev_pci.h +@@ -42,6 +42,8 @@ + + /** + * Copy pci device info to the Ethernet device data. ++ * Shared memory (eth_dev->data) only updated by primary process, so it is safe ++ * to call this function from both primary and secondary processes. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. +@@ -60,14 +62,16 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, + + eth_dev->intr_handle = &pci_dev->intr_handle; + +- eth_dev->data->dev_flags = 0; +- if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) +- eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; +- if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_RMV) +- eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV; +- +- eth_dev->data->kdrv = pci_dev->kdrv; +- eth_dev->data->numa_node = pci_dev->device.numa_node; ++ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { ++ eth_dev->data->dev_flags = 0; ++ if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) ++ eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; ++ if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_RMV) ++ eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV; ++ ++ eth_dev->data->kdrv = pci_dev->kdrv; ++ eth_dev->data->numa_node = pci_dev->device.numa_node; ++ } + } + + static inline int +diff --git a/dpdk/lib/librte_ethdev/rte_flow.c b/dpdk/lib/librte_ethdev/rte_flow.c +index 87a3e8c4c6..391165646a 100644 +--- a/dpdk/lib/librte_ethdev/rte_flow.c ++++ b/dpdk/lib/librte_ethdev/rte_flow.c +@@ -19,7 +19,7 @@ + #include "rte_flow.h" + + /* Mbuf dynamic field name for metadata. */ +-int rte_flow_dynf_metadata_offs = -1; ++int32_t rte_flow_dynf_metadata_offs = -1; + + /* Mbuf dynamic field flag bit number for metadata. */ + uint64_t rte_flow_dynf_metadata_mask; +diff --git a/dpdk/lib/librte_ethdev/rte_flow.h b/dpdk/lib/librte_ethdev/rte_flow.h +index 452d359a16..693824da8a 100644 +--- a/dpdk/lib/librte_ethdev/rte_flow.h ++++ b/dpdk/lib/librte_ethdev/rte_flow.h +@@ -502,7 +502,7 @@ enum rte_flow_item_type { + */ + RTE_FLOW_ITEM_TYPE_HIGIG2, + +- /* ++ /** + * [META] + * + * Matches a tag value. +@@ -2531,7 +2531,7 @@ struct rte_flow_action_set_meta { + }; + + /* Mbuf dynamic field offset for metadata. */ +-extern int rte_flow_dynf_metadata_offs; ++extern int32_t rte_flow_dynf_metadata_offs; + + /* Mbuf dynamic field flag mask for metadata. */ + extern uint64_t rte_flow_dynf_metadata_mask; +diff --git a/dpdk/lib/librte_eventdev/rte_eventdev.c b/dpdk/lib/librte_eventdev/rte_eventdev.c +index b987e07454..9aca7fbd52 100644 +--- a/dpdk/lib/librte_eventdev/rte_eventdev.c ++++ b/dpdk/lib/librte_eventdev/rte_eventdev.c +@@ -1364,14 +1364,17 @@ rte_event_pmd_allocate(const char *name, int socket_id) + + eventdev->data = eventdev_data; + +- strlcpy(eventdev->data->name, name, RTE_EVENTDEV_NAME_MAX_LEN); ++ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + +- eventdev->data->dev_id = dev_id; +- eventdev->data->socket_id = socket_id; +- eventdev->data->dev_started = 0; ++ strlcpy(eventdev->data->name, name, ++ RTE_EVENTDEV_NAME_MAX_LEN); + +- eventdev->attached = RTE_EVENTDEV_ATTACHED; ++ eventdev->data->dev_id = dev_id; ++ eventdev->data->socket_id = socket_id; ++ eventdev->data->dev_started = 0; ++ } + ++ eventdev->attached = RTE_EVENTDEV_ATTACHED; + eventdev_globals.nb_devs++; + } + +diff --git a/dpdk/lib/librte_eventdev/rte_eventdev_pmd_pci.h b/dpdk/lib/librte_eventdev/rte_eventdev_pmd_pci.h +index 8fb61386fd..443cd38c23 100644 +--- a/dpdk/lib/librte_eventdev/rte_eventdev_pmd_pci.h ++++ b/dpdk/lib/librte_eventdev/rte_eventdev_pmd_pci.h +@@ -112,9 +112,11 @@ rte_event_pmd_pci_remove(struct rte_pci_device *pci_dev, + if (eventdev == NULL) + return -ENODEV; + +- ret = rte_event_dev_close(eventdev->data->dev_id); +- if (ret < 0) +- return ret; ++ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { ++ ret = rte_event_dev_close(eventdev->data->dev_id); ++ if (ret < 0) ++ return ret; ++ } + + /* Invoke PMD device un-init function */ + if (devuninit) +diff --git a/dpdk/lib/librte_fib/rte_fib.h b/dpdk/lib/librte_fib/rte_fib.h +index d06c5ef55a..af3bbf07ee 100644 +--- a/dpdk/lib/librte_fib/rte_fib.h ++++ b/dpdk/lib/librte_fib/rte_fib.h +@@ -14,6 +14,10 @@ + + #include <rte_compat.h> + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + struct rte_fib; + struct rte_rib; + +@@ -185,4 +189,8 @@ __rte_experimental + struct rte_rib * + rte_fib_get_rib(struct rte_fib *fib); + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_FIB_H_ */ +diff --git a/dpdk/lib/librte_fib/rte_fib6.h b/dpdk/lib/librte_fib/rte_fib6.h +index 4268704038..66c71c84c9 100644 +--- a/dpdk/lib/librte_fib/rte_fib6.h ++++ b/dpdk/lib/librte_fib/rte_fib6.h +@@ -14,6 +14,10 @@ + + #include <rte_compat.h> + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #define RTE_FIB6_IPV6_ADDR_SIZE 16 + /** Maximum depth value possible for IPv6 FIB. */ + #define RTE_FIB6_MAXDEPTH 128 +@@ -190,4 +194,8 @@ __rte_experimental + struct rte_rib6 * + rte_fib6_get_rib(struct rte_fib6 *fib); + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_FIB6_H_ */ +diff --git a/dpdk/lib/librte_fib/trie.c b/dpdk/lib/librte_fib/trie.c +index 124aa8b98b..2ae2add4f3 100644 +--- a/dpdk/lib/librte_fib/trie.c ++++ b/dpdk/lib/librte_fib/trie.c +@@ -240,9 +240,8 @@ tbl8_alloc(struct rte_trie_tbl *dp, uint64_t nh) + tbl8_idx = tbl8_get(dp); + if (tbl8_idx < 0) + return tbl8_idx; +- tbl8_ptr = (uint8_t *)dp->tbl8 + +- ((tbl8_idx * TRIE_TBL8_GRP_NUM_ENT) << +- dp->nh_sz); ++ tbl8_ptr = get_tbl_p_by_idx(dp->tbl8, ++ tbl8_idx * TRIE_TBL8_GRP_NUM_ENT, dp->nh_sz); + /*Init tbl8 entries with nexthop from tbl24*/ + write_to_dp((void *)tbl8_ptr, nh, dp->nh_sz, + TRIE_TBL8_GRP_NUM_ENT); +@@ -317,7 +316,7 @@ get_idx(const uint8_t *ip, uint32_t prev_idx, int bytes, int first_byte) + bitshift = (int8_t)(((first_byte + bytes - 1) - i)*BYTE_SIZE); + idx |= ip[i] << bitshift; + } +- return (prev_idx * 256) + idx; ++ return (prev_idx * TRIE_TBL8_GRP_NUM_ENT) + idx; + } + + static inline uint64_t +@@ -354,8 +353,8 @@ recycle_root_path(struct rte_trie_tbl *dp, const uint8_t *ip_part, + return; + + if (common_tbl8 != 0) { +- p = get_tbl_p_by_idx(dp->tbl8, (val >> 1) * 256 + *ip_part, +- dp->nh_sz); ++ p = get_tbl_p_by_idx(dp->tbl8, (val >> 1) * ++ TRIE_TBL8_GRP_NUM_ENT + *ip_part, dp->nh_sz); + recycle_root_path(dp, ip_part + 1, common_tbl8 - 1, p); + } + tbl8_recycle(dp, prev, val >> 1); +@@ -388,7 +387,8 @@ build_common_root(struct rte_trie_tbl *dp, const uint8_t *ip, + j = i; + cur_tbl = dp->tbl8; + } +- *tbl = get_tbl_p_by_idx(cur_tbl, prev_idx * 256, dp->nh_sz); ++ *tbl = get_tbl_p_by_idx(cur_tbl, prev_idx * TRIE_TBL8_GRP_NUM_ENT, ++ dp->nh_sz); + return 0; + } + +@@ -411,8 +411,8 @@ write_edge(struct rte_trie_tbl *dp, const uint8_t *ip_part, uint64_t next_hop, + return tbl8_idx; + val = (tbl8_idx << 1)|TRIE_EXT_ENT; + } +- p = get_tbl_p_by_idx(dp->tbl8, (tbl8_idx * 256) + *ip_part, +- dp->nh_sz); ++ p = get_tbl_p_by_idx(dp->tbl8, (tbl8_idx * ++ TRIE_TBL8_GRP_NUM_ENT) + *ip_part, dp->nh_sz); + ret = write_edge(dp, ip_part + 1, next_hop, len - 1, edge, p); + if (ret < 0) + return ret; +@@ -420,8 +420,8 @@ write_edge(struct rte_trie_tbl *dp, const uint8_t *ip_part, uint64_t next_hop, + write_to_dp((uint8_t *)p + (1 << dp->nh_sz), + next_hop << 1, dp->nh_sz, UINT8_MAX - *ip_part); + } else { +- write_to_dp(get_tbl_p_by_idx(dp->tbl8, tbl8_idx * 256, +- dp->nh_sz), ++ write_to_dp(get_tbl_p_by_idx(dp->tbl8, tbl8_idx * ++ TRIE_TBL8_GRP_NUM_ENT, dp->nh_sz), + next_hop << 1, dp->nh_sz, *ip_part); + } + tbl8_recycle(dp, &val, tbl8_idx); +diff --git a/dpdk/lib/librte_hash/meson.build b/dpdk/lib/librte_hash/meson.build +index 5d02b3084f..bce11ad9e0 100644 +--- a/dpdk/lib/librte_hash/meson.build ++++ b/dpdk/lib/librte_hash/meson.build +@@ -1,10 +1,7 @@ + # SPDX-License-Identifier: BSD-3-Clause + # Copyright(c) 2017 Intel Corporation + +-headers = files('rte_cmp_arm64.h', +- 'rte_cmp_x86.h', +- 'rte_crc_arm64.h', +- 'rte_cuckoo_hash.h', ++headers = files('rte_crc_arm64.h', + 'rte_fbk_hash.h', + 'rte_hash_crc.h', + 'rte_hash.h', +diff --git a/dpdk/lib/librte_hash/rte_hash.h b/dpdk/lib/librte_hash/rte_hash.h +index 0d73370dc4..ab7be1d528 100644 +--- a/dpdk/lib/librte_hash/rte_hash.h ++++ b/dpdk/lib/librte_hash/rte_hash.h +@@ -51,8 +51,6 @@ extern "C" { + + /** Flag to support lock free reader writer concurrency. Both single writer + * and multi writer use cases are supported. +- * Currently, extendable bucket table feature is not supported with +- * this feature. + */ + #define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF 0x20 + +diff --git a/dpdk/lib/librte_ipsec/ipsec_sad.c b/dpdk/lib/librte_ipsec/ipsec_sad.c +index db2c44c804..31b5956d89 100644 +--- a/dpdk/lib/librte_ipsec/ipsec_sad.c ++++ b/dpdk/lib/librte_ipsec/ipsec_sad.c +@@ -94,6 +94,8 @@ add_specific(struct rte_ipsec_sad *sad, const void *key, + + /* Update a counter for a given SPI */ + ret = rte_hash_lookup(sad->hash[RTE_IPSEC_SAD_SPI_ONLY], key); ++ if (ret < 0) ++ return ret; + if (key_type == RTE_IPSEC_SAD_SPI_DIP) + sad->cnt_arr[ret].cnt_dip += notexist; + else +diff --git a/dpdk/lib/librte_ipsec/sa.h b/dpdk/lib/librte_ipsec/sa.h +index 51e69ad05a..0cfe82f634 100644 +--- a/dpdk/lib/librte_ipsec/sa.h ++++ b/dpdk/lib/librte_ipsec/sa.h +@@ -113,7 +113,7 @@ struct rte_ipsec_sa { + * sqn and replay window + * In case of SA handled by multiple threads *sqn* cacheline + * could be shared by multiple cores. +- * To minimise perfomance impact, we try to locate in a separate ++ * To minimise performance impact, we try to locate in a separate + * place from other frequently accesed data. + */ + union { +diff --git a/dpdk/lib/librte_kni/rte_kni.c b/dpdk/lib/librte_kni/rte_kni.c +index e388751e33..bcf82cc2d5 100644 +--- a/dpdk/lib/librte_kni/rte_kni.c ++++ b/dpdk/lib/librte_kni/rte_kni.c +@@ -145,31 +145,38 @@ kni_reserve_mz(struct rte_kni *kni) + char mz_name[RTE_MEMZONE_NAMESIZE]; + + snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_TX_Q_MZ_NAME_FMT, kni->name); +- kni->m_tx_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0); ++ kni->m_tx_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, ++ RTE_MEMZONE_IOVA_CONTIG); + KNI_MEM_CHECK(kni->m_tx_q == NULL, tx_q_fail); + + snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_RX_Q_MZ_NAME_FMT, kni->name); +- kni->m_rx_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0); ++ kni->m_rx_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, ++ RTE_MEMZONE_IOVA_CONTIG); + KNI_MEM_CHECK(kni->m_rx_q == NULL, rx_q_fail); + + snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_ALLOC_Q_MZ_NAME_FMT, kni->name); +- kni->m_alloc_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0); ++ kni->m_alloc_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, ++ RTE_MEMZONE_IOVA_CONTIG); + KNI_MEM_CHECK(kni->m_alloc_q == NULL, alloc_q_fail); + + snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_FREE_Q_MZ_NAME_FMT, kni->name); +- kni->m_free_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0); ++ kni->m_free_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, ++ RTE_MEMZONE_IOVA_CONTIG); + KNI_MEM_CHECK(kni->m_free_q == NULL, free_q_fail); + + snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_REQ_Q_MZ_NAME_FMT, kni->name); +- kni->m_req_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0); ++ kni->m_req_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, ++ RTE_MEMZONE_IOVA_CONTIG); + KNI_MEM_CHECK(kni->m_req_q == NULL, req_q_fail); + + snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_RESP_Q_MZ_NAME_FMT, kni->name); +- kni->m_resp_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0); ++ kni->m_resp_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, ++ RTE_MEMZONE_IOVA_CONTIG); + KNI_MEM_CHECK(kni->m_resp_q == NULL, resp_q_fail); + + snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_SYNC_ADDR_MZ_NAME_FMT, kni->name); +- kni->m_sync_addr = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0); ++ kni->m_sync_addr = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, ++ RTE_MEMZONE_IOVA_CONTIG); + KNI_MEM_CHECK(kni->m_sync_addr == NULL, sync_addr_fail); + + return 0; +diff --git a/dpdk/lib/librte_kvargs/rte_kvargs.c b/dpdk/lib/librte_kvargs/rte_kvargs.c +index d39332999e..285081c86c 100644 +--- a/dpdk/lib/librte_kvargs/rte_kvargs.c ++++ b/dpdk/lib/librte_kvargs/rte_kvargs.c +@@ -50,6 +50,8 @@ rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params) + /* Find the end of the list. */ + while (str[strlen(str) - 1] != ']') { + /* Restore the comma erased by strtok_r(). */ ++ if (ctx1 == NULL || ctx1[0] == '\0') ++ return -1; /* no closing bracket */ + str[strlen(str)] = ','; + /* Parse until next comma. */ + str = strtok_r(NULL, RTE_KVARGS_PAIRS_DELIM, &ctx1); +diff --git a/dpdk/lib/librte_kvargs/rte_kvargs.h b/dpdk/lib/librte_kvargs/rte_kvargs.h +index 1946195de4..eff598e08b 100644 +--- a/dpdk/lib/librte_kvargs/rte_kvargs.h ++++ b/dpdk/lib/librte_kvargs/rte_kvargs.h +@@ -171,7 +171,7 @@ unsigned rte_kvargs_count(const struct rte_kvargs *kvlist, + * 0 if the strings match. + * !0 otherwise or on error. + * +- * Unless strcmp, comparison ordering is not kept. ++ * Unlike strcmp, comparison ordering is not kept. + * In order for rte_kvargs_process to stop processing on match error, + * a negative value is returned even if strcmp had returned a positive one. + */ +diff --git a/dpdk/lib/librte_latencystats/rte_latencystats.c b/dpdk/lib/librte_latencystats/rte_latencystats.c +index 98e018939e..ba2fff3bcb 100644 +--- a/dpdk/lib/librte_latencystats/rte_latencystats.c ++++ b/dpdk/lib/librte_latencystats/rte_latencystats.c +@@ -42,6 +42,7 @@ struct rte_latency_stats { + float avg_latency; /**< Average latency in nano seconds */ + float max_latency; /**< Maximum latency in nano seconds */ + float jitter; /** Latency variation */ ++ rte_spinlock_t lock; /** Latency calculation lock */ + }; + + static struct rte_latency_stats *glob_stats; +@@ -164,6 +165,7 @@ calc_latency(uint16_t pid __rte_unused, + latency[cnt++] = now - pkts[i]->timestamp; + } + ++ rte_spinlock_lock(&glob_stats->lock); + for (i = 0; i < cnt; i++) { + /* + * The jitter is calculated as statistical mean of interpacket +@@ -193,6 +195,7 @@ calc_latency(uint16_t pid __rte_unused, + alpha * (latency[i] - glob_stats->avg_latency); + prev_latency = latency[i]; + } ++ rte_spinlock_unlock(&glob_stats->lock); + + return nb_pkts; + } +@@ -223,6 +226,7 @@ rte_latencystats_init(uint64_t app_samp_intvl, + } + + glob_stats = mz->addr; ++ rte_spinlock_init(&glob_stats->lock); + samp_intvl = app_samp_intvl * latencystat_cycles_per_ns(); + + /** Register latency stats with stats library */ +diff --git a/dpdk/lib/librte_lpm/meson.build b/dpdk/lib/librte_lpm/meson.build +index 27ce45b531..021ac6d8d4 100644 +--- a/dpdk/lib/librte_lpm/meson.build ++++ b/dpdk/lib/librte_lpm/meson.build +@@ -7,4 +7,3 @@ headers = files('rte_lpm.h', 'rte_lpm6.h') + # without worrying about which architecture we actually need + headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h') + deps += ['hash'] +-use_function_versioning = true +diff --git a/dpdk/lib/librte_lpm/rte_lpm.c b/dpdk/lib/librte_lpm/rte_lpm.c +index b78c487447..2687564194 100644 +--- a/dpdk/lib/librte_lpm/rte_lpm.c ++++ b/dpdk/lib/librte_lpm/rte_lpm.c +@@ -22,7 +22,6 @@ + #include <rte_rwlock.h> + #include <rte_spinlock.h> + #include <rte_tailq.h> +-#include <rte_function_versioning.h> + + #include "rte_lpm.h" + +diff --git a/dpdk/lib/librte_lpm/rte_lpm6.c b/dpdk/lib/librte_lpm/rte_lpm6.c +index c46e557e23..6e1b18d6fd 100644 +--- a/dpdk/lib/librte_lpm/rte_lpm6.c ++++ b/dpdk/lib/librte_lpm/rte_lpm6.c +@@ -25,7 +25,6 @@ + #include <assert.h> + #include <rte_jhash.h> + #include <rte_tailq.h> +-#include <rte_function_versioning.h> + + #include "rte_lpm6.h" + +@@ -727,7 +726,8 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl, + tbl8_group_start = tbl8_gindex * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES; + memset(&lpm->tbl8[tbl8_group_start], 0, +- RTE_LPM6_TBL8_GROUP_NUM_ENTRIES); ++ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * ++ sizeof(struct rte_lpm6_tbl_entry)); + + /* init the new table's header: + * save the reference to the owner table +@@ -814,7 +814,7 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl, + * + * Returns: + * 0 on success +- * -ENOSPC not enought tbl8 left ++ * -ENOSPC not enough tbl8 left + */ + static int + simulate_add(struct rte_lpm6 *lpm, const uint8_t *masked_ip, uint8_t depth) +@@ -844,7 +844,7 @@ simulate_add(struct rte_lpm6 *lpm, const uint8_t *masked_ip, uint8_t depth) + } + + if (tbl8_available(lpm) < total_need_tbl_nb) +- /* not enought tbl8 to add a rule */ ++ /* not enough tbl8 to add a rule */ + return -ENOSPC; + + return 0; +@@ -1212,7 +1212,7 @@ rule_find_range(struct rte_lpm6 *lpm, const uint8_t *ip, uint8_t depth, + /* minus top level */ + depth -= 24; + +- /* interate through levels (tbl8s) ++ /* iterate through levels (tbl8s) + * until we reach the last one + */ + while (depth > 8) { +diff --git a/dpdk/lib/librte_mbuf/rte_mbuf.h b/dpdk/lib/librte_mbuf/rte_mbuf.h +index 219b110b76..6d080527f6 100644 +--- a/dpdk/lib/librte_mbuf/rte_mbuf.h ++++ b/dpdk/lib/librte_mbuf/rte_mbuf.h +@@ -1535,7 +1535,7 @@ static inline int rte_pktmbuf_trim(struct rte_mbuf *m, uint16_t len) + static inline int rte_pktmbuf_is_contiguous(const struct rte_mbuf *m) + { + __rte_mbuf_sanity_check(m, 1); +- return !!(m->nb_segs == 1); ++ return m->nb_segs == 1; + } + + /** +diff --git a/dpdk/lib/librte_mempool/rte_mempool.c b/dpdk/lib/librte_mempool/rte_mempool.c +index 78d8eb941e..08906df9ee 100644 +--- a/dpdk/lib/librte_mempool/rte_mempool.c ++++ b/dpdk/lib/librte_mempool/rte_mempool.c +@@ -297,8 +297,8 @@ mempool_ops_alloc_once(struct rte_mempool *mp) + * zone. Return the number of objects added, or a negative value + * on error. + */ +-int +-rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, ++static int ++__rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, + rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb, + void *opaque) + { +@@ -332,7 +332,7 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, + off = RTE_PTR_ALIGN_CEIL(vaddr, RTE_MEMPOOL_ALIGN) - vaddr; + + if (off > len) { +- ret = -EINVAL; ++ ret = 0; + goto fail; + } + +@@ -343,7 +343,7 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, + + /* not enough room to store one object */ + if (i == 0) { +- ret = -EINVAL; ++ ret = 0; + goto fail; + } + +@@ -356,6 +356,21 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, + return ret; + } + ++int ++rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, ++ rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb, ++ void *opaque) ++{ ++ int ret; ++ ++ ret = __rte_mempool_populate_iova(mp, vaddr, iova, len, free_cb, ++ opaque); ++ if (ret == 0) ++ ret = -EINVAL; ++ ++ return ret; ++} ++ + static rte_iova_t + get_iova(void *addr) + { +@@ -406,8 +421,10 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr, + break; + } + +- ret = rte_mempool_populate_iova(mp, addr + off, iova, ++ ret = __rte_mempool_populate_iova(mp, addr + off, iova, + phys_len, free_cb, opaque); ++ if (ret == 0) ++ continue; + if (ret < 0) + goto fail; + /* no need to call the free callback for next chunks */ +@@ -415,6 +432,9 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr, + cnt += ret; + } + ++ if (cnt == 0) ++ return -EINVAL; ++ + return cnt; + + fail: +@@ -463,6 +483,7 @@ rte_mempool_populate_default(struct rte_mempool *mp) + unsigned mz_id, n; + int ret; + bool need_iova_contig_obj; ++ size_t max_alloc_size = SIZE_MAX; + + ret = mempool_ops_alloc_once(mp); + if (ret != 0) +@@ -542,30 +563,24 @@ rte_mempool_populate_default(struct rte_mempool *mp) + if (min_chunk_size == (size_t)mem_size) + mz_flags |= RTE_MEMZONE_IOVA_CONTIG; + +- mz = rte_memzone_reserve_aligned(mz_name, mem_size, ++ /* Allocate a memzone, retrying with a smaller area on ENOMEM */ ++ do { ++ mz = rte_memzone_reserve_aligned(mz_name, ++ RTE_MIN((size_t)mem_size, max_alloc_size), + mp->socket_id, mz_flags, align); + +- /* don't try reserving with 0 size if we were asked to reserve +- * IOVA-contiguous memory. +- */ +- if (min_chunk_size < (size_t)mem_size && mz == NULL) { +- /* not enough memory, retry with the biggest zone we +- * have +- */ +- mz = rte_memzone_reserve_aligned(mz_name, 0, +- mp->socket_id, mz_flags, align); +- } ++ if (mz == NULL && rte_errno != ENOMEM) ++ break; ++ ++ max_alloc_size = RTE_MIN(max_alloc_size, ++ (size_t)mem_size) / 2; ++ } while (mz == NULL && max_alloc_size >= min_chunk_size); ++ + if (mz == NULL) { + ret = -rte_errno; + goto fail; + } + +- if (mz->len < min_chunk_size) { +- rte_memzone_free(mz); +- ret = -ENOMEM; +- goto fail; +- } +- + if (need_iova_contig_obj) + iova = mz->iova; + else +@@ -645,8 +660,10 @@ rte_mempool_populate_anon(struct rte_mempool *mp) + } + + ret = mempool_ops_alloc_once(mp); +- if (ret != 0) +- return ret; ++ if (ret < 0) { ++ rte_errno = -ret; ++ return 0; ++ } + + size = get_anon_size(mp); + if (size < 0) { +@@ -670,8 +687,10 @@ rte_mempool_populate_anon(struct rte_mempool *mp) + + ret = rte_mempool_populate_virt(mp, addr, size, getpagesize(), + rte_mempool_memchunk_anon_free, addr); +- if (ret == 0) ++ if (ret < 0) { ++ rte_errno = -ret; + goto fail; ++ } + + return mp->populated_size; + +diff --git a/dpdk/lib/librte_mempool/rte_mempool.h b/dpdk/lib/librte_mempool/rte_mempool.h +index f81152af96..4907c0808e 100644 +--- a/dpdk/lib/librte_mempool/rte_mempool.h ++++ b/dpdk/lib/librte_mempool/rte_mempool.h +@@ -1167,8 +1167,8 @@ int rte_mempool_populate_default(struct rte_mempool *mp); + * A pointer to the mempool structure. + * @return + * The number of objects added on success. +- * On error, the chunk is not added in the memory list of the +- * mempool and a negative errno is returned. ++ * On error, 0 is returned, rte_errno is set, and the chunk is not added in ++ * the memory list of the mempool. + */ + int rte_mempool_populate_anon(struct rte_mempool *mp); + +@@ -1653,7 +1653,7 @@ rte_mempool_in_use_count(const struct rte_mempool *mp); + static inline int + rte_mempool_full(const struct rte_mempool *mp) + { +- return !!(rte_mempool_avail_count(mp) == mp->size); ++ return rte_mempool_avail_count(mp) == mp->size; + } + + /** +@@ -1672,7 +1672,7 @@ rte_mempool_full(const struct rte_mempool *mp) + static inline int + rte_mempool_empty(const struct rte_mempool *mp) + { +- return !!(rte_mempool_avail_count(mp) == 0); ++ return rte_mempool_avail_count(mp) == 0; + } + + /** +diff --git a/dpdk/lib/librte_mempool/rte_mempool_version.map b/dpdk/lib/librte_mempool/rte_mempool_version.map +index d002dfc46f..d67ed2e2b9 100644 +--- a/dpdk/lib/librte_mempool/rte_mempool_version.map ++++ b/dpdk/lib/librte_mempool/rte_mempool_version.map +@@ -4,18 +4,14 @@ DPDK_20.0 { + rte_mempool_audit; + rte_mempool_avail_count; + rte_mempool_cache_create; +- rte_mempool_cache_flush; + rte_mempool_cache_free; + rte_mempool_calc_obj_size; + rte_mempool_check_cookies; + rte_mempool_contig_blocks_check_cookies; + rte_mempool_create; + rte_mempool_create_empty; +- rte_mempool_default_cache; + rte_mempool_dump; + rte_mempool_free; +- rte_mempool_generic_get; +- rte_mempool_generic_put; + rte_mempool_in_use_count; + rte_mempool_list_dump; + rte_mempool_lookup; +diff --git a/dpdk/lib/librte_pci/rte_pci.c b/dpdk/lib/librte_pci/rte_pci.c +index a753cf3eca..5f7726fa89 100644 +--- a/dpdk/lib/librte_pci/rte_pci.c ++++ b/dpdk/lib/librte_pci/rte_pci.c +@@ -20,6 +20,7 @@ + #include <rte_eal.h> + #include <rte_string_fns.h> + #include <rte_common.h> ++#include <rte_debug.h> + + #include "rte_pci.h" + +@@ -34,6 +35,12 @@ get_u8_pciaddr_field(const char *in, void *_u8, char dlm) + if (*in == '\0') + return NULL; + ++ /* PCI field starting with spaces is forbidden. ++ * Negative wrap-around is not reported as an error by strtoul. ++ */ ++ if (*in == ' ' || *in == '-') ++ return NULL; ++ + errno = 0; + val = strtoul(in, &end, 16); + if (errno != 0 || end[0] != dlm || val > UINT8_MAX) { +@@ -69,11 +76,17 @@ pci_dbdf_parse(const char *input, struct rte_pci_addr *dev_addr) + unsigned long val; + char *end; + ++ /* PCI id starting with spaces is forbidden. ++ * Negative wrap-around is not reported as an error by strtoul. ++ */ ++ if (*in == ' ' || *in == '-') ++ return -EINVAL; ++ + errno = 0; + val = strtoul(in, &end, 16); +- if (errno != 0 || end[0] != ':' || val > UINT16_MAX) ++ if (errno != 0 || end[0] != ':' || val > UINT32_MAX) + return -EINVAL; +- dev_addr->domain = (uint16_t)val; ++ dev_addr->domain = (uint32_t)val; + in = end + 1; + in = get_u8_pciaddr_field(in, &dev_addr->bus, ':'); + if (in == NULL) +diff --git a/dpdk/lib/librte_pci/rte_pci.h b/dpdk/lib/librte_pci/rte_pci.h +index c87891405c..4087771c1e 100644 +--- a/dpdk/lib/librte_pci/rte_pci.h ++++ b/dpdk/lib/librte_pci/rte_pci.h +@@ -17,16 +17,10 @@ extern "C" { + #endif + + #include <stdio.h> +-#include <stdlib.h> + #include <limits.h> +-#include <errno.h> + #include <sys/queue.h> +-#include <stdint.h> + #include <inttypes.h> + +-#include <rte_debug.h> +-#include <rte_interrupts.h> +- + /** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */ + #define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 + #define PCI_PRI_STR_SIZE sizeof("XXXXXXXX:XX:XX.X") +diff --git a/dpdk/lib/librte_security/rte_security.c b/dpdk/lib/librte_security/rte_security.c +index bc81ce15d1..dc9a3e89cd 100644 +--- a/dpdk/lib/librte_security/rte_security.c ++++ b/dpdk/lib/librte_security/rte_security.c +@@ -1,6 +1,7 @@ + /* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 NXP. + * Copyright(c) 2017 Intel Corporation. ++ * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved + */ + + #include <rte_malloc.h> +@@ -9,6 +10,19 @@ + #include "rte_security.h" + #include "rte_security_driver.h" + ++/* Macro to check for invalid pointers */ ++#define RTE_PTR_OR_ERR_RET(ptr, retval) do { \ ++ if ((ptr) == NULL) \ ++ return retval; \ ++} while (0) ++ ++/* Macro to check for invalid pointers chains */ ++#define RTE_PTR_CHAIN3_OR_ERR_RET(p1, p2, p3, retval, last_retval) do { \ ++ RTE_PTR_OR_ERR_RET(p1, retval); \ ++ RTE_PTR_OR_ERR_RET(p1->p2, retval); \ ++ RTE_PTR_OR_ERR_RET(p1->p2->p3, last_retval); \ ++} while (0) ++ + struct rte_security_session * + rte_security_session_create(struct rte_security_ctx *instance, + struct rte_security_session_conf *conf, +@@ -16,10 +30,9 @@ rte_security_session_create(struct rte_security_ctx *instance, + { + struct rte_security_session *sess = NULL; + +- if (conf == NULL) +- return NULL; +- +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_create, NULL); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, session_create, NULL, NULL); ++ RTE_PTR_OR_ERR_RET(conf, NULL); ++ RTE_PTR_OR_ERR_RET(mp, NULL); + + if (rte_mempool_get(mp, (void **)&sess)) + return NULL; +@@ -38,14 +51,19 @@ rte_security_session_update(struct rte_security_ctx *instance, + struct rte_security_session *sess, + struct rte_security_session_conf *conf) + { +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_update, -ENOTSUP); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, session_update, -EINVAL, ++ -ENOTSUP); ++ RTE_PTR_OR_ERR_RET(sess, -EINVAL); ++ RTE_PTR_OR_ERR_RET(conf, -EINVAL); ++ + return instance->ops->session_update(instance->device, sess, conf); + } + + unsigned int + rte_security_session_get_size(struct rte_security_ctx *instance) + { +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_get_size, 0); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, session_get_size, 0, 0); ++ + return instance->ops->session_get_size(instance->device); + } + +@@ -54,7 +72,11 @@ rte_security_session_stats_get(struct rte_security_ctx *instance, + struct rte_security_session *sess, + struct rte_security_stats *stats) + { +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_stats_get, -ENOTSUP); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, session_stats_get, -EINVAL, ++ -ENOTSUP); ++ /* Parameter sess can be NULL in case of getting global statistics. */ ++ RTE_PTR_OR_ERR_RET(stats, -EINVAL); ++ + return instance->ops->session_stats_get(instance->device, sess, stats); + } + +@@ -64,16 +86,20 @@ rte_security_session_destroy(struct rte_security_ctx *instance, + { + int ret; + +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_destroy, -ENOTSUP); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, session_destroy, -EINVAL, ++ -ENOTSUP); ++ RTE_PTR_OR_ERR_RET(sess, -EINVAL); ++ ++ ret = instance->ops->session_destroy(instance->device, sess); ++ if (ret != 0) ++ return ret; ++ ++ rte_mempool_put(rte_mempool_from_obj(sess), (void *)sess); + + if (instance->sess_cnt) + instance->sess_cnt--; + +- ret = instance->ops->session_destroy(instance->device, sess); +- if (!ret) +- rte_mempool_put(rte_mempool_from_obj(sess), (void *)sess); +- +- return ret; ++ return 0; + } + + int +@@ -81,6 +107,11 @@ rte_security_set_pkt_metadata(struct rte_security_ctx *instance, + struct rte_security_session *sess, + struct rte_mbuf *m, void *params) + { ++#ifdef RTE_DEBUG ++ RTE_PTR_OR_ERR_RET(sess, -EINVAL); ++ RTE_PTR_OR_ERR_RET(instance, -EINVAL); ++ RTE_PTR_OR_ERR_RET(instance->ops, -EINVAL); ++#endif + RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->set_pkt_metadata, -ENOTSUP); + return instance->ops->set_pkt_metadata(instance->device, + sess, m, params); +@@ -91,6 +122,10 @@ rte_security_get_userdata(struct rte_security_ctx *instance, uint64_t md) + { + void *userdata = NULL; + ++#ifdef RTE_DEBUG ++ RTE_PTR_OR_ERR_RET(instance, NULL); ++ RTE_PTR_OR_ERR_RET(instance->ops, NULL); ++#endif + RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->get_userdata, NULL); + if (instance->ops->get_userdata(instance->device, md, &userdata)) + return NULL; +@@ -101,7 +136,8 @@ rte_security_get_userdata(struct rte_security_ctx *instance, uint64_t md) + const struct rte_security_capability * + rte_security_capabilities_get(struct rte_security_ctx *instance) + { +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->capabilities_get, NULL); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, capabilities_get, NULL, NULL); ++ + return instance->ops->capabilities_get(instance->device); + } + +@@ -113,7 +149,9 @@ rte_security_capability_get(struct rte_security_ctx *instance, + const struct rte_security_capability *capability; + uint16_t i = 0; + +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->capabilities_get, NULL); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, capabilities_get, NULL, NULL); ++ RTE_PTR_OR_ERR_RET(idx, NULL); ++ + capabilities = instance->ops->capabilities_get(instance->device); + + if (capabilities == NULL) +@@ -121,7 +159,7 @@ rte_security_capability_get(struct rte_security_ctx *instance, + + while ((capability = &capabilities[i++])->action + != RTE_SECURITY_ACTION_TYPE_NONE) { +- if (capability->action == idx->action && ++ if (capability->action == idx->action && + capability->protocol == idx->protocol) { + if (idx->protocol == RTE_SECURITY_PROTOCOL_IPSEC) { + if (capability->ipsec.proto == +diff --git a/dpdk/lib/librte_security/rte_security.h b/dpdk/lib/librte_security/rte_security.h +index 546779df2b..b4b4eb2d85 100644 +--- a/dpdk/lib/librte_security/rte_security.h ++++ b/dpdk/lib/librte_security/rte_security.h +@@ -374,7 +374,7 @@ rte_security_session_create(struct rte_security_ctx *instance, + * @param conf update configuration parameters + * @return + * - On success returns 0 +- * - On failure return errno ++ * - On failure returns a negative errno value. + */ + __rte_experimental + int +@@ -399,12 +399,14 @@ rte_security_session_get_size(struct rte_security_ctx *instance); + * return it to its original mempool. + * + * @param instance security instance +- * @param sess security session to freed ++ * @param sess security session to be freed + * + * @return + * - 0 if successful. +- * - -EINVAL if session is NULL. ++ * - -EINVAL if session or context instance is NULL. + * - -EBUSY if not all device private data has been freed. ++ * - -ENOTSUP if destroying private data is not supported. ++ * - other negative values in case of freeing private data errors. + */ + int + rte_security_session_destroy(struct rte_security_ctx *instance, +diff --git a/dpdk/lib/librte_telemetry/rte_telemetry_parser.c b/dpdk/lib/librte_telemetry/rte_telemetry_parser.c +index 9601323970..e8c269e85e 100644 +--- a/dpdk/lib/librte_telemetry/rte_telemetry_parser.c ++++ b/dpdk/lib/librte_telemetry/rte_telemetry_parser.c +@@ -456,9 +456,9 @@ rte_telemetry_command_ports_stats_values_by_name(struct telemetry_impl + size_t index; + json_t *value; + ++ memset(&ep, 0, sizeof(ep)); + ep.pp.num_port_ids = json_array_size(port_ids_json); + ep.pp.num_metric_ids = num_stat_names; +- memset(&ep, 0, sizeof(ep)); + if (telemetry == NULL) { + TELEMETRY_LOG_ERR("Invalid telemetry argument"); + return -1; +diff --git a/dpdk/lib/librte_timer/meson.build b/dpdk/lib/librte_timer/meson.build +index b7edfe2e7d..d3b828ce9d 100644 +--- a/dpdk/lib/librte_timer/meson.build ++++ b/dpdk/lib/librte_timer/meson.build +@@ -4,4 +4,3 @@ + sources = files('rte_timer.c') + headers = files('rte_timer.h') + allow_experimental_apis = true +-use_function_versioning = true +diff --git a/dpdk/lib/librte_timer/rte_timer.c b/dpdk/lib/librte_timer/rte_timer.c +index ca88454ff6..99862a3ba1 100644 +--- a/dpdk/lib/librte_timer/rte_timer.c ++++ b/dpdk/lib/librte_timer/rte_timer.c +@@ -26,7 +26,6 @@ + #include <rte_memzone.h> + #include <rte_malloc.h> + #include <rte_errno.h> +-#include <rte_function_versioning.h> + + #include "rte_timer.h" + +@@ -146,11 +145,13 @@ rte_timer_subsystem_init(void) + const size_t mem_size = data_arr_size + sizeof(*rte_timer_mz_refcnt); + bool do_full_init = true; + +- if (rte_timer_subsystem_initialized) +- return -EALREADY; +- + rte_mcfg_timer_lock(); + ++ if (rte_timer_subsystem_initialized) { ++ rte_mcfg_timer_unlock(); ++ return -EALREADY; ++ } ++ + mz = rte_memzone_lookup(mz_name); + if (mz == NULL) { + mz = rte_memzone_reserve_aligned(mz_name, mem_size, +@@ -184,27 +185,29 @@ rte_timer_subsystem_init(void) + rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED; + (*rte_timer_mz_refcnt)++; + +- rte_mcfg_timer_unlock(); +- + rte_timer_subsystem_initialized = 1; + ++ rte_mcfg_timer_unlock(); ++ + return 0; + } + + void + rte_timer_subsystem_finalize(void) + { +- if (!rte_timer_subsystem_initialized) +- return; +- + rte_mcfg_timer_lock(); + ++ if (!rte_timer_subsystem_initialized) { ++ rte_mcfg_timer_unlock(); ++ return; ++ } ++ + if (--(*rte_timer_mz_refcnt) == 0) + rte_memzone_free(rte_timer_data_mz); + +- rte_mcfg_timer_unlock(); +- + rte_timer_subsystem_initialized = 0; ++ ++ rte_mcfg_timer_unlock(); + } + + /* Initialize the timer handle tim for use */ +diff --git a/dpdk/lib/librte_vhost/iotlb.c b/dpdk/lib/librte_vhost/iotlb.c +index 4a1d8c1253..07443a94bc 100644 +--- a/dpdk/lib/librte_vhost/iotlb.c ++++ b/dpdk/lib/librte_vhost/iotlb.c +@@ -308,8 +308,9 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) + TAILQ_INIT(&vq->iotlb_list); + TAILQ_INIT(&vq->iotlb_pending_list); + +- snprintf(pool_name, sizeof(pool_name), "iotlb_cache_%d_%d", +- dev->vid, vq_index); ++ snprintf(pool_name, sizeof(pool_name), "iotlb_%u_%d_%d", ++ getpid(), dev->vid, vq_index); ++ RTE_LOG(DEBUG, VHOST_CONFIG, "IOTLB cache name: %s\n", pool_name); + + /* If already created, free it and recreate */ + vq->iotlb_pool = rte_mempool_lookup(pool_name); +diff --git a/dpdk/lib/librte_vhost/rte_vhost.h b/dpdk/lib/librte_vhost/rte_vhost.h +index 7b5dc87c2e..532ee0dec7 100644 +--- a/dpdk/lib/librte_vhost/rte_vhost.h ++++ b/dpdk/lib/librte_vhost/rte_vhost.h +@@ -68,6 +68,10 @@ extern "C" { + #define VHOST_USER_PROTOCOL_F_PAGEFAULT 8 + #endif + ++#ifndef VHOST_USER_PROTOCOL_F_CONFIG ++#define VHOST_USER_PROTOCOL_F_CONFIG 9 ++#endif ++ + #ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD + #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10 + #endif +@@ -85,6 +89,7 @@ extern "C" { + #define VHOST_USER_F_PROTOCOL_FEATURES 30 + #endif + ++ + /** + * Information relating to memory regions including offsets to + * addresses in QEMUs memory file. +@@ -253,7 +258,7 @@ struct vhost_device_ops { + + /** + * This callback gets called each time a guest gets notified +- * about waiting packets. This is the interrupt handling trough ++ * about waiting packets. This is the interrupt handling through + * the eventfd_write(callfd), which can be used for counting these + * "slow" syscalls. + */ +diff --git a/dpdk/lib/librte_vhost/socket.c b/dpdk/lib/librte_vhost/socket.c +index ebb2ff6c28..2461549fea 100644 +--- a/dpdk/lib/librte_vhost/socket.c ++++ b/dpdk/lib/librte_vhost/socket.c +@@ -127,7 +127,8 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds, + + ret = recvmsg(sockfd, &msgh, 0); + if (ret <= 0) { +- RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n"); ++ if (ret) ++ RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n"); + return ret; + } + +@@ -318,16 +319,16 @@ vhost_user_read_cb(int connfd, void *dat, int *remove) + + vhost_destroy_device(conn->vid); + ++ if (vsocket->reconnect) { ++ create_unix_socket(vsocket); ++ vhost_user_start_client(vsocket); ++ } ++ + pthread_mutex_lock(&vsocket->conn_mutex); + TAILQ_REMOVE(&vsocket->conn_list, conn, next); + pthread_mutex_unlock(&vsocket->conn_mutex); + + free(conn); +- +- if (vsocket->reconnect) { +- create_unix_socket(vsocket); +- vhost_user_start_client(vsocket); +- } + } + } + +@@ -877,6 +878,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags) + "error: failed to init connection mutex\n"); + goto out_free; + } ++ vsocket->vdpa_dev_id = -1; + vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY; + vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT; + vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT; +@@ -924,6 +926,12 @@ rte_vhost_driver_register(const char *path, uint64_t flags) + ret = -1; + goto out_mutex; + } ++ if ((flags & RTE_VHOST_USER_CLIENT) != 0) { ++ RTE_LOG(ERR, VHOST_CONFIG, ++ "error: zero copy is incompatible with vhost client mode\n"); ++ ret = -1; ++ goto out_mutex; ++ } + vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER); + vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER); + +@@ -1051,9 +1059,10 @@ rte_vhost_driver_unregister(const char *path) + next = TAILQ_NEXT(conn, next); + + /* +- * If r/wcb is executing, release the +- * conn_mutex lock, and try again since +- * the r/wcb may use the conn_mutex lock. ++ * If r/wcb is executing, release vsocket's ++ * conn_mutex and vhost_user's mutex locks, and ++ * try again since the r/wcb may use the ++ * conn_mutex and mutex locks. + */ + if (fdset_try_del(&vhost_user.fdset, + conn->connfd) == -1) { +@@ -1074,8 +1083,17 @@ rte_vhost_driver_unregister(const char *path) + pthread_mutex_unlock(&vsocket->conn_mutex); + + if (vsocket->is_server) { +- fdset_del(&vhost_user.fdset, +- vsocket->socket_fd); ++ /* ++ * If r/wcb is executing, release vhost_user's ++ * mutex lock, and try again since the r/wcb ++ * may use the mutex lock. ++ */ ++ if (fdset_try_del(&vhost_user.fdset, ++ vsocket->socket_fd) == -1) { ++ pthread_mutex_unlock(&vhost_user.mutex); ++ goto again; ++ } ++ + close(vsocket->socket_fd); + unlink(path); + } else if (vsocket->reconnect) { +diff --git a/dpdk/lib/librte_vhost/vhost.c b/dpdk/lib/librte_vhost/vhost.c +index 1cbe948f74..20fda61518 100644 +--- a/dpdk/lib/librte_vhost/vhost.c ++++ b/dpdk/lib/librte_vhost/vhost.c +@@ -350,6 +350,57 @@ free_device(struct virtio_net *dev) + rte_free(dev); + } + ++static __rte_always_inline int ++log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq) ++{ ++ if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG)))) ++ return 0; ++ ++ vq->log_guest_addr = translate_log_addr(dev, vq, ++ vq->ring_addrs.log_guest_addr); ++ if (vq->log_guest_addr == 0) ++ return -1; ++ ++ return 0; ++} ++ ++/* ++ * Converts vring log address to GPA ++ * If IOMMU is enabled, the log address is IOVA ++ * If IOMMU not enabled, the log address is already GPA ++ * ++ * Caller should have iotlb_lock read-locked ++ */ ++uint64_t ++translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq, ++ uint64_t log_addr) ++{ ++ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) { ++ const uint64_t exp_size = sizeof(uint64_t); ++ uint64_t hva, gpa; ++ uint64_t size = exp_size; ++ ++ hva = vhost_iova_to_vva(dev, vq, log_addr, ++ &size, VHOST_ACCESS_RW); ++ ++ if (size != exp_size) ++ return 0; ++ ++ gpa = hva_to_gpa(dev, hva, exp_size); ++ if (!gpa) { ++ RTE_LOG(ERR, VHOST_CONFIG, ++ "VQ: Failed to find GPA for log_addr: 0x%" ++ PRIx64 " hva: 0x%" PRIx64 "\n", ++ log_addr, hva); ++ return 0; ++ } ++ return gpa; ++ ++ } else ++ return log_addr; ++} ++ ++/* Caller should have iotlb_lock read-locked */ + static int + vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq) + { +@@ -388,6 +439,7 @@ vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq) + return 0; + } + ++/* Caller should have iotlb_lock read-locked */ + static int + vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq) + { +@@ -434,6 +486,10 @@ vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq) + if (vring_translate_split(dev, vq) < 0) + return -1; + } ++ ++ if (log_translate(dev, vq) < 0) ++ return -1; ++ + vq->access_ok = 1; + + return 0; +diff --git a/dpdk/lib/librte_vhost/vhost.h b/dpdk/lib/librte_vhost/vhost.h +index 9f11b28a31..844904ca3b 100644 +--- a/dpdk/lib/librte_vhost/vhost.h ++++ b/dpdk/lib/librte_vhost/vhost.h +@@ -462,14 +462,23 @@ static __rte_always_inline void + vhost_log_cache_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t offset, uint64_t len) + { +- vhost_log_cache_write(dev, vq, vq->log_guest_addr + offset, len); ++ if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL))) { ++ if (unlikely(vq->log_guest_addr == 0)) ++ return; ++ __vhost_log_cache_write(dev, vq, vq->log_guest_addr + offset, ++ len); ++ } + } + + static __rte_always_inline void + vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t offset, uint64_t len) + { +- vhost_log_write(dev, vq->log_guest_addr + offset, len); ++ if (unlikely(dev->features & (1ULL << VHOST_F_LOG_ALL))) { ++ if (unlikely(vq->log_guest_addr == 0)) ++ return; ++ __vhost_log_write(dev, vq->log_guest_addr + offset, len); ++ } + } + + static __rte_always_inline void +@@ -528,7 +537,6 @@ vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq, + #define PRINT_PACKET(device, addr, size, header) do {} while (0) + #endif + +-extern uint64_t VHOST_FEATURES; + #define MAX_VHOST_DEVICE 1024 + extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; + +@@ -620,6 +628,8 @@ void *vhost_alloc_copy_ind_table(struct virtio_net *dev, + struct vhost_virtqueue *vq, + uint64_t desc_addr, uint64_t desc_len); + int vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq); ++uint64_t translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq, ++ uint64_t log_addr); + void vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq); + + static __rte_always_inline uint64_t +diff --git a/dpdk/lib/librte_vhost/vhost_crypto.c b/dpdk/lib/librte_vhost/vhost_crypto.c +index 684fddc30b..0f9df4059d 100644 +--- a/dpdk/lib/librte_vhost/vhost_crypto.c ++++ b/dpdk/lib/librte_vhost/vhost_crypto.c +@@ -40,7 +40,8 @@ + (1 << VIRTIO_RING_F_EVENT_IDX) | \ + (1 << VIRTIO_CRYPTO_SERVICE_CIPHER) | \ + (1 << VIRTIO_CRYPTO_SERVICE_MAC) | \ +- (1 << VIRTIO_NET_F_CTRL_VQ)) ++ (1 << VIRTIO_NET_F_CTRL_VQ) | \ ++ (1 << VHOST_USER_PROTOCOL_F_CONFIG)) + + #define IOVA_TO_VVA(t, r, a, l, p) \ + ((t)(uintptr_t)vhost_iova_to_vva(r->dev, r->vq, a, l, p)) +@@ -237,6 +238,11 @@ transform_cipher_param(struct rte_crypto_sym_xform *xform, + if (unlikely(ret < 0)) + return ret; + ++ if (param->cipher_key_len > VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH) { ++ VC_LOG_DBG("Invalid cipher key length\n"); ++ return -VIRTIO_CRYPTO_BADMSG; ++ } ++ + xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + xform->cipher.key.length = param->cipher_key_len; + if (xform->cipher.key.length > 0) +@@ -287,6 +293,12 @@ transform_chain_param(struct rte_crypto_sym_xform *xforms, + &xform_cipher->cipher.algo); + if (unlikely(ret < 0)) + return ret; ++ ++ if (param->cipher_key_len > VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH) { ++ VC_LOG_DBG("Invalid cipher key length\n"); ++ return -VIRTIO_CRYPTO_BADMSG; ++ } ++ + xform_cipher->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + xform_cipher->cipher.key.length = param->cipher_key_len; + xform_cipher->cipher.key.data = param->cipher_key_buf; +@@ -301,6 +313,12 @@ transform_chain_param(struct rte_crypto_sym_xform *xforms, + ret = auth_algo_transform(param->hash_algo, &xform_auth->auth.algo); + if (unlikely(ret < 0)) + return ret; ++ ++ if (param->auth_key_len > VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH) { ++ VC_LOG_DBG("Invalid auth key length\n"); ++ return -VIRTIO_CRYPTO_BADMSG; ++ } ++ + xform_auth->auth.digest_length = param->digest_len; + xform_auth->auth.key.length = param->auth_key_len; + xform_auth->auth.key.data = param->auth_key_buf; +@@ -1539,18 +1557,18 @@ rte_vhost_crypto_fetch_requests(int vid, uint32_t qid, + + if (unlikely(dev == NULL)) { + VC_LOG_ERR("Invalid vid %i", vid); +- return -EINVAL; ++ return 0; + } + + if (unlikely(qid >= VHOST_MAX_QUEUE_PAIRS)) { + VC_LOG_ERR("Invalid qid %u", qid); +- return -EINVAL; ++ return 0; + } + + vcrypto = (struct vhost_crypto *)dev->extern_data; + if (unlikely(vcrypto == NULL)) { + VC_LOG_ERR("Cannot find required data, is it initialized?"); +- return -ENOENT; ++ return 0; + } + + vq = dev->virtqueue[qid]; +@@ -1572,7 +1590,7 @@ rte_vhost_crypto_fetch_requests(int vid, uint32_t qid, + if (unlikely(rte_mempool_get_bulk(vcrypto->mbuf_pool, + (void **)mbufs, count * 2) < 0)) { + VC_LOG_ERR("Insufficient memory"); +- return -ENOMEM; ++ return 0; + } + + for (i = 0; i < count; i++) { +@@ -1602,7 +1620,7 @@ rte_vhost_crypto_fetch_requests(int vid, uint32_t qid, + if (unlikely(rte_mempool_get_bulk(vcrypto->mbuf_pool, + (void **)mbufs, count) < 0)) { + VC_LOG_ERR("Insufficient memory"); +- return -ENOMEM; ++ return 0; + } + + for (i = 0; i < count; i++) { +diff --git a/dpdk/lib/librte_vhost/vhost_user.c b/dpdk/lib/librte_vhost/vhost_user.c +index 0cfb8b792b..31080be2bd 100644 +--- a/dpdk/lib/librte_vhost/vhost_user.c ++++ b/dpdk/lib/librte_vhost/vhost_user.c +@@ -206,7 +206,7 @@ vhost_backend_cleanup(struct virtio_net *dev) + dev->inflight_info->addr = NULL; + } + +- if (dev->inflight_info->fd > 0) { ++ if (dev->inflight_info->fd >= 0) { + close(dev->inflight_info->fd); + dev->inflight_info->fd = -1; + } +@@ -656,13 +656,11 @@ ring_addr_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, + { + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) { + uint64_t vva; +- uint64_t req_size = *size; + +- vva = vhost_user_iotlb_cache_find(vq, ra, ++ vhost_user_iotlb_rd_lock(vq); ++ vva = vhost_iova_to_vva(dev, vq, ra, + size, VHOST_ACCESS_RW); +- if (req_size != *size) +- vhost_user_iotlb_miss(dev, (ra + *size), +- VHOST_ACCESS_RW); ++ vhost_user_iotlb_rd_unlock(vq); + + return vva; + } +@@ -670,37 +668,16 @@ ring_addr_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq, + return qva_to_vva(dev, ra, size); + } + +-/* +- * Converts vring log address to GPA +- * If IOMMU is enabled, the log address is IOVA +- * If IOMMU not enabled, the log address is already GPA +- */ + static uint64_t +-translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq, +- uint64_t log_addr) ++log_addr_to_gpa(struct virtio_net *dev, struct vhost_virtqueue *vq) + { +- if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) { +- const uint64_t exp_size = sizeof(struct vring_used) + +- sizeof(struct vring_used_elem) * vq->size; +- uint64_t hva, gpa; +- uint64_t size = exp_size; +- +- hva = vhost_iova_to_vva(dev, vq, log_addr, +- &size, VHOST_ACCESS_RW); +- if (size != exp_size) +- return 0; ++ uint64_t log_gpa; + +- gpa = hva_to_gpa(dev, hva, exp_size); +- if (!gpa) { +- RTE_LOG(ERR, VHOST_CONFIG, +- "VQ: Failed to find GPA for log_addr: 0x%" PRIx64 " hva: 0x%" PRIx64 "\n", +- log_addr, hva); +- return 0; +- } +- return gpa; ++ vhost_user_iotlb_rd_lock(vq); ++ log_gpa = translate_log_addr(dev, vq, vq->ring_addrs.log_guest_addr); ++ vhost_user_iotlb_rd_unlock(vq); + +- } else +- return log_addr; ++ return log_gpa; + } + + static struct virtio_net * +@@ -712,7 +689,7 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) + + if (addr->flags & (1 << VHOST_VRING_F_LOG)) { + vq->log_guest_addr = +- translate_log_addr(dev, vq, addr->log_guest_addr); ++ log_addr_to_gpa(dev, vq); + if (vq->log_guest_addr == 0) { + RTE_LOG(DEBUG, VHOST_CONFIG, + "(%d) failed to map log_guest_addr.\n", +@@ -1145,6 +1122,21 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg, + goto err_mmap; + } + mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment); ++ if (mmap_size == 0) { ++ /* ++ * It could happen if initial mmap_size + alignment ++ * overflows the sizeof uint64, which could happen if ++ * either mmap_size or alignment value is wrong. ++ * ++ * mmap() kernel implementation would return an error, ++ * but better catch it before and provide useful info ++ * in the logs. ++ */ ++ RTE_LOG(ERR, VHOST_CONFIG, "mmap size (0x%" PRIx64 ") " ++ "or alignment (0x%" PRIx64 ") is invalid\n", ++ reg->size + mmap_offset, alignment); ++ goto err_mmap; ++ } + + populate = (dev->dequeue_zero_copy) ? MAP_POPULATE : 0; + mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, +@@ -1298,7 +1290,8 @@ vq_is_ready(struct virtio_net *dev, struct vhost_virtqueue *vq) + return false; + + if (vq_is_packed(dev)) +- rings_ok = !!vq->desc_packed; ++ rings_ok = vq->desc_packed && vq->driver_event && ++ vq->device_event; + else + rings_ok = vq->desc && vq->avail && vq->used; + +@@ -1415,6 +1408,7 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev, + "failed to alloc dev inflight area\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } ++ dev->inflight_info->fd = -1; + } + + num_queues = msg->payload.inflight.num_queues; +@@ -1440,6 +1434,16 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev, + } + memset(addr, 0, mmap_size); + ++ if (dev->inflight_info->addr) { ++ munmap(dev->inflight_info->addr, dev->inflight_info->size); ++ dev->inflight_info->addr = NULL; ++ } ++ ++ if (dev->inflight_info->fd >= 0) { ++ close(dev->inflight_info->fd); ++ dev->inflight_info->fd = -1; ++ } ++ + dev->inflight_info->addr = addr; + dev->inflight_info->size = msg->payload.inflight.mmap_size = mmap_size; + dev->inflight_info->fd = msg->fds[0] = fd; +@@ -1522,10 +1526,13 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + "failed to alloc dev inflight area\n"); + return RTE_VHOST_MSG_RESULT_ERR; + } ++ dev->inflight_info->fd = -1; + } + +- if (dev->inflight_info->addr) ++ if (dev->inflight_info->addr) { + munmap(dev->inflight_info->addr, dev->inflight_info->size); ++ dev->inflight_info->addr = NULL; ++ } + + addr = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, mmap_offset); +@@ -1534,8 +1541,10 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + return RTE_VHOST_MSG_RESULT_ERR; + } + +- if (dev->inflight_info->fd) ++ if (dev->inflight_info->fd >= 0) { + close(dev->inflight_info->fd); ++ dev->inflight_info->fd = -1; ++ } + + dev->inflight_info->fd = fd; + dev->inflight_info->addr = addr; +@@ -1629,8 +1638,11 @@ vhost_check_queue_inflights_split(struct virtio_net *dev, + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) + return RTE_VHOST_MSG_RESULT_OK; + ++ /* The frontend may still not support the inflight feature ++ * although we negotiate the protocol feature. ++ */ + if ((!vq->inflight_split)) +- return RTE_VHOST_MSG_RESULT_ERR; ++ return RTE_VHOST_MSG_RESULT_OK; + + if (!vq->inflight_split->version) { + vq->inflight_split->version = INFLIGHT_VERSION; +@@ -1710,8 +1722,11 @@ vhost_check_queue_inflights_packed(struct virtio_net *dev, + (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))) + return RTE_VHOST_MSG_RESULT_OK; + ++ /* The frontend may still not support the inflight feature ++ * although we negotiate the protocol feature. ++ */ + if ((!vq->inflight_packed)) +- return RTE_VHOST_MSG_RESULT_ERR; ++ return RTE_VHOST_MSG_RESULT_OK; + + if (!vq->inflight_packed->version) { + vq->inflight_packed->version = INFLIGHT_VERSION; +@@ -2060,10 +2075,10 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct VhostUserMsg *msg, + size = msg->payload.log.mmap_size; + off = msg->payload.log.mmap_offset; + +- /* Don't allow mmap_offset to point outside the mmap region */ +- if (off > size) { ++ /* Check for mmap size and offset overflow. */ ++ if (off >= -size) { + RTE_LOG(ERR, VHOST_CONFIG, +- "log offset %#"PRIx64" exceeds log size %#"PRIx64"\n", ++ "log offset %#"PRIx64" and log size %#"PRIx64" overflow\n", + off, size); + return RTE_VHOST_MSG_RESULT_ERR; + } +@@ -2229,6 +2244,13 @@ is_vring_iotlb_split(struct vhost_virtqueue *vq, struct vhost_iotlb_msg *imsg) + if (ra->used_user_addr < end && (ra->used_user_addr + len) > start) + return 1; + ++ if (ra->flags & (1 << VHOST_VRING_F_LOG)) { ++ len = sizeof(uint64_t); ++ if (ra->log_guest_addr < end && ++ (ra->log_guest_addr + len) > start) ++ return 1; ++ } ++ + return 0; + } + +@@ -2254,6 +2276,13 @@ is_vring_iotlb_packed(struct vhost_virtqueue *vq, struct vhost_iotlb_msg *imsg) + if (ra->used_user_addr < end && (ra->used_user_addr + len) > start) + return 1; + ++ if (ra->flags & (1 << VHOST_VRING_F_LOG)) { ++ len = sizeof(uint64_t); ++ if (ra->log_guest_addr < end && ++ (ra->log_guest_addr + len) > start) ++ return 1; ++ } ++ + return 0; + } + +@@ -2440,8 +2469,13 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg) + + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, + msg->fds, VHOST_MEMORY_MAX_NREGIONS, &msg->fd_num); +- if (ret <= 0) ++ if (ret <= 0) { + return ret; ++ } else if (ret != VHOST_USER_HDR_SIZE) { ++ RTE_LOG(ERR, VHOST_CONFIG, "Unexpected header size read\n"); ++ close_msg_fds(msg); ++ return -1; ++ } + + if (msg->size) { + if (msg->size > sizeof(msg->payload)) { +@@ -2508,7 +2542,7 @@ static int + vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, + struct VhostUserMsg *msg) + { +- uint16_t vring_idx; ++ uint32_t vring_idx; + + switch (msg->request.master) { + case VHOST_USER_SET_VRING_KICK: +@@ -2794,11 +2828,19 @@ static int process_slave_message_reply(struct virtio_net *dev, + if ((msg->flags & VHOST_USER_NEED_REPLY) == 0) + return 0; + +- if (read_vhost_message(dev->slave_req_fd, &msg_reply) < 0) { ++ ret = read_vhost_message(dev->slave_req_fd, &msg_reply); ++ if (ret <= 0) { ++ if (ret < 0) ++ RTE_LOG(ERR, VHOST_CONFIG, ++ "vhost read slave message reply failed\n"); ++ else ++ RTE_LOG(INFO, VHOST_CONFIG, ++ "vhost peer closed\n"); + ret = -1; + goto out; + } + ++ ret = 0; + if (msg_reply.request.slave != msg->request.slave) { + RTE_LOG(ERR, VHOST_CONFIG, + "Received unexpected msg type (%u), expected %u\n", +diff --git a/dpdk/lib/librte_vhost/virtio_net.c b/dpdk/lib/librte_vhost/virtio_net.c +index 21c311732a..a6c106c13c 100644 +--- a/dpdk/lib/librte_vhost/virtio_net.c ++++ b/dpdk/lib/librte_vhost/virtio_net.c +@@ -43,6 +43,36 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) + return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; + } + ++static inline void ++do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) ++{ ++ struct batch_copy_elem *elem = vq->batch_copy_elems; ++ uint16_t count = vq->batch_copy_nb_elems; ++ int i; ++ ++ for (i = 0; i < count; i++) { ++ rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); ++ vhost_log_cache_write_iova(dev, vq, elem[i].log_addr, ++ elem[i].len); ++ PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); ++ } ++ ++ vq->batch_copy_nb_elems = 0; ++} ++ ++static inline void ++do_data_copy_dequeue(struct vhost_virtqueue *vq) ++{ ++ struct batch_copy_elem *elem = vq->batch_copy_elems; ++ uint16_t count = vq->batch_copy_nb_elems; ++ int i; ++ ++ for (i = 0; i < count; i++) ++ rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); ++ ++ vq->batch_copy_nb_elems = 0; ++} ++ + static __rte_always_inline void + do_flush_shadow_used_ring_split(struct virtio_net *dev, + struct vhost_virtqueue *vq, +@@ -186,6 +216,11 @@ vhost_flush_enqueue_batch_packed(struct virtio_net *dev, + uint16_t i; + uint16_t flags; + ++ if (vq->shadow_used_idx) { ++ do_data_copy_enqueue(dev, vq); ++ vhost_flush_enqueue_shadow_packed(dev, vq); ++ } ++ + flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter); + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { +@@ -325,36 +360,6 @@ vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq, + vq_inc_last_used_packed(vq, count); + } + +-static inline void +-do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) +-{ +- struct batch_copy_elem *elem = vq->batch_copy_elems; +- uint16_t count = vq->batch_copy_nb_elems; +- int i; +- +- for (i = 0; i < count; i++) { +- rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); +- vhost_log_cache_write_iova(dev, vq, elem[i].log_addr, +- elem[i].len); +- PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); +- } +- +- vq->batch_copy_nb_elems = 0; +-} +- +-static inline void +-do_data_copy_dequeue(struct vhost_virtqueue *vq) +-{ +- struct batch_copy_elem *elem = vq->batch_copy_elems; +- uint16_t count = vq->batch_copy_nb_elems; +- int i; +- +- for (i = 0; i < count; i++) +- rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); +- +- vq->batch_copy_nb_elems = 0; +-} +- + static __rte_always_inline void + vhost_shadow_enqueue_single_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, +@@ -382,25 +387,6 @@ vhost_shadow_enqueue_single_packed(struct virtio_net *dev, + } + } + +-static __rte_always_inline void +-vhost_flush_dequeue_packed(struct virtio_net *dev, +- struct vhost_virtqueue *vq) +-{ +- int shadow_count; +- if (!vq->shadow_used_idx) +- return; +- +- shadow_count = vq->last_used_idx - vq->shadow_last_used_idx; +- if (shadow_count <= 0) +- shadow_count += vq->size; +- +- if ((uint32_t)shadow_count >= (vq->size - MAX_PKT_BURST)) { +- do_data_copy_dequeue(vq); +- vhost_flush_dequeue_shadow_packed(dev, vq); +- vhost_vring_call_packed(dev, vq); +- } +-} +- + /* avoid write operation when necessary, to lessen cache issues */ + #define ASSIGN_UNLESS_EQUAL(var, val) do { \ + if ((var) != (val)) \ +@@ -1086,6 +1072,8 @@ virtio_dev_rx_batch_packed(struct virtio_net *dev, + VHOST_ACCESS_RW); + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { ++ if (unlikely(!desc_addrs[i])) ++ return -1; + if (unlikely(lens[i] != descs[avail_idx + i].len)) + return -1; + } +@@ -1688,6 +1676,8 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, + { + uint16_t i; + uint16_t free_entries; ++ uint16_t dropped = 0; ++ static bool allocerr_warned; + + if (unlikely(dev->dequeue_zero_copy)) { + struct zcopy_mbuf *zmbuf, *next; +@@ -1751,13 +1741,35 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, + update_shadow_used_ring_split(vq, head_idx, 0); + + pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len); +- if (unlikely(pkts[i] == NULL)) ++ if (unlikely(pkts[i] == NULL)) { ++ /* ++ * mbuf allocation fails for jumbo packets when external ++ * buffer allocation is not allowed and linear buffer ++ * is required. Drop this packet. ++ */ ++ if (!allocerr_warned) { ++ RTE_LOG(ERR, VHOST_DATA, ++ "Failed mbuf alloc of size %d from %s on %s.\n", ++ buf_len, mbuf_pool->name, dev->ifname); ++ allocerr_warned = true; ++ } ++ dropped += 1; ++ i++; + break; ++ } + + err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], + mbuf_pool); + if (unlikely(err)) { + rte_pktmbuf_free(pkts[i]); ++ if (!allocerr_warned) { ++ RTE_LOG(ERR, VHOST_DATA, ++ "Failed to copy desc to mbuf on %s.\n", ++ dev->ifname); ++ allocerr_warned = true; ++ } ++ dropped += 1; ++ i++; + break; + } + +@@ -1767,6 +1779,8 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, + zmbuf = get_zmbuf(vq); + if (!zmbuf) { + rte_pktmbuf_free(pkts[i]); ++ dropped += 1; ++ i++; + break; + } + zmbuf->mbuf = pkts[i]; +@@ -1796,7 +1810,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, + } + } + +- return i; ++ return (i - dropped); + } + + static __rte_always_inline int +@@ -1841,6 +1855,8 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev, + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { ++ if (unlikely(!desc_addrs[i])) ++ return -1; + if (unlikely((lens[i] != descs[avail_idx + i].len))) + return -1; + } +@@ -1928,6 +1944,7 @@ vhost_dequeue_single_packed(struct virtio_net *dev, + uint32_t buf_len; + uint16_t nr_vec = 0; + int err; ++ static bool allocerr_warned; + + if (unlikely(fill_vec_buf_packed(dev, vq, + vq->last_avail_idx, desc_count, +@@ -1938,14 +1955,24 @@ vhost_dequeue_single_packed(struct virtio_net *dev, + + *pkts = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len); + if (unlikely(*pkts == NULL)) { +- RTE_LOG(ERR, VHOST_DATA, +- "Failed to allocate memory for mbuf.\n"); ++ if (!allocerr_warned) { ++ RTE_LOG(ERR, VHOST_DATA, ++ "Failed mbuf alloc of size %d from %s on %s.\n", ++ buf_len, mbuf_pool->name, dev->ifname); ++ allocerr_warned = true; ++ } + return -1; + } + + err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, *pkts, + mbuf_pool); + if (unlikely(err)) { ++ if (!allocerr_warned) { ++ RTE_LOG(ERR, VHOST_DATA, ++ "Failed to copy desc to mbuf on %s.\n", ++ dev->ifname); ++ allocerr_warned = true; ++ } + rte_pktmbuf_free(*pkts); + return -1; + } +@@ -1960,21 +1987,24 @@ virtio_dev_tx_single_packed(struct virtio_net *dev, + struct rte_mbuf **pkts) + { + +- uint16_t buf_id, desc_count; ++ uint16_t buf_id, desc_count = 0; ++ int ret; + +- if (vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, +- &desc_count)) +- return -1; ++ ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, ++ &desc_count); + +- if (virtio_net_is_inorder(dev)) +- vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, +- desc_count); +- else +- vhost_shadow_dequeue_single_packed(vq, buf_id, desc_count); ++ if (likely(desc_count > 0)) { ++ if (virtio_net_is_inorder(dev)) ++ vhost_shadow_dequeue_single_packed_inorder(vq, buf_id, ++ desc_count); ++ else ++ vhost_shadow_dequeue_single_packed(vq, buf_id, ++ desc_count); + +- vq_inc_last_avail_packed(vq, desc_count); ++ vq_inc_last_avail_packed(vq, desc_count); ++ } + +- return 0; ++ return ret; + } + + static __rte_always_inline int +@@ -2004,7 +2034,7 @@ virtio_dev_tx_batch_packed_zmbuf(struct virtio_net *dev, + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + zmbufs[i]->mbuf = pkts[i]; +- zmbufs[i]->desc_idx = avail_idx + i; ++ zmbufs[i]->desc_idx = ids[i]; + zmbufs[i]->desc_count = 1; + } + +@@ -2045,7 +2075,7 @@ virtio_dev_tx_single_packed_zmbuf(struct virtio_net *dev, + return -1; + } + zmbuf->mbuf = *pkts; +- zmbuf->desc_idx = vq->last_avail_idx; ++ zmbuf->desc_idx = buf_id; + zmbuf->desc_count = desc_count; + + rte_mbuf_refcnt_update(*pkts, 1); +@@ -2149,7 +2179,6 @@ virtio_dev_tx_packed(struct virtio_net *dev, + if (remained >= PACKED_BATCH_SIZE) { + if (!virtio_dev_tx_batch_packed(dev, vq, mbuf_pool, + &pkts[pkt_idx])) { +- vhost_flush_dequeue_packed(dev, vq); + pkt_idx += PACKED_BATCH_SIZE; + remained -= PACKED_BATCH_SIZE; + continue; +@@ -2159,15 +2188,18 @@ virtio_dev_tx_packed(struct virtio_net *dev, + if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, + &pkts[pkt_idx])) + break; +- vhost_flush_dequeue_packed(dev, vq); + pkt_idx++; + remained--; + + } while (remained); + +- if (vq->shadow_used_idx) ++ if (vq->shadow_used_idx) { + do_data_copy_dequeue(vq); + ++ vhost_flush_dequeue_shadow_packed(dev, vq); ++ vhost_vring_call_packed(dev, vq); ++ } ++ + return pkt_idx; + } + +diff --git a/dpdk/lib/meson.build b/dpdk/lib/meson.build +index 6ceb5e756e..d5a507fb43 100644 +--- a/dpdk/lib/meson.build ++++ b/dpdk/lib/meson.build +@@ -148,12 +148,16 @@ foreach l:libraries + command: [map_to_def_cmd, '@INPUT@', '@OUTPUT@'], + input: version_map, + output: 'rte_@0@_exports.def'.format(name)) +- lk_deps = [version_map, def_file] +- if is_windows ++ ++ if is_ms_linker + lk_args = ['-Wl,/def:' + def_file.full_path(), + '-Wl,/implib:lib\\' + implib] + else + lk_args = ['-Wl,--version-script=' + version_map] ++ endif ++ ++ lk_deps = [version_map, def_file] ++ if not is_windows + # on unix systems check the output of the + # experimental syms script, using it as a + # dependency of the .so build +diff --git a/dpdk/meson_options.txt b/dpdk/meson_options.txt +index bc369d06c9..0de16b4fdb 100644 +--- a/dpdk/meson_options.txt ++++ b/dpdk/meson_options.txt +@@ -12,8 +12,8 @@ option('examples', type: 'string', value: '', + description: 'Comma-separated list of examples to build by default') + option('flexran_sdk', type: 'string', value: '', + description: 'Path to FlexRAN SDK optional Libraries for BBDEV device') +-option('ibverbs_link', type: 'combo', choices : ['shared', 'dlopen'], value: 'shared', +- description: 'Linkage method (shared/dlopen) for Mellanox PMDs with ibverbs dependencies.') ++option('ibverbs_link', type: 'combo', choices : ['static', 'shared', 'dlopen'], value: 'shared', ++ description: 'Linkage method (static/shared/dlopen) for Mellanox PMDs with ibverbs dependencies.') + option('include_subdir_arch', type: 'string', value: '', + description: 'subdirectory where to install arch-dependent headers') + option('kernel_dir', type: 'string', value: '', +diff --git a/dpdk/mk/internal/rte.compile-pre.mk b/dpdk/mk/internal/rte.compile-pre.mk +index 0cf3791b4d..82fe098f7c 100644 +--- a/dpdk/mk/internal/rte.compile-pre.mk ++++ b/dpdk/mk/internal/rte.compile-pre.mk +@@ -61,7 +61,7 @@ CHECK_EXPERIMENTAL = $(EXPERIMENTAL_CHECK) $(SRCDIR)/$(EXPORT_MAP) $@ + + PMDINFO_GEN = $(RTE_SDK_BIN)/app/dpdk-pmdinfogen $@ $@.pmd.c + PMDINFO_CC = $(CC) $(CPPFLAGS) $(CFLAGS) $(EXTRA_CFLAGS) -c -o $@.pmd.o $@.pmd.c +-PMDINFO_LD = $(CROSS)ld $(LDFLAGS) -r -o $@.o $@.pmd.o $@ ++PMDINFO_LD = $(CROSS)ld -r $(filter-out -export-dynamic,$(LDFLAGS)) -o $@.o $@.pmd.o $@ + PMDINFO_TO_O = if grep -q 'RTE_PMD_REGISTER_.*(.*)' $<; then \ + echo "$(if $V,$(PMDINFO_GEN), PMDINFO $@.pmd.c)" && \ + $(PMDINFO_GEN) && \ +diff --git a/dpdk/mk/rte.app.mk b/dpdk/mk/rte.app.mk +index 05ea034b99..44dd684cb1 100644 +--- a/dpdk/mk/rte.app.mk ++++ b/dpdk/mk/rte.app.mk +@@ -196,8 +196,12 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += -ldl + _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += -ldl + else ifeq ($(CONFIG_RTE_IBVERBS_LINK_STATIC),y) + LIBS_IBVERBS_STATIC = $(shell $(RTE_SDK)/buildtools/options-ibverbs-static.sh) ++_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += --no-whole-archive + _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += $(LIBS_IBVERBS_STATIC) ++_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += --whole-archive ++_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += --no-whole-archive + _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += $(LIBS_IBVERBS_STATIC) ++_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += --whole-archive + else + _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += -libverbs -lmlx4 + _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += -libverbs -lmlx5 +diff --git a/dpdk/mk/toolchain/gcc/rte.vars.mk b/dpdk/mk/toolchain/gcc/rte.vars.mk +index 9fc704193b..b3473c06fd 100644 +--- a/dpdk/mk/toolchain/gcc/rte.vars.mk ++++ b/dpdk/mk/toolchain/gcc/rte.vars.mk +@@ -83,6 +83,11 @@ ifeq ($(shell test $(GCC_VERSION) -lt 47 && echo 1), 1) + WERROR_FLAGS += -Wno-uninitialized + endif + ++ifeq ($(shell test $(GCC_VERSION) -ge 100 && echo 1), 1) ++# FIXME: Bugzilla 396 ++WERROR_FLAGS += -Wno-zero-length-bounds ++endif ++ + HOST_WERROR_FLAGS := $(WERROR_FLAGS) + + ifeq ($(shell test $(HOST_GCC_VERSION) -gt 70 && echo 1), 1) +diff --git a/dpdk/usertools/dpdk-pmdinfo.py b/dpdk/usertools/dpdk-pmdinfo.py +index 069a3bf124..12f20735e0 100755 +--- a/dpdk/usertools/dpdk-pmdinfo.py ++++ b/dpdk/usertools/dpdk-pmdinfo.py +@@ -539,7 +539,7 @@ def scan_for_autoload_pmds(dpdk_path): + return + + (autoload_path, scannedfile) = readelf.search_for_autoload_path() +- if (autoload_path is None or autoload_path is ""): ++ if not autoload_path: + if (raw_output is False): + print("No autoload path configured in %s" % dpdk_path) + return +@@ -561,7 +561,10 @@ def main(stream=None): + + pcifile_default = "./pci.ids" # For unknown OS's assume local file + if platform.system() == 'Linux': +- pcifile_default = "/usr/share/hwdata/pci.ids" ++ # hwdata is the legacy location, misc is supported going forward ++ pcifile_default = "/usr/share/misc/pci.ids" ++ if not os.path.exists(pcifile_default): ++ pcifile_default = "/usr/share/hwdata/pci.ids" + elif platform.system() == 'FreeBSD': + pcifile_default = "/usr/local/share/pciids/pci.ids" + if not os.path.exists(pcifile_default): +diff --git a/dpdk/usertools/dpdk-telemetry-client.py b/dpdk/usertools/dpdk-telemetry-client.py +index 290345dcc4..35edb7cd26 100755 +--- a/dpdk/usertools/dpdk-telemetry-client.py ++++ b/dpdk/usertools/dpdk-telemetry-client.py +@@ -3,6 +3,7 @@ + # Copyright(c) 2018 Intel Corporation + + from __future__ import print_function ++from __future__ import unicode_literals + + import socket + import os +@@ -65,18 +66,19 @@ def register(self): # Connects a client to DPDK-instance + self.socket.recv_fd.settimeout(2) + self.socket.send_fd.connect("/var/run/dpdk/rte/telemetry") + JSON = (API_REG + self.file_path + "\"}}") +- self.socket.send_fd.sendall(JSON) ++ self.socket.send_fd.sendall(JSON.encode()) ++ + self.socket.recv_fd.listen(1) + self.socket.client_fd = self.socket.recv_fd.accept()[0] + + def unregister(self): # Unregister a given client +- self.socket.client_fd.send(API_UNREG + self.file_path + "\"}}") ++ self.socket.client_fd.send((API_UNREG + self.file_path + "\"}}").encode()) + self.socket.client_fd.close() + + def requestMetrics(self): # Requests metrics for given client +- self.socket.client_fd.send(METRICS_REQ) +- data = self.socket.client_fd.recv(BUFFER_SIZE) +- print("\nResponse: \n", str(data)) ++ self.socket.client_fd.send(METRICS_REQ.encode()) ++ data = self.socket.client_fd.recv(BUFFER_SIZE).decode() ++ print("\nResponse: \n", data) + + def repeatedlyRequestMetrics(self, sleep_time): # Recursively requests metrics for given client + print("\nPlease enter the number of times you'd like to continuously request Metrics:") +@@ -88,9 +90,9 @@ def repeatedlyRequestMetrics(self, sleep_time): # Recursively requests metrics f + time.sleep(sleep_time) + + def requestGlobalMetrics(self): #Requests global metrics for given client +- self.socket.client_fd.send(GLOBAL_METRICS_REQ) +- data = self.socket.client_fd.recv(BUFFER_SIZE) +- print("\nResponse: \n", str(data)) ++ self.socket.client_fd.send(GLOBAL_METRICS_REQ.encode()) ++ data = self.socket.client_fd.recv(BUFFER_SIZE).decode() ++ print("\nResponse: \n", data) + + def interactiveMenu(self, sleep_time): # Creates Interactive menu within the script + while self.choice != 4: +diff --git a/include/openvswitch/compiler.h b/include/openvswitch/compiler.h +index 5289a70f6e..cf009f8264 100644 +--- a/include/openvswitch/compiler.h ++++ b/include/openvswitch/compiler.h +@@ -113,6 +113,8 @@ + * OVS_REQUIRES OVS_REQ_RDLOCK OVS_REQ_WRLOCK + * OVS_EXCLUDED OVS_EXCLUDED OVS_EXCLUDED + */ ++ ++/* Please keep OVS_CTAGS_IDENTIFIERS up-to-date in acinclude.m4. */ + #define OVS_LOCKABLE __attribute__((lockable)) + #define OVS_REQ_RDLOCK(...) __attribute__((shared_locks_required(__VA_ARGS__))) + #define OVS_ACQ_RDLOCK(...) __attribute__((shared_lock_function(__VA_ARGS__))) +diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in +index 37e3703245..1c185bbd85 100755 +--- a/ipsec/ovs-monitor-ipsec.in ++++ b/ipsec/ovs-monitor-ipsec.in +@@ -101,7 +101,7 @@ class XFRM(object): + proc = subprocess.Popen([self.IP, 'xfrm', 'policy'], + stdout=subprocess.PIPE) + while True: +- line = proc.stdout.readline().strip() ++ line = proc.stdout.readline().strip().decode() + if line == '': + break + a = line.split(" ") +@@ -124,7 +124,7 @@ class XFRM(object): + proc = subprocess.Popen([self.IP, 'xfrm', 'state'], + stdout=subprocess.PIPE) + while True: +- line = proc.stdout.readline().strip() ++ line = proc.stdout.readline().strip().decode() + if line == '': + break + a = line.split(" ") +@@ -246,7 +246,7 @@ conn prevent_unencrypted_vxlan + proc = subprocess.Popen([self.IPSEC, 'status'], stdout=subprocess.PIPE) + + while True: +- line = proc.stdout.readline().strip() ++ line = proc.stdout.readline().strip().decode() + if line == '': + break + tunnel_name = line.split(":") +@@ -340,7 +340,7 @@ conn prevent_unencrypted_vxlan + # about possibility of ovs-monitor-ipsec to block for each tunnel + # while strongSwan sends IKE messages over Internet. + conns_dict = self.get_active_conns() +- for ifname, conns in conns_dict.iteritems(): ++ for ifname, conns in conns_dict.items(): + tunnel = monitor.tunnels.get(ifname) + for conn in conns: + # IPsec "connection" names that we choose in strongswan +@@ -536,7 +536,7 @@ conn prevent_unencrypted_vxlan + + # Delete old connections + conns_dict = self.get_active_conns() +- for ifname, conns in conns_dict.iteritems(): ++ for ifname, conns in conns_dict.items(): + tunnel = monitor.tunnels.get(ifname) + + for conn in conns: +@@ -608,7 +608,7 @@ conn prevent_unencrypted_vxlan + proc = subprocess.Popen([self.IPSEC, 'status'], stdout=subprocess.PIPE) + + while True: +- line = proc.stdout.readline().strip() ++ line = proc.stdout.readline().strip().decode() + if line == '': + break + +@@ -989,7 +989,7 @@ class IPsecMonitor(object): + skb_mark = None + is_valid = False + +- for row in data["Open_vSwitch"].rows.itervalues(): ++ for row in data["Open_vSwitch"].rows.values(): + pki[0] = row.other_config.get("certificate") + pki[1] = row.other_config.get("private_key") + pki[2] = row.other_config.get("ca_cert") +@@ -1016,7 +1016,7 @@ class IPsecMonitor(object): + table.""" + ifaces = set() + +- for row in data["Interface"].rows.itervalues(): ++ for row in data["Interface"].rows.values(): + if not self.is_tunneling_type_supported(row.type): + continue + if not self.is_ipsec_required(row.options): +@@ -1047,7 +1047,7 @@ class IPsecMonitor(object): + return + s = "" + conns = self.ike_helper.get_active_conns() +- for name, tunnel in self.tunnels.iteritems(): ++ for name, tunnel in self.tunnels.items(): + s += tunnel.show(policies, securities, conns) + unix_conn.reply(s) + +@@ -1064,7 +1064,7 @@ class IPsecMonitor(object): + if self.ike_helper.config_global(self): + needs_refresh = True + +- for name, tunnel in self.tunnels.iteritems(): ++ for name, tunnel in self.tunnels.items(): + if tunnel.last_refreshed_version != tunnel.version: + tunnel.last_refreshed_version = tunnel.version + needs_refresh = True +@@ -1094,7 +1094,7 @@ class IPsecMonitor(object): + proc.wait() + if proc.returncode: + raise Exception(proc.stderr.read()) +- m = re.search(r"CN=(.+?),", proc.stdout.readline()) ++ m = re.search(r"CN=(.+?),", proc.stdout.readline().decode()) + if not m: + raise Exception("No CN in the certificate subject.") + except Exception as e: +diff --git a/lib/classifier.c b/lib/classifier.c +index 0fad953213..f2c3497c2d 100644 +--- a/lib/classifier.c ++++ b/lib/classifier.c +@@ -393,7 +393,9 @@ classifier_set_prefix_fields(struct classifier *cls, + bitmap_set1(fields.bm, trie_fields[i]); + + new_fields[n_tries] = NULL; +- if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) { ++ const struct mf_field *cls_field ++ = ovsrcu_get(struct mf_field *, &cls->tries[n_tries].field); ++ if (n_tries >= cls->n_tries || field != cls_field) { + new_fields[n_tries] = field; + changed = true; + } +@@ -454,7 +456,7 @@ trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field) + } else { + ovsrcu_set_hidden(&trie->root, NULL); + } +- trie->field = field; ++ ovsrcu_set_hidden(&trie->field, CONST_CAST(struct mf_field *, field)); + + /* Add existing rules to the new trie. */ + CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { +@@ -839,7 +841,6 @@ classifier_remove_assert(struct classifier *cls, + struct trie_ctx { + const struct cls_trie *trie; + bool lookup_done; /* Status of the lookup. */ +- uint8_t be32ofs; /* U32 offset of the field in question. */ + unsigned int maskbits; /* Prefix length needed to avoid false matches. */ + union trie_prefix match_plens; /* Bitmask of prefix lengths with possible + * matches. */ +@@ -849,7 +850,6 @@ static void + trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie) + { + ctx->trie = trie; +- ctx->be32ofs = trie->field->flow_be32ofs; + ctx->lookup_done = false; + } + +@@ -1531,8 +1531,10 @@ insert_subtable(struct classifier *cls, const struct minimask *mask) + *CONST_CAST(uint8_t *, &subtable->n_indices) = index; + + for (i = 0; i < cls->n_tries; i++) { +- subtable->trie_plen[i] = minimask_get_prefix_len(mask, +- cls->tries[i].field); ++ const struct mf_field *field ++ = ovsrcu_get(struct mf_field *, &cls->tries[i].field); ++ subtable->trie_plen[i] ++ = field ? minimask_get_prefix_len(mask, field) : 0; + } + + /* Ports trie. */ +@@ -1575,11 +1577,17 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, + * fields using the prefix tries. The trie checks are done only as + * needed to avoid folding in additional bits to the wildcards mask. */ + for (j = 0; j < n_tries; j++) { +- /* Is the trie field relevant for this subtable, and +- is the trie field within the current range of fields? */ +- if (field_plen[j] && +- flowmap_is_set(&range_map, trie_ctx[j].be32ofs / 2)) { ++ /* Is the trie field relevant for this subtable? */ ++ if (field_plen[j]) { + struct trie_ctx *ctx = &trie_ctx[j]; ++ const struct mf_field *ctx_field ++ = ovsrcu_get(struct mf_field *, &ctx->trie->field); ++ ++ /* Is the trie field within the current range of fields? */ ++ if (!ctx_field ++ || !flowmap_is_set(&range_map, ctx_field->flow_be32ofs / 2)) { ++ continue; ++ } + + /* On-demand trie lookup. */ + if (!ctx->lookup_done) { +@@ -1601,14 +1609,16 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, + * than this subtable would otherwise. */ + if (ctx->maskbits <= field_plen[j]) { + /* Unwildcard the bits and skip the rest. */ +- mask_set_prefix_bits(wc, ctx->be32ofs, ctx->maskbits); ++ mask_set_prefix_bits(wc, ctx_field->flow_be32ofs, ++ ctx->maskbits); + /* Note: Prerequisite already unwildcarded, as the only + * prerequisite of the supported trie lookup fields is + * the ethertype, which is always unwildcarded. */ + return true; + } + /* Can skip if the field is already unwildcarded. */ +- if (mask_prefix_bits_set(wc, ctx->be32ofs, ctx->maskbits)) { ++ if (mask_prefix_bits_set(wc, ctx_field->flow_be32ofs, ++ ctx->maskbits)) { + return true; + } + } +@@ -2001,12 +2011,12 @@ static unsigned int + trie_lookup(const struct cls_trie *trie, const struct flow *flow, + union trie_prefix *plens) + { +- const struct mf_field *mf = trie->field; ++ const struct mf_field *mf = ovsrcu_get(struct mf_field *, &trie->field); + + /* Check that current flow matches the prerequisites for the trie + * field. Some match fields are used for multiple purposes, so we + * must check that the trie is relevant for this flow. */ +- if (mf_are_prereqs_ok(mf, flow, NULL)) { ++ if (mf && mf_are_prereqs_ok(mf, flow, NULL)) { + return trie_lookup_value(&trie->root, + &((ovs_be32 *)flow)[mf->flow_be32ofs], + &plens->be32, mf->n_bits); +@@ -2053,8 +2063,9 @@ minimask_get_prefix_len(const struct minimask *minimask, + * happened to be zeros. + */ + static const ovs_be32 * +-minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf) ++minimatch_get_prefix(const struct minimatch *match, rcu_field_ptr *field) + { ++ struct mf_field *mf = ovsrcu_get_protected(struct mf_field *, field); + size_t u64_ofs = mf->flow_be32ofs / 2; + + return (OVS_FORCE const ovs_be32 *)miniflow_get__(match->flow, u64_ofs) +@@ -2068,7 +2079,7 @@ static void + trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen) + { + trie_insert_prefix(&trie->root, +- minimatch_get_prefix(&rule->match, trie->field), mlen); ++ minimatch_get_prefix(&rule->match, &trie->field), mlen); + } + + static void +@@ -2123,7 +2134,7 @@ static void + trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen) + { + trie_remove_prefix(&trie->root, +- minimatch_get_prefix(&rule->match, trie->field), mlen); ++ minimatch_get_prefix(&rule->match, &trie->field), mlen); + } + + /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask +diff --git a/lib/classifier.h b/lib/classifier.h +index d1bd4aa12a..f646a8f742 100644 +--- a/lib/classifier.h ++++ b/lib/classifier.h +@@ -314,13 +314,15 @@ extern "C" { + struct cls_subtable; + struct cls_match; + ++struct mf_field; ++typedef OVSRCU_TYPE(struct mf_field *) rcu_field_ptr; + struct trie_node; + typedef OVSRCU_TYPE(struct trie_node *) rcu_trie_ptr; + + /* Prefix trie for a 'field' */ + struct cls_trie { +- const struct mf_field *field; /* Trie field, or NULL. */ +- rcu_trie_ptr root; /* NULL if none. */ ++ rcu_field_ptr field; /* Trie field, or NULL. */ ++ rcu_trie_ptr root; /* NULL if none. */ + }; + + enum { +diff --git a/lib/conntrack-tcp.c b/lib/conntrack-tcp.c +index 416cb769d2..47261c7551 100644 +--- a/lib/conntrack-tcp.c ++++ b/lib/conntrack-tcp.c +@@ -189,7 +189,7 @@ tcp_conn_update(struct conntrack *ct, struct conn *conn_, + } else if (src->state <= CT_DPIF_TCPS_SYN_SENT) { + src->state = CT_DPIF_TCPS_SYN_SENT; + conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIRST_PACKET, now); +- return CT_UPDATE_NEW; ++ return CT_UPDATE_VALID_NEW; + } + } + +diff --git a/lib/conntrack.c b/lib/conntrack.c +index ff5a89457c..0cbc8f6d2b 100644 +--- a/lib/conntrack.c ++++ b/lib/conntrack.c +@@ -1277,6 +1277,11 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, + const struct nat_action_info_t *nat_action_info, + ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper) + { ++ /* Reset ct_state whenever entering a new zone. */ ++ if (pkt->md.ct_state && pkt->md.ct_zone != zone) { ++ pkt->md.ct_state = 0; ++ } ++ + bool create_new_conn = false; + conn_key_lookup(ct, &ctx->key, ctx->hash, now, &ctx->conn, &ctx->reply); + struct conn *conn = ctx->conn; +@@ -1300,9 +1305,10 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, + conn_key_lookup(ct, &ctx->key, hash, now, &conn, &ctx->reply); + + if (!conn) { +- pkt->md.ct_state |= CS_TRACKED | CS_INVALID; ++ pkt->md.ct_state |= CS_INVALID; ++ write_ct_md(pkt, zone, NULL, NULL, NULL); + char *log_msg = xasprintf("Missing master conn %p", rev_conn); +- ct_print_conn_info(conn, log_msg, VLL_INFO, true, true); ++ ct_print_conn_info(rev_conn, log_msg, VLL_INFO, true, true); + free(log_msg); + return; + } +diff --git a/lib/dpctl.c b/lib/dpctl.c +index db2b1f8961..09ae97f25c 100644 +--- a/lib/dpctl.c ++++ b/lib/dpctl.c +@@ -1031,7 +1031,7 @@ dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p) + memset(&dump_types, 0, sizeof dump_types); + error = populate_dump_types(types_list, &dump_types, dpctl_p); + if (error) { +- goto out_free; ++ goto out_dpifclose; + } + determine_dpif_flow_dump_types(&dump_types, &dpif_dump_types); + +diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c +index d393aab5e3..42e1c44ae8 100644 +--- a/lib/dpif-netdev.c ++++ b/lib/dpif-netdev.c +@@ -481,6 +481,12 @@ struct dp_netdev_flow_stats { + atomic_uint16_t tcp_flags; /* Bitwise-OR of seen tcp_flags values. */ + }; + ++/* Contained by struct dp_netdev_flow's 'last_attrs' member. */ ++struct dp_netdev_flow_attrs { ++ atomic_bool offloaded; /* True if flow is offloaded to HW. */ ++ ATOMIC(const char *) dp_layer; /* DP layer the flow is handled in. */ ++}; ++ + /* A flow in 'dp_netdev_pmd_thread's 'flow_table'. + * + * +@@ -541,6 +547,11 @@ struct dp_netdev_flow { + /* Statistics. */ + struct dp_netdev_flow_stats stats; + ++ /* Statistics and attributes received from the netdev offload provider. */ ++ atomic_int netdev_flow_get_result; ++ struct dp_netdev_flow_stats last_stats; ++ struct dp_netdev_flow_attrs last_attrs; ++ + /* Actions. */ + OVSRCU_TYPE(struct dp_netdev_actions *) actions; + +@@ -2149,7 +2160,11 @@ dp_netdev_pmd_find_dpcls(struct dp_netdev_pmd_thread *pmd, + } + + #define MAX_FLOW_MARK (UINT32_MAX - 1) +-#define INVALID_FLOW_MARK (UINT32_MAX) ++#define INVALID_FLOW_MARK 0 ++/* Zero flow mark is used to indicate the HW to remove the mark. A packet ++ * marked with zero mark is received in SW without a mark at all, so it ++ * cannot be used as a valid mark. ++ */ + + struct megaflow_to_mark_data { + const struct cmap_node node; +@@ -2175,7 +2190,7 @@ flow_mark_alloc(void) + + if (!flow_mark.pool) { + /* Haven't initiated yet, do it here */ +- flow_mark.pool = id_pool_create(0, MAX_FLOW_MARK); ++ flow_mark.pool = id_pool_create(1, MAX_FLOW_MARK); + } + + if (id_pool_alloc_id(flow_mark.pool, &mark)) { +@@ -2280,6 +2295,12 @@ mark_to_flow_disassociate(struct dp_netdev_pmd_thread *pmd, + struct cmap_node *mark_node = CONST_CAST(struct cmap_node *, + &flow->mark_node); + ++ /* INVALID_FLOW_MARK may mean that the flow has been disassociated or ++ * never associated. */ ++ if (OVS_UNLIKELY(mark == INVALID_FLOW_MARK)) { ++ return EINVAL; ++ } ++ + cmap_remove(&flow_mark.mark_to_flow, mark_node, hash_int(mark, 0)); + flow->mark = INVALID_FLOW_MARK; + +@@ -2433,6 +2454,7 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload) + mark = flow_mark_alloc(); + if (mark == INVALID_FLOW_MARK) { + VLOG_ERR("Failed to allocate flow mark!\n"); ++ return -1; + } + } + info.flow_mark = mark; +@@ -2512,6 +2534,7 @@ dp_netdev_flow_offload_main(void *data OVS_UNUSED) + VLOG_DBG("%s to %s netdev flow\n", + ret == 0 ? "succeed" : "failed", op); + dp_netdev_free_flow_offload(offload); ++ ovsrcu_quiesce(); + } + + return NULL; +@@ -3032,9 +3055,56 @@ dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd, + return NULL; + } + ++static void ++dp_netdev_flow_set_last_stats_attrs(struct dp_netdev_flow *netdev_flow, ++ const struct dpif_flow_stats *stats, ++ const struct dpif_flow_attrs *attrs, ++ int result) ++{ ++ struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats; ++ struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs; ++ ++ atomic_store_relaxed(&netdev_flow->netdev_flow_get_result, result); ++ if (result) { ++ return; ++ } ++ ++ atomic_store_relaxed(&last_stats->used, stats->used); ++ atomic_store_relaxed(&last_stats->packet_count, stats->n_packets); ++ atomic_store_relaxed(&last_stats->byte_count, stats->n_bytes); ++ atomic_store_relaxed(&last_stats->tcp_flags, stats->tcp_flags); ++ ++ atomic_store_relaxed(&last_attrs->offloaded, attrs->offloaded); ++ atomic_store_relaxed(&last_attrs->dp_layer, attrs->dp_layer); ++ ++} ++ ++static void ++dp_netdev_flow_get_last_stats_attrs(struct dp_netdev_flow *netdev_flow, ++ struct dpif_flow_stats *stats, ++ struct dpif_flow_attrs *attrs, ++ int *result) ++{ ++ struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats; ++ struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs; ++ ++ atomic_read_relaxed(&netdev_flow->netdev_flow_get_result, result); ++ if (*result) { ++ return; ++ } ++ ++ atomic_read_relaxed(&last_stats->used, &stats->used); ++ atomic_read_relaxed(&last_stats->packet_count, &stats->n_packets); ++ atomic_read_relaxed(&last_stats->byte_count, &stats->n_bytes); ++ atomic_read_relaxed(&last_stats->tcp_flags, &stats->tcp_flags); ++ ++ atomic_read_relaxed(&last_attrs->offloaded, &attrs->offloaded); ++ atomic_read_relaxed(&last_attrs->dp_layer, &attrs->dp_layer); ++} ++ + static bool + dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp, +- const struct dp_netdev_flow *netdev_flow, ++ struct dp_netdev_flow *netdev_flow, + struct dpif_flow_stats *stats, + struct dpif_flow_attrs *attrs) + { +@@ -3056,11 +3126,31 @@ dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp, + } + ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf); + /* Taking a global 'port_mutex' to fulfill thread safety +- * restrictions for the netdev-offload-dpdk module. */ +- ovs_mutex_lock(&dp->port_mutex); +- ret = netdev_flow_get(netdev, &match, &actions, &netdev_flow->mega_ufid, +- stats, attrs, &buf); +- ovs_mutex_unlock(&dp->port_mutex); ++ * restrictions for the netdev-offload-dpdk module. ++ * ++ * XXX: Main thread will try to pause/stop all revalidators during datapath ++ * reconfiguration via datapath purge callback (dp_purge_cb) while ++ * holding 'dp->port_mutex'. So we're not waiting for mutex here. ++ * Otherwise, deadlock is possible, bcause revalidators might sleep ++ * waiting for the main thread to release the lock and main thread ++ * will wait for them to stop processing. ++ * This workaround might make statistics less accurate. Especially ++ * for flow deletion case, since there will be no other attempt. */ ++ if (!ovs_mutex_trylock(&dp->port_mutex)) { ++ ret = netdev_flow_get(netdev, &match, &actions, ++ &netdev_flow->mega_ufid, stats, attrs, &buf); ++ /* Storing statistics and attributes from the last request for ++ * later use on mutex contention. */ ++ dp_netdev_flow_set_last_stats_attrs(netdev_flow, stats, attrs, ret); ++ ovs_mutex_unlock(&dp->port_mutex); ++ } else { ++ dp_netdev_flow_get_last_stats_attrs(netdev_flow, stats, attrs, &ret); ++ if (!ret && !attrs->dp_layer) { ++ /* Flow was never reported as 'offloaded' so it's harmless ++ * to continue to think so. */ ++ ret = EAGAIN; ++ } ++ } + netdev_close(netdev); + if (ret) { + return false; +@@ -3329,6 +3419,9 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, + /* Do not allocate extra space. */ + flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len); + memset(&flow->stats, 0, sizeof flow->stats); ++ atomic_init(&flow->netdev_flow_get_result, 0); ++ memset(&flow->last_stats, 0, sizeof flow->last_stats); ++ memset(&flow->last_attrs, 0, sizeof flow->last_attrs); + flow->dead = false; + flow->batch = NULL; + flow->mark = INVALID_FLOW_MARK; +@@ -4940,9 +5033,17 @@ reconfigure_datapath(struct dp_netdev *dp) + + /* Check for all the ports that need reconfiguration. We cache this in + * 'port->need_reconfigure', because netdev_is_reconf_required() can +- * change at any time. */ ++ * change at any time. ++ * Also mark for reconfiguration all ports which will likely change their ++ * 'dynamic_txqs' parameter. It's required to stop using them before ++ * changing this setting and it's simpler to mark ports here and allow ++ * 'pmd_remove_stale_ports' to remove them from threads. There will be ++ * no actual reconfiguration in 'port_reconfigure' because it's ++ * unnecessary. */ + HMAP_FOR_EACH (port, node, &dp->ports) { +- if (netdev_is_reconf_required(port->netdev)) { ++ if (netdev_is_reconf_required(port->netdev) ++ || (port->dynamic_txqs ++ != (netdev_n_txq(port->netdev) < wanted_txqs))) { + port->need_reconfigure = true; + } + } +diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c +index 5b5c96d727..f9c732886f 100644 +--- a/lib/dpif-netlink.c ++++ b/lib/dpif-netlink.c +@@ -691,6 +691,7 @@ dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features) + + dpif_netlink_dp_init(&request); + request.cmd = OVS_DP_CMD_SET; ++ request.name = dpif_->base_name; + request.dp_ifindex = dpif->dp_ifindex; + request.user_features = dpif->user_features | new_features; + +@@ -2091,6 +2092,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) + info.tunnel_csum_on = csum_on; + info.recirc_id_shared_with_tc = (dpif->user_features + & OVS_DP_F_TC_RECIRC_SHARING); ++ info.tc_modify_flow_deleted = false; + err = netdev_flow_put(dev, &match, + CONST_CAST(struct nlattr *, put->actions), + put->actions_len, +@@ -2141,7 +2143,11 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) + out: + if (err && err != EEXIST && (put->flags & DPIF_FP_MODIFY)) { + /* Modified rule can't be offloaded, try and delete from HW */ +- int del_err = netdev_flow_del(dev, put->ufid, put->stats); ++ int del_err = 0; ++ ++ if (!info.tc_modify_flow_deleted) { ++ del_err = netdev_flow_del(dev, put->ufid, put->stats); ++ } + + if (!del_err) { + /* Delete from hw success, so old flow was offloaded. +diff --git a/lib/meta-flow.c b/lib/meta-flow.c +index 8b62e6d968..80063b933d 100644 +--- a/lib/meta-flow.c ++++ b/lib/meta-flow.c +@@ -2296,12 +2296,6 @@ mf_set(const struct mf_field *mf, + switch (mf->id) { + case MFF_CT_ZONE: + case MFF_CT_NW_PROTO: +- case MFF_CT_NW_SRC: +- case MFF_CT_NW_DST: +- case MFF_CT_IPV6_SRC: +- case MFF_CT_IPV6_DST: +- case MFF_CT_TP_SRC: +- case MFF_CT_TP_DST: + case MFF_RECIRC_ID: + case MFF_PACKET_TYPE: + case MFF_CONJ_ID: +@@ -2419,6 +2413,30 @@ mf_set(const struct mf_field *mf, + ntoh128(mask->be128)); + break; + ++ case MFF_CT_NW_SRC: ++ match_set_ct_nw_src_masked(match, value->be32, mask->be32); ++ break; ++ ++ case MFF_CT_NW_DST: ++ match_set_ct_nw_dst_masked(match, value->be32, mask->be32); ++ break; ++ ++ case MFF_CT_IPV6_SRC: ++ match_set_ct_ipv6_src_masked(match, &value->ipv6, &mask->ipv6); ++ break; ++ ++ case MFF_CT_IPV6_DST: ++ match_set_ct_ipv6_dst_masked(match, &value->ipv6, &mask->ipv6); ++ break; ++ ++ case MFF_CT_TP_SRC: ++ match_set_ct_tp_src_masked(match, value->be16, mask->be16); ++ break; ++ ++ case MFF_CT_TP_DST: ++ match_set_ct_tp_dst_masked(match, value->be16, mask->be16); ++ break; ++ + case MFF_ETH_DST: + match_set_dl_dst_masked(match, value->mac, mask->mac); + break; +diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml +index 90b405c737..2f9c5ee163 100644 +--- a/lib/meta-flow.xml ++++ b/lib/meta-flow.xml +@@ -2566,8 +2566,8 @@ actions=clone(load:0->NXM_OF_IN_PORT[],output:123) + + <dt><code>est</code> (0x02)</dt> + <dd> +- Part of an existing connection. Set to 1 if this is a committed +- connection. ++ Part of an existing connection. Set to 1 if packets of a committed ++ connection have been seen by conntrack from both directions. + </dd> + + <dt><code>rel</code> (0x04)</dt> +diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c +index 6187129c00..7ab81864db 100644 +--- a/lib/netdev-dpdk.c ++++ b/lib/netdev-dpdk.c +@@ -152,6 +152,16 @@ typedef uint16_t dpdk_port_t; + + #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) + ++/* List of required flags advertised by the hardware that will be used ++ * if TSO is enabled. Ideally this should include DEV_TX_OFFLOAD_SCTP_CKSUM. ++ * However, very few drivers supports that the moment and SCTP is not a ++ * widely used protocol as TCP and UDP, so it's optional. */ ++#define DPDK_TX_TSO_OFFLOAD_FLAGS (DEV_TX_OFFLOAD_TCP_TSO \ ++ | DEV_TX_OFFLOAD_TCP_CKSUM \ ++ | DEV_TX_OFFLOAD_UDP_CKSUM \ ++ | DEV_TX_OFFLOAD_IPV4_CKSUM) ++ ++ + static const struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = ETH_MQ_RX_RSS, +@@ -415,6 +425,7 @@ enum dpdk_hw_ol_features { + NETDEV_RX_HW_CRC_STRIP = 1 << 1, + NETDEV_RX_HW_SCATTER = 1 << 2, + NETDEV_TX_TSO_OFFLOAD = 1 << 3, ++ NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4, + }; + + /* +@@ -997,9 +1008,10 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) + } + + if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { +- conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_TSO; +- conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM; +- conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM; ++ conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS; ++ if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { ++ conf.txmode.offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM; ++ } + } + + /* Limit configured rss hash functions to only those supported +@@ -1100,12 +1112,10 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) + struct rte_ether_addr eth_addr; + int diag; + int n_rxq, n_txq; ++ uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS; + uint32_t rx_chksm_offload_capa = DEV_RX_OFFLOAD_UDP_CKSUM | + DEV_RX_OFFLOAD_TCP_CKSUM | + DEV_RX_OFFLOAD_IPV4_CKSUM; +- uint32_t tx_tso_offload_capa = DEV_TX_OFFLOAD_TCP_TSO | +- DEV_TX_OFFLOAD_TCP_CKSUM | +- DEV_TX_OFFLOAD_IPV4_CKSUM; + + rte_eth_dev_info_get(dev->port_id, &info); + +@@ -1137,6 +1147,13 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) + if ((info.tx_offload_capa & tx_tso_offload_capa) + == tx_tso_offload_capa) { + dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; ++ if (info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM) { ++ dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD; ++ } else { ++ VLOG_WARN("%s: Tx SCTP checksum offload is not supported, " ++ "SCTP packets sent to this device will be dropped", ++ netdev_get_name(&dev->up)); ++ } + } else { + VLOG_WARN("%s: Tx TSO offload is not supported.", + netdev_get_name(&dev->up)); +@@ -5110,7 +5127,11 @@ netdev_dpdk_reconfigure(struct netdev *netdev) + if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { + netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; + netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; ++ netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; + netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; ++ if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { ++ netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; ++ } + } + + dev->tx_q = netdev_dpdk_alloc_txq(netdev->n_txq); +@@ -5186,6 +5207,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + int err; + uint64_t vhost_flags = 0; ++ uint64_t vhost_unsup_flags; + bool zc_enabled; + + ovs_mutex_lock(&dev->mutex); +@@ -5251,17 +5273,24 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) + if (userspace_tso_enabled()) { + netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; + netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; ++ netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; ++ netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; + netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; ++ vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN ++ | 1ULL << VIRTIO_NET_F_HOST_UFO; + } else { +- err = rte_vhost_driver_disable_features(dev->vhost_id, +- 1ULL << VIRTIO_NET_F_HOST_TSO4 +- | 1ULL << VIRTIO_NET_F_HOST_TSO6 +- | 1ULL << VIRTIO_NET_F_CSUM); +- if (err) { +- VLOG_ERR("rte_vhost_driver_disable_features failed for " +- "vhost user client port: %s\n", dev->up.name); +- goto unlock; +- } ++ /* This disables checksum offloading and all the features ++ * that depends on it (TSO, UFO, ECN) according to virtio ++ * specification. */ ++ vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM; ++ } ++ ++ err = rte_vhost_driver_disable_features(dev->vhost_id, ++ vhost_unsup_flags); ++ if (err) { ++ VLOG_ERR("rte_vhost_driver_disable_features failed for " ++ "vhost user client port: %s\n", dev->up.name); ++ goto unlock; + } + + err = rte_vhost_driver_start(dev->vhost_id); +diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c +index c6f3d27409..8d779945a1 100644 +--- a/lib/netdev-linux.c ++++ b/lib/netdev-linux.c +@@ -231,6 +231,14 @@ struct rtnl_link_stats64 { + uint64_t tx_compressed; + }; + ++/* Linux 3.19 introduced virtio_types.h. It might be missing ++ * if we are using old kernel. */ ++#ifndef HAVE_VIRTIO_TYPES ++typedef __u16 __bitwise__ __virtio16; ++typedef __u32 __bitwise__ __virtio32; ++typedef __u64 __bitwise__ __virtio64; ++#endif ++ + enum { + VALID_IFINDEX = 1 << 0, + VALID_ETHERADDR = 1 << 1, +@@ -659,10 +667,6 @@ netdev_linux_update_lag(struct rtnetlink_change *change) + { + struct linux_lag_slave *lag; + +- if (!rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) { +- return; +- } +- + if (change->slave && netdev_linux_kind_is_lag(change->slave)) { + lag = shash_find_data(&lag_shash, change->ifname); + +@@ -760,8 +764,11 @@ netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) + netdev_linux_update(netdev, nsid, &change); + ovs_mutex_unlock(&netdev->mutex); + } +- else if (!netdev_ && change.ifname) { +- /* Netdev is not present in OvS but its master could be. */ ++ ++ if (change.ifname && ++ rtnetlink_type_is_rtnlgrp_link(change.nlmsg_type)) { ++ ++ /* Need to try updating the LAG information. */ + ovs_mutex_lock(&lag_mutex); + netdev_linux_update_lag(&change); + ovs_mutex_unlock(&lag_mutex); +@@ -923,6 +930,8 @@ netdev_linux_common_construct(struct netdev *netdev_) + if (userspace_tso_enabled()) { + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; ++ netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; ++ netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; + } + +diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c +index f8c46bbaad..4538baf5e6 100644 +--- a/lib/netdev-offload-dpdk.c ++++ b/lib/netdev-offload-dpdk.c +@@ -565,8 +565,18 @@ parse_flow_match(struct flow_patterns *patterns, + uint8_t proto = 0; + + /* Eth */ +- if (!eth_addr_is_zero(match->wc.masks.dl_src) || +- !eth_addr_is_zero(match->wc.masks.dl_dst)) { ++ if (match->wc.masks.dl_type == OVS_BE16_MAX && is_ip_any(&match->flow) ++ && eth_addr_is_zero(match->wc.masks.dl_dst) ++ && eth_addr_is_zero(match->wc.masks.dl_src)) { ++ /* ++ * This is a temporary work around to fix ethernet pattern for partial ++ * hardware offload for X710 devices. This fix will be reverted once ++ * the issue is fixed within the i40e PMD driver. ++ */ ++ add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL); ++ } else if (match->wc.masks.dl_type || ++ !eth_addr_is_zero(match->wc.masks.dl_src) || ++ !eth_addr_is_zero(match->wc.masks.dl_dst)) { + struct rte_flow_item_eth *spec, *mask; + + spec = xzalloc(sizeof *spec); +@@ -581,15 +591,6 @@ parse_flow_match(struct flow_patterns *patterns, + mask->type = match->wc.masks.dl_type; + + add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask); +- } else { +- /* +- * If user specifies a flow (like UDP flow) without L2 patterns, +- * OVS will at least set the dl_type. Normally, it's enough to +- * create an eth pattern just with it. Unluckily, some Intel's +- * NIC (such as XL710) doesn't support that. Below is a workaround, +- * which simply matches any L2 pkts. +- */ +- add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL); + } + + /* VLAN */ +diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c +index 550e440b3a..e188e63e56 100644 +--- a/lib/netdev-offload-tc.c ++++ b/lib/netdev-offload-tc.c +@@ -1727,7 +1727,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, + if (get_ufid_tc_mapping(ufid, &id) == 0) { + VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d", + id.handle, id.prio); +- del_filter_and_ufid_mapping(&id, ufid); ++ info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(&id, ufid); + } + + prio = get_prio_for_tc_flower(&flower); +@@ -1907,6 +1907,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) + static struct ovsthread_once block_once = OVSTHREAD_ONCE_INITIALIZER; + enum tc_qdisc_hook hook = get_tc_qdisc_hook(netdev); + uint32_t block_id = 0; ++ struct tcf_id id; + int ifindex; + int error; + +@@ -1917,11 +1918,21 @@ netdev_tc_init_flow_api(struct netdev *netdev) + return -ifindex; + } + ++ block_id = get_block_id_from_netdev(netdev); ++ ++ /* Flush rules explicitly needed when we work with ingress_block, ++ * so we will not fail with reattaching block to bond iface, for ex. ++ */ ++ id = tc_make_tcf_id(ifindex, block_id, 0, hook); ++ tc_del_filter(&id); ++ + /* make sure there is no ingress/egress qdisc */ + tc_add_del_qdisc(ifindex, false, 0, hook); + + if (ovsthread_once_start(&block_once)) { + probe_tc_block_support(ifindex); ++ /* Need to re-fetch block id as it depends on feature availability. */ ++ block_id = get_block_id_from_netdev(netdev); + ovsthread_once_done(&block_once); + } + +@@ -1930,7 +1941,6 @@ netdev_tc_init_flow_api(struct netdev *netdev) + ovsthread_once_done(&multi_mask_once); + } + +- block_id = get_block_id_from_netdev(netdev); + error = tc_add_del_qdisc(ifindex, true, block_id, hook); + + if (error && error != EEXIST) { +diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h +index cd6dfdfff4..b4b882a56a 100644 +--- a/lib/netdev-offload.h ++++ b/lib/netdev-offload.h +@@ -74,6 +74,9 @@ struct offload_info { + * it will be in the pkt meta data. + */ + uint32_t flow_mark; ++ ++ bool tc_modify_flow_deleted; /* Indicate the tc modify flow put success ++ * to delete the original flow. */ + }; + + int netdev_flow_flush(struct netdev *); +diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h +index 22f4cde333..6f509424bc 100644 +--- a/lib/netdev-provider.h ++++ b/lib/netdev-provider.h +@@ -40,7 +40,9 @@ struct netdev_tnl_build_header_params; + enum netdev_ol_flags { + NETDEV_TX_OFFLOAD_IPV4_CKSUM = 1 << 0, + NETDEV_TX_OFFLOAD_TCP_CKSUM = 1 << 1, +- NETDEV_TX_OFFLOAD_TCP_TSO = 1 << 2, ++ NETDEV_TX_OFFLOAD_UDP_CKSUM = 1 << 2, ++ NETDEV_TX_OFFLOAD_SCTP_CKSUM = 1 << 3, ++ NETDEV_TX_OFFLOAD_TCP_TSO = 1 << 4, + }; + + /* A network device (e.g. an Ethernet device). +diff --git a/lib/netdev.c b/lib/netdev.c +index f95b19af4d..8c44eee8e9 100644 +--- a/lib/netdev.c ++++ b/lib/netdev.c +@@ -791,6 +791,8 @@ static bool + netdev_send_prepare_packet(const uint64_t netdev_flags, + struct dp_packet *packet, char **errormsg) + { ++ uint64_t l4_mask; ++ + if (dp_packet_hwol_is_tso(packet) + && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { + /* Fall back to GSO in software. */ +@@ -798,11 +800,31 @@ netdev_send_prepare_packet(const uint64_t netdev_flags, + return false; + } + +- if (dp_packet_hwol_l4_mask(packet) +- && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { +- /* Fall back to L4 csum in software. */ +- VLOG_ERR_BUF(errormsg, "No L4 checksum support"); ++ l4_mask = dp_packet_hwol_l4_mask(packet); ++ if (l4_mask) { ++ if (dp_packet_hwol_l4_is_tcp(packet)) { ++ if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { ++ /* Fall back to TCP csum in software. */ ++ VLOG_ERR_BUF(errormsg, "No TCP checksum support"); ++ return false; ++ } ++ } else if (dp_packet_hwol_l4_is_udp(packet)) { ++ if (!(netdev_flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) { ++ /* Fall back to UDP csum in software. */ ++ VLOG_ERR_BUF(errormsg, "No UDP checksum support"); ++ return false; ++ } ++ } else if (dp_packet_hwol_l4_is_sctp(packet)) { ++ if (!(netdev_flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM)) { ++ /* Fall back to SCTP csum in software. */ ++ VLOG_ERR_BUF(errormsg, "No SCTP checksum support"); ++ return false; ++ } ++ } else { ++ VLOG_ERR_BUF(errormsg, "No L4 checksum support: mask: %"PRIu64, ++ l4_mask); + return false; ++ } + } + + return true; +diff --git a/lib/odp-execute.c b/lib/odp-execute.c +index 42d3335f0f..97320a4dba 100644 +--- a/lib/odp-execute.c ++++ b/lib/odp-execute.c +@@ -761,10 +761,11 @@ odp_execute_check_pkt_len(void *dp, struct dp_packet *packet, bool steal, + + const struct nlattr *a; + struct dp_packet_batch pb; ++ uint32_t size = dp_packet_get_send_len(packet) ++ - dp_packet_l2_pad_size(packet); + + a = attrs[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]; +- bool is_greater = dp_packet_size(packet) > nl_attr_get_u16(a); +- if (is_greater) { ++ if (size > nl_attr_get_u16(a)) { + a = attrs[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER]; + } else { + a = attrs[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL]; +diff --git a/lib/odp-util.c b/lib/odp-util.c +index 746d1e97d4..6baa2a8a70 100644 +--- a/lib/odp-util.c ++++ b/lib/odp-util.c +@@ -6225,7 +6225,9 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms, + struct ovs_key_nd_extensions *nd_ext_key; + + if (data->igmp_group_ip4 != 0 || data->tcp_flags != 0) { +- nd_ext_key = nl_msg_put_unspec_uninit(buf, ++ /* 'struct ovs_key_nd_extensions' has padding, ++ * clear it. */ ++ nd_ext_key = nl_msg_put_unspec_zero(buf, + OVS_KEY_ATTR_ND_EXTENSIONS, + sizeof *nd_ext_key); + nd_ext_key->nd_reserved = data->igmp_group_ip4; +@@ -6275,6 +6277,10 @@ odp_key_from_dp_packet(struct ofpbuf *buf, const struct dp_packet *packet) + + nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, md->skb_priority); + ++ if (md->dp_hash) { ++ nl_msg_put_u32(buf, OVS_KEY_ATTR_DP_HASH, md->dp_hash); ++ } ++ + if (flow_tnl_dst_is_set(&md->tunnel)) { + tun_key_to_attr(buf, &md->tunnel, &md->tunnel, NULL, NULL); + } +@@ -7565,6 +7571,28 @@ struct offsetof_sizeof { + int size; + }; + ++ ++/* Performs bitwise OR over the fields in 'dst_' and 'src_' specified in ++ * 'offsetof_sizeof_arr' array. Result is stored in 'dst_'. */ ++static void ++or_masks(void *dst_, const void *src_, ++ struct offsetof_sizeof *offsetof_sizeof_arr) ++{ ++ int field, size, offset; ++ const uint8_t *src = src_; ++ uint8_t *dst = dst_; ++ ++ for (field = 0; ; field++) { ++ size = offsetof_sizeof_arr[field].size; ++ offset = offsetof_sizeof_arr[field].offset; ++ ++ if (!size) { ++ return; ++ } ++ or_bytes(dst + offset, src + offset, size); ++ } ++} ++ + /* Compares each of the fields in 'key0' and 'key1'. The fields are specified + * in 'offsetof_sizeof_arr', which is an array terminated by a 0-size field. + * Returns true if all of the fields are equal, false if at least one differs. +@@ -7643,9 +7671,10 @@ commit_set_ether_action(const struct flow *flow, struct flow *base_flow, + struct flow_wildcards *wc, + bool use_masked) + { +- struct ovs_key_ethernet key, base, mask; ++ struct ovs_key_ethernet key, base, mask, orig_mask; + struct offsetof_sizeof ovs_key_ethernet_offsetof_sizeof_arr[] = + OVS_KEY_ETHERNET_OFFSETOF_SIZEOF_ARR; ++ + if (flow->packet_type != htonl(PT_ETH)) { + return; + } +@@ -7653,11 +7682,13 @@ commit_set_ether_action(const struct flow *flow, struct flow *base_flow, + get_ethernet_key(flow, &key); + get_ethernet_key(base_flow, &base); + get_ethernet_key(&wc->masks, &mask); ++ memcpy(&orig_mask, &mask, sizeof mask); + + if (commit(OVS_KEY_ATTR_ETHERNET, use_masked, + &key, &base, &mask, sizeof key, + ovs_key_ethernet_offsetof_sizeof_arr, odp_actions)) { + put_ethernet_key(&base, base_flow); ++ or_masks(&mask, &orig_mask, ovs_key_ethernet_offsetof_sizeof_arr); + put_ethernet_key(&mask, &wc->masks); + } + } +@@ -7781,7 +7812,7 @@ commit_set_ipv4_action(const struct flow *flow, struct flow *base_flow, + struct ofpbuf *odp_actions, struct flow_wildcards *wc, + bool use_masked) + { +- struct ovs_key_ipv4 key, mask, base; ++ struct ovs_key_ipv4 key, mask, orig_mask, base; + struct offsetof_sizeof ovs_key_ipv4_offsetof_sizeof_arr[] = + OVS_KEY_IPV4_OFFSETOF_SIZEOF_ARR; + +@@ -7792,6 +7823,7 @@ commit_set_ipv4_action(const struct flow *flow, struct flow *base_flow, + get_ipv4_key(flow, &key, false); + get_ipv4_key(base_flow, &base, false); + get_ipv4_key(&wc->masks, &mask, true); ++ memcpy(&orig_mask, &mask, sizeof mask); + mask.ipv4_proto = 0; /* Not writeable. */ + mask.ipv4_frag = 0; /* Not writable. */ + +@@ -7803,9 +7835,8 @@ commit_set_ipv4_action(const struct flow *flow, struct flow *base_flow, + if (commit(OVS_KEY_ATTR_IPV4, use_masked, &key, &base, &mask, sizeof key, + ovs_key_ipv4_offsetof_sizeof_arr, odp_actions)) { + put_ipv4_key(&base, base_flow, false); +- if (mask.ipv4_proto != 0) { /* Mask was changed by commit(). */ +- put_ipv4_key(&mask, &wc->masks, true); +- } ++ or_masks(&mask, &orig_mask, ovs_key_ipv4_offsetof_sizeof_arr); ++ put_ipv4_key(&mask, &wc->masks, true); + } + } + +@@ -7838,7 +7869,7 @@ commit_set_ipv6_action(const struct flow *flow, struct flow *base_flow, + struct ofpbuf *odp_actions, struct flow_wildcards *wc, + bool use_masked) + { +- struct ovs_key_ipv6 key, mask, base; ++ struct ovs_key_ipv6 key, mask, orig_mask, base; + struct offsetof_sizeof ovs_key_ipv6_offsetof_sizeof_arr[] = + OVS_KEY_IPV6_OFFSETOF_SIZEOF_ARR; + +@@ -7849,6 +7880,7 @@ commit_set_ipv6_action(const struct flow *flow, struct flow *base_flow, + get_ipv6_key(flow, &key, false); + get_ipv6_key(base_flow, &base, false); + get_ipv6_key(&wc->masks, &mask, true); ++ memcpy(&orig_mask, &mask, sizeof mask); + mask.ipv6_proto = 0; /* Not writeable. */ + mask.ipv6_frag = 0; /* Not writable. */ + mask.ipv6_label &= htonl(IPV6_LABEL_MASK); /* Not writable. */ +@@ -7861,9 +7893,8 @@ commit_set_ipv6_action(const struct flow *flow, struct flow *base_flow, + if (commit(OVS_KEY_ATTR_IPV6, use_masked, &key, &base, &mask, sizeof key, + ovs_key_ipv6_offsetof_sizeof_arr, odp_actions)) { + put_ipv6_key(&base, base_flow, false); +- if (mask.ipv6_proto != 0) { /* Mask was changed by commit(). */ +- put_ipv6_key(&mask, &wc->masks, true); +- } ++ or_masks(&mask, &orig_mask, ovs_key_ipv6_offsetof_sizeof_arr); ++ put_ipv6_key(&mask, &wc->masks, true); + } + } + +@@ -7894,17 +7925,19 @@ static enum slow_path_reason + commit_set_arp_action(const struct flow *flow, struct flow *base_flow, + struct ofpbuf *odp_actions, struct flow_wildcards *wc) + { +- struct ovs_key_arp key, mask, base; ++ struct ovs_key_arp key, mask, orig_mask, base; + struct offsetof_sizeof ovs_key_arp_offsetof_sizeof_arr[] = + OVS_KEY_ARP_OFFSETOF_SIZEOF_ARR; + + get_arp_key(flow, &key); + get_arp_key(base_flow, &base); + get_arp_key(&wc->masks, &mask); ++ memcpy(&orig_mask, &mask, sizeof mask); + + if (commit(OVS_KEY_ATTR_ARP, true, &key, &base, &mask, sizeof key, + ovs_key_arp_offsetof_sizeof_arr, odp_actions)) { + put_arp_key(&base, base_flow); ++ or_masks(&mask, &orig_mask, ovs_key_arp_offsetof_sizeof_arr); + put_arp_key(&mask, &wc->masks); + return SLOW_ACTION; + } +@@ -7931,7 +7964,7 @@ static enum slow_path_reason + commit_set_icmp_action(const struct flow *flow, struct flow *base_flow, + struct ofpbuf *odp_actions, struct flow_wildcards *wc) + { +- struct ovs_key_icmp key, mask, base; ++ struct ovs_key_icmp key, mask, orig_mask, base; + struct offsetof_sizeof ovs_key_icmp_offsetof_sizeof_arr[] = + OVS_KEY_ICMP_OFFSETOF_SIZEOF_ARR; + enum ovs_key_attr attr; +@@ -7947,10 +7980,12 @@ commit_set_icmp_action(const struct flow *flow, struct flow *base_flow, + get_icmp_key(flow, &key); + get_icmp_key(base_flow, &base); + get_icmp_key(&wc->masks, &mask); ++ memcpy(&orig_mask, &mask, sizeof mask); + + if (commit(attr, false, &key, &base, &mask, sizeof key, + ovs_key_icmp_offsetof_sizeof_arr, odp_actions)) { + put_icmp_key(&base, base_flow); ++ or_masks(&mask, &orig_mask, ovs_key_icmp_offsetof_sizeof_arr); + put_icmp_key(&mask, &wc->masks); + return SLOW_ACTION; + } +@@ -7998,17 +8033,19 @@ commit_set_nd_action(const struct flow *flow, struct flow *base_flow, + struct ofpbuf *odp_actions, + struct flow_wildcards *wc, bool use_masked) + { +- struct ovs_key_nd key, mask, base; ++ struct ovs_key_nd key, mask, orig_mask, base; + struct offsetof_sizeof ovs_key_nd_offsetof_sizeof_arr[] = + OVS_KEY_ND_OFFSETOF_SIZEOF_ARR; + + get_nd_key(flow, &key); + get_nd_key(base_flow, &base); + get_nd_key(&wc->masks, &mask); ++ memcpy(&orig_mask, &mask, sizeof mask); + + if (commit(OVS_KEY_ATTR_ND, use_masked, &key, &base, &mask, sizeof key, + ovs_key_nd_offsetof_sizeof_arr, odp_actions)) { + put_nd_key(&base, base_flow); ++ or_masks(&mask, &orig_mask, ovs_key_nd_offsetof_sizeof_arr); + put_nd_key(&mask, &wc->masks); + return SLOW_ACTION; + } +@@ -8022,18 +8059,20 @@ commit_set_nd_extensions_action(const struct flow *flow, + struct ofpbuf *odp_actions, + struct flow_wildcards *wc, bool use_masked) + { +- struct ovs_key_nd_extensions key, mask, base; ++ struct ovs_key_nd_extensions key, mask, orig_mask, base; + struct offsetof_sizeof ovs_key_nd_extensions_offsetof_sizeof_arr[] = + OVS_KEY_ND_EXTENSIONS_OFFSETOF_SIZEOF_ARR; + + get_nd_extensions_key(flow, &key); + get_nd_extensions_key(base_flow, &base); + get_nd_extensions_key(&wc->masks, &mask); ++ memcpy(&orig_mask, &mask, sizeof mask); + + if (commit(OVS_KEY_ATTR_ND_EXTENSIONS, use_masked, &key, &base, &mask, + sizeof key, ovs_key_nd_extensions_offsetof_sizeof_arr, + odp_actions)) { + put_nd_extensions_key(&base, base_flow); ++ or_masks(&mask, &orig_mask, ovs_key_nd_extensions_offsetof_sizeof_arr); + put_nd_extensions_key(&mask, &wc->masks); + return SLOW_ACTION; + } +@@ -8248,7 +8287,7 @@ commit_set_port_action(const struct flow *flow, struct flow *base_flow, + bool use_masked) + { + enum ovs_key_attr key_type; +- union ovs_key_tp key, mask, base; ++ union ovs_key_tp key, mask, orig_mask, base; + struct offsetof_sizeof ovs_key_tp_offsetof_sizeof_arr[] = + OVS_KEY_TCP_OFFSETOF_SIZEOF_ARR; + +@@ -8274,10 +8313,12 @@ commit_set_port_action(const struct flow *flow, struct flow *base_flow, + get_tp_key(flow, &key); + get_tp_key(base_flow, &base); + get_tp_key(&wc->masks, &mask); ++ memcpy(&orig_mask, &mask, sizeof mask); + + if (commit(key_type, use_masked, &key, &base, &mask, sizeof key, + ovs_key_tp_offsetof_sizeof_arr, odp_actions)) { + put_tp_key(&base, base_flow); ++ or_masks(&mask, &orig_mask, ovs_key_tp_offsetof_sizeof_arr); + put_tp_key(&mask, &wc->masks); + } + } +@@ -8301,7 +8342,7 @@ commit_set_priority_action(const struct flow *flow, struct flow *base_flow, + if (commit(OVS_KEY_ATTR_PRIORITY, use_masked, &key, &base, &mask, + sizeof key, ovs_key_prio_offsetof_sizeof_arr, odp_actions)) { + base_flow->skb_priority = base; +- wc->masks.skb_priority = mask; ++ wc->masks.skb_priority |= mask; + } + } + +@@ -8325,7 +8366,7 @@ commit_set_pkt_mark_action(const struct flow *flow, struct flow *base_flow, + sizeof key, ovs_key_pkt_mark_offsetof_sizeof_arr, + odp_actions)) { + base_flow->pkt_mark = base; +- wc->masks.pkt_mark = mask; ++ wc->masks.pkt_mark |= mask; + } + } + +diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c +index ddef3b0c87..ef8b2b4527 100644 +--- a/lib/ofp-actions.c ++++ b/lib/ofp-actions.c +@@ -6657,6 +6657,7 @@ parse_CT(char *arg, const struct ofpact_parse_params *pp) + } + + if (ofpbuf_oversized(pp->ofpacts)) { ++ free(error); + return xasprintf("input too big"); + } + +diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c +index ebc8120f0f..cde1e925ba 100644 +--- a/lib/ovs-rcu.c ++++ b/lib/ovs-rcu.c +@@ -30,6 +30,8 @@ + + VLOG_DEFINE_THIS_MODULE(ovs_rcu); + ++#define MIN_CBS 16 ++ + struct ovsrcu_cb { + void (*function)(void *aux); + void *aux; +@@ -37,7 +39,8 @@ struct ovsrcu_cb { + + struct ovsrcu_cbset { + struct ovs_list list_node; +- struct ovsrcu_cb cbs[16]; ++ struct ovsrcu_cb *cbs; ++ size_t n_allocated; + int n_cbs; + }; + +@@ -310,16 +313,19 @@ ovsrcu_postpone__(void (*function)(void *aux), void *aux) + cbset = perthread->cbset; + if (!cbset) { + cbset = perthread->cbset = xmalloc(sizeof *perthread->cbset); ++ cbset->cbs = xmalloc(MIN_CBS * sizeof *cbset->cbs); ++ cbset->n_allocated = MIN_CBS; + cbset->n_cbs = 0; + } + ++ if (cbset->n_cbs == cbset->n_allocated) { ++ cbset->cbs = x2nrealloc(cbset->cbs, &cbset->n_allocated, ++ sizeof *cbset->cbs); ++ } ++ + cb = &cbset->cbs[cbset->n_cbs++]; + cb->function = function; + cb->aux = aux; +- +- if (cbset->n_cbs >= ARRAY_SIZE(cbset->cbs)) { +- ovsrcu_flush_cbset(perthread); +- } + } + + static bool +@@ -341,6 +347,7 @@ ovsrcu_call_postponed(void) + for (cb = cbset->cbs; cb < &cbset->cbs[cbset->n_cbs]; cb++) { + cb->function(cb->aux); + } ++ free(cbset->cbs); + free(cbset); + } + +diff --git a/lib/ovs-router.c b/lib/ovs-router.c +index bfb2b7071b..09b81c6e5a 100644 +--- a/lib/ovs-router.c ++++ b/lib/ovs-router.c +@@ -505,7 +505,7 @@ ovs_router_flush(void) + ovs_mutex_lock(&mutex); + classifier_defer(&cls); + CLS_FOR_EACH(rt, cr, &cls) { +- if (rt->priority == rt->plen) { ++ if (rt->priority == rt->plen || rt->local) { + rt_entry_delete__(&rt->cr); + } + } +diff --git a/lib/ovsdb-idl-provider.h b/lib/ovsdb-idl-provider.h +index 30d1d08eba..00497d940c 100644 +--- a/lib/ovsdb-idl-provider.h ++++ b/lib/ovsdb-idl-provider.h +@@ -122,8 +122,12 @@ struct ovsdb_idl_table { + unsigned int change_seqno[OVSDB_IDL_CHANGE_MAX]; + struct ovs_list indexes; /* Contains "struct ovsdb_idl_index"s */ + struct ovs_list track_list; /* Tracked rows (ovsdb_idl_row.track_node). */ +- struct ovsdb_idl_condition condition; +- bool cond_changed; ++ struct ovsdb_idl_condition *ack_cond; /* Last condition acked by the ++ * server. */ ++ struct ovsdb_idl_condition *req_cond; /* Last condition requested to the ++ * server. */ ++ struct ovsdb_idl_condition *new_cond; /* Latest condition set by the IDL ++ * client. */ + }; + + struct ovsdb_idl_class { +diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c +index 190143f363..5abe40f6d8 100644 +--- a/lib/ovsdb-idl.c ++++ b/lib/ovsdb-idl.c +@@ -240,6 +240,10 @@ static void ovsdb_idl_send_monitor_request(struct ovsdb_idl *, + struct ovsdb_idl_db *, + enum ovsdb_idl_monitor_method); + static void ovsdb_idl_db_clear(struct ovsdb_idl_db *db); ++static void ovsdb_idl_db_ack_condition(struct ovsdb_idl_db *db); ++static void ovsdb_idl_db_sync_condition(struct ovsdb_idl_db *db); ++static void ovsdb_idl_condition_move(struct ovsdb_idl_condition **dst, ++ struct ovsdb_idl_condition **src); + + struct ovsdb_idl { + struct ovsdb_idl_db server; +@@ -422,9 +426,11 @@ ovsdb_idl_db_init(struct ovsdb_idl_db *db, const struct ovsdb_idl_class *class, + = table->change_seqno[OVSDB_IDL_CHANGE_MODIFY] + = table->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0; + table->db = db; +- ovsdb_idl_condition_init(&table->condition); +- ovsdb_idl_condition_add_clause_true(&table->condition); +- table->cond_changed = false; ++ table->ack_cond = NULL; ++ table->req_cond = NULL; ++ table->new_cond = xmalloc(sizeof *table->new_cond); ++ ovsdb_idl_condition_init(table->new_cond); ++ ovsdb_idl_condition_add_clause_true(table->new_cond); + } + db->monitor_id = json_array_create_2(json_string_create("monid"), + json_string_create(class->database)); +@@ -556,12 +562,15 @@ ovsdb_idl_set_shuffle_remotes(struct ovsdb_idl *idl, bool shuffle) + static void + ovsdb_idl_db_destroy(struct ovsdb_idl_db *db) + { ++ struct ovsdb_idl_condition *null_cond = NULL; + ovs_assert(!db->txn); + ovsdb_idl_db_txn_abort_all(db); + ovsdb_idl_db_clear(db); + for (size_t i = 0; i < db->class_->n_tables; i++) { + struct ovsdb_idl_table *table = &db->tables[i]; +- ovsdb_idl_condition_destroy(&table->condition); ++ ovsdb_idl_condition_move(&table->ack_cond, &null_cond); ++ ovsdb_idl_condition_move(&table->req_cond, &null_cond); ++ ovsdb_idl_condition_move(&table->new_cond, &null_cond); + ovsdb_idl_destroy_indexes(table); + shash_destroy(&table->columns); + hmap_destroy(&table->rows); +@@ -610,7 +619,6 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db) + struct ovsdb_idl_table *table = &db->tables[i]; + struct ovsdb_idl_row *row, *next_row; + +- table->cond_changed = false; + if (hmap_is_empty(&table->rows)) { + continue; + } +@@ -634,7 +642,6 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db) + } + ovsdb_idl_row_destroy_postprocess(db); + +- db->cond_changed = false; + db->cond_seqno = 0; + ovsdb_idl_db_track_clear(db); + +@@ -692,6 +699,12 @@ ovsdb_idl_send_request(struct ovsdb_idl *idl, struct jsonrpc_msg *request) + static void + ovsdb_idl_restart_fsm(struct ovsdb_idl *idl) + { ++ /* Resync data DB table conditions to avoid missing updates due to ++ * conditions that were in flight or changed locally while the connection ++ * was down. ++ */ ++ ovsdb_idl_db_sync_condition(&idl->data); ++ + ovsdb_idl_send_schema_request(idl, &idl->server); + ovsdb_idl_transition(idl, IDL_S_SERVER_SCHEMA_REQUESTED); + idl->data.monitoring = OVSDB_IDL_NOT_MONITORING; +@@ -799,7 +812,9 @@ ovsdb_idl_process_response(struct ovsdb_idl *idl, struct jsonrpc_msg *msg) + * do, it's a "monitor_cond_change", which means that the conditional + * monitor clauses were updated. + * +- * If further condition changes were pending, send them now. */ ++ * Mark the last requested conditions as acked and if further ++ * condition changes were pending, send them now. */ ++ ovsdb_idl_db_ack_condition(&idl->data); + ovsdb_idl_send_cond_change(idl); + idl->data.cond_seqno++; + break; +@@ -1495,30 +1510,60 @@ ovsdb_idl_condition_equals(const struct ovsdb_idl_condition *a, + } + + static void +-ovsdb_idl_condition_clone(struct ovsdb_idl_condition *dst, ++ovsdb_idl_condition_clone(struct ovsdb_idl_condition **dst, + const struct ovsdb_idl_condition *src) + { +- ovsdb_idl_condition_init(dst); ++ if (*dst) { ++ ovsdb_idl_condition_destroy(*dst); ++ } else { ++ *dst = xmalloc(sizeof **dst); ++ } ++ ovsdb_idl_condition_init(*dst); + +- dst->is_true = src->is_true; ++ (*dst)->is_true = src->is_true; + + const struct ovsdb_idl_clause *clause; + HMAP_FOR_EACH (clause, hmap_node, &src->clauses) { +- ovsdb_idl_condition_add_clause__(dst, clause, clause->hmap_node.hash); ++ ovsdb_idl_condition_add_clause__(*dst, clause, clause->hmap_node.hash); + } + } + ++static void ++ovsdb_idl_condition_move(struct ovsdb_idl_condition **dst, ++ struct ovsdb_idl_condition **src) ++{ ++ if (*dst) { ++ ovsdb_idl_condition_destroy(*dst); ++ free(*dst); ++ } ++ *dst = *src; ++ *src = NULL; ++} ++ + static unsigned int + ovsdb_idl_db_set_condition(struct ovsdb_idl_db *db, + const struct ovsdb_idl_table_class *tc, + const struct ovsdb_idl_condition *condition) + { ++ struct ovsdb_idl_condition *table_cond; + struct ovsdb_idl_table *table = ovsdb_idl_db_table_from_class(db, tc); + unsigned int seqno = db->cond_seqno; +- if (!ovsdb_idl_condition_equals(condition, &table->condition)) { +- ovsdb_idl_condition_destroy(&table->condition); +- ovsdb_idl_condition_clone(&table->condition, condition); +- db->cond_changed = table->cond_changed = true; ++ ++ /* Compare the new condition to the last known condition which can be ++ * either "new" (not sent yet), "requested" or "acked", in this order. ++ */ ++ if (table->new_cond) { ++ table_cond = table->new_cond; ++ } else if (table->req_cond) { ++ table_cond = table->req_cond; ++ } else { ++ table_cond = table->ack_cond; ++ } ++ ovs_assert(table_cond); ++ ++ if (!ovsdb_idl_condition_equals(condition, table_cond)) { ++ ovsdb_idl_condition_clone(&table->new_cond, condition); ++ db->cond_changed = true; + poll_immediate_wake(); + return seqno + 1; + } +@@ -1563,9 +1608,8 @@ ovsdb_idl_condition_to_json(const struct ovsdb_idl_condition *cnd) + } + + static struct json * +-ovsdb_idl_create_cond_change_req(struct ovsdb_idl_table *table) ++ovsdb_idl_create_cond_change_req(const struct ovsdb_idl_condition *cond) + { +- const struct ovsdb_idl_condition *cond = &table->condition; + struct json *monitor_cond_change_request = json_object_create(); + struct json *cond_json = ovsdb_idl_condition_to_json(cond); + +@@ -1585,8 +1629,12 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db) + for (size_t i = 0; i < db->class_->n_tables; i++) { + struct ovsdb_idl_table *table = &db->tables[i]; + +- if (table->cond_changed) { +- struct json *req = ovsdb_idl_create_cond_change_req(table); ++ /* Always use the most recent conditions set by the IDL client when ++ * requesting monitor_cond_change, i.e., table->new_cond. ++ */ ++ if (table->new_cond) { ++ struct json *req = ++ ovsdb_idl_create_cond_change_req(table->new_cond); + if (req) { + if (!monitor_cond_change_requests) { + monitor_cond_change_requests = json_object_create(); +@@ -1595,7 +1643,11 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db) + table->class_->name, + json_array_create_1(req)); + } +- table->cond_changed = false; ++ /* Mark the new condition as requested by moving it to req_cond. ++ * If there's already requested condition that's a bug. ++ */ ++ ovs_assert(table->req_cond == NULL); ++ ovsdb_idl_condition_move(&table->req_cond, &table->new_cond); + } + } + +@@ -1610,6 +1662,73 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db) + return jsonrpc_create_request("monitor_cond_change", params, NULL); + } + ++/* Marks all requested table conditions in 'db' as acked by the server. ++ * It should be called when the server replies to monitor_cond_change ++ * requests. ++ */ ++static void ++ovsdb_idl_db_ack_condition(struct ovsdb_idl_db *db) ++{ ++ for (size_t i = 0; i < db->class_->n_tables; i++) { ++ struct ovsdb_idl_table *table = &db->tables[i]; ++ ++ if (table->req_cond) { ++ ovsdb_idl_condition_move(&table->ack_cond, &table->req_cond); ++ } ++ } ++} ++ ++/* Should be called when the IDL fsm is restarted and resyncs table conditions ++ * based on the state the DB is in: ++ * - if a non-zero last_id is available for the DB then upon reconnect ++ * the IDL should first request acked conditions to avoid missing updates ++ * about records that were added before the transaction with ++ * txn-id == last_id. If there were requested condition changes in flight ++ * (i.e., req_cond not NULL) and the IDL client didn't set new conditions ++ * (i.e., new_cond is NULL) then move req_cond to new_cond to trigger a ++ * follow up monitor_cond_change request. ++ * - if there's no last_id available for the DB then it's safe to use the ++ * latest conditions set by the IDL client even if they weren't acked yet. ++ */ ++static void ++ovsdb_idl_db_sync_condition(struct ovsdb_idl_db *db) ++{ ++ bool ack_all = uuid_is_zero(&db->last_id); ++ ++ db->cond_changed = false; ++ for (size_t i = 0; i < db->class_->n_tables; i++) { ++ struct ovsdb_idl_table *table = &db->tables[i]; ++ ++ /* When monitor_cond_since requests will be issued, the ++ * table->ack_cond condition will be added to the "where" clause". ++ * Follow up monitor_cond_change requests will use table->new_cond. ++ */ ++ if (ack_all) { ++ if (table->new_cond) { ++ ovsdb_idl_condition_move(&table->req_cond, &table->new_cond); ++ } ++ ++ if (table->req_cond) { ++ ovsdb_idl_condition_move(&table->ack_cond, &table->req_cond); ++ } ++ } else { ++ /* If there was no "unsent" condition but instead a ++ * monitor_cond_change request was in flight, move table->req_cond ++ * to table->new_cond and set db->cond_changed to trigger a new ++ * monitor_cond_change request. ++ * ++ * However, if a new condition has been set by the IDL client, ++ * monitor_cond_change will be sent anyway and will use the most ++ * recent table->new_cond so there's no need to update it here. ++ */ ++ if (table->req_cond && !table->new_cond) { ++ ovsdb_idl_condition_move(&table->new_cond, &table->req_cond); ++ db->cond_changed = true; ++ } ++ } ++ } ++} ++ + static void + ovsdb_idl_send_cond_change(struct ovsdb_idl *idl) + { +@@ -2064,13 +2183,15 @@ ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl, struct ovsdb_idl_db *db, + monitor_request = json_object_create(); + json_object_put(monitor_request, "columns", columns); + +- const struct ovsdb_idl_condition *cond = &table->condition; ++ /* Always use acked conditions when requesting ++ * monitor_cond/monitor_cond_since. ++ */ ++ const struct ovsdb_idl_condition *cond = table->ack_cond; + if ((monitor_method == OVSDB_IDL_MM_MONITOR_COND || + monitor_method == OVSDB_IDL_MM_MONITOR_COND_SINCE) && +- !ovsdb_idl_condition_is_true(cond)) { ++ cond && !ovsdb_idl_condition_is_true(cond)) { + json_object_put(monitor_request, "where", + ovsdb_idl_condition_to_json(cond)); +- table->cond_changed = false; + } + json_object_put(monitor_requests, tc->name, + json_array_create_1(monitor_request)); +@@ -2078,8 +2199,6 @@ ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl, struct ovsdb_idl_db *db, + } + free_schema(schema); + +- db->cond_changed = false; +- + struct json *params = json_array_create_3( + json_string_create(db->class_->database), + json_clone(db->monitor_id), +diff --git a/lib/pvector.c b/lib/pvector.c +index aaeee92147..cc527fdc41 100644 +--- a/lib/pvector.c ++++ b/lib/pvector.c +@@ -33,7 +33,7 @@ pvector_impl_alloc(size_t size) + struct pvector_impl *impl; + + impl = xmalloc(sizeof *impl + size * sizeof impl->vector[0]); +- impl->size = 0; ++ atomic_init(&impl->size, 0); + impl->allocated = size; + + return impl; +@@ -117,18 +117,22 @@ pvector_insert(struct pvector *pvec, void *ptr, int priority) + { + struct pvector_impl *temp = pvec->temp; + struct pvector_impl *old = pvector_impl_get(pvec); ++ size_t size; + + ovs_assert(ptr != NULL); + ++ /* There is no possible concurrent writer. Insertions must be protected ++ * by mutex or be always excuted from the same thread. */ ++ atomic_read_relaxed(&old->size, &size); ++ + /* Check if can add to the end without reallocation. */ +- if (!temp && old->allocated > old->size && +- (!old->size || priority <= old->vector[old->size - 1].priority)) { +- old->vector[old->size].ptr = ptr; +- old->vector[old->size].priority = priority; ++ if (!temp && old->allocated > size && ++ (!size || priority <= old->vector[size - 1].priority)) { ++ old->vector[size].ptr = ptr; ++ old->vector[size].priority = priority; + /* Size increment must not be visible to the readers before the new + * entry is stored. */ +- atomic_thread_fence(memory_order_release); +- ++old->size; ++ atomic_store_explicit(&old->size, size + 1, memory_order_release); + } else { + if (!temp) { + temp = pvector_impl_dup(old); +diff --git a/lib/pvector.h b/lib/pvector.h +index b990ed9d59..0d3290dc37 100644 +--- a/lib/pvector.h ++++ b/lib/pvector.h +@@ -69,8 +69,8 @@ struct pvector_entry { + }; + + struct pvector_impl { +- size_t size; /* Number of entries in the vector. */ +- size_t allocated; /* Number of allocated entries. */ ++ atomic_size_t size; /* Number of entries in the vector. */ ++ size_t allocated; /* Number of allocated entries. */ + struct pvector_entry vector[]; + }; + +@@ -181,12 +181,17 @@ pvector_cursor_init(const struct pvector *pvec, + { + const struct pvector_impl *impl; + struct pvector_cursor cursor; ++ size_t size; + + impl = ovsrcu_get(struct pvector_impl *, &pvec->impl); + +- ovs_prefetch_range(impl->vector, impl->size * sizeof impl->vector[0]); ++ /* Use memory_order_acquire to ensure entry access can not be ++ * reordered to happen before size read. */ ++ atomic_read_explicit(&CONST_CAST(struct pvector_impl *, impl)->size, ++ &size, memory_order_acquire); ++ ovs_prefetch_range(impl->vector, size * sizeof impl->vector[0]); + +- cursor.size = impl->size; ++ cursor.size = size; + cursor.vector = impl->vector; + cursor.entry_idx = -1; + +diff --git a/lib/tc.c b/lib/tc.c +index 12af0192b6..cc8c2d849e 100644 +--- a/lib/tc.c ++++ b/lib/tc.c +@@ -1647,8 +1647,10 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower) + } + + bs = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs); +- put_32aligned_u64(&stats->n_packets, bs->packets); +- put_32aligned_u64(&stats->n_bytes, bs->bytes); ++ if (bs->packets) { ++ put_32aligned_u64(&stats->n_packets, bs->packets); ++ put_32aligned_u64(&stats->n_bytes, bs->bytes); ++ } + + return 0; + } +diff --git a/lib/tc.h b/lib/tc.h +index d31c0953ed..24a4994fd1 100644 +--- a/lib/tc.h ++++ b/lib/tc.h +@@ -235,7 +235,7 @@ struct tc_action { + } ipv6; + }; + +- union { ++ struct { + ovs_be16 min; + ovs_be16 max; + } port; +diff --git a/lib/util.c b/lib/util.c +index 830e14516f..25635b27ff 100644 +--- a/lib/util.c ++++ b/lib/util.c +@@ -1395,6 +1395,19 @@ is_all_ones(const void *p, size_t n) + return is_all_byte(p, n, 0xff); + } + ++/* *dst |= *src for 'n' bytes. */ ++void ++or_bytes(void *dst_, const void *src_, size_t n) ++{ ++ const uint8_t *src = src_; ++ uint8_t *dst = dst_; ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ *dst++ |= *src++; ++ } ++} ++ + /* Copies 'n_bits' bits starting from bit 'src_ofs' in 'src' to the 'n_bits' + * starting from bit 'dst_ofs' in 'dst'. 'src' is 'src_len' bytes long and + * 'dst' is 'dst_len' bytes long. +diff --git a/lib/util.h b/lib/util.h +index 7ad8758fe6..067dcad157 100644 +--- a/lib/util.h ++++ b/lib/util.h +@@ -484,6 +484,7 @@ be64_is_superset(ovs_be64 super, ovs_be64 sub) + bool is_all_zeros(const void *, size_t); + bool is_all_ones(const void *, size_t); + bool is_all_byte(const void *, size_t, uint8_t byte); ++void or_bytes(void *dst, const void *src, size_t n); + void bitwise_copy(const void *src, unsigned int src_len, unsigned int src_ofs, + void *dst, unsigned int dst_len, unsigned int dst_ofs, + unsigned int n_bits); +diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c +index 51d656cba9..aee676d93e 100644 +--- a/ofproto/connmgr.c ++++ b/ofproto/connmgr.c +@@ -190,8 +190,8 @@ struct ofservice { + + static void ofservice_run(struct ofservice *); + static void ofservice_wait(struct ofservice *); +-static void ofservice_reconfigure(struct ofservice *, +- const struct ofproto_controller *) ++static int ofservice_reconfigure(struct ofservice *, ++ const struct ofproto_controller *) + OVS_REQUIRES(ofproto_mutex); + static void ofservice_create(struct connmgr *mgr, const char *target, + const struct ofproto_controller *) +@@ -602,7 +602,15 @@ connmgr_set_controllers(struct connmgr *mgr, struct shash *controllers) + target); + ofservice_destroy(ofservice); + } else { +- ofservice_reconfigure(ofservice, c); ++ if (ofservice_reconfigure(ofservice, c)) { ++ char *target_to_restore = xstrdup(target); ++ VLOG_INFO("%s: Changes to controller \"%s\" " ++ "expects re-initialization: Re-initializing now.", ++ mgr->name, target); ++ ofservice_destroy(ofservice); ++ ofservice_create(mgr, target_to_restore, c); ++ free(target_to_restore); ++ } + } + } + +@@ -2011,16 +2019,15 @@ ofservice_wait(struct ofservice *ofservice) + } + } + +-static void ++static int + ofservice_reconfigure(struct ofservice *ofservice, + const struct ofproto_controller *settings) + OVS_REQUIRES(ofproto_mutex) + { +- /* If the allowed OpenFlow versions change, close all of the existing +- * connections to allow them to reconnect and possibly negotiate a new +- * version. */ ++ /* If the allowed OpenFlow versions change, a full cleanup is needed ++ * for the ofservice and connections. */ + if (ofservice->s.allowed_versions != settings->allowed_versions) { +- ofservice_close_all(ofservice); ++ return -EINVAL; + } + + ofservice->s = *settings; +@@ -2029,6 +2036,8 @@ ofservice_reconfigure(struct ofservice *ofservice, + LIST_FOR_EACH (ofconn, ofservice_node, &ofservice->conns) { + ofconn_reconfigure(ofconn, settings); + } ++ ++ return 0; + } + + /* Finds and returns the ofservice within 'mgr' that has the given +diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h +index 147ef9c333..97699cb905 100644 +--- a/ofproto/ofproto-dpif-rid.h ++++ b/ofproto/ofproto-dpif-rid.h +@@ -22,6 +22,7 @@ + + #include "cmap.h" + #include "ofproto-dpif-mirror.h" ++#include "ofproto/ofproto-provider.h" + #include "openvswitch/list.h" + #include "openvswitch/ofp-actions.h" + #include "ovs-thread.h" +@@ -115,16 +116,25 @@ frozen_metadata_from_flow(struct frozen_metadata *md, + { + memset(md, 0, sizeof *md); + md->tunnel = flow->tunnel; ++ /* It is unsafe for frozen_state to reference tun_table because ++ * tun_table is protected by RCU while the lifecycle of frozen_state ++ * can span several RCU quiesce states. ++ * ++ * The latest valid tun_table can be found by ofproto_get_tun_tab() ++ * efficiently. */ ++ md->tunnel.metadata.tab = NULL; + md->metadata = flow->metadata; + memcpy(md->regs, flow->regs, sizeof md->regs); + md->in_port = flow->in_port.ofp_port; + } + + static inline void +-frozen_metadata_to_flow(const struct frozen_metadata *md, ++frozen_metadata_to_flow(struct ofproto *ofproto, ++ const struct frozen_metadata *md, + struct flow *flow) + { + flow->tunnel = md->tunnel; ++ flow->tunnel.metadata.tab = ofproto_get_tun_tab(ofproto); + flow->metadata = md->metadata; + memcpy(flow->regs, md->regs, sizeof flow->regs); + flow->in_port.ofp_port = md->in_port; +diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c +index 409286ab15..3a290e4918 100644 +--- a/ofproto/ofproto-dpif-upcall.c ++++ b/ofproto/ofproto-dpif-upcall.c +@@ -1545,7 +1545,8 @@ process_upcall(struct udpif *udpif, struct upcall *upcall, + flow_clear_conntrack(&frozen_flow); + } + +- frozen_metadata_to_flow(&state->metadata, &frozen_flow); ++ frozen_metadata_to_flow(&upcall->ofproto->up, &state->metadata, ++ &frozen_flow); + flow_get_metadata(&frozen_flow, &am->pin.up.base.flow_metadata); + + ofproto_dpif_send_async_msg(upcall->ofproto, am); +diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c +index 4407f9c97a..dd89cb47c0 100644 +--- a/ofproto/ofproto-dpif-xlate.c ++++ b/ofproto/ofproto-dpif-xlate.c +@@ -1516,15 +1516,32 @@ xlate_lookup_ofproto_(const struct dpif_backer *backer, + return NULL; + } + +- /* If recirculation was initiated due to bond (in_port = OFPP_NONE) +- * then frozen state is static and xport_uuid is not defined, so xport +- * cannot be restored from frozen state. */ +- if (recirc_id_node->state.metadata.in_port != OFPP_NONE) { ++ ofp_port_t in_port = recirc_id_node->state.metadata.in_port; ++ if (in_port != OFPP_NONE && in_port != OFPP_CONTROLLER) { + struct uuid xport_uuid = recirc_id_node->state.xport_uuid; + xport = xport_lookup_by_uuid(xcfg, &xport_uuid); + if (xport && xport->xbridge && xport->xbridge->ofproto) { + goto out; + } ++ } else { ++ /* OFPP_NONE and OFPP_CONTROLLER are not real ports. They indicate ++ * that the packet originated from the controller via an OpenFlow ++ * "packet-out". The right thing to do is to find just the ++ * ofproto. There is no xport, which is OK. ++ * ++ * OFPP_NONE can also indicate that a bond caused recirculation. */ ++ struct uuid uuid = recirc_id_node->state.ofproto_uuid; ++ const struct xbridge *bridge = xbridge_lookup_by_uuid(xcfg, &uuid); ++ if (bridge && bridge->ofproto) { ++ if (errorp) { ++ *errorp = NULL; ++ } ++ *xportp = NULL; ++ if (ofp_in_port) { ++ *ofp_in_port = in_port; ++ } ++ return bridge->ofproto; ++ } + } + } + +@@ -7519,7 +7536,8 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) + + /* Restore pipeline metadata. May change flow's in_port and other + * metadata to the values that existed when freezing was triggered. */ +- frozen_metadata_to_flow(&state->metadata, flow); ++ frozen_metadata_to_flow(&ctx.xbridge->ofproto->up, ++ &state->metadata, flow); + + /* Restore stack, if any. */ + if (state->stack) { +@@ -7571,14 +7589,10 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) + ctx.error = XLATE_INVALID_TUNNEL_METADATA; + goto exit; + } +- } else if (!flow->tunnel.metadata.tab || xin->frozen_state) { ++ } else if (!flow->tunnel.metadata.tab) { + /* If the original flow did not come in on a tunnel, then it won't have + * FLOW_TNL_F_UDPIF set. However, we still need to have a metadata + * table in case we generate tunnel actions. */ +- /* If the translation is from a frozen state, we use the latest +- * TLV map to avoid segmentation fault in case the old TLV map is +- * replaced by a new one. +- * XXX: It is better to abort translation if the table is changed. */ + flow->tunnel.metadata.tab = ofproto_get_tun_tab( + &ctx.xbridge->ofproto->up); + } +diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c +index 08830d8371..8594afad4a 100644 +--- a/ofproto/ofproto.c ++++ b/ofproto/ofproto.c +@@ -6077,8 +6077,8 @@ ofproto_rule_send_removed(struct rule *rule) + fr.hard_timeout = rule->hard_timeout; + ovs_mutex_unlock(&rule->mutex); + rule->ofproto->ofproto_class->rule_get_stats(rule, &stats, &used); +- fr.packet_count += stats.n_packets; +- fr.byte_count += stats.n_bytes; ++ fr.packet_count = stats.n_packets; ++ fr.byte_count = stats.n_bytes; + connmgr_send_flow_removed(connmgr, &fr); + ovs_mutex_unlock(&ofproto_mutex); + } +diff --git a/ovsdb/execution.c b/ovsdb/execution.c +index e45f3d6796..3a0dad5d0a 100644 +--- a/ovsdb/execution.c ++++ b/ovsdb/execution.c +@@ -712,7 +712,7 @@ ovsdb_execute_wait(struct ovsdb_execution *x, struct ovsdb_parser *parser, + long long int timeout_msec = 0; + size_t i; + +- timeout = ovsdb_parser_member(parser, "timeout", OP_NUMBER | OP_OPTIONAL); ++ timeout = ovsdb_parser_member(parser, "timeout", OP_INTEGER | OP_OPTIONAL); + where = ovsdb_parser_member(parser, "where", OP_ARRAY); + columns_json = ovsdb_parser_member(parser, "columns", + OP_ARRAY | OP_OPTIONAL); +@@ -730,7 +730,7 @@ ovsdb_execute_wait(struct ovsdb_execution *x, struct ovsdb_parser *parser, + } + if (!error) { + if (timeout) { +- timeout_msec = MIN(LLONG_MAX, json_real(timeout)); ++ timeout_msec = json_integer(timeout); + if (timeout_msec < 0) { + error = ovsdb_syntax_error(timeout, NULL, + "timeout must be nonnegative"); +diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c +index b6957d7300..fd7891a729 100644 +--- a/ovsdb/ovsdb-server.c ++++ b/ovsdb/ovsdb-server.c +@@ -540,7 +540,7 @@ close_db(struct server_config *config, struct db *db, char *comment) + + static struct ovsdb_error * OVS_WARN_UNUSED_RESULT + parse_txn(struct server_config *config, struct db *db, +- struct ovsdb_schema *schema, const struct json *txn_json, ++ const struct ovsdb_schema *schema, const struct json *txn_json, + const struct uuid *txnid) + { + if (schema) { +@@ -548,7 +548,9 @@ parse_txn(struct server_config *config, struct db *db, + * (first grabbing its storage), then replace it with the new schema. + * The transaction must also include the replacement data. + * +- * Only clustered database schema changes go through this path. */ ++ * Only clustered database schema changes and snapshot installs ++ * go through this path. ++ */ + ovs_assert(txn_json); + ovs_assert(ovsdb_storage_is_clustered(db->db->storage)); + +@@ -558,13 +560,17 @@ parse_txn(struct server_config *config, struct db *db, + return error; + } + +- ovsdb_jsonrpc_server_reconnect( +- config->jsonrpc, false, +- (db->db->schema +- ? xasprintf("database %s schema changed", db->db->name) +- : xasprintf("database %s connected to storage", db->db->name))); ++ if (!db->db->schema || ++ strcmp(schema->version, db->db->schema->version)) { ++ ovsdb_jsonrpc_server_reconnect( ++ config->jsonrpc, false, ++ (db->db->schema ++ ? xasprintf("database %s schema changed", db->db->name) ++ : xasprintf("database %s connected to storage", ++ db->db->name))); ++ } + +- ovsdb_replace(db->db, ovsdb_create(schema, NULL)); ++ ovsdb_replace(db->db, ovsdb_create(ovsdb_schema_clone(schema), NULL)); + + /* Force update to schema in _Server database. */ + db->row_uuid = UUID_ZERO; +@@ -613,6 +619,7 @@ read_db(struct server_config *config, struct db *db) + } else { + error = parse_txn(config, db, schema, txn_json, &txnid); + json_destroy(txn_json); ++ ovsdb_schema_destroy(schema); + if (error) { + break; + } +diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c +index cfc96b32f8..2da117cb36 100644 +--- a/ovsdb/ovsdb.c ++++ b/ovsdb/ovsdb.c +@@ -414,7 +414,7 @@ ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage) + db->storage = storage; + ovs_list_init(&db->monitors); + ovs_list_init(&db->triggers); +- db->run_triggers = false; ++ db->run_triggers_now = db->run_triggers = false; + + shash_init(&db->tables); + if (schema) { +@@ -502,6 +502,10 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage) + } + + simap_increase(usage, "cells", cells); ++ ++ if (db->storage) { ++ ovsdb_storage_get_memory_usage(db->storage, usage); ++ } + } + + struct ovsdb_table * +diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h +index 32e5333163..5c30a83d92 100644 +--- a/ovsdb/ovsdb.h ++++ b/ovsdb/ovsdb.h +@@ -83,6 +83,7 @@ struct ovsdb { + /* Triggers. */ + struct ovs_list triggers; /* Contains "struct ovsdb_trigger"s. */ + bool run_triggers; ++ bool run_triggers_now; + + struct ovsdb_table *rbac_role; + +diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c +index 26d39a087f..9468fdaf4a 100644 +--- a/ovsdb/raft-private.c ++++ b/ovsdb/raft-private.c +@@ -137,6 +137,7 @@ raft_server_destroy(struct raft_server *s) + if (s) { + free(s->address); + free(s->nickname); ++ free(s->last_install_snapshot_request); + free(s); + } + } +diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h +index ac8656d42f..1f366b4ab3 100644 +--- a/ovsdb/raft-private.h ++++ b/ovsdb/raft-private.h +@@ -27,6 +27,7 @@ + + struct ds; + struct ovsdb_parser; ++struct raft_install_snapshot_request; + + /* Formatting server IDs and cluster IDs for use in human-readable logs. Do + * not use these in cases where the whole server or cluster ID is needed; use +@@ -83,6 +84,9 @@ struct raft_server { + bool replied; /* Reply to append_request was received from this + node during current election_timeout interval. + */ ++ /* Copy of the last install_snapshot_request sent to this server. */ ++ struct raft_install_snapshot_request *last_install_snapshot_request; ++ + /* For use in adding and removing servers: */ + struct uuid requester_sid; /* Nonzero if requested via RPC. */ + struct unixctl_conn *requester_conn; /* Only if requested via unixctl. */ +diff --git a/ovsdb/raft-rpc.c b/ovsdb/raft-rpc.c +index 18c83fe9c2..dd14d81091 100644 +--- a/ovsdb/raft-rpc.c ++++ b/ovsdb/raft-rpc.c +@@ -544,8 +544,8 @@ raft_format_install_snapshot_request( + ds_put_format(s, " last_index=%"PRIu64, rq->last_index); + ds_put_format(s, " last_term=%"PRIu64, rq->last_term); + ds_put_format(s, " last_eid="UUID_FMT, UUID_ARGS(&rq->last_eid)); +- ds_put_cstr(s, " last_servers="); + ds_put_format(s, " election_timer=%"PRIu64, rq->election_timer); ++ ds_put_cstr(s, " last_servers="); + + struct hmap servers; + struct ovsdb_error *error = +diff --git a/ovsdb/raft.c b/ovsdb/raft.c +index 4789bc4f22..8df386fa19 100644 +--- a/ovsdb/raft.c ++++ b/ovsdb/raft.c +@@ -36,6 +36,7 @@ + #include "ovsdb/log.h" + #include "raft-rpc.h" + #include "random.h" ++#include "simap.h" + #include "socket-util.h" + #include "stream.h" + #include "timeval.h" +@@ -73,7 +74,8 @@ enum raft_failure_test { + FT_CRASH_BEFORE_SEND_EXEC_REQ, + FT_CRASH_AFTER_SEND_EXEC_REQ, + FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE, +- FT_DELAY_ELECTION ++ FT_DELAY_ELECTION, ++ FT_DONT_SEND_VOTE_REQUEST + }; + static enum raft_failure_test failure_test; + +@@ -298,6 +300,11 @@ struct raft { + bool had_leader; /* There has been leader elected since last + election initiated. This is to help setting + candidate_retrying. */ ++ ++ /* For all. */ ++ bool ever_had_leader; /* There has been leader elected since the raft ++ is initialized, meaning it is ever ++ connected. */ + }; + + /* All Raft structures. */ +@@ -932,6 +939,7 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js, + &conn->sid); + conn->incoming = incoming; + conn->js_seqno = jsonrpc_session_get_seqno(conn->js); ++ jsonrpc_session_set_probe_interval(js, 0); + } + + /* Starts the local server in an existing Raft cluster, using the local copy of +@@ -1007,6 +1015,21 @@ raft_get_sid(const struct raft *raft) + return &raft->sid; + } + ++/* Adds memory consumption info to 'usage' for later use by memory_report(). */ ++void ++raft_get_memory_usage(const struct raft *raft, struct simap *usage) ++{ ++ struct raft_conn *conn; ++ int cnt = 0; ++ ++ LIST_FOR_EACH (conn, list_node, &raft->conns) { ++ simap_increase(usage, "raft-backlog", ++ jsonrpc_session_get_backlog(conn->js)); ++ cnt++; ++ } ++ simap_increase(usage, "raft-connections", cnt); ++} ++ + /* Returns true if 'raft' has completed joining its cluster, has not left or + * initiated leaving the cluster, does not have failed disk storage, and is + * apparently connected to the leader in a healthy way (or is itself the +@@ -1024,7 +1047,8 @@ raft_is_connected(const struct raft *raft) + && !raft->joining + && !raft->leaving + && !raft->left +- && !raft->failed); ++ && !raft->failed ++ && raft->ever_had_leader); + VLOG_DBG("raft_is_connected: %s\n", ret? "true": "false"); + return ret; + } +@@ -1397,8 +1421,20 @@ raft_conn_run(struct raft *raft, struct raft_conn *conn) + jsonrpc_session_run(conn->js); + + unsigned int new_seqno = jsonrpc_session_get_seqno(conn->js); +- bool just_connected = (new_seqno != conn->js_seqno ++ bool reconnected = new_seqno != conn->js_seqno; ++ bool just_connected = (reconnected + && jsonrpc_session_is_connected(conn->js)); ++ ++ if (reconnected) { ++ /* Clear 'last_install_snapshot_request' since it might not reach the ++ * destination or server was restarted. */ ++ struct raft_server *server = raft_find_server(raft, &conn->sid); ++ if (server) { ++ free(server->last_install_snapshot_request); ++ server->last_install_snapshot_request = NULL; ++ } ++ } ++ + conn->js_seqno = new_seqno; + if (just_connected) { + if (raft->joining) { +@@ -1641,6 +1677,7 @@ raft_start_election(struct raft *raft, bool leadership_transfer) + } + + ovs_assert(raft->role != RAFT_LEADER); ++ + raft->role = RAFT_CANDIDATE; + /* If there was no leader elected since last election, we know we are + * retrying now. */ +@@ -1684,7 +1721,9 @@ raft_start_election(struct raft *raft, bool leadership_transfer) + .leadership_transfer = leadership_transfer, + }, + }; +- raft_send(raft, &rq); ++ if (failure_test != FT_DONT_SEND_VOTE_REQUEST) { ++ raft_send(raft, &rq); ++ } + } + + /* Vote for ourselves. */ +@@ -2519,7 +2558,7 @@ static void + raft_set_leader(struct raft *raft, const struct uuid *sid) + { + raft->leader_sid = *sid; +- raft->had_leader = true; ++ raft->ever_had_leader = raft->had_leader = true; + raft->candidate_retrying = false; + } + +@@ -2960,6 +2999,15 @@ raft_update_leader(struct raft *raft, const struct uuid *sid) + }; + ignore(ovsdb_log_write_and_free(raft->log, raft_record_to_json(&r))); + } ++ if (raft->role == RAFT_CANDIDATE) { ++ /* Section 3.4: While waiting for votes, a candidate may ++ * receive an AppendEntries RPC from another server claiming to ++ * be leader. If the leader’s term (included in its RPC) is at ++ * least as large as the candidate’s current term, then the ++ * candidate recognizes the leader as legitimate and returns to ++ * follower state. */ ++ raft->role = RAFT_FOLLOWER; ++ } + return true; + } + +@@ -3260,6 +3308,31 @@ raft_send_install_snapshot_request(struct raft *raft, + .election_timer = raft->election_timer, /* use latest value */ + } + }; ++ ++ if (s->last_install_snapshot_request) { ++ struct raft_install_snapshot_request *old, *new; ++ ++ old = s->last_install_snapshot_request; ++ new = &rpc.install_snapshot_request; ++ if ( old->term == new->term ++ && old->last_index == new->last_index ++ && old->last_term == new->last_term ++ && old->last_servers == new->last_servers ++ && old->data == new->data ++ && old->election_timer == new->election_timer ++ && uuid_equals(&old->last_eid, &new->last_eid)) { ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); ++ ++ VLOG_WARN_RL(&rl, "not sending exact same install_snapshot_request" ++ " to server %s again", s->nickname); ++ return; ++ } ++ } ++ free(s->last_install_snapshot_request); ++ CONST_CAST(struct raft_server *, s)->last_install_snapshot_request ++ = xmemdup(&rpc.install_snapshot_request, ++ sizeof rpc.install_snapshot_request); ++ + raft_send(raft, &rpc); + } + +@@ -3992,8 +4065,9 @@ raft_handle_install_snapshot_reply( + VLOG_INFO_RL(&rl, "cluster "CID_FMT": installed snapshot on server %s " + " up to %"PRIu64":%"PRIu64, CID_ARGS(&raft->cid), + s->nickname, rpy->last_term, rpy->last_index); +- s->next_index = raft->log_end; +- raft_send_append_request(raft, s, 0, "snapshot installed"); ++ s->next_index = raft->log_start; ++ raft_send_append_request(raft, s, raft->log_end - s->next_index, ++ "snapshot installed"); + } + + /* Returns true if 'raft' has grown enough since the last snapshot that +@@ -4143,9 +4217,7 @@ raft_handle_execute_command_request__( + cmd->sid = rq->common.sid; + + enum raft_command_status status = cmd->status; +- if (status != RAFT_CMD_INCOMPLETE) { +- raft_command_unref(cmd); +- } ++ raft_command_unref(cmd); + return status; + } + +@@ -4667,6 +4739,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, + raft_reset_election_timer(raft); + } + } ++ } else if (!strcmp(test, "dont-send-vote-request")) { ++ failure_test = FT_DONT_SEND_VOTE_REQUEST; + } else if (!strcmp(test, "clear")) { + failure_test = FT_NO_TEST; + unixctl_command_reply(conn, "test dismissed"); +diff --git a/ovsdb/raft.h b/ovsdb/raft.h +index 3d448995af..99d5307e54 100644 +--- a/ovsdb/raft.h ++++ b/ovsdb/raft.h +@@ -67,6 +67,7 @@ + struct json; + struct ovsdb_log; + struct raft; ++struct simap; + struct sset; + + #define RAFT_MAGIC "CLUSTER" +@@ -113,6 +114,7 @@ const struct uuid *raft_get_cid(const struct raft *); + const struct uuid *raft_get_sid(const struct raft *); + bool raft_is_connected(const struct raft *); + bool raft_is_leader(const struct raft *); ++void raft_get_memory_usage(const struct raft *, struct simap *usage); + + /* Joining a cluster. */ + bool raft_is_joining(const struct raft *); +diff --git a/ovsdb/storage.c b/ovsdb/storage.c +index e26252b066..7b4ad16f60 100644 +--- a/ovsdb/storage.c ++++ b/ovsdb/storage.c +@@ -26,6 +26,7 @@ + #include "ovsdb.h" + #include "raft.h" + #include "random.h" ++#include "simap.h" + #include "timeval.h" + #include "util.h" + +@@ -188,6 +189,15 @@ ovsdb_storage_get_applied_index(const struct ovsdb_storage *storage) + return storage->raft ? raft_get_applied_index(storage->raft) : 0; + } + ++void ++ovsdb_storage_get_memory_usage(const struct ovsdb_storage *storage, ++ struct simap *usage) ++{ ++ if (storage->raft) { ++ raft_get_memory_usage(storage->raft, usage); ++ } ++} ++ + void + ovsdb_storage_run(struct ovsdb_storage *storage) + { +diff --git a/ovsdb/storage.h b/ovsdb/storage.h +index 8a9bbab709..a223968912 100644 +--- a/ovsdb/storage.h ++++ b/ovsdb/storage.h +@@ -23,6 +23,7 @@ + struct json; + struct ovsdb_schema; + struct ovsdb_storage; ++struct simap; + struct uuid; + + struct ovsdb_error *ovsdb_storage_open(const char *filename, bool rw, +@@ -39,6 +40,8 @@ bool ovsdb_storage_is_leader(const struct ovsdb_storage *); + const struct uuid *ovsdb_storage_get_cid(const struct ovsdb_storage *); + const struct uuid *ovsdb_storage_get_sid(const struct ovsdb_storage *); + uint64_t ovsdb_storage_get_applied_index(const struct ovsdb_storage *); ++void ovsdb_storage_get_memory_usage(const struct ovsdb_storage *, ++ struct simap *usage); + + void ovsdb_storage_run(struct ovsdb_storage *); + void ovsdb_storage_wait(struct ovsdb_storage *); +diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c +index 369436bffb..8ffefcf7c9 100644 +--- a/ovsdb/transaction.c ++++ b/ovsdb/transaction.c +@@ -967,7 +967,7 @@ ovsdb_txn_complete(struct ovsdb_txn *txn) + { + if (!ovsdb_txn_is_empty(txn)) { + +- txn->db->run_triggers = true; ++ txn->db->run_triggers_now = txn->db->run_triggers = true; + ovsdb_monitors_commit(txn->db, txn); + ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_update_weak_refs)); + ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit)); +diff --git a/ovsdb/trigger.c b/ovsdb/trigger.c +index 7e62e90ae3..0372302af4 100644 +--- a/ovsdb/trigger.c ++++ b/ovsdb/trigger.c +@@ -141,7 +141,7 @@ ovsdb_trigger_run(struct ovsdb *db, long long int now) + struct ovsdb_trigger *t, *next; + + bool run_triggers = db->run_triggers; +- db->run_triggers = false; ++ db->run_triggers_now = db->run_triggers = false; + + bool disconnect_all = false; + +@@ -160,7 +160,7 @@ ovsdb_trigger_run(struct ovsdb *db, long long int now) + void + ovsdb_trigger_wait(struct ovsdb *db, long long int now) + { +- if (db->run_triggers) { ++ if (db->run_triggers_now) { + poll_immediate_wake(); + } else { + long long int deadline = LLONG_MAX; +@@ -319,9 +319,16 @@ ovsdb_trigger_try(struct ovsdb_trigger *t, long long int now) + if (!strcmp(ovsdb_error_get_tag(error), "cluster error")) { + /* Temporary error. Transition back to "initialized" state to + * try again. */ ++ char *err_s = ovsdb_error_to_string(error); ++ VLOG_DBG("cluster error %s", err_s); ++ + jsonrpc_msg_destroy(t->reply); + t->reply = NULL; + t->db->run_triggers = true; ++ if (!strstr(err_s, "not leader")) { ++ t->db->run_triggers_now = true; ++ } ++ free(err_s); + ovsdb_error_destroy(error); + } else { + /* Permanent error. Transition to "completed" state to report +diff --git a/rhel/openvswitch-kmod-fedora.spec.in b/rhel/openvswitch-kmod-fedora.spec.in +index c94f2f5358..15eec6d4c0 100644 +--- a/rhel/openvswitch-kmod-fedora.spec.in ++++ b/rhel/openvswitch-kmod-fedora.spec.in +@@ -17,7 +17,8 @@ + # - 3.10.0 major revision 693 (RHEL 7.4) + # - 3.10.0 major revision 957 (RHEL 7.6) + # - 3.10.0 major revision 1062 (RHEL 7.7) +-# - 3.10.0 major revision 1101 (RHEL 7.8) ++# - 3.10.0 major revision 1101 (RHEL 7.8 Beta) ++# - 3.10.0 major revision 1127 (RHEL 7.8 GA) + # By default, build against the current running kernel version + #%define kernel 3.1.5-1.fc16.x86_64 + #define kernel %{kernel_source} +@@ -97,7 +98,7 @@ if grep -qs "suse" /etc/os-release; then + elif [ "$mainline_major" = "3" ] && [ "$mainline_minor" = "10" ] && + { [ "$major_rev" = "327" ] || [ "$major_rev" = "693" ] || \ + [ "$major_rev" = "957" ] || [ "$major_rev" == "1062" ] || \ +- [ "$major_rev" = "1101" ]; }; then ++ [ "$major_rev" = "1101" ] || [ "$major_rev" = "1127" ] ; }; then + # For RHEL 7.2, 7.4, 7.6, 7.7, and 7.8 + if [ -x "%{_datadir}/openvswitch/scripts/ovs-kmod-manage.sh" ]; then + %{_datadir}/openvswitch/scripts/ovs-kmod-manage.sh +diff --git a/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh b/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh +index a9b5cdd817..c70e135cd5 100644 +--- a/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh ++++ b/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh +@@ -19,7 +19,8 @@ + # - 3.10.0 major revision 693 (RHEL 7.4) + # - 3.10.0 major revision 957 (RHEL 7.6) + # - 3.10.0 major revision 1062 (RHEL 7.7) +-# - 3.10.0 major revision 1101 (RHEL 7.8) ++# - 3.10.0 major revision 1101 (RHEL 7.8 Beta) ++# - 3.10.0 major revision 1127 (RHEL 7.8 GA) + # - 4.4.x, x >= 73 (SLES 12 SP3) + # - 4.12.x, x >= 14 (SLES 12 SP4). + # It is packaged in the openvswitch kmod RPM and run in the post-install +@@ -108,6 +109,11 @@ if [ "$mainline_major" = "3" ] && [ "$mainline_minor" = "10" ]; then + ver_offset=4 + installed_ver="$minor_rev" + elif [ "$major_rev" = "1101" ]; then ++# echo "rhel78" ++ comp_ver=10 ++ ver_offset=4 ++ installed_ver="$minor_rev" ++ elif [ "$major_rev" = "1127" ]; then + # echo "rhel78" + comp_ver=10 + ver_offset=4 +diff --git a/tests/automake.mk b/tests/automake.mk +index 9c7ebdce9b..3d90f97687 100644 +--- a/tests/automake.mk ++++ b/tests/automake.mk +@@ -152,7 +152,8 @@ SYSTEM_KMOD_TESTSUITE_AT = \ + SYSTEM_USERSPACE_TESTSUITE_AT = \ + tests/system-userspace-testsuite.at \ + tests/system-userspace-macros.at \ +- tests/system-userspace-packet-type-aware.at ++ tests/system-userspace-packet-type-aware.at \ ++ tests/system-route.at + + SYSTEM_AFXDP_TESTSUITE_AT = \ + tests/system-userspace-macros.at \ +diff --git a/tests/bridge.at b/tests/bridge.at +index d48463e263..904f1381c7 100644 +--- a/tests/bridge.at ++++ b/tests/bridge.at +@@ -103,3 +103,20 @@ AT_CHECK([ovs-appctl -t ovs-vswitchd version], [0], [ignore]) + OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) + OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + AT_CLEANUP ++ ++AT_SETUP([bridge - change ofproto versions]) ++dnl Start vswitch and add a version test bridge ++OVS_VSWITCHD_START( ++ [add-br vr_test0 -- \ ++ set bridge vr_test0 datapath-type=dummy \ ++ protocols=OpenFlow10]) ++ ++dnl set the version to include, say, OpenFlow14 ++AT_CHECK([ovs-vsctl set bridge vr_test0 protocols=OpenFlow10,OpenFlow14]) ++ ++dnl now try to use bundle action on a flow ++AT_CHECK([ovs-ofctl add-flow vr_test0 --bundle actions=normal]) ++ ++OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) ++OVS_APP_EXIT_AND_WAIT([ovsdb-server]) ++AT_CLEANUP +diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at +index 0aeb4e788f..1651e02d29 100644 +--- a/tests/dpif-netdev.at ++++ b/tests/dpif-netdev.at +@@ -371,7 +371,7 @@ m4_define([DPIF_NETDEV_FLOW_HW_OFFLOAD], + [AT_SETUP([dpif-netdev - partial hw offload - $1]) + OVS_VSWITCHD_START( + [add-port br0 p1 -- \ +- set interface p1 type=$1 ofport_request=1 options:pstream=punix:$OVS_RUNDIR/p1.sock options:ifindex=1 -- \ ++ set interface p1 type=$1 ofport_request=1 options:pstream=punix:$OVS_RUNDIR/p1.sock options:ifindex=1100 -- \ + set bridge br0 datapath-type=dummy \ + other-config:datapath-id=1234 fail-mode=secure], [], [], + [m4_if([$1], [dummy-pmd], [--dummy-numa="0,0,0,0,1,1,1,1"], [])]) +@@ -393,7 +393,7 @@ skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc + # Check that flow successfully offloaded. + OVS_WAIT_UNTIL([grep "succeed to add netdev flow" ovs-vswitchd.log]) + AT_CHECK([filter_hw_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl +-p1: flow put[[create]]: flow match: recirc_id=0,eth,ip,in_port=1,vlan_tci=0x0000,nw_frag=no, mark: 0 ++p1: flow put[[create]]: flow match: recirc_id=0,eth,ip,in_port=1,vlan_tci=0x0000,nw_frag=no, mark: 1 + ]) + # Check that datapath flow installed successfully. + AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl +@@ -404,7 +404,7 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), a + + # Check for succesfull packet matching with installed offloaded flow. + AT_CHECK([filter_hw_packet_netdev_dummy < ovs-vswitchd.log | strip_xout], [0], [dnl +-p1: packet: ip,vlan_tci=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,nw_src=127.0.0.1,nw_dst=127.0.0.1,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=64 matches with flow: recirc_id=0,eth,ip,vlan_tci=0x0000,nw_frag=no with mark: 0 ++p1: packet: ip,vlan_tci=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,nw_src=127.0.0.1,nw_dst=127.0.0.1,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=64 matches with flow: recirc_id=0,eth,ip,vlan_tci=0x0000,nw_frag=no with mark: 1 + ]) + + ovs-appctl revalidator/wait +@@ -421,7 +421,7 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), p + # Check that flow successfully deleted from HW. + OVS_WAIT_UNTIL([grep "succeed to delete netdev flow" ovs-vswitchd.log]) + AT_CHECK([filter_hw_flow_del < ovs-vswitchd.log | strip_xout], [0], [dnl +-p1: flow del: mark: 0 ++p1: flow del: mark: 1 + ]) + OVS_VSWITCHD_STOP + AT_CLEANUP]) +@@ -434,7 +434,7 @@ m4_define([DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS], + [AT_SETUP([dpif-netdev - partial hw offload with packet modifications - $1]) + OVS_VSWITCHD_START( + [add-port br0 p1 -- \ +- set interface p1 type=$1 ofport_request=1 options:pcap=p1.pcap options:ifindex=1 -- \ ++ set interface p1 type=$1 ofport_request=1 options:pcap=p1.pcap options:ifindex=1101 -- \ + set bridge br0 datapath-type=dummy \ + other-config:datapath-id=1234 fail-mode=secure], [], [], + [m4_if([$1], [dummy-pmd], [--dummy-numa="0,0,0,0,1,1,1,1"], [])]) +@@ -460,7 +460,7 @@ packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00:01:02:03:04:05),eth_type + # Check that flow successfully offloaded. + OVS_WAIT_UNTIL([grep "succeed to add netdev flow" ovs-vswitchd.log]) + AT_CHECK([filter_hw_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl +-p1: flow put[[create]]: flow match: recirc_id=0,eth,udp,in_port=1,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82, mark: 0 ++p1: flow put[[create]]: flow match: recirc_id=0,eth,udp,in_port=1,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82, mark: 1 + ]) + # Check that datapath flow installed successfully. + AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl +@@ -472,7 +472,7 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp= + # Check for succesfull packet matching with installed offloaded flow. + AT_CHECK([filter_hw_packet_netdev_dummy < ovs-vswitchd.log | strip_xout], [0], [dnl + p1: packet: udp,dl_vlan=99,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,nw_src=127.0.0.1,nw_dst=127.0.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=81,tp_dst=82 dnl +-matches with flow: recirc_id=0,eth,udp,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82 with mark: 0 ++matches with flow: recirc_id=0,eth,udp,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82 with mark: 1 + ]) + + ovs-appctl revalidator/wait +@@ -490,7 +490,7 @@ packets:1, bytes:64, used:0.0s, actions:set(ipv4(src=192.168.0.7)),set(udp(dst=3 + # Check that flow successfully deleted from HW. + OVS_WAIT_UNTIL([grep "succeed to delete netdev flow" ovs-vswitchd.log]) + AT_CHECK([filter_hw_flow_del < ovs-vswitchd.log | strip_xout], [0], [dnl +-p1: flow del: mark: 0 ++p1: flow del: mark: 1 + ]) + + # Check that ip address and udp port were correctly modified in output packets. +diff --git a/tests/idltest.ovsschema b/tests/idltest.ovsschema +index bee79fc50f..d08f7e7ead 100644 +--- a/tests/idltest.ovsschema ++++ b/tests/idltest.ovsschema +@@ -54,6 +54,15 @@ + }, + "isRoot" : true + }, ++ "indexed": { ++ "columns": { ++ "i": { ++ "type": "integer" ++ } ++ }, ++ "indexes": [["i"]], ++ "isRoot" : true ++ }, + "simple": { + "columns": { + "b": { +diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at +index ff1cc93707..6415a8a04d 100644 +--- a/tests/ofproto-dpif.at ++++ b/tests/ofproto-dpif.at +@@ -5171,6 +5171,36 @@ AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 2 + OVS_VSWITCHD_STOP + AT_CLEANUP + ++# Checks for regression against a bug in which OVS dropped packets ++# with in_port=CONTROLLER when they were recirculated (because ++# CONTROLLER isn't a real port and could not be looked up). ++AT_SETUP([ofproto-dpif - packet-out recirculation]) ++OVS_VSWITCHD_START ++add_of_ports br0 1 2 ++ ++AT_DATA([flows.txt], [dnl ++table=0 ip actions=mod_dl_dst:83:83:83:83:83:83,ct(table=1) ++table=1 ip actions=ct(commit),output:2 ++]) ++AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) ++ ++packet=ffffffffffff00102030405008004500001c00000000401100000a000002ffffffff0035111100080000 ++AT_CHECK([ovs-ofctl packet-out br0 "in_port=controller packet=$packet actions=table"]) ++ ++# Dumps out the flow table, extracts the number of packets that have gone ++# through the (single) flow in table 1, and returns success if it's exactly 1. ++# ++# If this remains 0, then the recirculation isn't working properly since the ++# packet never goes through flow in table 1. ++check_flows () { ++ n=$(ovs-ofctl dump-flows br0 table=1 | sed -n 's/.*n_packets=\([[0-9]]\{1,\}\).*/\1/p') ++ echo "n_packets=$n" ++ test "$n" = 1 ++} ++OVS_WAIT_UNTIL([check_flows], [ovs dump-flows br0]) ++ ++OVS_VSWITCHD_STOP ++AT_CLEANUP + + AT_SETUP([ofproto-dpif - debug_slow action]) + OVS_VSWITCHD_START +@@ -8632,6 +8662,29 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(dst=50:54:00:00:00:0c),eth_ty + OVS_VSWITCHD_STOP + AT_CLEANUP + ++AT_SETUP([ofproto-dpif megaflow - set dl_dst with match on dl_src]) ++OVS_VSWITCHD_START ++AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg]) ++add_of_ports br0 1 2 ++AT_DATA([flows.txt], [dnl ++table=0 in_port=1,dl_src=50:54:00:00:00:09 actions=mod_dl_dst(50:54:00:00:00:0a),output(2) ++]) ++AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) ++AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) ++AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) ++AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.6,dst=10.0.0.5,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) ++sleep 1 ++dnl The first packet is essentially a no-op, as the new destination MAC is the ++dnl same as the original. The second entry actually updates the destination ++dnl MAC. The last one must be dropped as it doesn't match with dl_src. ++AT_CHECK([strip_ufid < ovs-vswitchd.log | filter_flow_install | strip_used], [0], [dnl ++recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(frag=no), actions:2 ++recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(frag=no), actions:set(eth(dst=50:54:00:00:00:0a)),2 ++recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b),eth_type(0x0800),ipv4(frag=no), actions:drop ++]) ++OVS_VSWITCHD_STOP ++AT_CLEANUP ++ + m4_define([OFPROTO_DPIF_MEGAFLOW_DISABLED], + [AT_SETUP([ofproto-dpif megaflow - disabled$1]) + OVS_VSWITCHD_START([], [], [], [m4_if([$1], [], [], [--dummy-numa="0,0,0,0,1,1,1,1"])]) +@@ -10540,6 +10593,62 @@ udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10. + OVS_VSWITCHD_STOP + AT_CLEANUP + ++AT_SETUP([ofproto-dpif - conntrack - match masked ct fields]) ++OVS_VSWITCHD_START ++ ++add_of_ports br0 1 2 ++ ++AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg vconn:info ofproto_dpif:info]) ++ ++dnl Allow new connections on p1->p2. Allow only established connections p2->p1 ++AT_DATA([flows.txt], [dnl ++table=0,arp,action=normal ++table=0,ip,in_port=1,udp,nw_src=10.1.2.1/24,action=ct(commit) ++table=0,ip,in_port=1,udp6,ipv6_dst=2001:db8::1/64,action=ct(commit) ++table=0,ip,in_port=1,udp,tp_src=3/0x1,action=ct(commit) ++table=0,ip,in_port=2,actions=ct(table=1) ++table=0,ip6,in_port=2,actions=ct(table=1) ++table=1,priority=10,udp,ct_state=+trk+rpl,ct_nw_src=10.1.2.1/24,actions=controller ++table=1,priority=10,udp6,ct_state=+trk+rpl,ct_ipv6_dst=2001:db8::1/64,actions=controller ++table=1,priority=10,udp,ct_state=+trk+rpl,ct_tp_src=3/0x1,actions=controller ++table=1,priority=1,action=drop ++]) ++ ++AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) ++ ++AT_CAPTURE_FILE([ofctl_monitor.log]) ++AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl -P nxt_packet_in --detach --no-chdir --pidfile 2> ofctl_monitor.log]) ++ ++dnl Match ct_nw_src=10.1.2.1/24 ++AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.2.100,dst=10.1.2.200,proto=17,tos=0,ttl=64,frag=no),udp(src=6,dst=6)']) ++AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x0800),ipv4(src=10.1.2.200,dst=10.1.2.100,proto=17,tos=0,ttl=64,frag=no),udp(src=6,dst=6)']) ++ ++dnl Match ct_ipv6_dst=2001:db8::1/64 ++AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x86dd),ipv6(src=2001:db8::1,dst=2001:db8::2,label=0,proto=17,tclass=0x70,hlimit=128,frag=no),udp(src=1,dst=2)']) ++AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x86dd),ipv6(src=2001:db8::2,dst=2001:db8::1,label=0,proto=17,tclass=0x70,hlimit=128,frag=no),udp(src=2,dst=1)']) ++ ++dnl Match ct_tp_src=3/0x1 ++AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=1,dst=2)']) ++AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x0800),ipv4(src=10.1.1.2,dst=10.1.1.1,proto=17,tos=0,ttl=64,frag=no),udp(src=2,dst=1)']) ++ ++OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ++OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) ++ ++dnl Check this output. ++AT_CHECK([cat ofctl_monitor.log], [0], [dnl ++NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=106 ct_state=est|rpl|trk,ct_nw_src=10.1.2.100,ct_nw_dst=10.1.2.200,ct_nw_proto=17,ct_tp_src=6,ct_tp_dst=6,ip,in_port=2 (via action) data_len=106 (unbuffered) ++udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.2.200,nw_dst=10.1.2.100,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=6,tp_dst=6 udp_csum:221 ++dnl ++NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=126 ct_state=est|rpl|trk,ct_ipv6_src=2001:db8::1,ct_ipv6_dst=2001:db8::2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,ipv6,in_port=2 (via action) data_len=126 (unbuffered) ++udp6,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,ipv6_src=2001:db8::2,ipv6_dst=2001:db8::1,ipv6_label=0x00000,nw_tos=112,nw_ecn=0,nw_ttl=128,tp_src=2,tp_dst=1 udp_csum:bfe2 ++dnl ++NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=106 ct_state=est|rpl|trk,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,ip,in_port=2 (via action) data_len=106 (unbuffered) ++udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=2,tp_dst=1 udp_csum:553 ++]) ++ ++OVS_VSWITCHD_STOP ++AT_CLEANUP ++ + AT_SETUP([ofproto-dpif - conntrack - ofproto/trace]) + OVS_VSWITCHD_START + +diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at +index 55c7a6e179..c8babe3612 100644 +--- a/tests/ovs-vsctl.at ++++ b/tests/ovs-vsctl.at +@@ -966,6 +966,14 @@ AT_CHECK([RUN_OVS_VSCTL([--if-exists del-zone-tp netdev zone=1])]) + AT_CHECK([RUN_OVS_VSCTL([list-zone-tp netdev])], [0], [Zone:2, Timeout Policies: icmp_first=2 icmp_reply=3 + ]) + ++AT_CHECK( ++ [RUN_OVS_VSCTL_TOGETHER([--id=@n create CT_Zone external_ids:"test"="123"], ++ [--id=@m create Datapath datapath_version=0 ct_zones:"10"=@n], ++ [set Open_vSwitch . datapaths:"netdev"=@m])], ++ [0], [stdout]) ++AT_CHECK([RUN_OVS_VSCTL([list-zone-tp netdev])], [0], [Zone:10, Timeout Policies: system default ++]) ++ + AT_CHECK([RUN_OVS_VSCTL([-- --id=@m create Datapath datapath_version=0 'capabilities={recirc=true}' -- set Open_vSwitch . datapaths:"system"=@m])], [0], [stdout]) + AT_CHECK([RUN_OVS_VSCTL([list-dp-cap system])], [0], [recirc=true + ]) +diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at +index 3a0bd4579e..e0758e954c 100644 +--- a/tests/ovsdb-cluster.at ++++ b/tests/ovsdb-cluster.at +@@ -179,6 +179,41 @@ AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) + ovsdb_test_cluster_disconnect 5 leader yes + AT_CLEANUP + ++AT_SETUP([OVSDB cluster - initial status should be disconnected]) ++AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) ++ ++n=3 ++schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` ++ordinal_schema > schema ++AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) ++cid=`ovsdb-tool db-cid s1.db` ++schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` ++for i in `seq 2 $n`; do ++ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) ++done ++ ++on_exit 'kill `cat *.pid`' ++for i in `seq $n`; do ++ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) ++done ++for i in `seq $n`; do ++ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) ++done ++ ++# Stop all servers, and start the s1 only, to test initial connection status ++# when there is no leader yet. ++for i in `seq 1 $n`; do ++ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) ++done ++i=1 ++AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) ++ ++# The initial status should be disconnected. So wait should fail. ++AT_CHECK([ovsdb_client_wait --timeout=1 unix:s$i.ovsdb $schema_name connected], [142], [ignore], [ignore]) ++OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) ++ ++AT_CLEANUP ++ + + + AT_BANNER([OVSDB cluster election timer change]) +@@ -273,6 +308,88 @@ OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name | grep "Ele + + AT_CLEANUP + ++ ++AT_BANNER([OVSDB cluster install snapshot RPC]) ++ ++AT_SETUP([OVSDB cluster - install snapshot RPC]) ++AT_KEYWORDS([ovsdb server positive unix cluster snapshot]) ++ ++n=3 ++schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` ++ordinal_schema > schema ++AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) ++cid=`ovsdb-tool db-cid s1.db` ++schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` ++for i in `seq 2 $n`; do ++ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) ++done ++ ++on_exit 'kill `cat *.pid`' ++for i in `seq $n`; do ++ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) ++done ++for i in `seq $n`; do ++ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) ++done ++ ++AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest", ++ {"op": "insert", ++ "table": "indexed", ++ "row": {"i": 0}}]]'], [0], [ignore], [ignore]) ++ ++# Kill one follower (s2) and write some data to cluster, so that the follower is falling behind ++printf "\ns2: stopping\n" ++OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s2], [s2.pid]) ++ ++# Delete "i":0 and readd it to get a different UUID for it. ++AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest", ++ {"op": "delete", ++ "table": "indexed", ++ "where": [["i", "==", 0]]}]]'], [0], [ignore], [ignore]) ++ ++AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest", ++ {"op": "insert", ++ "table": "indexed", ++ "row": {"i": 0}}]]'], [0], [ignore], [ignore]) ++ ++AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest", ++ {"op": "insert", ++ "table": "indexed", ++ "row": {"i": 1}}]]'], [0], [ignore], [ignore]) ++ ++# Compact leader online to generate snapshot ++AT_CHECK([ovs-appctl -t "`pwd`"/s1 ovsdb-server/compact]) ++ ++# Start the follower s2 again. ++AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s2.log --pidfile=s2.pid --unixctl=s2 --remote=punix:s2.ovsdb s2.db]) ++AT_CHECK([ovsdb_client_wait unix:s2.ovsdb $schema_name connected]) ++ ++# A client transaction through s2. During this transaction, there will be a ++# install_snapshot RPC because s2 detects it is behind and s1 doesn't have the ++# pre_log_index requested by s2 because it is already compacted. ++# After the install_snapshot RPC process, the transaction through s2 should ++# succeed. ++AT_CHECK([ovsdb-client transact unix:s2.ovsdb '[["idltest", ++ {"op": "insert", ++ "table": "indexed", ++ "row": {"i": 2}}]]'], [0], [ignore], [ignore]) ++ ++# The snapshot should overwrite the in-memory contents of the DB on S2 ++# without generating any constraint violations. All tree records (0, 1, 2) ++# should be in the DB at this point. ++AT_CHECK([ovsdb-client --no-headings dump unix:s2.ovsdb idltest indexed | uuidfilt | sort -k 2], [0], [dnl ++<0> 0 ++<1> 1 ++<2> 2 ++indexed table ++]) ++ ++for i in `seq $n`; do ++ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) ++done ++ ++AT_CLEANUP ++ + + + OVS_START_SHELL_HELPERS +@@ -436,6 +553,61 @@ AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) + ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update + AT_CLEANUP + ++ ++AT_SETUP([OVSDB cluster - competing candidates]) ++AT_KEYWORDS([ovsdb server negative unix cluster competing-candidates]) ++ ++n=3 ++schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` ++ordinal_schema > schema ++AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) ++cid=`ovsdb-tool db-cid s1.db` ++schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` ++for i in `seq 2 $n`; do ++ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) ++done ++ ++on_exit 'kill `cat *.pid`' ++for i in `seq $n`; do ++ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) ++done ++for i in `seq $n`; do ++ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) ++done ++ ++# We need to simulate the situation when 2 candidates starts election with same ++# term. ++# ++# Before triggering leader election, tell follower s2 don't send vote request (simulating ++# vote-request lost or not handled in time), and tell follower s3 to delay ++# election timer to make sure s3 doesn't send vote-request before s2 enters ++# term 2. ++AT_CHECK([ovs-appctl -t "`pwd`"/s2 cluster/failure-test dont-send-vote-request], [0], [ignore]) ++AT_CHECK([ovs-appctl -t "`pwd`"/s3 cluster/failure-test delay-election], [0], [ignore]) ++ ++# Restart leader, which will become follower, and both old followers will start ++# election as candidate. The new follower (old leader) will vote one of them, ++# and the other candidate should step back as follower as again. ++kill -9 `cat s1.pid` ++AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s1.log --pidfile=s1.pid --unixctl=s1 --remote=punix:s1.ovsdb s1.db]) ++ ++# Tell s1 to delay election timer so that it won't start election before s3 ++# becomes candidate. ++AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/failure-test delay-election], [0], [ignore]) ++ ++OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Term: 2"]) ++ ++for i in `seq $n`; do ++ OVS_WAIT_WHILE([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "candidate"]) ++ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) ++done ++ ++for i in `seq $n`; do ++ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) ++done ++ ++AT_CLEANUP ++ + + AT_BANNER([OVSDB - cluster tests]) + +diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at +index cc38d69c10..cc53da923b 100644 +--- a/tests/ovsdb-idl.at ++++ b/tests/ovsdb-idl.at +@@ -954,6 +954,7 @@ AT_CHECK([sort stdout | uuidfilt], [0], + + # Check that ovsdb-idl figured out that table link2 and column l2 are missing. + AT_CHECK([grep ovsdb_idl stderr | sort], [0], [dnl ++test-ovsdb|ovsdb_idl|idltest database lacks indexed table (database needs upgrade?) + test-ovsdb|ovsdb_idl|idltest database lacks link2 table (database needs upgrade?) + test-ovsdb|ovsdb_idl|idltest database lacks singleton table (database needs upgrade?) + test-ovsdb|ovsdb_idl|link1 table in idltest database lacks l2 column (database needs upgrade?) +@@ -1814,3 +1815,59 @@ m4_define([OVSDB_CHECK_IDL_LEADER_ONLY_PY], + + OVSDB_CHECK_IDL_LEADER_ONLY_PY([Check Python IDL connects to leader], 3, ['remote']) + OVSDB_CHECK_IDL_LEADER_ONLY_PY([Check Python IDL reconnects to leader], 3, ['remote' '+remotestop' 'remote']) ++ ++# same as OVSDB_CHECK_IDL but uses C IDL implementation with tcp ++# with multiple remotes. ++m4_define([OVSDB_CHECK_CLUSTER_IDL_C], ++ [AT_SETUP([$1 - C - tcp]) ++ AT_KEYWORDS([ovsdb server idl positive tcp socket $5]) ++ m4_define([LPBK],[127.0.0.1]) ++ AT_CHECK([ovsdb_cluster_start_idltest $2 "ptcp:0:"LPBK]) ++ PARSE_LISTENING_PORT([s1.log], [TCP_PORT_1]) ++ PARSE_LISTENING_PORT([s2.log], [TCP_PORT_2]) ++ PARSE_LISTENING_PORT([s3.log], [TCP_PORT_3]) ++ remotes=tcp:LPBK:$TCP_PORT_1,tcp:LPBK:$TCP_PORT_2,tcp:LPBK:$TCP_PORT_3 ++ ++ m4_if([$3], [], [], ++ [AT_CHECK([ovsdb-client transact $remotes $3], [0], [ignore], [ignore])]) ++ AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 idl tcp:LPBK:$TCP_PORT_1 $4], ++ [0], [stdout], [ignore]) ++ AT_CHECK([sort stdout | uuidfilt]m4_if([$7],,, [[| $7]]), ++ [0], [$5]) ++ AT_CLEANUP]) ++ ++# Checks that monitor_cond_since works fine when disconnects happen ++# with cond_change requests in flight (i.e., IDL is properly updated). ++OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect], ++ 3, ++ [['["idltest", ++ {"op": "insert", ++ "table": "simple", ++ "row": {"i": 1, ++ "r": 1.0, ++ "b": true}}, ++ {"op": "insert", ++ "table": "simple", ++ "row": {"i": 2, ++ "r": 1.0, ++ "b": true}}]']], ++ [['condition simple []' \ ++ 'condition simple [["i","==",2]]' \ ++ 'condition simple [["i","==",1]]' \ ++ '+reconnect' \ ++ '["idltest", ++ {"op": "update", ++ "table": "simple", ++ "where": [["i", "==", 1]], ++ "row": {"r": 2.0 }}]']], ++ [[000: change conditions ++001: empty ++002: change conditions ++003: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> ++004: change conditions ++005: reconnect ++006: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> ++007: {"error":null,"result":[{"count":1}]} ++008: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> ++009: done ++]]) +diff --git a/tests/system-route.at b/tests/system-route.at +new file mode 100644 +index 0000000000..1714273e35 +--- /dev/null ++++ b/tests/system-route.at +@@ -0,0 +1,28 @@ ++AT_BANNER([system-route]) ++ ++dnl Add an interface, add/del ip address, check that OVS catches route updates. ++AT_SETUP([ovs-route - add/remove system route]) ++AT_KEYWORDS([route]) ++OVS_TRAFFIC_VSWITCHD_START() ++ ++dnl Create tap port. ++AT_CHECK([ip tuntap add name p1-route mode tap]) ++AT_CHECK([ip link set p1-route up]) ++on_exit 'ip link del p1-route' ++ ++dnl Add ip address. ++AT_CHECK([ip addr add 10.0.0.17/24 dev p1-route], [0], [stdout]) ++ ++dnl Check that OVS catches route updates. ++OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep 'p1-route' | sort], [0], [dnl ++Cached: 10.0.0.17/24 dev p1-route SRC 10.0.0.17 ++Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local ++]) ++ ++dnl Delete ip address. ++AT_CHECK([ip addr del 10.0.0.17/24 dev p1-route], [0], [stdout]) ++dnl Check that routes was removed from OVS. ++OVS_WAIT_UNTIL([test `ovs-appctl ovs/route/show | grep -c 'p1-route'` -eq 0 ]) ++ ++OVS_TRAFFIC_VSWITCHD_STOP ++AT_CLEANUP +diff --git a/tests/system-traffic.at b/tests/system-traffic.at +index 4a39c929c2..3ed03d92b5 100644 +--- a/tests/system-traffic.at ++++ b/tests/system-traffic.at +@@ -611,6 +611,16 @@ NS_CHECK_EXEC([at_ns0], [ping -q -c 3 10.1.1.100 | FORMAT_PING], [0], [dnl + 3 packets transmitted, 3 received, 0% packet loss, time 0ms + ]) + ++dnl Test OVS handles TLV map modifictions properly when restores frozen state. ++NS_CHECK_EXEC([at_ns0], [ping 10.1.1.100 > /dev/null &]) ++ ++AT_CHECK([ovs-ofctl add-tlv-map br0 "{class=0xffff,type=0x88,len=4}->tun_metadata1"]) ++sleep 1 ++AT_CHECK([ovs-ofctl add-tlv-map br0 "{class=0xffff,type=0x99,len=4}->tun_metadata2"]) ++sleep 1 ++AT_CHECK([ovs-ofctl add-tlv-map br0 "{class=0xffff,type=0xaa,len=4}->tun_metadata3"]) ++sleep 1 ++ + OVS_APP_EXIT_AND_WAIT([ovs-ofctl]) + OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP +diff --git a/tests/system-userspace-testsuite.at b/tests/system-userspace-testsuite.at +index b40da9579e..2e9659a675 100644 +--- a/tests/system-userspace-testsuite.at ++++ b/tests/system-userspace-testsuite.at +@@ -26,3 +26,4 @@ m4_include([tests/system-traffic.at]) + m4_include([tests/system-layer3-tunnels.at]) + m4_include([tests/system-interface.at]) + m4_include([tests/system-userspace-packet-type-aware.at]) ++m4_include([tests/system-route.at]) +diff --git a/tests/test-classifier.c b/tests/test-classifier.c +index 6d53d016de..2d98fad485 100644 +--- a/tests/test-classifier.c ++++ b/tests/test-classifier.c +@@ -512,8 +512,9 @@ verify_tries(struct classifier *cls) + int i; + + for (i = 0; i < cls->n_tries; i++) { +- n_rules += trie_verify(&cls->tries[i].root, 0, +- cls->tries[i].field->n_bits); ++ const struct mf_field * cls_field ++ = ovsrcu_get(struct mf_field *, &cls->tries[i].field); ++ n_rules += trie_verify(&cls->tries[i].root, 0, cls_field->n_bits); + } + assert(n_rules <= cls->n_rules); + } +diff --git a/utilities/bugtool/ovs-bugtool.in b/utilities/bugtool/ovs-bugtool.in +index e55bfc2ed5..47f3c4629f 100755 +--- a/utilities/bugtool/ovs-bugtool.in ++++ b/utilities/bugtool/ovs-bugtool.in +@@ -33,8 +33,7 @@ + # or func_output(). + # + +-import StringIO +-import commands ++from io import BytesIO + import fcntl + import getopt + import hashlib +@@ -48,7 +47,7 @@ import warnings + import zipfile + from select import select + from signal import SIGTERM +-from subprocess import PIPE, Popen ++from subprocess import PIPE, Popen, check_output + + from xml.dom.minidom import getDOMImplementation, parse + +@@ -348,7 +347,7 @@ def collect_data(): + cap = v['cap'] + if 'cmd_args' in v: + if 'output' not in v.keys(): +- v['output'] = StringIOmtime() ++ v['output'] = BytesIOmtime() + if v['repeat_count'] > 0: + if cap not in process_lists: + process_lists[cap] = [] +@@ -373,20 +372,23 @@ def collect_data(): + if 'filename' in v and v['filename'].startswith('/proc/'): + # proc files must be read into memory + try: +- f = open(v['filename'], 'r') ++ f = open(v['filename'], 'rb') + s = f.read() + f.close() + if check_space(cap, v['filename'], len(s)): +- v['output'] = StringIOmtime(s) ++ v['output'] = BytesIOmtime(s) + except: + pass + elif 'func' in v: + try: + s = v['func'](cap) + except Exception as e: +- s = str(e) ++ s = str(e).encode() + if check_space(cap, k, len(s)): +- v['output'] = StringIOmtime(s) ++ if isinstance(s, str): ++ v['output'] = BytesIOmtime(s.encode()) ++ else: ++ v['output'] = BytesIOmtime(s) + + + def main(argv=None): +@@ -704,7 +706,7 @@ exclude those logs from the archive. + + # permit the user to filter out data + # We cannot use iteritems, since we modify 'data' as we pass through +- for (k, v) in sorted(data.items()): ++ for (k, v) in data.items(): + cap = v['cap'] + if 'filename' in v: + key = k[0] +@@ -721,7 +723,7 @@ exclude those logs from the archive. + + # include inventory + data['inventory.xml'] = {'cap': None, +- 'output': StringIOmtime(make_inventory(data, subdir))} ++ 'output': BytesIOmtime(make_inventory(data, subdir))} + + # create archive + if output_fd == -1: +@@ -782,7 +784,7 @@ def dump_scsi_hosts(cap): + + + def module_info(cap): +- output = StringIO.StringIO() ++ output = BytesIO() + modules = open(PROC_MODULES, 'r') + procs = [] + +@@ -806,7 +808,7 @@ def multipathd_topology(cap): + + + def dp_list(): +- output = StringIO.StringIO() ++ output = BytesIO() + procs = [ProcOutput([OVS_DPCTL, 'dump-dps'], + caps[CAP_NETWORK_STATUS][MAX_TIME], output)] + +@@ -828,7 +830,7 @@ def collect_ovsdb(): + if os.path.isfile(OPENVSWITCH_COMPACT_DB): + os.unlink(OPENVSWITCH_COMPACT_DB) + +- output = StringIO.StringIO() ++ output = BytesIO() + max_time = 5 + procs = [ProcOutput(['ovsdb-tool', 'compact', + OPENVSWITCH_CONF_DB, OPENVSWITCH_COMPACT_DB], +@@ -871,7 +873,7 @@ def fd_usage(cap): + + + def dump_rdac_groups(cap): +- output = StringIO.StringIO() ++ output = BytesIO() + procs = [ProcOutput([MPPUTIL, '-a'], caps[cap][MAX_TIME], output)] + + run_procs([procs]) +@@ -896,7 +898,7 @@ def load_plugins(just_capabilities=False, filter=None): + for node in nodelist: + if node.nodeType == node.TEXT_NODE: + rc += node.data +- return rc.encode() ++ return rc + + def getBoolAttr(el, attr, default=False): + ret = default +@@ -1037,7 +1039,7 @@ def make_tar(subdir, suffix, output_fd, output_file): + s = os.stat(v['filename']) + ti.mtime = s.st_mtime + ti.size = s.st_size +- tf.addfile(ti, open(v['filename'])) ++ tf.addfile(ti, open(v['filename'], 'rb')) + except: + pass + finally: +@@ -1095,12 +1097,12 @@ def make_inventory(inventory, subdir): + s.setAttribute('date', time.strftime('%c')) + s.setAttribute('hostname', platform.node()) + s.setAttribute('uname', ' '.join(platform.uname())) +- s.setAttribute('uptime', commands.getoutput(UPTIME)) ++ s.setAttribute('uptime', check_output(UPTIME).decode()) + document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(s) + + map(lambda k_v: inventory_entry(document, subdir, k_v[0], k_v[1]), + inventory.items()) +- return document.toprettyxml() ++ return document.toprettyxml().encode() + + + def inventory_entry(document, subdir, k, v): +@@ -1301,7 +1303,7 @@ class ProcOutput(object): + line = self.proc.stdout.readline() + else: + line = self.proc.stdout.read(self.bufsize) +- if line == '': ++ if line == b'': + # process exited + self.proc.stdout.close() + self.status = self.proc.wait() +@@ -1391,13 +1393,13 @@ def get_free_disk_space(path): + return s.f_frsize * s.f_bfree + + +-class StringIOmtime(StringIO.StringIO): +- def __init__(self, buf=''): +- StringIO.StringIO.__init__(self, buf) ++class BytesIOmtime(BytesIO): ++ def __init__(self, buf=b''): ++ BytesIO.__init__(self, buf) + self.mtime = time.time() + + def write(self, s): +- StringIO.StringIO.write(self, s) ++ BytesIO.write(self, s) + self.mtime = time.time() + + +diff --git a/utilities/ovs-dpctl-top.in b/utilities/ovs-dpctl-top.in +index f2cc3f7f2a..011cc64b74 100755 +--- a/utilities/ovs-dpctl-top.in ++++ b/utilities/ovs-dpctl-top.in +@@ -592,7 +592,7 @@ def flows_read(ihdl, flow_db): + + try: + flow_db.flow_line_add(line) +- except ValueError, arg: ++ except ValueError as arg: + logging.error(arg) + + return flow_db +@@ -958,6 +958,9 @@ class FlowDB: + change order of fields of the same flow. + """ + ++ if not isinstance(line, str): ++ line = str(line) ++ + line = line.rstrip("\n") + (fields, stats, _) = flow_line_split(line) + +@@ -988,7 +991,7 @@ class FlowDB: + + self.flow_event(fields_dict, stats_old_dict, stats_dict) + +- except ValueError, arg: ++ except ValueError as arg: + logging.error(arg) + self._error_count += 1 + raise +@@ -1192,7 +1195,7 @@ def flows_top(args): + flows_read(ihdl, flow_db) + finally: + ihdl.close() +- except OSError, arg: ++ except OSError as arg: + logging.critical(arg) + break + +@@ -1220,7 +1223,7 @@ def flows_top(args): + + # repeat output + for (count, line) in lines: +- print line ++ print(line) + + + def flows_script(args): +@@ -1249,7 +1252,7 @@ def flows_script(args): + render = Render(console_width, Render.FIELD_SELECT_SCRIPT) + + for line in render.format(flow_db): +- print line ++ print(line) + + + def main(): +diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c +index bd3972636e..37cc72d401 100644 +--- a/utilities/ovs-vsctl.c ++++ b/utilities/ovs-vsctl.c +@@ -1344,9 +1344,13 @@ cmd_list_zone_tp(struct ctl_context *ctx) + + struct ovsrec_ct_timeout_policy *tp = zone->timeout_policy; + +- for (int j = 0; j < tp->n_timeouts; j++) { +- ds_put_format(&ctx->output, "%s=%"PRIu64" ", +- tp->key_timeouts[j], tp->value_timeouts[j]); ++ if (tp) { ++ for (int j = 0; j < tp->n_timeouts; j++) { ++ ds_put_format(&ctx->output, "%s=%"PRIu64" ", ++ tp->key_timeouts[j], tp->value_timeouts[j]); ++ } ++ } else { ++ ds_put_cstr(&ctx->output, "system default"); + } + ds_chomp(&ctx->output, ' '); + ds_put_char(&ctx->output, '\n'); +diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c +index e591c26a6c..ce348b9d16 100644 +--- a/vswitchd/bridge.c ++++ b/vswitchd/bridge.c +@@ -634,8 +634,10 @@ static void + get_timeout_policy_from_ovsrec(struct simap *tp, + const struct ovsrec_ct_timeout_policy *tp_cfg) + { +- for (size_t i = 0; i < tp_cfg->n_timeouts; i++) { +- simap_put(tp, tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]); ++ if (tp_cfg) { ++ for (size_t i = 0; i < tp_cfg->n_timeouts; i++) { ++ simap_put(tp, tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]); ++ } + } + } + diff --git a/SOURCES/ppc_64-power8-linuxapp-gcc-config b/SOURCES/ppc_64-power8-linuxapp-gcc-config index 7b21579..394713d 100644 --- a/SOURCES/ppc_64-power8-linuxapp-gcc-config +++ b/SOURCES/ppc_64-power8-linuxapp-gcc-config @@ -1,4 +1,4 @@ -# -*- cfg-sha: f7b9a8671f1828542f6b8389a63bc60574d9c9ab21d06d5e8adefbaf7c929bc3 +# -*- cfg-sha: ed6bcdfa02f885357548558116ba4f4693048c72eb35043c2de856708c6f7257 # SPDX-License-Identifier: BSD-3-Clause # Copyright (C) IBM Corporation 2014. # SPDX-License-Identifier: BSD-3-Clause @@ -10,7 +10,7 @@ CONFIG_RTE_VER_PREFIX="DPDK" # Version information completed when this file is processed for a build CONFIG_RTE_VER_YEAR=19 CONFIG_RTE_VER_MONTH=11 -CONFIG_RTE_VER_MINOR=1 +CONFIG_RTE_VER_MINOR=3 CONFIG_RTE_VER_SUFFIX="" CONFIG_RTE_VER_RELEASE=99 # RTE_EXEC_ENV values are the directories in mk/exec-env/ @@ -590,4 +590,3 @@ CONFIG_RTE_TOOLCHAIN_GCC=y # Note: Power doesn't have this support # Note: Initially, all of architecture we compile for. PMD drivers compilation are turned off on Power # Will turn on them only after architecture we compile for. successful testing on Power -CONFIG_RTE_LIBRTE_PMD_XENVIRT=n diff --git a/SOURCES/x86_64-native-linuxapp-gcc-config b/SOURCES/x86_64-native-linuxapp-gcc-config index 81175d1..30d033b 100644 --- a/SOURCES/x86_64-native-linuxapp-gcc-config +++ b/SOURCES/x86_64-native-linuxapp-gcc-config @@ -1,4 +1,4 @@ -# -*- cfg-sha: 59724fb7100d28a9ee24efa79c4206bcde839bc29bb98eea771474514e57e022 +# -*- cfg-sha: f4cf137e2d4d96b2fa1ea8a0f1029d8d6553993747fda3f9f37fd01138fae055 # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2010-2014 Intel Corporation # SPDX-License-Identifier: BSD-3-Clause @@ -10,7 +10,7 @@ CONFIG_RTE_VER_PREFIX="DPDK" # Version information completed when this file is processed for a build CONFIG_RTE_VER_YEAR=19 CONFIG_RTE_VER_MONTH=11 -CONFIG_RTE_VER_MINOR=1 +CONFIG_RTE_VER_MINOR=3 CONFIG_RTE_VER_SUFFIX="" CONFIG_RTE_VER_RELEASE=99 # RTE_EXEC_ENV values are the directories in mk/exec-env/ @@ -588,4 +588,3 @@ CONFIG_RTE_ARCH_X86_64=y CONFIG_RTE_ARCH_X86=y CONFIG_RTE_ARCH_64=y CONFIG_RTE_TOOLCHAIN_GCC=y -CONFIG_RTE_LIBRTE_PMD_XENVIRT=n diff --git a/SPECS/openvswitch2.13.spec b/SPECS/openvswitch2.13.spec index 8258d7e..21b0e82 100644 --- a/SPECS/openvswitch2.13.spec +++ b/SPECS/openvswitch2.13.spec @@ -62,14 +62,14 @@ Summary: Open vSwitch Group: System Environment/Daemons daemon/database/utilities URL: http://www.openvswitch.org/ Version: 2.13.0 -Release: 39%{?commit0:.%{date}git%{shortcommit0}}%{?commit1:dpdk%{shortcommit1}}%{?dist} +Release: 57%{?commit0:.%{date}git%{shortcommit0}}%{?commit1:dpdk%{shortcommit1}}%{?dist} # Nearly all of openvswitch is ASL 2.0. The bugtool is LGPLv2+, and the # lib/sflow*.[ch] files are SISSL # datapath/ is GPLv2 (although not built into any of the binary packages) License: ASL 2.0 and LGPLv2+ and SISSL -%define dpdkver %{?commit1}%{!?commit1:19.11.1} +%define dpdkver %{?commit1}%{!?commit1:19.11} %define dpdkdir dpdk %define dpdksver %(echo %{dpdkver} | cut -d. -f-2) # NOTE: DPDK does not currently build for s390x @@ -700,6 +700,78 @@ exit 0 %endif %changelog +* Wed Aug 26 2020 Open vSwitch CI <ovs-team@redhat.com> - 2.13.0-57 +- Merging upstream branch-2.13 + [2fe3a06bffcd907f8f6561ec0e56963de9766c97] + +* Tue Aug 18 2020 Flavio Leitner <fbl@redhat.com> - 2.13.0-56 +- dpdk: Updated configs to 19.11.3 + [4e4acaf40ab114e958b299cdff55c11240bfd4da] + +* Tue Aug 18 2020 Flavio Leitner <fbl@redhat.com> - 2.13.0-55 +- Merging 798524b5e3 version: 19.11.3 (#1868709) + [64c883ec66425ad67a70599c549008442e3217cd] + +* Thu Aug 13 2020 Open vSwitch CI <ovs-team@redhat.com> - 2.13.0-54 +- Merging upstream branch-2.13 + [5dddb2d4f863203ec3560fcfaf8f20844b053073] + +* Mon Aug 10 2020 Open vSwitch CI <ovs-team@redhat.com> - 2.13.0-53 +- Merging upstream branch-2.13 + [bb436c2999218e59e06f089b42e19d3778869c63] + +* Mon Aug 10 2020 Dumitru Ceara <dceara@redhat.com> - 2.13.0-52 +- ovsdb-server: Replace in-memory DB contents at raft install_snapshot. (#1867185) + [9f646ec051fa2a2bf980843b7c1859479e87c228] + +* Sat Aug 08 2020 Flavio Leitner <fbl@redhat.com> - 2.13.0-51 +- redhat: Add support to custom RPM releases. + [7eb5b56344c07f237b2883f655eeee9c1ea0535e] + +* Sat Aug 08 2020 Flavio Leitner <fbl@redhat.com> - 2.13.0-50 +- pkgtool: Use OVS static version in package NVR. + [a0b572aaa173f2a4b4f57b8b396706777bf83395] + +* Thu Jul 30 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.13.0-49 +- odp-util: Fix clearing match mask if set action is partially unnecessary. (#1862153) + [6d85fea8b4c7db954c051d0bad7bc9505c1fdf7c] + +* Thu Jul 16 2020 Flavio Leitner <fbl@redhat.com> - 2.13.0-48 +- redhat: Clean old changelog entries. + [6cf8d909e81a715a302a2c401ef60abcc726fc78] + +* Thu Jul 16 2020 Flavio Leitner <fbl@redhat.com> - 2.13.0-47 +- redhat: Update the documentation. + [c9571d2dad6b1e47ba1d398350d8cd101a93e6a7] + +* Thu Jul 16 2020 Flavio Leitner <fbl@redhat.com> - 2.13.0-46 +- redhat: Add merge script. + [752c59ba745c3c82bc7ca1e31caefbc4b6514b07] + +* Thu Jul 16 2020 Flavio Leitner <fbl@redhat.com> - 2.13.0-45 +- redhat: Use static references. + [f1025c1515c00e9ec8e1fbc3a5337c412a3ce0c8] + +* Wed Jul 15 2020 Flavio Leitner <fbl@redhat.com> - 2.13.0-44 +- Update DPDK configs to v19.11.2. + [98e6e9823b54d5f7f52aa531a5479289a4fc40d7] + +* Wed Jul 15 2020 Flavio Leitner <fbl@redhat.com> - 2.13.0-43 +- Merge DPDK tag 'v19.11.2' into fast-datapath-rhel-8 + [755e86c61ae905a1485850f9e44a3502a63f52fb] + +* Wed Jul 15 2020 Flavio Leitner <fbl@redhat.com> - 2.13.0-42 +- Merging upstream branch-2.13 to fast-datapath-rhel-8 + [735b3f94c2655e930b0ee86556eb01191518f7e8] + +* Sun Jul 12 2020 Flavio Leitner <fbl@redhat.com> - 2.13.0-41 +- redhat: Rename OVSCI job name. + [a61f1d1095e58fb7c2ad38d37b86f3012f5aecfe] + +* Wed Jul 08 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.13.0-40 +- redhat: pkgtool: use diff instead of format-patch + [da2129ac827efe85db1e0ceeff8996e5045a862b] + * Thu Jun 25 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.13.0-39 - bus/pci: fix VF memory access (#1851169) [2b22bcd9ad02d0180ad5c46a2cccf34a3afba600] @@ -844,720 +916,3 @@ exit 0 - vhost: protect log address translation in IOTLB update (#1806599) [0d4370404fa971cb07ca2bf9cb0cdf98ecc54d4b] -* Tue Feb 25 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.13.0-3 -- Remove Docutils, Pygments and Sphinx directories - [0857b41c11694061bc94122c3c026ff552745703] - -* Tue Feb 25 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.13.0-2 -- Update Red Hat build files to use upstream tarballs and one patch - [b14f867126d5d9cfbe24d54c89aa917384c8c133] - -* Thu Feb 20 2020 Flavio Leitner <fbl@redhat.com> - 2.13.0-1 -- Open vSwitch version 2.13.0 - [44ed4ed8d98d8c21e715a7014d89a2f14f56b96b] - -* Wed Jan 22 2020 Open vSwitch Bot <null@redhat.com> - 2.13.0-0.20200121git2a4f006 -- Snapshot of branch-2.13 2a4f006c79c0 - -* Fri Jan 17 2020 Open vSwitch Bot <null@redhat.com> - 2.13.0-0.20200117git8ae6a5f -- Snapshot of master 8ae6a5f98c3a - -* Tue Jan 14 2020 Open vSwitch Bot <null@redhat.com> - 2.13.0-0.20200114gitb9b7b98 -- Snapshot of master b9b7b989d105 - -* Tue Jan 14 2020 Open vSwitch Bot <null@redhat.com> - 2.13.0-0.20200114gitb9b7b98 -- Snapshot of master b9b7b989d105 - -* Tue Jan 14 2020 Open vSwitch Bot <null@redhat.com> - 2.13.0-0.20200114gitb9b7b98 -- Snapshot of master b9b7b989d105 -- Remove MLX{4,5} glue libraries, since Python3 is included in RHEL 7.6 that - ships the correct libibverbs library. - -* Tue Jan 14 2020 Open vSwitch Bot <null@redhat.com> - 2.13.0-0.20200113git67eb811 -- Snapshot of master 67eb8110171f - -* Mon Jan 13 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.13.0-0.20200109git2109841.1 -- Add a not-upstream-yet patch to remove dependency for python3-netifaces, - since it's not available on RHEL7 - -* Mon Jan 13 2020 Open vSwitch Bot <null@redhat.com> - 2.13.0-0.20200109git2109841 -- Snapshot of master 2109841b7984 - -* Thu Jan 09 2020 Open vSwitch Bot <null@redhat.com> - 2.13.0-0.20200109gitb926f57 -- Snapshot of master b926f577aaf1 - -* Tue Jan 07 2020 David Marchand <david.marchand@redhat.com> - 2.11.0-16 -- Backport DPDK interrupt fixes for qede (#1788515) - -* Mon Dec 23 2019 Eelco Chaudron <echaudro@redhat.com> - 2.12.0-15 - -- Backport "vhost: add device op when notification to guest is sent" (#1726579) -- Backport "netdev-dpdk: Add coverage counter to count vhost IRQs" (#1726579) - -* Mon Dec 23 2019 Eelco Chaudron <echaudro@redhat.com> - 2.12.0-14 -- Backport "net/i40e: downgrade error log" (#1719644) -- Backport "net/i40e: re-program promiscuous mode on VF interface" (#1733402) -- Backport "bridge: Allow manual notifications about interfaces' updates" (#1719644) -- Backport "netdev-dpdk: add support for the RTE_ETH_EVENT_INTR_RESET" (#1719644) - -* Thu Dec 19 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.12.0-13 -- Add --with ipsec flag to build OVS with IPSEC support - -* Tue Dec 10 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.12.0-12 -- Fix librte_pmd_mlx{4,5}_glue.so error in Execshield part of RPMDiff - by backporting the DPDK flags from dpdk spec file. - -* Fri Dec 06 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.12.0-11 -- Backport "ovs-tcpundump: allow multiple packet lengths" (#1780553) -- Backport "ovs-tcpundump: exit when getting version" (#1780555) -- Backport "ovs-check-dead-ifs: python3 print format" (#1780563) -- Backport "ovs-check-dead-ifs: unshadow pid variable" (#1780563) -- Backport "flake8: also check the ovs-check-dead-ifs script" (#1780563) - -* Wed Dec 04 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.12.0-10 -- Rebase internal DPDK to 18.11.5 (#1773780) (CVE-2019-14818) - -* Tue Nov 26 2019 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.12.0-9 -- Backport "jsonrpc: increase input buffer size from 512 to 4096" (#1720653) - -* Fri Nov 22 2019 Flavio Leitner <fbl@redhat.com> - 2.12.0-8 -- updated spec to conflict with previous versions. - -* Fri Nov 22 2019 Flavio Leitner <fbl@redhat.com> - 2.12.0-7 -- Backport "ofproto-dpif: Allow IPv6 ND Extensions only if supported" (#1773598) - [df5db2a7a0fe9a4b6f5eafaada20a9b834aebbac] - -* Wed Nov 13 2019 Numan Siddique <nusiddiq@redhat.com> - 2.12.0-6 -- Backport "ovsdb-server: Allow replication from older schema version servers" (#1771854) - -* Tue Nov 12 2019 David Marchand <david.marchand@redhat.com> - 2.12.0-5 -- Backport "netdev-dpdk: Track vhost tx contention." (#1771390) - -* Tue Nov 05 2019 David Marchand <david.marchand@redhat.com> - 2.12.0-4 -- Renumbered dpdk patches -- Backport IOVA fixes (#1769027) - -* Mon Oct 14 2019 Numan Siddique <nusiddiq@redhat.com> - 2.12.0-3 -- Backport "ovsdb-server: Don't drop all connections on read/write status change" (#1761573) - -* Tue Oct 08 2019 Flavio Leitner <fbl@redhat.com> - 2.12.0-2 -- updated to 2.12.0 plus patches till 093fd99a4c12d (#1758820) - -* Mon Oct 07 2019 Aaron Conole <aconole@redhat.com> - 2.12.0-1.20190723gitcbff264 -- Backport "vswitch: ratelimit the device add log" (#1737146) - -* Wed Jul 24 2019 Open vSwitch Bot <null@redhat.com> - 2.12.0-0.20190723gitcbff264 -- Snapshot of branch-2.12 cbff264a084a - -* Tue Jul 16 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-18 -- Increase CONFIG_RTE_MAX_ETHPORTS to 128 (#1730421) - -* Tue Jul 16 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-17 -- Backport "tunnel: Add layer 2 IPv6 GRE encapsulation support." and - "netdev-vport: Make ip6gre netdev type to use TC rules" (#1725623) - -* Fri Jul 12 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-16 -- Rebase internal DPDK to 18.11.2 (#1713698) - -* Tue Jul 09 2019 David Marchand <david.marchand@redhat.com> - 2.11.0-15 -- Backport "net/i40e: fix dropped packets statistics name" (#1728610) - -* Tue Jul 02 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-14 -- Backport "netdev-tc-offloads: Use correct hook qdisc at init tc flow" (#1721219) - -* Fri Jun 21 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-13 -- Backport "netdev-tc-offloads: Support match on priority tags" (#1722249) - -* Thu Jun 13 2019 Maxime Coquelin <maxime.coquelin@redhat.com> - 2.11.0-12 -- Backport Vhost performance regression fixes (#1672538) - -* Thu Jun 13 2019 Flavio Leitner <fbl@redhat.com> - 2.11.0-11 -- Backport "rhel: limit stack size to 2M." (#1720315) - -* Thu May 16 2019 Pablo Cascón <pablo.cascon@redhat.com> - 2.11.0-10 -- Backport "ovs-tc: support OvS internal port offload" and deps (#1702334) - -* Wed Apr 24 2019 Numan Siddique <nusiddiq@redhat.com> - 2.11.0-9 -- Backport "[OVN] Fragmentation support - check_pkt_larger action" (#1702564) - -* Thu Apr 11 2019 Kevin Traynor <ktraynor@redhat.com> - 2.11.0-8 -- Backport "net/qede: support IOVA VA mode" (#1684605) - -* Wed Apr 10 2019 David Marchand <david.marchand@redhat.com> - 2.11.0-7 -- Backport cpu affinity fixes (#1687320) - -* Tue Apr 09 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-6 -- Add missing dependencies for ovs-tcpdump (#1697978) - -* Tue Mar 26 2019 Flavio Leitner <fbl@redhat.com> - 2.11.0-5 -- fixed netlink msg corruption when updating netdev. (#1692812) - -* Tue Mar 12 2019 Davide Caratti <dcaratti@redhat.com> - 2.11.0-4 -- Backport "net/bnxt: support IOVA VA mode" (#1645523) - -* Tue Mar 12 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-3 -- Backport "ovs-ctl: Permit to specify additional options" (#1687775) -- Remove useless -fPIC from DPDK - -* Fri Mar 01 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-2 -- Backport "rhel: Use PIDFile on forking systemd service files" (#1684477) - -* Thu Feb 28 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-1 -- Update to official 2.11 release - -* Thu Jan 31 2019 Open vSwitch Bot <null@redhat.com> - 2.11.0-0.20190129gitd3a10db -- Snapshot of branch-2.11 d3a10db4fd38 - -* Sun Jan 27 2019 Open vSwitch Bot <null@redhat.com> - 2.11.0-0.20190126gitd4ff5b2 -- Snapshot of branch-2.11 d4ff5b2be7fc - -* Mon Jan 14 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-0.20190114gitadb3f0b -- Update to a snapshot of OVS 2.11 from master - -* Mon Jan 7 2019 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-42 -- Backport "OVN: add static IP support to IPAM" (#1664028) - -* Thu Jan 03 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-41 -- Backport some patches to improve offload indications (#1655990) - -* Wed Jan 02 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-40 -- Add "Requires: openvswitch = %%{version}-%%{release}" to python-openvswitch2.10 (#1662944) - -* Wed Jan 2 2019 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-39 -- Backport "OVN: add mac address only support to IPAM/MACAM" (#1662905) - -* Thu Dec 20 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-38 -- Backport "ovn-controller: Inject GARPs to logical switch pipeline to update neighbors" (#1643902) - -* Tue Dec 18 2018 David Marchand <david.marchand@redhat.com> - 2.10.0-37 -- Backport 'ovs-ctl: fix system-id.conf owner' (#1659391) -- Do not check /var/log/openvswitch owner/group (#1659391) - -* Tue Dec 18 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-36 -- Backport "ovn: Fix the invalid eth.dst and ip6.dst set by nd_ns action for certain cases." (#1656018) - -* Mon Dec 10 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-35 -- Backport "dpif-netdev: Add vlan to mask for flow_put operation" (#1649516) - -* Tue Nov 27 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-34 -- Backport "ovn: Avoid tunneling for VLAN packets redirected to a gateway chassis" (#1561880) - -* Fri Nov 23 2018 Eelco Chaudron <echaudro@redhat.com> - 2.10.0-33 -- Backport "mem: fix memory initialization time" (#1647498) - -* Thu Nov 22 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-32 -- Backport "tests: Use the default key length when generating RSA keys" - -* Wed Nov 14 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-31 -- Backport "net/qede: fix crash when configure fails" (#1648183) - -* Tue Nov 13 2018 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-30 -- Backport 'pinctrl: Fix dp_packet structure leak' and 'pinctrl: Fix crash on - buffered packets hmap double remove'. Moreover align 'ovn -- 3 HVs, 3 LS, 3 - lports/LS, 1 LR' test to upstream one (#1649008) - -* Tue Nov 13 2018 Eelco Chaudron <echaudro@redhat.com> - 2.10.0-29 -- Backup "netdev-dpdk: Bring link down when NETDEV_UP is not set" (#1645288) - -* Fri Nov 09 2018 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-28 -- OVN: configure L2 address according to the used IP address (#1648272) - -* Thu Nov 08 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-27 -- Backport "bond: Honor updelay and downdelay when LACP is in use" (#1646923) - -* Thu Nov 08 2018 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-26 -- OVN: introduce mac_prefix support to IPAM (#1647750) - -* Tue Nov 06 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-25 -- Backport "ofproto-dpif-xlate: Avoid deadlock on multicast snooping recursion" (#1643065) - -* Tue Nov 06 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-24 -- Re-enable "make check" - -* Fri Nov 02 2018 Kevin Traynor <ktraynor@redhat.com> - 2.10.0-23 -- Update to DPDK 17.11.4 (#1566069) - -* Thu Oct 25 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-22 -- Ship statically linked OVS binaries (#1643478) - -* Tue Oct 23 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-21 -- Backport connmgr: Fix vswitchd abort when a port is added and the controller is down (#1637926) - -* Mon Oct 22 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-20 -- Backport "ovn: Add DHCP support for option 252" (#1641740) - -* Wed Oct 17 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-19 -- Backport "net/i40e: fix VLAN offload setting issue" (#1637893) - -* Wed Oct 17 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-18 -- Backport "Python: Make Row's __getattr__ less error prone" (#1639963) - -* Fri Oct 12 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-17 -- OVN: ovn-ctl: Fix the wrong pidfile argument passed to ovsdb-servers (#1636714) - -* Fri Oct 12 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-16 -- OVN: Support processing DHCPv6 information request message type (#1636874) - -* Fri Oct 12 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-15 -- OVN: Fix IPv6 DAD failure for container ports (#1616129) - -* Thu Oct 11 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-14 -- OVN: Fix the issue in IPv6 Neigh Solicitation responder for router IPs (#1567735) - -* Tue Oct 09 2018 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-13 -- OVN: add buffering support for ip packets (#1637466) - -* Mon Oct 08 2018 Matteo Croce <mcroce@redhat.com> - 2.10.0-12 -- Fix null pointer (#1634015) -* Tue Oct 02 2018 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-11 -- OVN: add CT_LB action to ovn-trace (#1635344) - -* Mon Oct 01 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-10 -- Backport NFP PMD's non-root related commits for > 1TB of RAM (#1634820): - - net/nfp: support IOVA VA mode - - bus/pci: forbid IOVA mode if IOMMU address width too small - - net/nfp: check hugepages IOVAs based on DMA mask - - mem: use address hint for mapping hugepages - - bus/pci: use IOVAs check when setting IOVA mode - - mem: add function for checking memsegs IOVAs addresses - - mem: fix max DMA maskbit size - -* Thu Sep 27 2018 Matteo Croce <mcroce@redhat.com> - 2.10.0-9 -- Backport "Remove support for multiple queues per port" (#1634015) - -* Wed Sep 26 2018 Matteo Croce <mcroce@redhat.com> - 2.10.0-8 -- Backport EMC reorder fix (#1565205) - -* Wed Sep 26 2018 Matteo Croce <mcroce@redhat.com> - 2.10.0-7 -- Backport per-port socket netlink creation with EPOLLEXCLUSIVE (#1634015) - -* Fri Sep 21 2018 Kevin Traynor <ktraynor@redhat.com> - 2.10.0-6 -- Backport roundrobin rxq to pmd assignment (#1631797) - -* Fri Sep 14 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-5 -- Backport "ovs-save: Don't always include the default flow during restore" (#1628905) - -* Thu Sep 13 2018 Flavio Leitner <fbl@redhat.com> - 2.10.0-4 -- applied Fix translation of groups with no buckets (#1626488) - -* Thu Sep 13 2018 Flavio Leitner <fbl@redhat.com> - 2.10.0-3 -- Removed provides and obsoletes for openvswitch-dpdk (#1628603) - -* Tue Sep 11 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-2 -- Backported "net/mlx{4,5}: avoid stripping the glue library" (#1627700) - -* Tue Aug 21 2018 Flavio Leitner <fbl@redhat.com> - 2.10-1 -- Updated with 2.10.0 official tarball (#1618551) - -* Fri Aug 17 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 -- Sync'ed with fd-next (4452afaa58) -- vhost: flush IOTLB cache on new mem table handling (#1609643) -- OVN: introduce ovs-appctl command to monitor HVs sb (#1593804) - -* Thu Aug 16 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 -- Snapshot of branch-2.10 6bced903bb50 - -* Fri Aug 10 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 -- Snapshot of branch-2.10 58a7ce60b9f7 - -* Wed Aug 08 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 -- Snapshot of branch-2.10 faf64fb8861f - -* Tue Aug 07 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 -- Snapshot of branch master 7a78d1c1ad73 - -* Tue Jul 31 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 -- Sync'ed spec file with fd-next-57 (shared linking). - (DPDK patches not included) -- Fixed package dependencies (#1610603) - -* Fri Jul 27 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 -- Snapshot of branch master b1ca64f020f7 - -* Fri Jul 27 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 -- Replace macro %%{name} with 'openvswitch'. - -* Tue Jul 24 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 -- Snapshot of branch master 1ac690899592 - -* Tue Jul 24 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 -- Versioned conflict to be less than 2.10. - -* Thu Jul 19 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 -- Snapshot of branch master 3c921cc2b6b7 - -* Wed Jul 18 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 -- Fixed unbound requires and buildrequires. - -* Tue Jul 10 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 -- Snapshot of branch master 93c0ef12039c - -* Tue Jul 03 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 -- Snapshot of branch master 79d0dfa4e99a - -* Wed Jun 27 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 -- Snapshot of branch master e46148133067 - -* Wed Jun 27 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 -- Snapshot of branch master 61677bf976e9 - -* Tue Jun 26 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 -- snapshot of branch master - -* Mon Jun 11 2018 Aaron Conole <aconole@redhat.com> - 2.9.0-47 -- Backport "net/mlx5: fix memory region cache lookup" (#1581230) -- Backport "net/mlx5: fix memory region boundary checks" (#1581230) - -* Mon Jun 11 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-46 -- Backport "net/qede: fix memory alloc for multiple port reconfig" (#1589866) - -* Thu Jun 07 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-45 -- Backport "net/qede: fix unicast filter routine return code" (#1578590) - -* Thu Jun 07 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-44 -- Backport "net/qede: fix L2-handles used for RSS hash update" (#1578981) - -* Tue May 29 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-43 -- Backport "net/nfp: fix lock file usage" (#1583670) - -* Mon May 28 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-42 -- Backport "net/nfp: configure default RSS reta table" (#1583161) - -* Mon May 28 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-41 -- Backport "netdev-dpdk: don't enable scatter for jumbo RX support for nfp" (#1578324) - -* Mon May 28 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-40 -- Backport "ovn pacemaker: Fix promotion issue when the master node is reset" (#1579025) - -* Thu May 24 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-39 -- Backport spec file modfications from "rhel: Use openvswitch user/group for - the log directory" - -* Wed May 23 2018 Maxime Coquelin <maxime.coquelin@redhat.com> - 2.9.0-38 -- Backport "vhost: improve dirty pages logging performance" (#1552465) - -* Wed May 16 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-37 -- Backport "ovn: Set proper Neighbour Adv flag when replying for NS request for - router IP" (#1567735) - -* Mon May 14 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-36 -- Enable QEDE PMDs (only on x86_64) (#1578003) - -* Thu May 10 2018 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.9.0-35 -- ovn-nbctl: Show gw chassis in decreasing prio order (#1576725) - -* Wed May 09 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-34 -- Fix hugetlbfs group when DPDK is enabled - -* Wed May 09 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-33 -- Backport "eal: abstract away the auxiliary vector" (#1560728) -- Re-enable DPDK on ppc64le - -* Wed May 09 2018 Aaron Conole <aconole@redhat.com> - 2.9.0-32 -- Require the selinux policy module (#1555440) - -* Tue May 08 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-31 -- Backport fix QEDE PMD (#1494616) - -* Tue May 08 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-30 -- Backport "net/nfp: fix mbufs releasing when stop or close" (#1575067) - -* Sun May 06 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-29 -- Backport net/mlx4: fix broadcast Rx (#1568908) - -* Fri May 04 2018 Kevin Traynor <ktraynor@redhat.com> - 2.9.0-28 -- Backport mempool use after free fix and debug (#1575016) - -* Fri May 04 2018 Aaron Conole <aconole@redhat.com> - 2.9.0-27 -- Fix the email address in the changelog. - -* Wed May 02 2018 Aaron Conole <aconole@redhat.com> - 2.9.0-26 -- Backport fix for missing user during install/upgrade (#1559374) - -* Mon Apr 30 2018 Jakub Sitnicki <jkbs@redhat.com> - 2.9.0-25 -- Backport fix for Unicode encoding in Python IDL (#1547065) - -* Thu Apr 26 2018 Aaron Conole <aconole@redhat.com> - 2.9.0-24 -- Backport the cisco enic patches - -* Thu Apr 26 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-23 -- Backport a fix for "Offload of Fragment Matching in OvS Userspace" (#1559111) - -* Thu Apr 26 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-22 -- Backport "ovn-controller: Handle Port_Binding's "requested-chassis" option" (#1559222) - -* Thu Apr 26 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-21 -- Backport "python: avoid useless JSON conversion to enhance performance" (#1551016) - -* Thu Apr 26 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-20 -- Backport "ovn: Set router lifetime value for IPv6 periodic RA" (#1567735) -- Remove useless libpcap-devel dependency - -* Mon Apr 23 2018 Kevin Traynor <ktraynor@redhat.com> - 2.9.0-19 -- Backport DPDK CVE-2018-1059 (#1544298) - -* Fri Apr 20 2018 Davide Caratti <dcaratti@redhat.com> - 2.9.0-18 -- Backport fix for PMD segfault when BNXT receives tunneled traffic (#1567634) - -* Mon Apr 16 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-17 -- Backport patches to make NFP detect the correct firmware (#1566712) -- Backport "rhel: Fix literal dollar sign usage in systemd service files" - -* Fri Mar 30 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-16 -- Backport "rhel: don't drop capabilities when running as root" -- Change owner of /etc/openvswitch during upgrade - -* Tue Mar 27 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-14 -- Disable DPDK on ppc64le - -* Sun Mar 25 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-13 -- Disable DPDK on aarch64 - -* Thu Mar 22 2018 Flavio Leitner <fbl@redhat.com> - 2.9.0-12 -- fixes i40e link status timeout trough direct register access (#1559612) - -* Thu Mar 22 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-11 -- Enable BNXT, MLX4, MLX5 and NFP (aligned from FDB) - -* Thu Mar 22 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-10 -- Backport "Offload of Fragment Matching in OvS Userspace" (#1559111) - -* Thu Mar 15 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-9 -- Avoid to unpack openvswitch 2 times and to overwrite all the patched files - Fixes 2.9.0-4 - -* Thu Mar 08 2018 Eric Garver <egarver@redhat.com> - 2.9.0-8 -- Backport "ofproto-dpif-xlate: translate action_set in clone action" (#1544892) - -* Thu Mar 08 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-7 -- Backport "ovn: Calculate UDP checksum for DNS over IPv6" (#1553023) - -* Tue Mar 06 2018 Aaron Conole <aconole@redhat.com> - 2.9.0-6 -- Require the latest rhel selinux policy (#1549673) - -* Fri Mar 02 2018 Matteo Croce <mcroce@redhat.com> - 2.9.0-5 -- Backport vhost patches (#1541881) - -* Fri Mar 02 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-4 -- Don't require python-sphinx directly, but built it since python-sphinx is in - the optional repository that is not available on RHEV and TPS test fails. - -* Tue Feb 20 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-3 -- Don't verify the user and group of /etc/openvswitch and /etc/sysconfig/openvswitch - This is needed since we cannot change the user and group if you upgrade from - an old version that still uses root:root. - -* Tue Feb 20 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-1 -- Update to OVS 2.9.0 + DPDK 17.11 (#1475436) -- Backport of ofproto-dpif: Delete system tunnel interface when remove ovs bridge (#1505776) -- Backport DPDK patches from FDB (vhost user async fix and enic fixes) -- Backport 94cd8383e297 and 951d79e638ec to fix permissions (#1489465) -- Use a static configuration file for DPDK - -* Fri Jan 12 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.7.3-3.git20180112 -- Rebase to latest OVS branch-2.7 fixes + DPDK 16.11.4 (#1533872) - -* Wed Oct 18 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.7.3-2.git20171010 -- Remove ovs-test and ovs-vlan-test from openvswitch-test package -- Add an option to enable openvswitch-ovn-docker package (disabled by default) - -* Tue Oct 10 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.7.3-1.git20171010 -- Update to OVS 2.7.3 + branch-2.7 bugfixes (#1502742) - -* Mon Sep 18 2017 Kevin Traynor <ktraynor@redhat.com> - 2.7.2-10.git20170914 -- Backport of fix for i40e flow control get (#1491791) - -* Thu Sep 14 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.7.2-9.git20170914 -- Rebase to latest OVS branch fixes + DPDK 16.11.3 - -* Wed Sep 06 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.7.2-8.git20170719 -- Backport of enic driver crash fix to dpdk-16.11 (#1489010) - -* Tue Aug 22 2017 Aaron Conole <aconole@redhat.com> - 2.7.2-7.git20170719 -- Re-enable Cisco enic PMD (#1482675) - -* Tue Aug 22 2017 Aaron Conole <aconole@redhat.com> - 2.7.2-6.git20170719 -- Update based on multi-arch - -* Tue Aug 22 2017 Aaron Conole <aconole@redhat.com> - 2.7.2-5.git20170719 -- Disable unsupported PMDs (#1482675) -- software and hardware PMDs audited by the team - -* Thu Aug 03 2017 John W. Linville <linville@redhat.com> - 2.7.2-4.git20170719 -- Backport mmap fix for memory initialization on ppc64le to dpdk-16.11 - -* Thu Aug 03 2017 John W. Linville <linville@redhat.com> - 2.7.2-3.git20170719 -- Backport support for vfio-pci based PMD in ppc64le to dpdk-16.11 - -* Thu Aug 03 2017 John W. Linville <linville@redhat.com> - 2.7.2-2.git20170719 -- Backport support for Intel XL710 (i40e) pmd in ppc64le to dpdk-16.11 - -* Wed Jul 19 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.7.2-1.git20170719 -- Update to OVS 2.7.2 + branch-2.7 bugfixes (#1472854) -- Add a symlink of the OCF script in the OCF resources folder (#1472729) - -* Mon Jul 10 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.7.1-1.git20170710 -- Align to FDB openvswitch-2.7.1-1.git20170710.el7fdb (#1459286) - -* Wed Jun 07 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.6.1-20.git20161206 -- backport "mcast-snooping: Avoid segfault for vswitchd" (#1456356) -- backport "mcast-snooping: Flush ports mdb when VLAN cfg changed." (#1456358) - -* Sun May 21 2017 Lance Richardson <lrichard@redhat.com> - 2.6.1-19.git20161206 -- backport patch to not automatically restard ovn svcs after upgrade (#1438901) - -* Tue May 09 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.6.1-18.git20161206 -- rconn: Avoid abort for ill-behaved remote (#1449109) - -* Fri May 05 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.6.1-17.git20161206 -- Fix race in "PMD - change numa node" test (#1447714) -- Report only un-deleted groups in group stats replies. (#1447724) -- Workaround some races in "ofproto - asynchronous message control" tests (#1448536) - -* Mon Apr 10 2017 Eric Garver <egarver@redhat.com> - 2.6.1-16.git20161206 -- Fix an issue using set_field action on nw_ecn (#1410715) - -* Fri Mar 31 2017 Kevin Traynor <ktraynor@redhat.com> - 2.6.1-15.git20161206 -- backport patch to fix uni-dir vhost perf drop (#1414919) - -* Wed Mar 29 2017 Lance Richardson <lrichard@redhat.com> - 2.6.1-14.git20161206 -- backport patch to correct port number in firewalld service file (#1390938) - -* Fri Mar 10 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.6.1-13.git20161206 -- backport patch to enable/disable libcap-ng support (--with libcapng) - -* Thu Mar 09 2017 Aaron Conole <aconole@redhat.com> - 2.6.1-12.git20161206 -- Fix an MTU issue with ovs mirror ports (#1426342) - -* Wed Mar 08 2017 Lance Richardson <lrichard@redhat.com> - 2.6.1-11.git20161206 -- update spec file to install firewalld service files (#1390938) - -* Thu Feb 16 2017 Aaron Conole <aconole@redhat.com> - 2.6.1-10.git20161206 -- vhostuser client mode support for ifup/ifdown (#1418957) - -* Thu Feb 16 2017 Lance Richardson <lrichard@redhat.com> - 2.6.1-9.git20161206 -- OVN-DHCP is not sending DHCP responses after a MAC change in north db (#1418261) - -* Thu Feb 16 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.6.1-8.git20161206 -- systemd service starts too fast (#1422227) - -* Fri Feb 10 2017 Lance Richardson <lrichard@redhat.com> - 2.6.1-7.git20161206 -- iptables should be easily configurable for OVN hosts and OVN central server (#1390938) - -* Thu Feb 09 2017 Aaron Conole <aconole@redhat.com> - 2.6.1-6.git20161206 -- ovn: IPAM has no reply to DHCP request for renewal (#1415449) - -* Tue Feb 07 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.6.1-5.git20161206 -- ovn-controller: Provide the option to set Encap.options:csum (#1418742) - -* Mon Feb 06 2017 Flavio Leitner <fbl@redhat.com> 2.5.0-23.git20160727 -- fixed broken service after a package upgrade (#1403958) - -* Wed Dec 21 2016 Lance Richardson <lrichard@redhat.com> 2.6.1-3.git20161206 -- ovsdb-idlc: Initialize nonnull string columns for inserted rows. (#1405094) - -* Fri Dec 09 2016 Lance Richardson <lrichard@redhat.com> 2.6.1-2.git20161206 -- OVN: Support IPAM with externally specified MAC (#1368043) - -* Tue Dec 06 2016 Kevin Traynor <ktraynor@redhat.com> 2.6.1-1.git20161206 -- Update to OVS 2.6.1 + branch-2.6 bugfixes (#1335865) -- Update to use DPDK 16.11 (#1335865) -- Enable OVN - -* Tue Nov 22 2016 Flavio Leitner <fbl@redhat.com> 2.5.0-22.git20160727 -- ifnotifier: do not wake up when there is no db connection (#1397504) - -* Tue Nov 22 2016 Flavio Leitner <fbl@redhat.com> 2.5.0-21.git20160727 -- Use instant sending instead of queue (#1397481) - -* Mon Nov 21 2016 Flavio Leitner <fbl@redhat.com> 2.5.0-20.git20160727 -- dpdk vhost: workaround stale vring base (#1376217) - -* Thu Oct 20 2016 Aaron Conole <aconole@redhat.com> - 2.5.0-19.git20160727 -- Applied tnl fix (#1346232) - -* Tue Oct 18 2016 Aaron Conole <aconole@redhat.com> - 2.5.0-18.git20160727 -- Applied the systemd backports - -* Tue Oct 18 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-17.git20160727 -- Fixed OVS to not require SSSE3 if DPDK is not used (#1378501) - -* Tue Oct 18 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-16.git20160727 -- Fixed a typo (#1385096) - -* Tue Oct 18 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-15.git20160727 -- Do not restart the service after a package upgrade (#1385096) - -* Mon Sep 26 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-14.git20160727 -- Permit running just the kernel datapath tests (#1375660) - -* Wed Sep 14 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-13.git20160727 -- Obsolete openvswitch-dpdk < 2.6.0 to provide migration path -- Add spec option to run kernel datapath tests (#1375660) - -* Fri Sep 09 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-12.git20160727 -- Backport ovs-tcpdump support (#1335560) -- Add ovs-pcap, ovs-tcpdump and ovs-tcpundump to -test package - -* Thu Sep 08 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-11.git20160727 -- Add openvswitch-dpdk provide for testing and depending on dpdk-enablement -- Disable bnx2x driver, it's not stable -- Build dpdk with -Wno-error to permit for newer compilers -- Drop subpkgs conditional from spec, its not useful anymore - -* Fri Aug 26 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-10.git20160727 -- Fix adding ukeys for same flow by different pmds (#1364898) - -* Thu Jul 28 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-9.git20160727 -- Fixed ifup-ovs to support DPDK Bond (#1360426) - -* Thu Jul 28 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-8.git20160727 -- Fixed ifup-ovs to delete the ports first (#1359890) - -* Wed Jul 27 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-7.git20160727 -- pull bugfixes from upstream 2.5 branch (#1360431) - -* Tue Jul 26 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-6.git20160628 -- Removed redundant provides for openvswitch -- Added epoch to the provides for -static package - -* Thu Jul 21 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-5.git20160628 -- Renamed to openvswitch (dpdk enabled) -- Enabled sub-packages -- Removed conflicts to openvswitch -- Increased epoch to give this package preference over stable - -* Tue Jun 28 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-4.git20160628 -- pull bugfixes from upstream 2.5 branch (#1346313) - -* Wed Apr 27 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-4 -- Enable DPDK bnx2x driver (#1330589) -- Add README.DPDK-PMDS document listing drivers included in this package - -* Thu Mar 17 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-3 -- Run testsuite by default on x86 arches (#1318786) - (this sync the spec with non-dpdk version though the testsuite - was already enabled here) - -* Thu Mar 17 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-2 -- eliminate debuginfo-artifacts (#1281913) - -* Thu Mar 17 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-1 -- Update to OVS to 2.5.0 and bundled DPDK to 2.2.0 (#1317889) - -* Mon Nov 23 2015 Panu Matilainen <pmatilai@redhat.com> -- Provide openvswitch ver-rel (#1281894) - -* Thu Aug 13 2015 Flavio Leitner <fbl@redhat.com> -- ExclusiveArch to x86_64 (dpdk) -- Provides bundled(dpdk) -- Re-enable testsuite - -* Fri Aug 07 2015 Panu Matilainen <pmatilai@redhat.com> -- Enable building from pre-release snapshots, update to pre 2.4 version -- Bundle a minimal, private build of DPDK 2.0 and link statically -- Rename package to openvswitch-dpdk, conflict with regular openvswitch -- Disable all sub-packages - -* Wed Jan 12 2011 Ralf Spenneberg <ralf@os-s.net> -- First build on F14