From e4a31baed5e57de10942cf6d655515665e0defed Mon Sep 17 00:00:00 2001 From: Open vSwitch CI Date: Sep 06 2022 22:28:36 +0000 Subject: Import openvswitch2.16-2.16.0-98 from Fast DataPath --- diff --git a/SOURCES/openvswitch-2.16.0.patch b/SOURCES/openvswitch-2.16.0.patch index 0fc276b..670bb60 100644 --- a/SOURCES/openvswitch-2.16.0.patch +++ b/SOURCES/openvswitch-2.16.0.patch @@ -885,11 +885,79380 @@ index 6420b9d3e2..27359a297d 100644 libssl-dev, libtool, openssl, +diff --git a/dpdk/.ci/linux-build.sh b/dpdk/.ci/linux-build.sh +index d079801d78..26c30a2301 100755 +--- a/dpdk/.ci/linux-build.sh ++++ b/dpdk/.ci/linux-build.sh +@@ -4,7 +4,10 @@ on_error() { + if [ $? = 0 ]; then + exit + fi +- FILES_TO_PRINT="build/meson-logs/testlog.txt build/.ninja_log build/meson-logs/meson-log.txt" ++ FILES_TO_PRINT="build/meson-logs/testlog.txt" ++ FILES_TO_PRINT="$FILES_TO_PRINT build/.ninja_log" ++ FILES_TO_PRINT="$FILES_TO_PRINT build/meson-logs/meson-log.txt" ++ FILES_TO_PRINT="$FILES_TO_PRINT build/gdb.log" + + for pr_file in $FILES_TO_PRINT; do + if [ -e "$pr_file" ]; then +@@ -12,7 +15,9 @@ on_error() { + fi + done + } +-trap on_error EXIT ++# We capture the error logs as artifacts in Github Actions, no need to dump ++# them via a EXIT handler. ++[ -n "$GITHUB_WORKFLOW" ] || trap on_error EXIT + + install_libabigail() { + version=$1 +@@ -28,16 +33,36 @@ install_libabigail() { + rm ${version}.tar.gz + } + +-if [ "$AARCH64" = "1" ]; then ++configure_coredump() { ++ # No point in configuring coredump without gdb ++ which gdb >/dev/null || return 0 ++ ulimit -c unlimited ++ sudo sysctl -w kernel.core_pattern=/tmp/dpdk-core.%e.%p ++} ++ ++catch_coredump() { ++ ls /tmp/dpdk-core.*.* 2>/dev/null || return 0 ++ for core in /tmp/dpdk-core.*.*; do ++ binary=$(sudo readelf -n $core |grep $(pwd)/build/ 2>/dev/null |head -n1) ++ [ -x $binary ] || binary= ++ sudo gdb $binary -c $core \ ++ -ex 'info threads' \ ++ -ex 'thread apply all bt full' \ ++ -ex 'quit' ++ done |tee -a build/gdb.log ++ return 1 ++} ++ ++if [ "$AARCH64" = "true" ]; then + # convert the arch specifier + OPTS="$OPTS --cross-file config/arm/arm64_armv8_linux_gcc" + fi + +-if [ "$BUILD_DOCS" = "1" ]; then ++if [ "$BUILD_DOCS" = "true" ]; then + OPTS="$OPTS -Denable_docs=true" + fi + +-if [ "$BUILD_32BIT" = "1" ]; then ++if [ "$BUILD_32BIT" = "true" ]; then + OPTS="$OPTS -Dc_args=-m32 -Dc_link_args=-m32" + export PKG_CONFIG_LIBDIR="/usr/lib32/pkgconfig" + fi +@@ -48,16 +73,21 @@ else + OPTS="$OPTS -Dexamples=all" + fi + ++OPTS="$OPTS -Dmachine=default" + OPTS="$OPTS --default-library=$DEF_LIB" + OPTS="$OPTS --buildtype=debugoptimized" + meson build --werror $OPTS + ninja -C build + +-if [ "$AARCH64" != "1" ]; then +- devtools/test-null.sh ++if [ "$AARCH64" != "true" ]; then ++ failed= ++ configure_coredump ++ devtools/test-null.sh || failed="true" ++ catch_coredump ++ [ "$failed" != "true" ] + fi + +-if [ "$ABI_CHECKS" = "1" ]; then ++if [ "$ABI_CHECKS" = "true" ]; then + LIBABIGAIL_VERSION=${LIBABIGAIL_VERSION:-libabigail-1.6} + + if [ "$(cat libabigail/VERSION 2>/dev/null)" != "$LIBABIGAIL_VERSION" ]; then +@@ -83,10 +113,13 @@ if [ "$ABI_CHECKS" = "1" ]; then + if [ ! -d reference ]; then + refsrcdir=$(readlink -f $(pwd)/../dpdk-$REF_GIT_TAG) + git clone --single-branch -b $REF_GIT_TAG $REF_GIT_REPO $refsrcdir +- meson --werror $OPTS $refsrcdir $refsrcdir/build ++ meson $OPTS -Dexamples= $refsrcdir $refsrcdir/build + ninja -C $refsrcdir/build + DESTDIR=$(pwd)/reference ninja -C $refsrcdir/build install + devtools/gen-abi.sh reference ++ find reference/usr/local -name '*.a' -delete ++ rm -rf reference/usr/local/bin ++ rm -rf reference/usr/local/share + echo $REF_GIT_TAG > reference/VERSION + fi + +@@ -95,6 +128,10 @@ if [ "$ABI_CHECKS" = "1" ]; then + devtools/check-abi.sh reference install ${ABI_CHECKS_WARN_ONLY:-} + fi + +-if [ "$RUN_TESTS" = "1" ]; then +- sudo meson test -C build --suite fast-tests -t 3 ++if [ "$RUN_TESTS" = "true" ]; then ++ failed= ++ configure_coredump ++ sudo meson test -C build --suite fast-tests -t 3 || failed="true" ++ catch_coredump ++ [ "$failed" != "true" ] + fi +diff --git a/dpdk/.github/workflows/build.yml b/dpdk/.github/workflows/build.yml +new file mode 100644 +index 0000000000..fa79f05955 +--- /dev/null ++++ b/dpdk/.github/workflows/build.yml +@@ -0,0 +1,129 @@ ++name: build ++ ++on: ++ push: ++ schedule: ++ - cron: '0 0 * * 1' ++ ++defaults: ++ run: ++ shell: bash --noprofile --norc -exo pipefail {0} ++ ++jobs: ++ build: ++ name: ${{ join(matrix.config.*, '-') }} ++ runs-on: ${{ matrix.config.os }} ++ env: ++ AARCH64: ${{ matrix.config.cross == 'aarch64' }} ++ ABI_CHECKS: ${{ contains(matrix.config.checks, 'abi') }} ++ BUILD_32BIT: ${{ matrix.config.cross == 'i386' }} ++ BUILD_DOCS: ${{ contains(matrix.config.checks, 'doc') }} ++ CC: ccache ${{ matrix.config.compiler }} ++ DEF_LIB: ${{ matrix.config.library }} ++ LIBABIGAIL_VERSION: libabigail-1.8 ++ REF_GIT_TAG: v20.11 ++ RUN_TESTS: ${{ contains(matrix.config.checks, 'tests') }} ++ ++ strategy: ++ fail-fast: false ++ matrix: ++ config: ++ - os: ubuntu-18.04 ++ compiler: gcc ++ library: static ++ - os: ubuntu-18.04 ++ compiler: gcc ++ library: shared ++ checks: abi+doc+tests ++ - os: ubuntu-18.04 ++ compiler: clang ++ library: static ++ - os: ubuntu-18.04 ++ compiler: clang ++ library: shared ++ checks: doc+tests ++ - os: ubuntu-18.04 ++ compiler: gcc ++ library: static ++ cross: i386 ++ - os: ubuntu-18.04 ++ compiler: gcc ++ library: static ++ cross: aarch64 ++ - os: ubuntu-18.04 ++ compiler: gcc ++ library: shared ++ cross: aarch64 ++ ++ steps: ++ - name: Checkout sources ++ uses: actions/checkout@v2 ++ - name: Generate cache keys ++ id: get_ref_keys ++ run: | ++ echo -n '::set-output name=ccache::' ++ echo 'ccache-${{ matrix.config.os }}-${{ matrix.config.compiler }}-${{ matrix.config.cross }}-'$(date -u +%Y-w%W) ++ echo -n '::set-output name=libabigail::' ++ echo 'libabigail-${{ matrix.config.os }}' ++ echo -n '::set-output name=abi::' ++ echo 'abi-${{ matrix.config.os }}-${{ matrix.config.compiler }}-${{ matrix.config.cross }}-${{ env.LIBABIGAIL_VERSION }}-${{ env.REF_GIT_TAG }}' ++ - name: Retrieve ccache cache ++ uses: actions/cache@v2 ++ with: ++ path: ~/.ccache ++ key: ${{ steps.get_ref_keys.outputs.ccache }}-${{ github.ref }} ++ restore-keys: | ++ ${{ steps.get_ref_keys.outputs.ccache }}-refs/heads/main ++ - name: Retrieve libabigail cache ++ id: libabigail-cache ++ uses: actions/cache@v2 ++ if: env.ABI_CHECKS == 'true' ++ with: ++ path: libabigail ++ key: ${{ steps.get_ref_keys.outputs.libabigail }} ++ - name: Retrieve ABI reference cache ++ uses: actions/cache@v2 ++ if: env.ABI_CHECKS == 'true' ++ with: ++ path: reference ++ key: ${{ steps.get_ref_keys.outputs.abi }} ++ - name: Update APT cache ++ run: sudo apt update || true ++ - name: Install packages ++ run: sudo apt install -y ccache libnuma-dev python3-setuptools ++ python3-wheel python3-pip ninja-build libbsd-dev libpcap-dev ++ libibverbs-dev libcrypto++-dev libfdt-dev libjansson-dev ++ - name: Install libabigail build dependencies if no cache is available ++ if: env.ABI_CHECKS == 'true' && steps.libabigail-cache.outputs.cache-hit != 'true' ++ run: sudo apt install -y autoconf automake libtool pkg-config libxml2-dev ++ libdw-dev ++ - name: Install i386 cross compiling packages ++ if: env.BUILD_32BIT == 'true' ++ run: sudo apt install -y gcc-multilib ++ - name: Install aarch64 cross compiling packages ++ if: env.AARCH64 == 'true' ++ run: sudo apt install -y crossbuild-essential-arm64 ++ - name: Install test tools packages ++ if: env.AARCH64 != 'true' || env.RUN_TESTS == 'true' ++ run: sudo apt install -y gdb ++ - name: Install doc generation packages ++ if: env.BUILD_DOCS == 'true' ++ run: sudo apt install -y doxygen graphviz python3-sphinx ++ python3-sphinx-rtd-theme ++ - name: Run setup ++ run: | ++ .ci/linux-setup.sh ++ # Workaround on $HOME permissions as EAL checks them for plugin loading ++ chmod o-w $HOME ++ - name: Build and test ++ run: .ci/linux-build.sh ++ - name: Upload logs on failure ++ if: failure() ++ uses: actions/upload-artifact@v2 ++ with: ++ name: meson-logs-${{ join(matrix.config.*, '-') }} ++ path: | ++ build/meson-logs/testlog.txt ++ build/.ninja_log ++ build/meson-logs/meson-log.txt ++ build/gdb.log +diff --git a/dpdk/.travis.yml b/dpdk/.travis.yml +index 5e12db23b5..5aa7ad49f1 100644 +--- a/dpdk/.travis.yml ++++ b/dpdk/.travis.yml +@@ -2,6 +2,9 @@ + language: c + cache: + ccache: true ++ directories: ++ - libabigail ++ - reference + + dist: bionic + +@@ -18,6 +21,9 @@ _aarch64_packages: &aarch64_packages + - *required_packages + - [gcc-aarch64-linux-gnu, libc6-dev-arm64-cross, pkg-config-aarch64-linux-gnu] + ++_libabigail_build_packages: &libabigail_build_packages ++ - [autoconf, automake, libtool, pkg-config, libxml2-dev, libdw-dev] ++ + _build_32b_packages: &build_32b_packages + - *required_packages + - [gcc-multilib] +@@ -28,16 +34,21 @@ _doc_packages: &doc_packages + before_install: ./.ci/${TRAVIS_OS_NAME}-setup.sh + script: ./.ci/${TRAVIS_OS_NAME}-build.sh + ++env: ++ global: ++ - LIBABIGAIL_VERSION=libabigail-1.8 ++ - REF_GIT_TAG=v20.11 ++ + jobs: + include: + # x86_64 gcc jobs + - env: DEF_LIB="static" + arch: amd64 + compiler: gcc +- - env: DEF_LIB="shared" RUN_TESTS=1 ++ - env: DEF_LIB="shared" RUN_TESTS=true + arch: amd64 + compiler: gcc +- - env: DEF_LIB="shared" BUILD_DOCS=1 ++ - env: DEF_LIB="shared" BUILD_DOCS=true + arch: amd64 + compiler: gcc + addons: +@@ -45,14 +56,22 @@ jobs: + packages: + - *required_packages + - *doc_packages ++ - env: DEF_LIB="shared" ABI_CHECKS=true ++ arch: amd64 ++ compiler: gcc ++ addons: ++ apt: ++ packages: ++ - *required_packages ++ - *libabigail_build_packages + # x86_64 clang jobs + - env: DEF_LIB="static" + arch: amd64 + compiler: clang +- - env: DEF_LIB="shared" RUN_TESTS=1 ++ - env: DEF_LIB="shared" RUN_TESTS=true + arch: amd64 + compiler: clang +- - env: DEF_LIB="shared" BUILD_DOCS=1 ++ - env: DEF_LIB="shared" BUILD_DOCS=true + arch: amd64 + compiler: clang + addons: +@@ -61,7 +80,7 @@ jobs: + - *required_packages + - *doc_packages + # x86_64 cross-compiling 32-bits jobs +- - env: DEF_LIB="static" BUILD_32BIT=1 ++ - env: DEF_LIB="static" BUILD_32BIT=true + arch: amd64 + compiler: gcc + addons: +@@ -69,14 +88,14 @@ jobs: + packages: + - *build_32b_packages + # x86_64 cross-compiling aarch64 jobs +- - env: DEF_LIB="static" AARCH64=1 ++ - env: DEF_LIB="static" AARCH64=true + arch: amd64 + compiler: gcc + addons: + apt: + packages: + - *aarch64_packages +- - env: DEF_LIB="shared" AARCH64=1 ++ - env: DEF_LIB="shared" AARCH64=true + arch: amd64 + compiler: gcc + addons: +@@ -87,16 +106,16 @@ jobs: + - env: DEF_LIB="static" + arch: arm64 + compiler: gcc +- - env: DEF_LIB="shared" RUN_TESTS=1 ++ - env: DEF_LIB="shared" RUN_TESTS=true + arch: arm64 + compiler: gcc +- - env: DEF_LIB="shared" RUN_TESTS=1 ++ - env: DEF_LIB="shared" RUN_TESTS=true + dist: focal + arch: arm64-graviton2 + virt: vm + group: edge + compiler: gcc +- - env: DEF_LIB="shared" BUILD_DOCS=1 ++ - env: DEF_LIB="shared" BUILD_DOCS=true + arch: arm64 + compiler: gcc + addons: +@@ -104,14 +123,22 @@ jobs: + packages: + - *required_packages + - *doc_packages ++ - env: DEF_LIB="shared" ABI_CHECKS=true ++ arch: arm64 ++ compiler: gcc ++ addons: ++ apt: ++ packages: ++ - *required_packages ++ - *libabigail_build_packages + # aarch64 clang jobs + - env: DEF_LIB="static" + arch: arm64 + compiler: clang +- - env: DEF_LIB="shared" RUN_TESTS=1 ++ - env: DEF_LIB="shared" RUN_TESTS=true + arch: arm64 + compiler: clang +- - env: DEF_LIB="shared" RUN_TESTS=1 ++ - env: DEF_LIB="shared" RUN_TESTS=true + dist: focal + arch: arm64-graviton2 + virt: vm +diff --git a/dpdk/MAINTAINERS b/dpdk/MAINTAINERS +index eafe9f8c46..ee763f8fb2 100644 +--- a/dpdk/MAINTAINERS ++++ b/dpdk/MAINTAINERS +@@ -64,6 +64,8 @@ T: git://dpdk.org/next/dpdk-next-eventdev + Stable Branches + M: Luca Boccassi + M: Kevin Traynor ++M: Christian Ehrhardt ++M: Xueming Li + T: git://dpdk.org/dpdk-stable + + Security Issues +@@ -103,12 +105,14 @@ F: buildtools/gen-pmdinfo-cfile.sh + F: buildtools/list-dir-globs.py + F: buildtools/pkg-config/ + F: buildtools/symlink-drivers-solibs.sh ++F: buildtools/symlink-drivers-solibs.py + F: devtools/test-meson-builds.sh + + Public CI + M: Aaron Conole + M: Michael Santana + F: .travis.yml ++F: .github/workflows/build.yml + F: .ci/ + + ABI Policy & Versioning +diff --git a/dpdk/VERSION b/dpdk/VERSION +index 2dbbe00e67..677565909c 100644 +--- a/dpdk/VERSION ++++ b/dpdk/VERSION +@@ -1 +1 @@ +-20.11.1 ++20.11.6 +diff --git a/dpdk/app/meson.build b/dpdk/app/meson.build +index 87fc195dbf..50a53dbde8 100644 +--- a/dpdk/app/meson.build ++++ b/dpdk/app/meson.build +@@ -21,9 +21,6 @@ apps = [ + 'test-regex', + 'test-sad'] + +-# for BSD only +-lib_execinfo = cc.find_library('execinfo', required: false) +- + default_cflags = machine_args + ['-DALLOW_EXPERIMENTAL_API'] + default_ldflags = [] + if get_option('default_library') == 'static' and not is_windows +@@ -53,7 +50,6 @@ foreach app:apps + dep_objs += get_variable(get_option('default_library') + + '_rte_' + d) + endforeach +- dep_objs += lib_execinfo + + link_libs = [] + if get_option('default_library') == 'static' +diff --git a/dpdk/app/pdump/main.c b/dpdk/app/pdump/main.c +index b34bf33531..36b14fa59f 100644 +--- a/dpdk/app/pdump/main.c ++++ b/dpdk/app/pdump/main.c +@@ -906,11 +906,21 @@ dump_packets_core(void *arg) + return 0; + } + ++static unsigned int ++get_next_core(unsigned int lcore) ++{ ++ lcore = rte_get_next_lcore(lcore, 1, 0); ++ if (lcore == RTE_MAX_LCORE) ++ rte_exit(EXIT_FAILURE, ++ "Max core limit %u reached for packet capture", lcore); ++ return lcore; ++} ++ + static inline void + dump_packets(void) + { + int i; +- uint32_t lcore_id = 0; ++ unsigned int lcore_id = 0; + + if (!multiple_core_capture) { + printf(" core (%u), capture for (%d) tuples\n", +@@ -936,12 +946,12 @@ dump_packets(void) + return; + } + +- lcore_id = rte_get_next_lcore(lcore_id, 1, 0); ++ lcore_id = get_next_core(lcore_id); + + for (i = 0; i < num_tuples; i++) { + rte_eal_remote_launch(dump_packets_core, + &pdump_t[i], lcore_id); +- lcore_id = rte_get_next_lcore(lcore_id, 1, 0); ++ lcore_id = get_next_core(lcore_id); + + if (rte_eal_wait_lcore(lcore_id) < 0) + rte_exit(EXIT_FAILURE, "failed to wait\n"); +diff --git a/dpdk/app/proc-info/main.c b/dpdk/app/proc-info/main.c +index b9587f7ded..40bfbefad8 100644 +--- a/dpdk/app/proc-info/main.c ++++ b/dpdk/app/proc-info/main.c +@@ -611,7 +611,7 @@ metrics_display(int port_id) + + names = rte_malloc(NULL, sizeof(struct rte_metric_name) * len, 0); + if (names == NULL) { +- printf("Cannot allocate memory for metrcis names\n"); ++ printf("Cannot allocate memory for metrics names\n"); + rte_free(metrics); + return; + } +@@ -1089,7 +1089,7 @@ show_tm(void) + caplevel.n_nodes_max, + caplevel.n_nodes_nonleaf_max, + caplevel.n_nodes_leaf_max); +- printf("\t -- indetical: non leaf %u leaf %u\n", ++ printf("\t -- identical: non leaf %u leaf %u\n", + caplevel.non_leaf_nodes_identical, + caplevel.leaf_nodes_identical); + +@@ -1243,7 +1243,7 @@ show_ring(char *name) + printf(" - Name (%s) on socket (%d)\n" + " - flags:\n" + "\t -- Single Producer Enqueue (%u)\n" +- "\t -- Single Consmer Dequeue (%u)\n", ++ "\t -- Single Consumer Dequeue (%u)\n", + ptr->name, + ptr->memzone->socket_id, + ptr->flags & RING_F_SP_ENQ, +@@ -1403,10 +1403,10 @@ main(int argc, char **argv) + if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); + +- /* If no port mask was specified, then show non-owned ports */ ++ /* If no port mask was specified, then show all non-owned ports */ + if (enabled_port_mask == 0) { + RTE_ETH_FOREACH_DEV(i) +- enabled_port_mask = 1ul << i; ++ enabled_port_mask |= 1ul << i; + } + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { +diff --git a/dpdk/app/test-acl/main.c b/dpdk/app/test-acl/main.c +index 2cb2fe2579..d572c69385 100644 +--- a/dpdk/app/test-acl/main.c ++++ b/dpdk/app/test-acl/main.c +@@ -384,8 +384,8 @@ parse_cb_ipv4_trace(char *str, struct ipv4_5tuple *v) + } + + /* +- * Parses IPV6 address, exepcts the following format: +- * XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:XXXX (where X - is a hexedecimal digit). ++ * Parse IPv6 address, expects the following format: ++ * XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:XXXX (where X is a hexadecimal digit). + */ + static int + parse_ipv6_addr(const char *in, const char **end, uint32_t v[IPV6_ADDR_U32], +@@ -961,7 +961,7 @@ print_usage(const char *prgname) + "should be either 1 or multiple of %zu, " + "but not greater then %u]\n" + "[--" OPT_MAX_SIZE +- "= " ++ "= " + "leave 0 for default behaviour]\n" + "[--" OPT_ITER_NUM "=]\n" + "[--" OPT_VERBOSE "=]\n" +diff --git a/dpdk/app/test-bbdev/test_bbdev_perf.c b/dpdk/app/test-bbdev/test_bbdev_perf.c +index 59b37ede4a..36589f3c3e 100644 +--- a/dpdk/app/test-bbdev/test_bbdev_perf.c ++++ b/dpdk/app/test-bbdev/test_bbdev_perf.c +@@ -372,14 +372,14 @@ check_dev_cap(const struct rte_bbdev_info *dev_info) + if (nb_harq_inputs > cap->num_buffers_hard_out) { + printf( + "Too many HARQ inputs defined: %u, max: %u\n", +- nb_hard_outputs, ++ nb_harq_inputs, + cap->num_buffers_hard_out); + return TEST_FAILED; + } + if (nb_harq_outputs > cap->num_buffers_hard_out) { + printf( + "Too many HARQ outputs defined: %u, max: %u\n", +- nb_hard_outputs, ++ nb_harq_outputs, + cap->num_buffers_hard_out); + return TEST_FAILED; + } +@@ -957,6 +957,9 @@ init_op_data_objs(struct rte_bbdev_op_data *bufs, + if ((op_type == DATA_INPUT) && large_input) { + /* Allocate a fake overused mbuf */ + data = rte_malloc(NULL, seg->length, 0); ++ TEST_ASSERT_NOT_NULL(data, ++ "rte malloc failed with %u bytes", ++ seg->length); + memcpy(data, seg->addr, seg->length); + m_head->buf_addr = data; + m_head->buf_iova = rte_malloc_virt2iova(data); +diff --git a/dpdk/app/test-compress-perf/comp_perf_options_parse.c b/dpdk/app/test-compress-perf/comp_perf_options_parse.c +index 04a8d2fbee..019eddb7bd 100644 +--- a/dpdk/app/test-compress-perf/comp_perf_options_parse.c ++++ b/dpdk/app/test-compress-perf/comp_perf_options_parse.c +@@ -620,7 +620,7 @@ comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv) + switch (opt) { + case 'h': + usage(argv[0]); +- rte_exit(EXIT_SUCCESS, "Displayed help\n"); ++ exit(EXIT_SUCCESS); + break; + /* long options */ + case 0: +diff --git a/dpdk/app/test-compress-perf/comp_perf_test_cyclecount.c b/dpdk/app/test-compress-perf/comp_perf_test_cyclecount.c +index 55559a7d5a..4a6e7aa094 100644 +--- a/dpdk/app/test-compress-perf/comp_perf_test_cyclecount.c ++++ b/dpdk/app/test-compress-perf/comp_perf_test_cyclecount.c +@@ -76,7 +76,6 @@ cperf_cyclecount_op_setup(struct rte_comp_op **ops, + + for (iter = 0; iter < num_iter; iter++) { + uint32_t remaining_ops = mem->total_bufs; +- uint32_t total_deq_ops = 0; + uint32_t total_enq_ops = 0; + uint16_t num_enq = 0; + uint16_t num_deq = 0; +@@ -136,7 +135,6 @@ cperf_cyclecount_op_setup(struct rte_comp_op **ops, + /* instead of the real dequeue operation */ + num_deq = num_ops; + +- total_deq_ops += num_deq; + rte_mempool_put_bulk(mem->op_pool, + (void **)ops, num_deq); + } +@@ -177,16 +175,17 @@ main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type) + + /* one array for both enqueue and dequeue */ + ops = rte_zmalloc_socket(NULL, +- 2 * mem->total_bufs * sizeof(struct rte_comp_op *), ++ (test_data->burst_sz + mem->total_bufs) * ++ sizeof(struct rte_comp_op *), + 0, rte_socket_id()); + + if (ops == NULL) { + RTE_LOG(ERR, USER1, +- "Can't allocate memory for ops strucures\n"); ++ "Can't allocate memory for ops structures\n"); + return -1; + } + +- deq_ops = &ops[mem->total_bufs]; ++ deq_ops = &ops[test_data->burst_sz]; + + if (type == RTE_COMP_COMPRESS) { + xform = (struct rte_comp_xform) { +@@ -275,7 +274,7 @@ main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type) + /* Allocate compression operations */ + if (ops_needed && rte_mempool_get_bulk( + mem->op_pool, +- (void **)ops, ++ (void **)&ops[ops_unused], + ops_needed) != 0) { + RTE_LOG(ERR, USER1, + "Could not allocate enough operations\n"); +diff --git a/dpdk/app/test-compress-perf/comp_perf_test_throughput.c b/dpdk/app/test-compress-perf/comp_perf_test_throughput.c +index 13922b658c..7574e33ac7 100644 +--- a/dpdk/app/test-compress-perf/comp_perf_test_throughput.c ++++ b/dpdk/app/test-compress-perf/comp_perf_test_throughput.c +@@ -72,7 +72,7 @@ main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type) + + if (ops == NULL) { + RTE_LOG(ERR, USER1, +- "Can't allocate memory for ops strucures\n"); ++ "Can't allocate memory for ops structures\n"); + return -1; + } + +diff --git a/dpdk/app/test-compress-perf/comp_perf_test_verify.c b/dpdk/app/test-compress-perf/comp_perf_test_verify.c +index 5e13257b79..d0dfa41b4b 100644 +--- a/dpdk/app/test-compress-perf/comp_perf_test_verify.c ++++ b/dpdk/app/test-compress-perf/comp_perf_test_verify.c +@@ -75,7 +75,7 @@ main_loop(struct cperf_verify_ctx *ctx, enum rte_comp_xform_type type) + + if (ops == NULL) { + RTE_LOG(ERR, USER1, +- "Can't allocate memory for ops strucures\n"); ++ "Can't allocate memory for ops structures\n"); + return -1; + } + +diff --git a/dpdk/app/test-compress-perf/main.c b/dpdk/app/test-compress-perf/main.c +index cc9951a9b1..ce9e80bedc 100644 +--- a/dpdk/app/test-compress-perf/main.c ++++ b/dpdk/app/test-compress-perf/main.c +@@ -67,7 +67,7 @@ comp_perf_check_capabilities(struct comp_test_data *test_data, uint8_t cdev_id) + + uint64_t comp_flags = cap->comp_feature_flags; + +- /* Huffman enconding */ ++ /* Huffman encoding */ + if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED && + (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) { + RTE_LOG(ERR, USER1, +@@ -168,7 +168,7 @@ comp_perf_initialize_compressdev(struct comp_test_data *test_data, + cdev_id = enabled_cdevs[i]; + + struct rte_compressdev_info cdev_info; +- uint8_t socket_id = rte_compressdev_socket_id(cdev_id); ++ int socket_id = rte_compressdev_socket_id(cdev_id); + + rte_compressdev_info_get(cdev_id, &cdev_info); + if (cdev_info.max_nb_queue_pairs && +@@ -194,6 +194,7 @@ comp_perf_initialize_compressdev(struct comp_test_data *test_data, + .max_nb_priv_xforms = NUM_MAX_XFORMS, + .max_nb_streams = 0 + }; ++ test_data->nb_qps = config.nb_queue_pairs; + + if (rte_compressdev_configure(cdev_id, &config) < 0) { + RTE_LOG(ERR, USER1, "Device configuration failed\n"); +diff --git a/dpdk/app/test-crypto-perf/cperf_options_parsing.c b/dpdk/app/test-crypto-perf/cperf_options_parsing.c +index 0466f7baf8..e84f56cfaa 100644 +--- a/dpdk/app/test-crypto-perf/cperf_options_parsing.c ++++ b/dpdk/app/test-crypto-perf/cperf_options_parsing.c +@@ -506,6 +506,12 @@ parse_test_name(struct cperf_options *opts, + { + char *test_name = (char *) rte_zmalloc(NULL, + sizeof(char) * (strlen(arg) + 3), 0); ++ if (test_name == NULL) { ++ RTE_LOG(ERR, USER1, "Failed to rte zmalloc with size: %zu\n", ++ strlen(arg) + 3); ++ return -1; ++ } ++ + snprintf(test_name, strlen(arg) + 3, "[%s]", arg); + opts->test_name = test_name; + +@@ -983,7 +989,7 @@ cperf_options_parse(struct cperf_options *options, int argc, char **argv) + switch (opt) { + case 'h': + usage(argv[0]); +- rte_exit(EXIT_SUCCESS, "Displayed help\n"); ++ exit(EXIT_SUCCESS); + break; + /* long options */ + case 0: +diff --git a/dpdk/app/test-crypto-perf/cperf_test_common.c b/dpdk/app/test-crypto-perf/cperf_test_common.c +index 058e0ba564..12925c7f22 100644 +--- a/dpdk/app/test-crypto-perf/cperf_test_common.c ++++ b/dpdk/app/test-crypto-perf/cperf_test_common.c +@@ -194,7 +194,7 @@ cperf_alloc_common_memory(const struct cperf_options *options, + (mbuf_size * segments_nb); + params.dst_buf_offset = *dst_buf_offset; + /* Destination buffer will be one segment only */ +- obj_size += max_size; ++ obj_size += max_size + sizeof(struct rte_mbuf); + } + + *pool = rte_mempool_create_empty(pool_name, +diff --git a/dpdk/app/test-crypto-perf/cperf_test_pmd_cyclecount.c b/dpdk/app/test-crypto-perf/cperf_test_pmd_cyclecount.c +index 844659aeca..72f710edfb 100644 +--- a/dpdk/app/test-crypto-perf/cperf_test_pmd_cyclecount.c ++++ b/dpdk/app/test-crypto-perf/cperf_test_pmd_cyclecount.c +@@ -334,7 +334,7 @@ pmd_cyclecount_bench_burst_sz( + * queue, so we never get any failed enqs unless the driver won't accept + * the exact number of descriptors we requested, or the driver won't + * wrap around the end of the TX ring. However, since we're only +- * dequeueing once we've filled up the queue, we have to benchmark it ++ * dequeuing once we've filled up the queue, we have to benchmark it + * piecemeal and then average out the results. + */ + cur_op = 0; +diff --git a/dpdk/app/test-crypto-perf/cperf_test_vectors.c b/dpdk/app/test-crypto-perf/cperf_test_vectors.c +index 0af01ff911..167bf87bc4 100644 +--- a/dpdk/app/test-crypto-perf/cperf_test_vectors.c ++++ b/dpdk/app/test-crypto-perf/cperf_test_vectors.c +@@ -554,6 +554,10 @@ cperf_test_vector_get_dummy(struct cperf_options *options) + rte_free(t_vec); + return NULL; + } ++ ++ if (options->aead_aad_sz > sizeof(aad)) ++ options->aead_aad_sz = sizeof(aad); ++ + memcpy(t_vec->aad.data, aad, options->aead_aad_sz); + t_vec->aad.phys_addr = rte_malloc_virt2iova(t_vec->aad.data); + t_vec->aad.length = options->aead_aad_sz; +diff --git a/dpdk/app/test-eventdev/evt_options.c b/dpdk/app/test-eventdev/evt_options.c +index 0d04ea9f8d..d91178d37f 100644 +--- a/dpdk/app/test-eventdev/evt_options.c ++++ b/dpdk/app/test-eventdev/evt_options.c +@@ -218,7 +218,7 @@ evt_parse_plcores(struct evt_options *opt, const char *corelist) + { + int ret; + +- ret = parse_lcores_list(opt->plcores, corelist); ++ ret = parse_lcores_list(opt->plcores, RTE_MAX_LCORE, corelist); + if (ret == -E2BIG) + evt_err("duplicate lcores in plcores"); + +@@ -230,7 +230,7 @@ evt_parse_work_lcores(struct evt_options *opt, const char *corelist) + { + int ret; + +- ret = parse_lcores_list(opt->wlcores, corelist); ++ ret = parse_lcores_list(opt->wlcores, RTE_MAX_LCORE, corelist); + if (ret == -E2BIG) + evt_err("duplicate lcores in wlcores"); + +@@ -278,7 +278,7 @@ usage(char *program) + "\t--deq_tmo_nsec : global dequeue timeout\n" + "\t--prod_type_ethdev : use ethernet device as producer.\n" + "\t--prod_type_timerdev : use event timer device as producer.\n" +- "\t expity_nsec would be the timeout\n" ++ "\t expiry_nsec would be the timeout\n" + "\t in ns.\n" + "\t--prod_type_timerdev_burst : use timer device as producer\n" + "\t burst mode.\n" +diff --git a/dpdk/app/test-eventdev/parser.c b/dpdk/app/test-eventdev/parser.c +index 24f1855e9a..8818c37ff8 100644 +--- a/dpdk/app/test-eventdev/parser.c ++++ b/dpdk/app/test-eventdev/parser.c +@@ -310,7 +310,7 @@ parse_hex_string(char *src, uint8_t *dst, uint32_t *size) + } + + int +-parse_lcores_list(bool lcores[], const char *corelist) ++parse_lcores_list(bool lcores[], int lcores_num, const char *corelist) + { + int i, idx = 0; + int min, max; +@@ -332,6 +332,8 @@ parse_lcores_list(bool lcores[], const char *corelist) + if (*corelist == '\0') + return -1; + idx = strtoul(corelist, &end, 10); ++ if (idx < 0 || idx > lcores_num) ++ return -1; + + if (end == NULL) + return -1; +diff --git a/dpdk/app/test-eventdev/parser.h b/dpdk/app/test-eventdev/parser.h +index 673ff22d78..696b40a3e2 100644 +--- a/dpdk/app/test-eventdev/parser.h ++++ b/dpdk/app/test-eventdev/parser.h +@@ -46,5 +46,5 @@ int parse_hex_string(char *src, uint8_t *dst, uint32_t *size); + + int parse_tokenize_string(char *string, char *tokens[], uint32_t *n_tokens); + +-int parse_lcores_list(bool lcores[], const char *corelist); ++int parse_lcores_list(bool lcores[], int lcores_num, const char *corelist); + #endif +diff --git a/dpdk/app/test-eventdev/test_order_common.c b/dpdk/app/test-eventdev/test_order_common.c +index 04456d56db..f894c3d5bf 100644 +--- a/dpdk/app/test-eventdev/test_order_common.c ++++ b/dpdk/app/test-eventdev/test_order_common.c +@@ -253,7 +253,7 @@ void + order_opt_dump(struct evt_options *opt) + { + evt_dump_producer_lcores(opt); +- evt_dump("nb_wrker_lcores", "%d", evt_nr_active_lcores(opt->wlcores)); ++ evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores)); + evt_dump_worker_lcores(opt); + evt_dump("nb_evdev_ports", "%d", order_nb_event_ports(opt)); + } +diff --git a/dpdk/app/test-eventdev/test_perf_common.c b/dpdk/app/test-eventdev/test_perf_common.c +index 955edb7526..fb3acc79ba 100644 +--- a/dpdk/app/test-eventdev/test_perf_common.c ++++ b/dpdk/app/test-eventdev/test_perf_common.c +@@ -2,6 +2,8 @@ + * Copyright(c) 2017 Cavium, Inc + */ + ++#include ++ + #include "test_perf_common.h" + + int +@@ -17,7 +19,7 @@ perf_test_result(struct evt_test *test, struct evt_options *opt) + total += t->worker[i].processed_pkts; + for (i = 0; i < t->nb_workers; i++) + printf("Worker %d packets: "CLGRN"%"PRIx64" "CLNRM"percentage:" +- CLGRN" %3.2f\n"CLNRM, i, ++ CLGRN" %3.2f"CLNRM"\n", i, + t->worker[i].processed_pkts, + (((double)t->worker[i].processed_pkts)/total) + * 100); +@@ -95,11 +97,13 @@ perf_event_timer_producer(void *arg) + uint64_t timeout_ticks = opt->expiry_nsec / opt->timer_tick_nsec; + + memset(&tim, 0, sizeof(struct rte_event_timer)); +- timeout_ticks = opt->optm_timer_tick_nsec ? +- (timeout_ticks * opt->timer_tick_nsec) +- / opt->optm_timer_tick_nsec : timeout_ticks; ++ timeout_ticks = ++ opt->optm_timer_tick_nsec ++ ? ceil((double)(timeout_ticks * opt->timer_tick_nsec) / ++ opt->optm_timer_tick_nsec) ++ : timeout_ticks; + timeout_ticks += timeout_ticks ? 0 : 1; +- tim.ev.event_type = RTE_EVENT_TYPE_TIMER; ++ tim.ev.event_type = RTE_EVENT_TYPE_TIMER; + tim.ev.op = RTE_EVENT_OP_NEW; + tim.ev.sched_type = t->opt->sched_type_list[0]; + tim.ev.queue_id = p->queue_id; +@@ -159,11 +163,13 @@ perf_event_timer_producer_burst(void *arg) + uint64_t timeout_ticks = opt->expiry_nsec / opt->timer_tick_nsec; + + memset(&tim, 0, sizeof(struct rte_event_timer)); +- timeout_ticks = opt->optm_timer_tick_nsec ? +- (timeout_ticks * opt->timer_tick_nsec) +- / opt->optm_timer_tick_nsec : timeout_ticks; ++ timeout_ticks = ++ opt->optm_timer_tick_nsec ++ ? ceil((double)(timeout_ticks * opt->timer_tick_nsec) / ++ opt->optm_timer_tick_nsec) ++ : timeout_ticks; + timeout_ticks += timeout_ticks ? 0 : 1; +- tim.ev.event_type = RTE_EVENT_TYPE_TIMER; ++ tim.ev.event_type = RTE_EVENT_TYPE_TIMER; + tim.ev.op = RTE_EVENT_OP_NEW; + tim.ev.sched_type = t->opt->sched_type_list[0]; + tim.ev.queue_id = p->queue_id; +diff --git a/dpdk/app/test-fib/main.c b/dpdk/app/test-fib/main.c +index b0a97b0d7e..17569ece07 100644 +--- a/dpdk/app/test-fib/main.c ++++ b/dpdk/app/test-fib/main.c +@@ -624,7 +624,7 @@ print_usage(void) + "(if -f is not specified)>]\n" + "[-r ]\n" +- "[-c ]\n" ++ "[-c ]\n" + "[-6 ]\n" + "[-s ]\n" + "[-a ]\n" + "[-w ]\n" + "[-u ]\n" +- "[-v ]\n", +@@ -711,6 +711,10 @@ parse_opts(int argc, char **argv) + print_usage(); + rte_exit(-EINVAL, "Invalid option -n\n"); + } ++ ++ if (config.nb_routes < config.print_fract) ++ config.print_fract = config.nb_routes; ++ + break; + case 'd': + distrib_string = optarg; +@@ -1240,6 +1244,10 @@ main(int argc, char **argv) + config.nb_routes = 0; + while (fgets(line, sizeof(line), fr) != NULL) + config.nb_routes++; ++ ++ if (config.nb_routes < config.print_fract) ++ config.print_fract = config.nb_routes; ++ + rewind(fr); + } + +diff --git a/dpdk/app/test-flow-perf/config.h b/dpdk/app/test-flow-perf/config.h +index 8f42bc589c..45f073f0d8 100644 +--- a/dpdk/app/test-flow-perf/config.h ++++ b/dpdk/app/test-flow-perf/config.h +@@ -31,7 +31,7 @@ + #define PORT_ID_DST 1 + #define TEID_VALUE 1 + +-/* Flow items/acctions max size */ ++/* Flow items/actions max size */ + #define MAX_ITEMS_NUM 32 + #define MAX_ACTIONS_NUM 32 + #define MAX_ATTRS_NUM 16 +diff --git a/dpdk/app/test-flow-perf/main.c b/dpdk/app/test-flow-perf/main.c +index e2fc5b7f65..899f6c93b6 100644 +--- a/dpdk/app/test-flow-perf/main.c ++++ b/dpdk/app/test-flow-perf/main.c +@@ -16,6 +16,7 @@ + * gives packet per second measurement. + */ + ++#include + #include + #include + #include +@@ -591,6 +592,7 @@ args_parse(int argc, char **argv) + { "raw-decap", 1, 0, 0 }, + { "vxlan-encap", 0, 0, 0 }, + { "vxlan-decap", 0, 0, 0 }, ++ { 0, 0, 0, 0 }, + }; + + RTE_ETH_FOREACH_DEV(i) +@@ -606,7 +608,7 @@ args_parse(int argc, char **argv) + case 0: + if (strcmp(lgopts[opt_idx].name, "help") == 0) { + usage(argv[0]); +- rte_exit(EXIT_SUCCESS, "Displayed help\n"); ++ exit(EXIT_SUCCESS); + } + + if (strcmp(lgopts[opt_idx].name, "group") == 0) { +@@ -614,7 +616,7 @@ args_parse(int argc, char **argv) + if (n >= 0) + flow_group = n; + else +- rte_exit(EXIT_SUCCESS, ++ rte_exit(EXIT_FAILURE, + "flow group should be >= 0\n"); + printf("group %d / ", flow_group); + } +@@ -634,7 +636,7 @@ args_parse(int argc, char **argv) + if (n > 0) + hairpin_queues_num = n; + else +- rte_exit(EXIT_SUCCESS, ++ rte_exit(EXIT_FAILURE, + "Hairpin queues should be > 0\n"); + + flow_actions[actions_idx++] = +@@ -647,7 +649,7 @@ args_parse(int argc, char **argv) + if (n > 0) + hairpin_queues_num = n; + else +- rte_exit(EXIT_SUCCESS, ++ rte_exit(EXIT_FAILURE, + "Hairpin queues should be > 0\n"); + + flow_actions[actions_idx++] = +@@ -671,11 +673,9 @@ args_parse(int argc, char **argv) + break; + } + /* Reached last item with no match */ +- if (i == (RTE_DIM(flow_options) - 1)) { +- fprintf(stderr, "Invalid encap item: %s\n", token); +- usage(argv[0]); +- rte_exit(EXIT_SUCCESS, "Invalid encap item\n"); +- } ++ if (i == (RTE_DIM(flow_options) - 1)) ++ rte_exit(EXIT_FAILURE, ++ "Invalid encap item: %s\n", token); + } + token = strtok(NULL, ","); + } +@@ -693,15 +693,13 @@ args_parse(int argc, char **argv) + for (i = 0; i < RTE_DIM(flow_options); i++) { + if (strcmp(flow_options[i].str, token) == 0) { + printf("%s,", token); +- encap_data |= flow_options[i].mask; ++ decap_data |= flow_options[i].mask; + break; + } + /* Reached last item with no match */ +- if (i == (RTE_DIM(flow_options) - 1)) { +- fprintf(stderr, "Invalid decap item: %s\n", token); +- usage(argv[0]); +- rte_exit(EXIT_SUCCESS, "Invalid decap item\n"); +- } ++ if (i == (RTE_DIM(flow_options) - 1)) ++ rte_exit(EXIT_FAILURE, ++ "Invalid decap item %s\n", token); + } + token = strtok(NULL, ","); + } +@@ -714,9 +712,9 @@ args_parse(int argc, char **argv) + if (n >= DEFAULT_RULES_BATCH) + rules_batch = n; + else { +- printf("\n\nrules_batch should be >= %d\n", ++ rte_exit(EXIT_FAILURE, ++ "rules_batch should be >= %d\n", + DEFAULT_RULES_BATCH); +- rte_exit(EXIT_SUCCESS, " "); + } + } + if (strcmp(lgopts[opt_idx].name, +@@ -725,7 +723,8 @@ args_parse(int argc, char **argv) + if (n >= (int) rules_batch) + rules_count = n; + else { +- printf("\n\nrules_count should be >= %d\n", ++ rte_exit(EXIT_FAILURE, ++ "rules_count should be >= %d\n", + rules_batch); + } + } +@@ -752,9 +751,9 @@ args_parse(int argc, char **argv) + } + break; + default: +- fprintf(stderr, "Invalid option: %s\n", argv[optind]); + usage(argv[0]); +- rte_exit(EXIT_SUCCESS, "Invalid option\n"); ++ rte_exit(EXIT_FAILURE, "Invalid option: %s\n", ++ argv[optind - 1]); + break; + } + } +@@ -853,7 +852,7 @@ destroy_flows(int port_id, struct rte_flow **flow_list) + memset(&error, 0x33, sizeof(error)); + if (rte_flow_destroy(port_id, flow_list[i], &error)) { + print_flow_error(error); +- rte_exit(EXIT_FAILURE, "Error in deleting flow"); ++ rte_exit(EXIT_FAILURE, "Error in deleting flow\n"); + } + + if (i && !((i + 1) % rules_batch)) { +@@ -924,7 +923,7 @@ flows_handler(void) + flow_list = rte_zmalloc("flow_list", + (sizeof(struct rte_flow *) * rules_count) + 1, 0); + if (flow_list == NULL) +- rte_exit(EXIT_FAILURE, "No Memory available!"); ++ rte_exit(EXIT_FAILURE, "No Memory available!\n"); + + for (port_id = 0; port_id < nr_ports; port_id++) { + /* If port outside portmask */ +@@ -947,7 +946,7 @@ flows_handler(void) + + if (flow == NULL) { + print_flow_error(error); +- rte_exit(EXIT_FAILURE, "error in creating flow"); ++ rte_exit(EXIT_FAILURE, "Error in creating flow\n"); + } + flow_list[flow_index++] = flow; + } +@@ -968,7 +967,7 @@ flows_handler(void) + + if (!flow) { + print_flow_error(error); +- rte_exit(EXIT_FAILURE, "error in creating flow"); ++ rte_exit(EXIT_FAILURE, "Error in creating flow\n"); + } + + flow_list[flow_index++] = flow; +@@ -1046,36 +1045,6 @@ do_tx(struct lcore_info *li, uint16_t cnt, uint16_t tx_port, + rte_pktmbuf_free(li->pkts[i]); + } + +-/* +- * Method to convert numbers into pretty numbers that easy +- * to read. The design here is to add comma after each three +- * digits and set all of this inside buffer. +- * +- * For example if n = 1799321, the output will be +- * 1,799,321 after this method which is easier to read. +- */ +-static char * +-pretty_number(uint64_t n, char *buf) +-{ +- char p[6][4]; +- int i = 0; +- int off = 0; +- +- while (n > 1000) { +- sprintf(p[i], "%03d", (int)(n % 1000)); +- n /= 1000; +- i += 1; +- } +- +- sprintf(p[i++], "%d", (int)n); +- +- while (i--) +- off += sprintf(buf + off, "%s,", p[i]); +- buf[strlen(buf) - 1] = '\0'; +- +- return buf; +-} +- + static void + packet_per_second_stats(void) + { +@@ -1087,7 +1056,7 @@ packet_per_second_stats(void) + old = rte_zmalloc("old", + sizeof(struct lcore_info) * MAX_LCORES, 0); + if (old == NULL) +- rte_exit(EXIT_FAILURE, "No Memory available!"); ++ rte_exit(EXIT_FAILURE, "No Memory available!\n"); + + memcpy(old, lcore_infos, + sizeof(struct lcore_info) * MAX_LCORES); +@@ -1097,7 +1066,6 @@ packet_per_second_stats(void) + uint64_t total_rx_pkts = 0; + uint64_t total_tx_drops = 0; + uint64_t tx_delta, rx_delta, drops_delta; +- char buf[3][32]; + int nr_valid_core = 0; + + sleep(1); +@@ -1122,10 +1090,8 @@ packet_per_second_stats(void) + tx_delta = li->tx_pkts - oli->tx_pkts; + rx_delta = li->rx_pkts - oli->rx_pkts; + drops_delta = li->tx_drops - oli->tx_drops; +- printf("%6d %16s %16s %16s\n", i, +- pretty_number(tx_delta, buf[0]), +- pretty_number(drops_delta, buf[1]), +- pretty_number(rx_delta, buf[2])); ++ printf("%6d %'16"PRId64" %'16"PRId64" %'16"PRId64"\n", ++ i, tx_delta, drops_delta, rx_delta); + + total_tx_pkts += tx_delta; + total_rx_pkts += rx_delta; +@@ -1136,10 +1102,9 @@ packet_per_second_stats(void) + } + + if (nr_valid_core > 1) { +- printf("%6s %16s %16s %16s\n", "total", +- pretty_number(total_tx_pkts, buf[0]), +- pretty_number(total_tx_drops, buf[1]), +- pretty_number(total_rx_pkts, buf[2])); ++ printf("%6s %'16"PRId64" %'16"PRId64" %'16"PRId64"\n", ++ "total", total_tx_pkts, total_tx_drops, ++ total_rx_pkts); + nr_lines += 1; + } + +@@ -1443,6 +1408,9 @@ main(int argc, char **argv) + if (argc > 1) + args_parse(argc, argv); + ++ /* For more fancy, localised integer formatting. */ ++ setlocale(LC_NUMERIC, ""); ++ + init_port(); + + nb_lcores = rte_lcore_count(); +diff --git a/dpdk/app/test-pmd/5tswap.c b/dpdk/app/test-pmd/5tswap.c +index e8cef9623b..090798d68b 100644 +--- a/dpdk/app/test-pmd/5tswap.c ++++ b/dpdk/app/test-pmd/5tswap.c +@@ -185,9 +185,22 @@ pkt_burst_5tuple_swap(struct fwd_stream *fs) + get_end_cycles(fs, start_tsc); + } + ++static void ++stream_init_5tuple_swap(struct fwd_stream *fs) ++{ ++ bool rx_stopped, tx_stopped; ++ ++ rx_stopped = ports[fs->rx_port].rxq[fs->rx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ tx_stopped = ports[fs->tx_port].txq[fs->tx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ fs->disabled = rx_stopped || tx_stopped; ++} ++ + struct fwd_engine five_tuple_swap_fwd_engine = { + .fwd_mode_name = "5tswap", + .port_fwd_begin = NULL, + .port_fwd_end = NULL, ++ .stream_init = stream_init_5tuple_swap, + .packet_fwd = pkt_burst_5tuple_swap, + }; +diff --git a/dpdk/app/test-pmd/bpf_cmd.c b/dpdk/app/test-pmd/bpf_cmd.c +index 066619e115..6980291f07 100644 +--- a/dpdk/app/test-pmd/bpf_cmd.c ++++ b/dpdk/app/test-pmd/bpf_cmd.c +@@ -20,7 +20,7 @@ static const struct rte_bpf_xsym bpf_xsym[] = { + .name = RTE_STR(stdout), + .type = RTE_BPF_XTYPE_VAR, + .var = { +- .val = &stdout, ++ .val = (void *)(uintptr_t)&stdout, + .desc = { + .type = RTE_BPF_ARG_PTR, + .size = sizeof(stdout), +diff --git a/dpdk/app/test-pmd/cmdline.c b/dpdk/app/test-pmd/cmdline.c +index 2b9dd3e1f4..83bb041484 100644 +--- a/dpdk/app/test-pmd/cmdline.c ++++ b/dpdk/app/test-pmd/cmdline.c +@@ -547,7 +547,7 @@ static void cmd_help_long_parsed(void *parsed_result, + " Set the option to enable display of RX and TX bursts.\n" + + "set port (port_id) vf (vf_id) rx|tx on|off\n" +- " Enable/Disable a VF receive/tranmit from a port\n\n" ++ " Enable/Disable a VF receive/transmit from a port\n\n" + + "set port (port_id) vf (vf_id) rxmode (AUPE|ROPE|BAM" + "|MPE) (on|off)\n" +@@ -1225,7 +1225,7 @@ cmdline_parse_token_string_t cmd_operate_port_all_all = + cmdline_parse_inst_t cmd_operate_port = { + .f = cmd_operate_port_parsed, + .data = NULL, +- .help_str = "port start|stop|close all: Start/Stop/Close/Reset all ports", ++ .help_str = "port start|stop|close|reset all: Start/Stop/Close/Reset all ports", + .tokens = { + (void *)&cmd_operate_port_all_cmd, + (void *)&cmd_operate_port_all_port, +@@ -1272,7 +1272,7 @@ cmdline_parse_token_num_t cmd_operate_specific_port_id = + cmdline_parse_inst_t cmd_operate_specific_port = { + .f = cmd_operate_specific_port_parsed, + .data = NULL, +- .help_str = "port start|stop|close : Start/Stop/Close/Reset port_id", ++ .help_str = "port start|stop|close|reset : Start/Stop/Close/Reset port_id", + .tokens = { + (void *)&cmd_operate_specific_port_cmd, + (void *)&cmd_operate_specific_port_port, +@@ -1521,6 +1521,9 @@ parse_and_check_speed_duplex(char *speedstr, char *duplexstr, uint32_t *speed) + } + } + ++ if (*speed != ETH_LINK_SPEED_AUTONEG) ++ *speed |= ETH_LINK_SPEED_FIXED; ++ + return 0; + } + +@@ -1604,13 +1607,13 @@ cmd_config_speed_specific_parsed(void *parsed_result, + struct cmd_config_speed_specific *res = parsed_result; + uint32_t link_speed; + +- if (!all_ports_stopped()) { +- printf("Please stop all ports first\n"); ++ if (port_id_is_invalid(res->id, ENABLED_WARN)) + return; +- } + +- if (port_id_is_invalid(res->id, ENABLED_WARN)) ++ if (!port_is_stopped(res->id)) { ++ printf("Please stop port %d first\n", res->id); + return; ++ } + + if (parse_and_check_speed_duplex(res->value1, res->value2, + &link_speed) < 0) +@@ -2554,8 +2557,10 @@ cmd_config_rxtx_queue_parsed(void *parsed_result, + __rte_unused void *data) + { + struct cmd_config_rxtx_queue *res = parsed_result; ++ struct rte_port *port; + uint8_t isrx; + uint8_t isstart; ++ uint8_t *state; + int ret = 0; + + if (test_done == 0) { +@@ -2603,8 +2608,15 @@ cmd_config_rxtx_queue_parsed(void *parsed_result, + else + ret = rte_eth_dev_tx_queue_stop(res->portid, res->qid); + +- if (ret == -ENOTSUP) +- printf("Function not supported in PMD driver\n"); ++ if (ret == -ENOTSUP) { ++ fprintf(stderr, "Function not supported in PMD\n"); ++ return; ++ } ++ ++ port = &ports[res->portid]; ++ state = isrx ? &port->rxq[res->qid].state : &port->txq[res->qid].state; ++ *state = isstart ? RTE_ETH_QUEUE_STATE_STARTED : ++ RTE_ETH_QUEUE_STATE_STOPPED; + } + + cmdline_parse_token_string_t cmd_config_rxtx_queue_port = +@@ -2673,11 +2685,11 @@ cmd_config_deferred_start_rxtx_queue_parsed(void *parsed_result, + + ison = !strcmp(res->state, "on"); + +- if (isrx && port->rx_conf[res->qid].rx_deferred_start != ison) { +- port->rx_conf[res->qid].rx_deferred_start = ison; ++ if (isrx && port->rxq[res->qid].conf.rx_deferred_start != ison) { ++ port->rxq[res->qid].conf.rx_deferred_start = ison; + needreconfig = 1; +- } else if (!isrx && port->tx_conf[res->qid].tx_deferred_start != ison) { +- port->tx_conf[res->qid].tx_deferred_start = ison; ++ } else if (!isrx && port->txq[res->qid].conf.tx_deferred_start != ison) { ++ port->txq[res->qid].conf.tx_deferred_start = ison; + needreconfig = 1; + } + +@@ -2796,7 +2808,7 @@ cmd_setup_rxtx_queue_parsed( + res->qid, + port->nb_rx_desc[res->qid], + socket_id, +- &port->rx_conf[res->qid], ++ &port->rxq[res->qid].conf, + mp); + if (ret) + printf("Failed to setup RX queue\n"); +@@ -2805,11 +2817,15 @@ cmd_setup_rxtx_queue_parsed( + if (!numa_support || socket_id == NUMA_NO_CONFIG) + socket_id = port->socket_id; + ++ if (port->nb_tx_desc[res->qid] < tx_pkt_nb_segs) { ++ printf("Failed to setup TX queue: not enough descriptors\n"); ++ return; ++ } + ret = rte_eth_tx_queue_setup(res->portid, + res->qid, + port->nb_tx_desc[res->qid], + socket_id, +- &port->tx_conf[res->qid]); ++ &port->txq[res->qid].conf); + if (ret) + printf("Failed to setup TX queue\n"); + } +@@ -3014,7 +3030,7 @@ showport_parse_reta_config(struct rte_eth_rss_reta_entry64 *conf, + return -1; + } + for (i = 0; i < ret; i++) +- conf[i].mask = (uint64_t)strtoul(str_fld[i], &end, 0); ++ conf[i].mask = (uint64_t)strtoull(str_fld[i], &end, 0); + + return 0; + } +@@ -3544,7 +3560,7 @@ parse_item_list(char* str, const char* item_name, unsigned int max_items, + return nb_item; + + /* +- * Then, check that all values in the list are differents. ++ * Then, check that all values in the list are different. + * No optimization here... + */ + for (i = 0; i < nb_item; i++) { +@@ -4561,8 +4577,8 @@ cmd_config_queue_tx_offloads(struct rte_port *port) + int k; + + /* Apply queue tx offloads configuration */ +- for (k = 0; k < port->dev_info.max_rx_queues; k++) +- port->tx_conf[k].offloads = ++ for (k = 0; k < port->dev_info.max_tx_queues; k++) ++ port->txq[k].conf.offloads = + port->dev_conf.txmode.offloads; + } + +@@ -5764,6 +5780,19 @@ static void cmd_set_bonding_mode_parsed(void *parsed_result, + { + struct cmd_set_bonding_mode_result *res = parsed_result; + portid_t port_id = res->port_id; ++ struct rte_port *port = &ports[port_id]; ++ ++ /* ++ * Bonding mode changed means resources of device changed, like whether ++ * started rte timer or not. Device should be restarted when resources ++ * of device changed. ++ */ ++ if (port->port_status != RTE_PORT_STOPPED) { ++ fprintf(stderr, ++ "\t Error: Can't set bonding mode when port %d is not stopped\n", ++ port_id); ++ return; ++ } + + /* Set the bonding mode for the relevant port. */ + if (0 != rte_eth_bond_mode_set(port_id, res->value)) +@@ -6308,6 +6337,7 @@ static void cmd_create_bonded_device_parsed(void *parsed_result, + printf("Failed to enable promiscuous mode for port %u: %s - ignore\n", + port_id, rte_strerror(-ret)); + ++ ports[port_id].bond_flag = 1; + ports[port_id].need_setup = 0; + ports[port_id].port_status = RTE_PORT_STOPPED; + } +@@ -8203,6 +8233,7 @@ static void cmd_quit_parsed(__rte_unused void *parsed_result, + __rte_unused void *data) + { + cmdline_quit(cl); ++ cl_quit = 1; + } + + cmdline_parse_token_string_t cmd_quit_quit = +@@ -8719,6 +8750,7 @@ cmd_set_vf_rxmode_parsed(void *parsed_result, + } + + RTE_SET_USED(is_on); ++ RTE_SET_USED(vf_rxmode); + + #ifdef RTE_NET_IXGBE + if (ret == -ENOTSUP) +@@ -9096,7 +9128,7 @@ cmdline_parse_inst_t cmd_vf_rate_limit = { + + /* *** CONFIGURE TUNNEL UDP PORT *** */ + struct cmd_tunnel_udp_config { +- cmdline_fixed_string_t cmd; ++ cmdline_fixed_string_t rx_vxlan_port; + cmdline_fixed_string_t what; + uint16_t udp_port; + portid_t port_id; +@@ -9112,9 +9144,7 @@ cmd_tunnel_udp_config_parsed(void *parsed_result, + int ret; + + tunnel_udp.udp_port = res->udp_port; +- +- if (!strcmp(res->cmd, "rx_vxlan_port")) +- tunnel_udp.prot_type = RTE_TUNNEL_TYPE_VXLAN; ++ tunnel_udp.prot_type = RTE_TUNNEL_TYPE_VXLAN; + + if (!strcmp(res->what, "add")) + ret = rte_eth_dev_udp_tunnel_port_add(res->port_id, +@@ -9127,9 +9157,9 @@ cmd_tunnel_udp_config_parsed(void *parsed_result, + printf("udp tunneling add error: (%s)\n", strerror(-ret)); + } + +-cmdline_parse_token_string_t cmd_tunnel_udp_config_cmd = ++cmdline_parse_token_string_t cmd_tunnel_udp_config_rx_vxlan_port = + TOKEN_STRING_INITIALIZER(struct cmd_tunnel_udp_config, +- cmd, "rx_vxlan_port"); ++ rx_vxlan_port, "rx_vxlan_port"); + cmdline_parse_token_string_t cmd_tunnel_udp_config_what = + TOKEN_STRING_INITIALIZER(struct cmd_tunnel_udp_config, + what, "add#rm"); +@@ -9146,7 +9176,7 @@ cmdline_parse_inst_t cmd_tunnel_udp_config = { + .help_str = "rx_vxlan_port add|rm : " + "Add/Remove a tunneling UDP port filter", + .tokens = { +- (void *)&cmd_tunnel_udp_config_cmd, ++ (void *)&cmd_tunnel_udp_config_rx_vxlan_port, + (void *)&cmd_tunnel_udp_config_what, + (void *)&cmd_tunnel_udp_config_udp_port, + (void *)&cmd_tunnel_udp_config_port_id, +@@ -9552,7 +9582,7 @@ dump_socket_mem(FILE *f) + fprintf(f, + "Total : size(M) total: %.6lf alloc: %.6lf(%.3lf%%) free: %.6lf \tcount alloc: %-4u free: %u\n", + (double)total / (1024 * 1024), (double)alloc / (1024 * 1024), +- (double)alloc * 100 / (double)total, ++ total ? ((double)alloc * 100 / (double)total) : 0, + (double)free / (1024 * 1024), + n_alloc, n_free); + if (last_allocs) +@@ -14030,7 +14060,7 @@ cmd_ddp_info_parsed( + free(proto); + #endif + if (ret == -ENOTSUP) +- printf("Function not supported in PMD driver\n"); ++ fprintf(stderr, "Function not supported in PMD\n"); + close_file(pkg); + } + +@@ -15393,7 +15423,7 @@ cmd_rx_offload_get_configuration_parsed( + + nb_rx_queues = dev_info.nb_rx_queues; + for (q = 0; q < nb_rx_queues; q++) { +- queue_offloads = port->rx_conf[q].offloads; ++ queue_offloads = port->rxq[q].conf.offloads; + printf(" Queue[%2d] :", q); + print_rx_offloads(queue_offloads); + printf("\n"); +@@ -15512,11 +15542,11 @@ cmd_config_per_port_rx_offload_parsed(void *parsed_result, + if (!strcmp(res->on_off, "on")) { + port->dev_conf.rxmode.offloads |= single_offload; + for (q = 0; q < nb_rx_queues; q++) +- port->rx_conf[q].offloads |= single_offload; ++ port->rxq[q].conf.offloads |= single_offload; + } else { + port->dev_conf.rxmode.offloads &= ~single_offload; + for (q = 0; q < nb_rx_queues; q++) +- port->rx_conf[q].offloads &= ~single_offload; ++ port->rxq[q].conf.offloads &= ~single_offload; + } + + cmd_reconfig_device_queue(port_id, 1, 1); +@@ -15620,9 +15650,9 @@ cmd_config_per_queue_rx_offload_parsed(void *parsed_result, + } + + if (!strcmp(res->on_off, "on")) +- port->rx_conf[queue_id].offloads |= single_offload; ++ port->rxq[queue_id].conf.offloads |= single_offload; + else +- port->rx_conf[queue_id].offloads &= ~single_offload; ++ port->rxq[queue_id].conf.offloads &= ~single_offload; + + cmd_reconfig_device_queue(port_id, 1, 1); + } +@@ -15804,7 +15834,7 @@ cmd_tx_offload_get_configuration_parsed( + + nb_tx_queues = dev_info.nb_tx_queues; + for (q = 0; q < nb_tx_queues; q++) { +- queue_offloads = port->tx_conf[q].offloads; ++ queue_offloads = port->txq[q].conf.offloads; + printf(" Queue[%2d] :", q); + print_tx_offloads(queue_offloads); + printf("\n"); +@@ -15927,11 +15957,11 @@ cmd_config_per_port_tx_offload_parsed(void *parsed_result, + if (!strcmp(res->on_off, "on")) { + port->dev_conf.txmode.offloads |= single_offload; + for (q = 0; q < nb_tx_queues; q++) +- port->tx_conf[q].offloads |= single_offload; ++ port->txq[q].conf.offloads |= single_offload; + } else { + port->dev_conf.txmode.offloads &= ~single_offload; + for (q = 0; q < nb_tx_queues; q++) +- port->tx_conf[q].offloads &= ~single_offload; ++ port->txq[q].conf.offloads &= ~single_offload; + } + + cmd_reconfig_device_queue(port_id, 1, 1); +@@ -16038,9 +16068,9 @@ cmd_config_per_queue_tx_offload_parsed(void *parsed_result, + } + + if (!strcmp(res->on_off, "on")) +- port->tx_conf[queue_id].offloads |= single_offload; ++ port->txq[queue_id].conf.offloads |= single_offload; + else +- port->tx_conf[queue_id].offloads &= ~single_offload; ++ port->txq[queue_id].conf.offloads &= ~single_offload; + + cmd_reconfig_device_queue(port_id, 1, 1); + } +@@ -16444,17 +16474,17 @@ cmd_set_port_fec_mode_parsed( + { + struct cmd_set_port_fec_mode *res = parsed_result; + uint16_t port_id = res->port_id; +- uint32_t mode; ++ uint32_t fec_capa; + int ret; + +- ret = parse_fec_mode(res->fec_value, &mode); ++ ret = parse_fec_mode(res->fec_value, &fec_capa); + if (ret < 0) { + printf("Unknown fec mode: %s for Port %d\n", res->fec_value, + port_id); + return; + } + +- ret = rte_eth_fec_set(port_id, mode); ++ ret = rte_eth_fec_set(port_id, fec_capa); + if (ret == -ENOTSUP) { + printf("Function not implemented\n"); + return; +@@ -16615,7 +16645,8 @@ cmd_show_rx_tx_desc_status_parsed(void *parsed_result, + rc = rte_eth_rx_descriptor_status(res->cmd_pid, res->cmd_qid, + res->cmd_did); + if (rc < 0) { +- printf("Invalid queueid = %d\n", res->cmd_qid); ++ printf("Invalid input: queue id = %d, desc id = %d\n", ++ res->cmd_qid, res->cmd_did); + return; + } + if (rc == RTE_ETH_RX_DESC_AVAIL) +@@ -16628,7 +16659,8 @@ cmd_show_rx_tx_desc_status_parsed(void *parsed_result, + rc = rte_eth_tx_descriptor_status(res->cmd_pid, res->cmd_qid, + res->cmd_did); + if (rc < 0) { +- printf("Invalid queueid = %d\n", res->cmd_qid); ++ printf("Invalid input: queue id = %d, desc id = %d\n", ++ res->cmd_qid, res->cmd_did); + return; + } + if (rc == RTE_ETH_TX_DESC_FULL) +@@ -16966,6 +16998,7 @@ cmdline_parse_ctx_t main_ctx[] = { + (cmdline_parse_inst_t *)&cmd_show_port_meter_cap, + (cmdline_parse_inst_t *)&cmd_add_port_meter_profile_srtcm, + (cmdline_parse_inst_t *)&cmd_add_port_meter_profile_trtcm, ++ (cmdline_parse_inst_t *)&cmd_add_port_meter_profile_trtcm_rfc4115, + (cmdline_parse_inst_t *)&cmd_del_port_meter_profile, + (cmdline_parse_inst_t *)&cmd_create_port_meter, + (cmdline_parse_inst_t *)&cmd_enable_port_meter, +diff --git a/dpdk/app/test-pmd/cmdline_flow.c b/dpdk/app/test-pmd/cmdline_flow.c +index de80924e7c..de0db3994b 100644 +--- a/dpdk/app/test-pmd/cmdline_flow.c ++++ b/dpdk/app/test-pmd/cmdline_flow.c +@@ -2003,7 +2003,7 @@ static const struct token token_list[] = { + }, + [TUNNEL_DESTROY] = { + .name = "destroy", +- .help = "destroy tunel", ++ .help = "destroy tunnel", + .next = NEXT(NEXT_ENTRY(TUNNEL_DESTROY_ID), + NEXT_ENTRY(PORT_ID)), + .args = ARGS(ARGS_ENTRY(struct buffer, port)), +@@ -2011,7 +2011,7 @@ static const struct token token_list[] = { + }, + [TUNNEL_DESTROY_ID] = { + .name = "id", +- .help = "tunnel identifier to testroy", ++ .help = "tunnel identifier to destroy", + .next = NEXT(NEXT_ENTRY(UNSIGNED)), + .args = ARGS(ARGS_ENTRY(struct tunnel_ops, id)), + .call = parse_tunnel, +@@ -5203,6 +5203,8 @@ parse_vc_action_nvgre_encap(struct context *ctx, const struct token *token, + .src_addr = nvgre_encap_conf.ipv4_src, + .dst_addr = nvgre_encap_conf.ipv4_dst, + }, ++ .item_nvgre.c_k_s_rsvd0_ver = RTE_BE16(0x2000), ++ .item_nvgre.protocol = RTE_BE16(RTE_ETHER_TYPE_TEB), + .item_nvgre.flow_id = 0, + }; + memcpy(action_nvgre_encap_data->item_eth.dst.addr_bytes, +@@ -6409,31 +6411,32 @@ parse_string(struct context *ctx, const struct token *token, + static int + parse_hex_string(const char *src, uint8_t *dst, uint32_t *size) + { +- char *c = NULL; +- uint32_t i, len; +- char tmp[3]; +- +- /* Check input parameters */ +- if ((src == NULL) || +- (dst == NULL) || +- (size == NULL) || +- (*size == 0)) ++ const uint8_t *head = dst; ++ uint32_t left; ++ ++ if (*size == 0) + return -1; + ++ left = *size; ++ + /* Convert chars to bytes */ +- for (i = 0, len = 0; i < *size; i += 2) { +- snprintf(tmp, 3, "%s", src + i); +- dst[len++] = strtoul(tmp, &c, 16); +- if (*c != 0) { +- len--; +- dst[len] = 0; +- *size = len; ++ while (left) { ++ char tmp[3], *end = tmp; ++ uint32_t read_lim = left & 1 ? 1 : 2; ++ ++ snprintf(tmp, read_lim + 1, "%s", src); ++ *dst = strtoul(tmp, &end, 16); ++ if (*end) { ++ *dst = 0; ++ *size = (uint32_t)(dst - head); + return -1; + } ++ left -= read_lim; ++ src += read_lim; ++ dst++; + } +- dst[len] = 0; +- *size = len; +- ++ *dst = 0; ++ *size = (uint32_t)(dst - head); + return 0; + } + +@@ -6477,10 +6480,13 @@ parse_hex(struct context *ctx, const struct token *token, + hexlen -= 2; + } + if (hexlen > length) +- return -1; ++ goto error; + ret = parse_hex_string(str, hex_tmp, &hexlen); + if (ret < 0) + goto error; ++ /* Check the converted binary fits into data buffer. */ ++ if (hexlen > size) ++ goto error; + /* Let parse_int() fill length information first. */ + ret = snprintf(tmp, sizeof(tmp), "%u", hexlen); + if (ret < 0) +diff --git a/dpdk/app/test-pmd/cmdline_mtr.c b/dpdk/app/test-pmd/cmdline_mtr.c +index 3982787d20..875a97788c 100644 +--- a/dpdk/app/test-pmd/cmdline_mtr.c ++++ b/dpdk/app/test-pmd/cmdline_mtr.c +@@ -92,13 +92,13 @@ parse_dscp_table_entries(char *str, enum rte_color **dscp_table) + while (1) { + if (strcmp(token, "G") == 0 || + strcmp(token, "g") == 0) +- *dscp_table[i++] = RTE_COLOR_GREEN; ++ (*dscp_table)[i++] = RTE_COLOR_GREEN; + else if (strcmp(token, "Y") == 0 || + strcmp(token, "y") == 0) +- *dscp_table[i++] = RTE_COLOR_YELLOW; ++ (*dscp_table)[i++] = RTE_COLOR_YELLOW; + else if (strcmp(token, "R") == 0 || + strcmp(token, "r") == 0) +- *dscp_table[i++] = RTE_COLOR_RED; ++ (*dscp_table)[i++] = RTE_COLOR_RED; + else { + free(*dscp_table); + return -1; +diff --git a/dpdk/app/test-pmd/cmdline_tm.c b/dpdk/app/test-pmd/cmdline_tm.c +index 9978226573..845d3848b5 100644 +--- a/dpdk/app/test-pmd/cmdline_tm.c ++++ b/dpdk/app/test-pmd/cmdline_tm.c +@@ -69,7 +69,7 @@ print_err_msg(struct rte_tm_error *error) + [RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS] + = "num shared shapers field (node params)", + [RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE] +- = "wfq weght mode field (node params)", ++ = "wfq weight mode field (node params)", + [RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES] + = "num strict priorities field (node params)", + [RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN] +@@ -477,7 +477,7 @@ static void cmd_show_port_tm_level_cap_parsed(void *parsed_result, + cmdline_parse_inst_t cmd_show_port_tm_level_cap = { + .f = cmd_show_port_tm_level_cap_parsed, + .data = NULL, +- .help_str = "Show Port TM Hierarhical level Capabilities", ++ .help_str = "Show port TM hierarchical level capabilities", + .tokens = { + (void *)&cmd_show_port_tm_level_cap_show, + (void *)&cmd_show_port_tm_level_cap_port, +diff --git a/dpdk/app/test-pmd/config.c b/dpdk/app/test-pmd/config.c +index dab8afe5dd..f3ffc23ecd 100644 +--- a/dpdk/app/test-pmd/config.c ++++ b/dpdk/app/test-pmd/config.c +@@ -63,8 +63,6 @@ + + #define NS_PER_SEC 1E9 + +-static char *flowtype_to_str(uint16_t flow_type); +- + static const struct { + enum tx_pkt_split split; + const char *name; +@@ -183,14 +181,20 @@ nic_stats_display(portid_t port_id) + diff_ns; + uint64_t mpps_rx, mpps_tx, mbps_rx, mbps_tx; + struct rte_eth_stats stats; +- + static const char *nic_stats_border = "########################"; ++ int ret; + + if (port_id_is_invalid(port_id, ENABLED_WARN)) { + print_valid_ports(); + return; + } +- rte_eth_stats_get(port_id, &stats); ++ ret = rte_eth_stats_get(port_id, &stats); ++ if (ret != 0) { ++ fprintf(stderr, ++ "%s: Error: failed to get stats (port %u): %d", ++ __func__, port_id, ret); ++ return; ++ } + printf("\n %s NIC statistics for port %-2d %s\n", + nic_stats_border, port_id, nic_stats_border); + +@@ -560,6 +564,19 @@ device_infos_display(const char *identifier) + }; + } + ++const char * ++rsstypes_to_str(uint64_t rss_type) ++{ ++ uint16_t i; ++ ++ for (i = 0; rss_type_table[i].str != NULL; i++) { ++ if (rss_type_table[i].rss_type == rss_type) ++ return rss_type_table[i].str; ++ } ++ ++ return NULL; ++} ++ + void + port_infos_display(portid_t port_id) + { +@@ -662,19 +679,20 @@ port_infos_display(portid_t port_id) + if (!dev_info.flow_type_rss_offloads) + printf("No RSS offload flow type is supported.\n"); + else { ++ uint64_t rss_offload_types = dev_info.flow_type_rss_offloads; + uint16_t i; +- char *p; + + printf("Supported RSS offload flow types:\n"); +- for (i = RTE_ETH_FLOW_UNKNOWN + 1; +- i < sizeof(dev_info.flow_type_rss_offloads) * CHAR_BIT; i++) { +- if (!(dev_info.flow_type_rss_offloads & (1ULL << i))) +- continue; +- p = flowtype_to_str(i); +- if (p) +- printf(" %s\n", p); +- else +- printf(" user defined %d\n", i); ++ for (i = 0; i < sizeof(rss_offload_types) * CHAR_BIT; i++) { ++ uint64_t rss_offload = UINT64_C(1) << i; ++ if ((rss_offload_types & rss_offload) != 0) { ++ const char *p = rsstypes_to_str(rss_offload); ++ if (p) ++ printf(" %s\n", p); ++ else ++ printf(" user defined %u\n", ++ i); ++ } + } + } + +@@ -797,10 +815,15 @@ port_eeprom_display(portid_t port_id) + return; + } + +- char buf[len_eeprom]; + einfo.offset = 0; + einfo.length = len_eeprom; +- einfo.data = buf; ++ einfo.data = calloc(1, len_eeprom); ++ if (!einfo.data) { ++ fprintf(stderr, ++ "Allocation of port %u eeprom data failed\n", ++ port_id); ++ return; ++ } + + ret = rte_eth_dev_get_eeprom(port_id, &einfo); + if (ret != 0) { +@@ -818,10 +841,12 @@ port_eeprom_display(portid_t port_id) + printf("Unable to get EEPROM: %d\n", ret); + break; + } ++ free(einfo.data); + return; + } + rte_hexdump(stdout, "hexdump", einfo.data, einfo.length); + printf("Finish -- Port: %d EEPROM length: %d bytes\n", port_id, len_eeprom); ++ free(einfo.data); + } + + void +@@ -856,10 +881,15 @@ port_module_eeprom_display(portid_t port_id) + return; + } + +- char buf[minfo.eeprom_len]; + einfo.offset = 0; + einfo.length = minfo.eeprom_len; +- einfo.data = buf; ++ einfo.data = calloc(1, minfo.eeprom_len); ++ if (!einfo.data) { ++ fprintf(stderr, ++ "Allocation of port %u eeprom data failed\n", ++ port_id); ++ return; ++ } + + ret = rte_eth_dev_get_module_eeprom(port_id, &einfo); + if (ret != 0) { +@@ -877,11 +907,13 @@ port_module_eeprom_display(portid_t port_id) + printf("Unable to get module EEPROM: %d\n", ret); + break; + } ++ free(einfo.data); + return; + } + + rte_hexdump(stdout, "hexdump", einfo.data, einfo.length); + printf("Finish -- Port: %d MODULE EEPROM length: %d bytes\n", port_id, einfo.length); ++ free(einfo.data); + } + + void +@@ -1473,6 +1505,15 @@ port_flow_tunnel_type(struct rte_flow_tunnel *tunnel) + case RTE_FLOW_ITEM_TYPE_VXLAN: + type = "vxlan"; + break; ++ case RTE_FLOW_ITEM_TYPE_GRE: ++ type = "gre"; ++ break; ++ case RTE_FLOW_ITEM_TYPE_NVGRE: ++ type = "nvgre"; ++ break; ++ case RTE_FLOW_ITEM_TYPE_GENEVE: ++ type = "geneve"; ++ break; + } + + return type; +@@ -1533,6 +1574,12 @@ void port_flow_tunnel_create(portid_t port_id, const struct tunnel_ops *ops) + + if (!strcmp(ops->type, "vxlan")) + type = RTE_FLOW_ITEM_TYPE_VXLAN; ++ else if (!strcmp(ops->type, "gre")) ++ type = RTE_FLOW_ITEM_TYPE_GRE; ++ else if (!strcmp(ops->type, "nvgre")) ++ type = RTE_FLOW_ITEM_TYPE_NVGRE; ++ else if (!strcmp(ops->type, "geneve")) ++ type = RTE_FLOW_ITEM_TYPE_GENEVE; + else { + printf("cannot offload \"%s\" tunnel type\n", ops->type); + return; +@@ -2032,6 +2079,7 @@ port_flow_validate(portid_t port_id, + { + struct rte_flow_error error; + struct port_flow_tunnel *pft = NULL; ++ int ret; + + /* Poisoning to make sure PMDs update it in case of error. */ + memset(&error, 0x11, sizeof(error)); +@@ -2045,10 +2093,11 @@ port_flow_validate(portid_t port_id, + if (pft->actions) + actions = pft->actions; + } +- if (rte_flow_validate(port_id, attr, pattern, actions, &error)) +- return port_flow_complain(&error); ++ ret = rte_flow_validate(port_id, attr, pattern, actions, &error); + if (tunnel_ops->enabled) + port_flow_tunnel_offload_cmd_release(port_id, tunnel_ops, pft); ++ if (ret) ++ return port_flow_complain(&error); + printf("Flow rule validated\n"); + return 0; + } +@@ -2115,6 +2164,9 @@ port_flow_create(portid_t port_id, + memset(&error, 0x22, sizeof(error)); + flow = rte_flow_create(port_id, attr, pattern, actions, &error); + if (!flow) { ++ if (tunnel_ops->enabled) ++ port_flow_tunnel_offload_cmd_release(port_id, ++ tunnel_ops, pft); + free(pf); + return port_flow_complain(&error); + } +@@ -2776,8 +2828,8 @@ rxtx_config_display(void) + nb_fwd_lcores, nb_fwd_ports); + + RTE_ETH_FOREACH_DEV(pid) { +- struct rte_eth_rxconf *rx_conf = &ports[pid].rx_conf[0]; +- struct rte_eth_txconf *tx_conf = &ports[pid].tx_conf[0]; ++ struct rte_eth_rxconf *rx_conf = &ports[pid].rxq[0].conf; ++ struct rte_eth_txconf *tx_conf = &ports[pid].txq[0].conf; + uint16_t *nb_rx_desc = &ports[pid].nb_rx_desc[0]; + uint16_t *nb_tx_desc = &ports[pid].nb_tx_desc[0]; + struct rte_eth_rxq_info rx_qinfo; +@@ -2894,7 +2946,7 @@ port_rss_reta_info(portid_t port_id, + } + + /* +- * Displays the RSS hash functions of a port, and, optionaly, the RSS hash ++ * Displays the RSS hash functions of a port, and, optionally, the RSS hash + * key of the port. + */ + void +@@ -2949,7 +3001,9 @@ port_rss_hash_conf_show(portid_t port_id, int show_rss_key) + } + printf("RSS functions:\n "); + for (i = 0; rss_type_table[i].str; i++) { +- if (rss_hf & rss_type_table[i].rss_type) ++ if (rss_type_table[i].rss_type == 0) ++ continue; ++ if ((rss_hf & rss_type_table[i].rss_type) == rss_type_table[i].rss_type) + printf("%s ", rss_type_table[i].str); + } + printf("\n"); +@@ -2963,14 +3017,14 @@ port_rss_hash_conf_show(portid_t port_id, int show_rss_key) + + void + port_rss_hash_key_update(portid_t port_id, char rss_type[], uint8_t *hash_key, +- uint hash_key_len) ++ uint8_t hash_key_len) + { + struct rte_eth_rss_conf rss_conf; + int diag; + unsigned int i; + + rss_conf.rss_key = NULL; +- rss_conf.rss_key_len = hash_key_len; ++ rss_conf.rss_key_len = 0; + rss_conf.rss_hf = 0; + for (i = 0; rss_type_table[i].str; i++) { + if (!strcmp(rss_type_table[i].str, rss_type)) +@@ -2979,6 +3033,7 @@ port_rss_hash_key_update(portid_t port_id, char rss_type[], uint8_t *hash_key, + diag = rte_eth_dev_rss_hash_conf_get(port_id, &rss_conf); + if (diag == 0) { + rss_conf.rss_key = hash_key; ++ rss_conf.rss_key_len = hash_key_len; + diag = rte_eth_dev_rss_hash_update(port_id, &rss_conf); + } + if (diag == 0) +@@ -3153,6 +3208,21 @@ rss_fwd_config_setup(void) + } + } + ++static uint16_t ++get_fwd_port_total_tc_num(void) ++{ ++ struct rte_eth_dcb_info dcb_info; ++ uint16_t total_tc_num = 0; ++ unsigned int i; ++ ++ for (i = 0; i < nb_fwd_ports; i++) { ++ (void)rte_eth_dev_get_dcb_info(fwd_ports_ids[i], &dcb_info); ++ total_tc_num += dcb_info.nb_tcs; ++ } ++ ++ return total_tc_num; ++} ++ + /** + * For the DCB forwarding test, each core is assigned on each traffic class. + * +@@ -3172,12 +3242,42 @@ dcb_fwd_config_setup(void) + lcoreid_t lc_id; + uint16_t nb_rx_queue, nb_tx_queue; + uint16_t i, j, k, sm_id = 0; ++ uint16_t total_tc_num; ++ struct rte_port *port; + uint8_t tc = 0; ++ portid_t pid; ++ int ret; ++ ++ /* ++ * The fwd_config_setup() is called when the port is RTE_PORT_STARTED ++ * or RTE_PORT_STOPPED. ++ * ++ * Re-configure ports to get updated mapping between tc and queue in ++ * case the queue number of the port is changed. Skip for started ports ++ * since modifying queue number and calling dev_configure need to stop ++ * ports first. ++ */ ++ for (pid = 0; pid < nb_fwd_ports; pid++) { ++ if (port_is_started(pid) == 1) ++ continue; ++ ++ port = &ports[pid]; ++ ret = rte_eth_dev_configure(pid, nb_rxq, nb_txq, ++ &port->dev_conf); ++ if (ret < 0) { ++ printf("Failed to re-configure port %d, ret = %d.\n", ++ pid, ret); ++ return; ++ } ++ } + + cur_fwd_config.nb_fwd_lcores = (lcoreid_t) nb_fwd_lcores; + cur_fwd_config.nb_fwd_ports = nb_fwd_ports; + cur_fwd_config.nb_fwd_streams = + (streamid_t) (nb_rxq * cur_fwd_config.nb_fwd_ports); ++ total_tc_num = get_fwd_port_total_tc_num(); ++ if (cur_fwd_config.nb_fwd_lcores > total_tc_num) ++ cur_fwd_config.nb_fwd_lcores = total_tc_num; + + /* reinitialize forwarding streams */ + init_fwd_streams(); +@@ -3299,6 +3399,10 @@ icmp_echo_config_setup(void) + void + fwd_config_setup(void) + { ++ struct rte_port *port; ++ portid_t pt_id; ++ unsigned int i; ++ + cur_fwd_config.fwd_eng = cur_fwd_eng; + if (strcmp(cur_fwd_eng->fwd_mode_name, "icmpecho") == 0) { + icmp_echo_config_setup(); +@@ -3306,9 +3410,24 @@ fwd_config_setup(void) + } + + if ((nb_rxq > 1) && (nb_txq > 1)){ +- if (dcb_config) ++ if (dcb_config) { ++ for (i = 0; i < nb_fwd_ports; i++) { ++ pt_id = fwd_ports_ids[i]; ++ port = &ports[pt_id]; ++ if (!port->dcb_flag) { ++ printf("In DCB mode, all forwarding ports must " ++ "be configured in this mode.\n"); ++ return; ++ } ++ } ++ if (nb_fwd_lcores == 1) { ++ printf("In DCB mode,the nb forwarding cores " ++ "should be larger than 1.\n"); ++ return; ++ } ++ + dcb_fwd_config_setup(); +- else ++ } else + rss_fwd_config_setup(); + } + else +@@ -3693,13 +3812,14 @@ set_tx_pkt_split(const char *name) + } + + int +-parse_fec_mode(const char *name, uint32_t *mode) ++parse_fec_mode(const char *name, uint32_t *fec_capa) + { + uint8_t i; + + for (i = 0; i < RTE_DIM(fec_mode_name); i++) { + if (strcmp(fec_mode_name[i].name, name) == 0) { +- *mode = RTE_ETH_FEC_MODE_TO_CAPA(fec_mode_name[i].mode); ++ *fec_capa = ++ RTE_ETH_FEC_MODE_TO_CAPA(fec_mode_name[i].mode); + return 0; + } + } +@@ -3842,13 +3962,15 @@ nb_segs_is_invalid(unsigned int nb_segs) + RTE_ETH_FOREACH_DEV(port_id) { + for (queue_id = 0; queue_id < nb_txq; queue_id++) { + ret = get_tx_ring_size(port_id, queue_id, &ring_size); +- +- if (ret) +- return true; +- ++ if (ret) { ++ /* Port may not be initialized yet, can't say ++ * the port is invalid in this stage. ++ */ ++ continue; ++ } + if (ring_size < nb_segs) { +- printf("nb segments per TX packets=%u >= " +- "TX queue(%u) ring_size=%u - ignored\n", ++ printf("nb segments per TX packets=%u >= TX " ++ "queue(%u) ring_size=%u - txpkts ignored\n", + nb_segs, queue_id, ring_size); + return true; + } +@@ -3864,12 +3986,26 @@ set_tx_pkt_segments(unsigned int *seg_lengths, unsigned int nb_segs) + uint16_t tx_pkt_len; + unsigned int i; + +- if (nb_segs_is_invalid(nb_segs)) ++ /* ++ * For single segment settings failed check is ignored. ++ * It is a very basic capability to send the single segment ++ * packets, suppose it is always supported. ++ */ ++ if (nb_segs > 1 && nb_segs_is_invalid(nb_segs)) { ++ printf("Tx segment size(%u) is not supported - txpkts ignored\n", ++ nb_segs); + return; ++ } ++ ++ if (nb_segs > RTE_MAX_SEGS_PER_PKT) { ++ printf("Tx segment size(%u) is bigger than max number of segment(%u)\n", ++ nb_segs, RTE_MAX_SEGS_PER_PKT); ++ return; ++ } + + /* + * Check that each segment length is greater or equal than +- * the mbuf data sise. ++ * the mbuf data size. + * Check also that the total packet length is greater or equal than the + * size of an empty UDP/IP packet (sizeof(struct rte_ether_hdr) + + * 20 + 8). +@@ -4951,7 +5087,7 @@ mcast_addr_pool_remove(struct rte_port *port, uint32_t addr_idx) + { + port->mc_addr_nb--; + if (addr_idx == port->mc_addr_nb) { +- /* No need to recompact the set of multicast addressses. */ ++ /* No need to recompact the set of multicast addresses. */ + if (port->mc_addr_nb == 0) { + /* free the pool of multicast addresses. */ + free(port->mc_addr_pool); +@@ -4964,6 +5100,25 @@ mcast_addr_pool_remove(struct rte_port *port, uint32_t addr_idx) + sizeof(struct rte_ether_addr) * (port->mc_addr_nb - addr_idx)); + } + ++int ++mcast_addr_pool_destroy(portid_t port_id) ++{ ++ struct rte_port *port; ++ ++ if (port_id_is_invalid(port_id, ENABLED_WARN) || ++ port_id == (portid_t)RTE_PORT_ALL) ++ return -EINVAL; ++ port = &ports[port_id]; ++ ++ if (port->mc_addr_nb != 0) { ++ /* free the pool of multicast addresses. */ ++ free(port->mc_addr_pool); ++ port->mc_addr_pool = NULL; ++ port->mc_addr_nb = 0; ++ } ++ return 0; ++} ++ + static int + eth_port_multicast_addr_list_set(portid_t port_id) + { +@@ -5219,7 +5374,8 @@ show_macs(portid_t port_id) + + dev = &rte_eth_devices[port_id]; + +- rte_eth_dev_info_get(port_id, &dev_info); ++ if (eth_dev_info_get_print_err(port_id, &dev_info)) ++ return; + + for (i = 0; i < dev_info.max_mac_addrs; i++) { + addr = &dev->data->mac_addrs[i]; +diff --git a/dpdk/app/test-pmd/csumonly.c b/dpdk/app/test-pmd/csumonly.c +index d813d4fae0..ffec25f308 100644 +--- a/dpdk/app/test-pmd/csumonly.c ++++ b/dpdk/app/test-pmd/csumonly.c +@@ -252,8 +252,7 @@ parse_gtp(struct rte_udp_hdr *udp_hdr, + /* Parse a vxlan header */ + static void + parse_vxlan(struct rte_udp_hdr *udp_hdr, +- struct testpmd_offload_info *info, +- uint32_t pkt_type) ++ struct testpmd_offload_info *info) + { + struct rte_ether_hdr *eth_hdr; + +@@ -261,8 +260,7 @@ parse_vxlan(struct rte_udp_hdr *udp_hdr, + * default vxlan port (rfc7348) or that the rx offload flag is set + * (i40e only currently) + */ +- if (udp_hdr->dst_port != _htons(RTE_VXLAN_DEFAULT_PORT) && +- RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0) ++ if (udp_hdr->dst_port != _htons(RTE_VXLAN_DEFAULT_PORT)) + return; + + update_tunnel_outer(info); +@@ -480,17 +478,18 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, + + if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV4)) { + ipv4_hdr = l3_hdr; +- ipv4_hdr->hdr_checksum = 0; + + ol_flags |= PKT_TX_IPV4; + if (info->l4_proto == IPPROTO_TCP && tso_segsz) { + ol_flags |= PKT_TX_IP_CKSUM; + } else { +- if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) ++ if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) { + ol_flags |= PKT_TX_IP_CKSUM; +- else ++ } else { ++ ipv4_hdr->hdr_checksum = 0; + ipv4_hdr->hdr_checksum = + rte_ipv4_cksum(ipv4_hdr); ++ } + } + } else if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV6)) + ol_flags |= PKT_TX_IPV6; +@@ -501,10 +500,10 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, + udp_hdr = (struct rte_udp_hdr *)((char *)l3_hdr + info->l3_len); + /* do not recalculate udp cksum if it was 0 */ + if (udp_hdr->dgram_cksum != 0) { +- udp_hdr->dgram_cksum = 0; +- if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) ++ if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) { + ol_flags |= PKT_TX_UDP_CKSUM; +- else { ++ } else { ++ udp_hdr->dgram_cksum = 0; + udp_hdr->dgram_cksum = + get_udptcp_checksum(l3_hdr, udp_hdr, + info->ethertype); +@@ -514,12 +513,12 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, + ol_flags |= PKT_TX_UDP_SEG; + } else if (info->l4_proto == IPPROTO_TCP) { + tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + info->l3_len); +- tcp_hdr->cksum = 0; + if (tso_segsz) + ol_flags |= PKT_TX_TCP_SEG; +- else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) ++ else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) { + ol_flags |= PKT_TX_TCP_CKSUM; +- else { ++ } else { ++ tcp_hdr->cksum = 0; + tcp_hdr->cksum = + get_udptcp_checksum(l3_hdr, tcp_hdr, + info->ethertype); +@@ -529,13 +528,13 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, + } else if (info->l4_proto == IPPROTO_SCTP) { + sctp_hdr = (struct rte_sctp_hdr *) + ((char *)l3_hdr + info->l3_len); +- sctp_hdr->cksum = 0; + /* sctp payload must be a multiple of 4 to be + * offloaded */ + if ((tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) && + ((ipv4_hdr->total_length & 0x3) == 0)) { + ol_flags |= PKT_TX_SCTP_CKSUM; + } else { ++ sctp_hdr->cksum = 0; + /* XXX implement CRC32c, example available in + * RFC3309 */ + } +@@ -761,6 +760,28 @@ pkt_copy_split(const struct rte_mbuf *pkt) + return md[0]; + } + ++#if defined(RTE_LIB_GRO) || defined(RTE_LIB_GSO) ++/* ++ * Re-calculate IP checksum for merged/fragmented packets. ++ */ ++static void ++pkts_ip_csum_recalc(struct rte_mbuf **pkts_burst, const uint16_t nb_pkts, uint64_t tx_offloads) ++{ ++ int i; ++ struct rte_ipv4_hdr *ipv4_hdr; ++ for (i = 0; i < nb_pkts; i++) { ++ if ((pkts_burst[i]->ol_flags & PKT_TX_IPV4) && ++ (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) == 0) { ++ ipv4_hdr = rte_pktmbuf_mtod_offset(pkts_burst[i], ++ struct rte_ipv4_hdr *, ++ pkts_burst[i]->l2_len); ++ ipv4_hdr->hdr_checksum = 0; ++ ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); ++ } ++ } ++} ++#endif ++ + /* + * Receive a burst of packets, and for each packet: + * - parse packet, and try to recognize a supported packet type (1) +@@ -786,7 +807,7 @@ pkt_copy_split(const struct rte_mbuf *pkt) + * + * The testpmd command line for this forward engine sets the flags + * TESTPMD_TX_OFFLOAD_* in ports[tx_port].tx_ol_flags. They control +- * wether a checksum must be calculated in software or in hardware. The ++ * whether a checksum must be calculated in software or in hardware. The + * IP, UDP, TCP and SCTP flags always concern the inner layer. The + * OUTER_IP is only useful for tunnel packets. + */ +@@ -867,10 +888,6 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) + * and inner headers */ + + eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); +- rte_ether_addr_copy(&peer_eth_addrs[fs->peer_addr], +- ð_hdr->d_addr); +- rte_ether_addr_copy(&ports[fs->tx_port].eth_addr, +- ð_hdr->s_addr); + parse_ethernet(eth_hdr, &info); + l3_hdr = (char *)eth_hdr + info.l2_len; + +@@ -892,8 +909,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) + PKT_TX_TUNNEL_VXLAN_GPE; + goto tunnel_update; + } +- parse_vxlan(udp_hdr, &info, +- m->packet_type); ++ parse_vxlan(udp_hdr, &info); + if (info.is_tunnel) { + tx_ol_flags |= + PKT_TX_TUNNEL_VXLAN; +@@ -905,6 +921,12 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) + PKT_TX_TUNNEL_GENEVE; + goto tunnel_update; + } ++ /* Always keep last. */ ++ if (unlikely(RTE_ETH_IS_TUNNEL_PKT( ++ m->packet_type) != 0)) { ++ TESTPMD_LOG(DEBUG, "Unknown tunnel packet. UDP dst port: %hu", ++ udp_hdr->dst_port); ++ } + } else if (info.l4_proto == IPPROTO_GRE) { + struct simple_gre_hdr *gre_hdr; + +@@ -956,8 +978,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) + (tx_offloads & + DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) || + (tx_offloads & +- DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) || +- (tx_ol_flags & PKT_TX_OUTER_IPV6)) { ++ DEV_TX_OFFLOAD_OUTER_UDP_CKSUM)) { + m->outer_l2_len = info.outer_l2_len; + m->outer_l3_len = info.outer_l3_len; + m->l2_len = info.l2_len; +@@ -1069,6 +1090,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) + fs->gro_times = 0; + } + } ++ ++ pkts_ip_csum_recalc(pkts_burst, nb_rx, tx_offloads); + } + + if (gso_ports[fs->tx_port].enable == 0) +@@ -1098,6 +1121,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) + + tx_pkts_burst = gso_segments; + nb_rx = nb_segments; ++ ++ pkts_ip_csum_recalc(tx_pkts_burst, nb_rx, tx_offloads); + } + + nb_prep = rte_eth_tx_prepare(fs->tx_port, fs->tx_queue, +@@ -1136,9 +1161,22 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) + get_end_cycles(fs, start_tsc); + } + ++static void ++stream_init_checksum_forward(struct fwd_stream *fs) ++{ ++ bool rx_stopped, tx_stopped; ++ ++ rx_stopped = ports[fs->rx_port].rxq[fs->rx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ tx_stopped = ports[fs->tx_port].txq[fs->tx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ fs->disabled = rx_stopped || tx_stopped; ++} ++ + struct fwd_engine csum_fwd_engine = { + .fwd_mode_name = "csum", + .port_fwd_begin = NULL, + .port_fwd_end = NULL, ++ .stream_init = stream_init_checksum_forward, + .packet_fwd = pkt_burst_checksum_forward, + }; +diff --git a/dpdk/app/test-pmd/flowgen.c b/dpdk/app/test-pmd/flowgen.c +index cabfc688ff..506ff07086 100644 +--- a/dpdk/app/test-pmd/flowgen.c ++++ b/dpdk/app/test-pmd/flowgen.c +@@ -181,12 +181,12 @@ pkt_burst_flow_gen(struct fwd_stream *fs) + /* + * Retry if necessary + */ +- if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) { ++ if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) { + retry = 0; +- while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { ++ while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) { + rte_delay_us(burst_tx_delay_time); + nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, +- &pkts_burst[nb_tx], nb_rx - nb_tx); ++ &pkts_burst[nb_tx], nb_pkt - nb_tx); + } + } + fs->tx_packets += nb_tx; +@@ -206,9 +206,22 @@ pkt_burst_flow_gen(struct fwd_stream *fs) + get_end_cycles(fs, start_tsc); + } + ++static void ++flowgen_stream_init(struct fwd_stream *fs) ++{ ++ bool rx_stopped, tx_stopped; ++ ++ rx_stopped = ports[fs->rx_port].rxq[fs->rx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ tx_stopped = ports[fs->tx_port].txq[fs->tx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ fs->disabled = rx_stopped || tx_stopped; ++} ++ + struct fwd_engine flow_gen_engine = { + .fwd_mode_name = "flowgen", + .port_fwd_begin = NULL, + .port_fwd_end = NULL, ++ .stream_init = flowgen_stream_init, + .packet_fwd = pkt_burst_flow_gen, + }; +diff --git a/dpdk/app/test-pmd/icmpecho.c b/dpdk/app/test-pmd/icmpecho.c +index af6f7e7902..989609fa5b 100644 +--- a/dpdk/app/test-pmd/icmpecho.c ++++ b/dpdk/app/test-pmd/icmpecho.c +@@ -54,7 +54,7 @@ arp_op_name(uint16_t arp_op) + default: + break; + } +- return "Unkwown ARP op"; ++ return "Unknown ARP op"; + } + + static const char * +@@ -513,9 +513,22 @@ reply_to_icmp_echo_rqsts(struct fwd_stream *fs) + get_end_cycles(fs, start_tsc); + } + ++static void ++icmpecho_stream_init(struct fwd_stream *fs) ++{ ++ bool rx_stopped, tx_stopped; ++ ++ rx_stopped = ports[fs->rx_port].rxq[fs->rx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ tx_stopped = ports[fs->tx_port].txq[fs->tx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ fs->disabled = rx_stopped || tx_stopped; ++} ++ + struct fwd_engine icmp_echo_engine = { + .fwd_mode_name = "icmpecho", + .port_fwd_begin = NULL, + .port_fwd_end = NULL, ++ .stream_init = icmpecho_stream_init, + .packet_fwd = reply_to_icmp_echo_rqsts, + }; +diff --git a/dpdk/app/test-pmd/ieee1588fwd.c b/dpdk/app/test-pmd/ieee1588fwd.c +index e3b98e3e0c..5876aafa5b 100644 +--- a/dpdk/app/test-pmd/ieee1588fwd.c ++++ b/dpdk/app/test-pmd/ieee1588fwd.c +@@ -198,10 +198,11 @@ ieee1588_packet_fwd(struct fwd_stream *fs) + port_ieee1588_tx_timestamp_check(fs->rx_port); + } + +-static void ++static int + port_ieee1588_fwd_begin(portid_t pi) + { + rte_eth_timesync_enable(pi); ++ return 0; + } + + static void +@@ -210,9 +211,22 @@ port_ieee1588_fwd_end(portid_t pi) + rte_eth_timesync_disable(pi); + } + ++static void ++port_ieee1588_stream_init(struct fwd_stream *fs) ++{ ++ bool rx_stopped, tx_stopped; ++ ++ rx_stopped = ports[fs->rx_port].rxq[fs->rx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ tx_stopped = ports[fs->tx_port].txq[fs->tx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ fs->disabled = rx_stopped || tx_stopped; ++} ++ + struct fwd_engine ieee1588_fwd_engine = { + .fwd_mode_name = "ieee1588", + .port_fwd_begin = port_ieee1588_fwd_begin, + .port_fwd_end = port_ieee1588_fwd_end, ++ .stream_init = port_ieee1588_stream_init, + .packet_fwd = ieee1588_packet_fwd, + }; +diff --git a/dpdk/app/test-pmd/iofwd.c b/dpdk/app/test-pmd/iofwd.c +index 83d098adcb..de20d645b5 100644 +--- a/dpdk/app/test-pmd/iofwd.c ++++ b/dpdk/app/test-pmd/iofwd.c +@@ -89,9 +89,22 @@ pkt_burst_io_forward(struct fwd_stream *fs) + get_end_cycles(fs, start_tsc); + } + ++static void ++stream_init_forward(struct fwd_stream *fs) ++{ ++ bool rx_stopped, tx_stopped; ++ ++ rx_stopped = ports[fs->rx_port].rxq[fs->rx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ tx_stopped = ports[fs->tx_port].txq[fs->tx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ fs->disabled = rx_stopped || tx_stopped; ++} ++ + struct fwd_engine io_fwd_engine = { + .fwd_mode_name = "io", + .port_fwd_begin = NULL, + .port_fwd_end = NULL, ++ .stream_init = stream_init_forward, + .packet_fwd = pkt_burst_io_forward, + }; +diff --git a/dpdk/app/test-pmd/macfwd.c b/dpdk/app/test-pmd/macfwd.c +index 0568ea794d..f8f55023b8 100644 +--- a/dpdk/app/test-pmd/macfwd.c ++++ b/dpdk/app/test-pmd/macfwd.c +@@ -120,9 +120,22 @@ pkt_burst_mac_forward(struct fwd_stream *fs) + get_end_cycles(fs, start_tsc); + } + ++static void ++stream_init_mac_forward(struct fwd_stream *fs) ++{ ++ bool rx_stopped, tx_stopped; ++ ++ rx_stopped = ports[fs->rx_port].rxq[fs->rx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ tx_stopped = ports[fs->tx_port].txq[fs->tx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ fs->disabled = rx_stopped || tx_stopped; ++} ++ + struct fwd_engine mac_fwd_engine = { + .fwd_mode_name = "mac", + .port_fwd_begin = NULL, + .port_fwd_end = NULL, ++ .stream_init = stream_init_mac_forward, + .packet_fwd = pkt_burst_mac_forward, + }; +diff --git a/dpdk/app/test-pmd/macswap.c b/dpdk/app/test-pmd/macswap.c +index 310bca06af..ba9a148e1c 100644 +--- a/dpdk/app/test-pmd/macswap.c ++++ b/dpdk/app/test-pmd/macswap.c +@@ -98,9 +98,22 @@ pkt_burst_mac_swap(struct fwd_stream *fs) + get_end_cycles(fs, start_tsc); + } + ++static void ++stream_init_mac_swap(struct fwd_stream *fs) ++{ ++ bool rx_stopped, tx_stopped; ++ ++ rx_stopped = ports[fs->rx_port].rxq[fs->rx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ tx_stopped = ports[fs->tx_port].txq[fs->tx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ fs->disabled = rx_stopped || tx_stopped; ++} ++ + struct fwd_engine mac_swap_engine = { + .fwd_mode_name = "macswap", + .port_fwd_begin = NULL, + .port_fwd_end = NULL, ++ .stream_init = stream_init_mac_swap, + .packet_fwd = pkt_burst_mac_swap, + }; +diff --git a/dpdk/app/test-pmd/meson.build b/dpdk/app/test-pmd/meson.build +index 7e9c7bdd69..4d584f7562 100644 +--- a/dpdk/app/test-pmd/meson.build ++++ b/dpdk/app/test-pmd/meson.build +@@ -25,20 +25,21 @@ sources = files('5tswap.c', + 'util.c') + + deps += ['ethdev', 'gro', 'gso', 'cmdline', 'metrics', 'meter', 'bus_pci'] +-if dpdk_conf.has('RTE_LIB_BITRATESTATS') +- deps += 'bitratestats' +-endif +-if dpdk_conf.has('RTE_LIB_PDUMP') +- deps += 'pdump' ++if dpdk_conf.has('RTE_CRYPTO_SCHEDULER') ++ deps += 'crypto_scheduler' + endif + if dpdk_conf.has('RTE_LIB_BITRATESTATS') + deps += 'bitratestats' + endif ++if dpdk_conf.has('RTE_LIB_BPF') ++ sources += files('bpf_cmd.c') ++ deps += 'bpf' ++endif + if dpdk_conf.has('RTE_LIB_LATENCYSTATS') + deps += 'latencystats' + endif +-if dpdk_conf.has('RTE_CRYPTO_SCHEDULER') +- deps += 'crypto_scheduler' ++if dpdk_conf.has('RTE_LIB_PDUMP') ++ deps += 'pdump' + endif + if dpdk_conf.has('RTE_NET_BOND') + deps += 'net_bond' +@@ -55,7 +56,3 @@ endif + if dpdk_conf.has('RTE_NET_DPAA') + deps += ['bus_dpaa', 'mempool_dpaa', 'net_dpaa'] + endif +-if dpdk_conf.has('RTE_LIB_BPF') +- sources += files('bpf_cmd.c') +- deps += 'bpf' +-endif +diff --git a/dpdk/app/test-pmd/noisy_vnf.c b/dpdk/app/test-pmd/noisy_vnf.c +index 382a4c2aae..a92e810190 100644 +--- a/dpdk/app/test-pmd/noisy_vnf.c ++++ b/dpdk/app/test-pmd/noisy_vnf.c +@@ -231,7 +231,7 @@ noisy_fwd_end(portid_t pi) + rte_free(noisy_cfg[pi]); + } + +-static void ++static int + noisy_fwd_begin(portid_t pi) + { + struct noisy_config *n; +@@ -273,11 +273,26 @@ noisy_fwd_begin(portid_t pi) + rte_exit(EXIT_FAILURE, + "--noisy-lkup-memory-size must be > 0\n"); + } ++ ++ return 0; ++} ++ ++static void ++stream_init_noisy_vnf(struct fwd_stream *fs) ++{ ++ bool rx_stopped, tx_stopped; ++ ++ rx_stopped = ports[fs->rx_port].rxq[fs->rx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ tx_stopped = ports[fs->tx_port].txq[fs->tx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++ fs->disabled = rx_stopped || tx_stopped; + } + + struct fwd_engine noisy_vnf_engine = { + .fwd_mode_name = "noisy", + .port_fwd_begin = noisy_fwd_begin, + .port_fwd_end = noisy_fwd_end, ++ .stream_init = stream_init_noisy_vnf, + .packet_fwd = pkt_burst_noisy_vnf, + }; +diff --git a/dpdk/app/test-pmd/parameters.c b/dpdk/app/test-pmd/parameters.c +index df5eb10d84..f9b5c189cb 100644 +--- a/dpdk/app/test-pmd/parameters.c ++++ b/dpdk/app/test-pmd/parameters.c +@@ -39,9 +39,6 @@ + #include + #include + #include +-#ifdef RTE_NET_BOND +-#include +-#endif + #include + + #include "testpmd.h" +@@ -49,29 +46,7 @@ + static void + usage(char* progname) + { +- printf("usage: %s [EAL options] -- " +-#ifdef RTE_LIB_CMDLINE +- "[--interactive|-i] " +- "[--cmdline-file=FILENAME] " +-#endif +- "[--help|-h] | [--auto-start|-a] | [" +- "--tx-first | --stats-period=PERIOD | " +- "--coremask=COREMASK --portmask=PORTMASK --numa " +- "--portlist=PORTLIST " +- "--mbuf-size= | --total-num-mbufs= | " +- "--nb-cores= | --nb-ports= | " +-#ifdef RTE_LIB_CMDLINE +- "--eth-peers-configfile= | " +- "--eth-peer=X,M:M:M:M:M:M | " +- "--tx-ip=SRC,DST | --tx-udp=PORT | " +-#endif +- "--pkt-filter-mode= |" +- "--rss-ip | --rss-udp | --rss-level-inner | --rss-level-outer |" +- "--rxpt= | --rxht= | --rxwt= |" +- " --rxfreet= | --txpt= | --txht= | --txwt= | --txfreet= | " +- "--txrst= | --tx-offloads= | | --rx-offloads= | " +- "--vxlan-gpe-port= | --geneve-parsed-port= | " +- "--record-core-cycles | --record-burst-stats]\n", ++ printf("\nUsage: %s [EAL options] -- [testpmd options]\n\n", + progname); + #ifdef RTE_LIB_CMDLINE + printf(" --interactive: run in interactive mode.\n"); +@@ -97,6 +72,7 @@ usage(char* progname) + printf(" --portlist=PORTLIST: list of forwarding ports\n"); + printf(" --numa: enable NUMA-aware allocation of RX/TX rings and of " + "RX memory buffers (mbufs).\n"); ++ printf(" --no-numa: disable NUMA-aware allocation.\n"); + printf(" --port-numa-config=(port,socket)[,(port,socket)]: " + "specify the socket on which the memory pool " + "used by the port will be allocated.\n"); +@@ -132,10 +108,11 @@ usage(char* progname) + "If the drop-queue doesn't exist, the packet is dropped. " + "By default drop-queue=127.\n"); + #ifdef RTE_LIB_LATENCYSTATS +- printf(" --latencystats=N: enable latency and jitter statistcs " ++ printf(" --latencystats=N: enable latency and jitter statistics " + "monitoring on forwarding lcore id N.\n"); + #endif + printf(" --disable-crc-strip: disable CRC stripping by hardware.\n"); ++ printf(" --enable-scatter: enable scattered Rx.\n"); + printf(" --enable-lro: enable large receive offload.\n"); + printf(" --enable-rx-cksum: enable rx hardware checksum offload.\n"); + printf(" --enable-rx-timestamp: enable rx hardware timestamp offload.\n"); +@@ -183,6 +160,8 @@ usage(char* progname) + printf(" --txpkts=X[,Y]*: set TX segment sizes" + " or total packet length.\n"); + printf(" --txonly-multi-flow: generate multiple flows in txonly mode\n"); ++ printf(" --tx-ip=src,dst: IP addresses in Tx-only mode\n"); ++ printf(" --tx-udp=src[,dst]: UDP ports in Tx-only mode\n"); + printf(" --disable-link-check: disable check on link status when " + "starting/stopping ports.\n"); + printf(" --disable-device-start: do not automatically start port\n"); +@@ -213,14 +192,14 @@ usage(char* progname) + printf(" --noisy-lkup-memory=N: allocate N MB of VNF memory\n"); + printf(" --noisy-lkup-num-writes=N: do N random writes per packet\n"); + printf(" --noisy-lkup-num-reads=N: do N random reads per packet\n"); +- printf(" --noisy-lkup-num-writes=N: do N random reads and writes per packet\n"); ++ printf(" --noisy-lkup-num-reads-writes=N: do N random reads and writes per packet\n"); + printf(" --no-iova-contig: mempool memory can be IOVA non contiguous. " + "valid only with --mp-alloc=anon\n"); + printf(" --rx-mq-mode=0xX: hexadecimal bitmask of RX mq mode can be " + "enabled\n"); + printf(" --record-core-cycles: enable measurement of CPU cycles.\n"); + printf(" --record-burst-stats: enable display of RX and TX bursts.\n"); +- printf(" --hairpin-mode=0xXX: bitmask set the hairpin port mode.\n " ++ printf(" --hairpin-mode=0xXX: bitmask set the hairpin port mode.\n" + " 0x10 - explicit Tx rule, 0x02 - hairpin ports paired\n" + " 0x01 - hairpin ports loop, 0x00 - hairpin port self\n"); + } +@@ -510,7 +489,6 @@ launch_args_parse(int argc, char** argv) + #endif + { "tx-first", 0, 0, 0 }, + { "stats-period", 1, 0, 0 }, +- { "ports", 1, 0, 0 }, + { "nb-cores", 1, 0, 0 }, + { "nb-ports", 1, 0, 0 }, + { "coremask", 1, 0, 0 }, +@@ -518,7 +496,7 @@ launch_args_parse(int argc, char** argv) + { "portlist", 1, 0, 0 }, + { "numa", 0, 0, 0 }, + { "no-numa", 0, 0, 0 }, +- { "mp-anon", 0, 0, 0 }, ++ { "mp-anon", 0, 0, 0 }, /* deprecated */ + { "port-numa-config", 1, 0, 0 }, + { "ring-numa-config", 1, 0, 0 }, + { "socket-num", 1, 0, 0 }, +@@ -630,7 +608,7 @@ launch_args_parse(int argc, char** argv) + case 0: /*long options */ + if (!strcmp(lgopts[opt_idx].name, "help")) { + usage(argv[0]); +- rte_exit(EXIT_SUCCESS, "Displayed help\n"); ++ exit(EXIT_SUCCESS); + } + #ifdef RTE_LIB_CMDLINE + if (!strcmp(lgopts[opt_idx].name, "interactive")) { +@@ -826,11 +804,12 @@ launch_args_parse(int argc, char** argv) + } + if (!strcmp(lgopts[opt_idx].name, "total-num-mbufs")) { + n = atoi(optarg); +- if (n > 1024) ++ if (n > MIN_TOTAL_NUM_MBUFS) + param_total_num_mbufs = (unsigned)n; + else + rte_exit(EXIT_FAILURE, +- "total-num-mbufs should be > 1024\n"); ++ "total-num-mbufs should be > %d\n", ++ MIN_TOTAL_NUM_MBUFS); + } + if (!strcmp(lgopts[opt_idx].name, "max-pkt-len")) { + n = atoi(optarg); +@@ -1359,7 +1338,7 @@ launch_args_parse(int argc, char** argv) + break; + case 'h': + usage(argv[0]); +- rte_exit(EXIT_SUCCESS, "Displayed help\n"); ++ exit(EXIT_SUCCESS); + break; + default: + usage(argv[0]); +diff --git a/dpdk/app/test-pmd/rxonly.c b/dpdk/app/test-pmd/rxonly.c +index c78fc4609a..83d0dcf670 100644 +--- a/dpdk/app/test-pmd/rxonly.c ++++ b/dpdk/app/test-pmd/rxonly.c +@@ -69,9 +69,17 @@ pkt_burst_receive(struct fwd_stream *fs) + get_end_cycles(fs, start_tsc); + } + ++static void ++stream_init_receive(struct fwd_stream *fs) ++{ ++ fs->disabled = ports[fs->rx_port].rxq[fs->rx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; ++} ++ + struct fwd_engine rx_only_engine = { + .fwd_mode_name = "rxonly", + .port_fwd_begin = NULL, + .port_fwd_end = NULL, ++ .stream_init = stream_init_receive, + .packet_fwd = pkt_burst_receive, + }; +diff --git a/dpdk/app/test-pmd/testpmd.c b/dpdk/app/test-pmd/testpmd.c +index 555852ae5e..c29f1f153c 100644 +--- a/dpdk/app/test-pmd/testpmd.c ++++ b/dpdk/app/test-pmd/testpmd.c +@@ -60,6 +60,9 @@ + #ifdef RTE_LIB_LATENCYSTATS + #include + #endif ++#ifdef RTE_NET_BOND ++#include ++#endif + + #include "testpmd.h" + +@@ -78,7 +81,13 @@ + #endif + + #define EXTMEM_HEAP_NAME "extmem" +-#define EXTBUF_ZONE_SIZE RTE_PGSIZE_2M ++/* ++ * Zone size with the malloc overhead (max of debug and release variants) ++ * must fit into the smallest supported hugepage size (2M), ++ * so that an IOVA-contiguous zone of this size can always be allocated ++ * if there are free 2M hugepages. ++ */ ++#define EXTBUF_ZONE_SIZE (RTE_PGSIZE_2M - 4 * RTE_CACHE_LINE_SIZE) + + uint16_t verbose_level = 0; /**< Silent by default. */ + int testpmd_logtype; /**< Log type for testpmd logs */ +@@ -208,6 +217,7 @@ uint16_t stats_period; /**< Period to show statistics (disabled by default) */ + * option. Set flag to exit stats period loop after received SIGINT/SIGTERM. + */ + uint8_t f_quit; ++uint8_t cl_quit; /* Quit testpmd from cmdline. */ + + /* + * Configuration of packet segments used to scatter received packets +@@ -245,9 +255,6 @@ uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */ + /* current configuration is in DCB or not,0 means it is not in DCB mode */ + uint8_t dcb_config = 0; + +-/* Whether the dcb is in testing status */ +-uint8_t dcb_test = 0; +- + /* + * Configurable number of RX/TX queues. + */ +@@ -433,7 +440,7 @@ uint32_t bypass_timeout = RTE_PMD_IXGBE_BYPASS_TMT_OFF; + uint8_t latencystats_enabled; + + /* +- * Lcore ID to serive latency statistics. ++ * Lcore ID to service latency statistics. + */ + lcoreid_t latencystats_lcore_id = -1; + +@@ -929,12 +936,11 @@ setup_extbuf(uint32_t nb_mbufs, uint16_t mbuf_sz, unsigned int socket_id, + ext_num = 0; + break; + } +- mz = rte_memzone_reserve_aligned(mz_name, EXTBUF_ZONE_SIZE, +- socket_id, +- RTE_MEMZONE_IOVA_CONTIG | +- RTE_MEMZONE_1GB | +- RTE_MEMZONE_SIZE_HINT_ONLY, +- EXTBUF_ZONE_SIZE); ++ mz = rte_memzone_reserve(mz_name, EXTBUF_ZONE_SIZE, ++ socket_id, ++ RTE_MEMZONE_IOVA_CONTIG | ++ RTE_MEMZONE_1GB | ++ RTE_MEMZONE_SIZE_HINT_ONLY); + if (mz == NULL) { + /* + * The caller exits on external buffer creation +@@ -1401,23 +1407,70 @@ check_nb_hairpinq(queueid_t hairpinq) + return 0; + } + ++static void ++init_config_port_offloads(portid_t pid, uint32_t socket_id) ++{ ++ struct rte_port *port = &ports[pid]; ++ uint16_t data_size; ++ int ret; ++ int i; ++ ++ port->dev_conf.txmode = tx_mode; ++ port->dev_conf.rxmode = rx_mode; ++ ++ ret = eth_dev_info_get_print_err(pid, &port->dev_info); ++ if (ret != 0) ++ rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n"); ++ ++ ret = update_jumbo_frame_offload(pid); ++ if (ret != 0) ++ printf("Updating jumbo frame offload failed for port %u\n", ++ pid); ++ ++ if (!(port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)) ++ port->dev_conf.txmode.offloads &= ++ ~DEV_TX_OFFLOAD_MBUF_FAST_FREE; ++ ++ /* Apply Rx offloads configuration */ ++ for (i = 0; i < port->dev_info.max_rx_queues; i++) ++ port->rxq[i].conf.offloads = port->dev_conf.rxmode.offloads; ++ /* Apply Tx offloads configuration */ ++ for (i = 0; i < port->dev_info.max_tx_queues; i++) ++ port->txq[i].conf.offloads = port->dev_conf.txmode.offloads; ++ ++ /* set flag to initialize port/queue */ ++ port->need_reconfig = 1; ++ port->need_reconfig_queues = 1; ++ port->socket_id = socket_id; ++ port->tx_metadata = 0; ++ ++ /* ++ * Check for maximum number of segments per MTU. ++ * Accordingly update the mbuf data size. ++ */ ++ if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX && ++ port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) { ++ data_size = rx_mode.max_rx_pkt_len / ++ port->dev_info.rx_desc_lim.nb_mtu_seg_max; ++ ++ if ((data_size + RTE_PKTMBUF_HEADROOM) > mbuf_data_size[0]) { ++ mbuf_data_size[0] = data_size + RTE_PKTMBUF_HEADROOM; ++ TESTPMD_LOG(WARNING, ++ "Configured mbuf size of the first segment %hu\n", ++ mbuf_data_size[0]); ++ } ++ } ++} ++ + static void + init_config(void) + { + portid_t pid; +- struct rte_port *port; + struct rte_mempool *mbp; + unsigned int nb_mbuf_per_pool; + lcoreid_t lc_id; +- uint8_t port_per_socket[RTE_MAX_NUMA_NODES]; + struct rte_gro_param gro_param; + uint32_t gso_types; +- uint16_t data_size; +- bool warning = 0; +- int k; +- int ret; +- +- memset(port_per_socket,0,RTE_MAX_NUMA_NODES); + + /* Configuration of logical cores. */ + fwd_lcores = rte_zmalloc("testpmd: fwd_lcores", +@@ -1439,30 +1492,12 @@ init_config(void) + } + + RTE_ETH_FOREACH_DEV(pid) { +- port = &ports[pid]; +- /* Apply default TxRx configuration for all ports */ +- port->dev_conf.txmode = tx_mode; +- port->dev_conf.rxmode = rx_mode; +- +- ret = eth_dev_info_get_print_err(pid, &port->dev_info); +- if (ret != 0) +- rte_exit(EXIT_FAILURE, +- "rte_eth_dev_info_get() failed\n"); ++ uint32_t socket_id; + +- ret = update_jumbo_frame_offload(pid); +- if (ret != 0) +- printf("Updating jumbo frame offload failed for port %u\n", +- pid); +- +- if (!(port->dev_info.tx_offload_capa & +- DEV_TX_OFFLOAD_MBUF_FAST_FREE)) +- port->dev_conf.txmode.offloads &= +- ~DEV_TX_OFFLOAD_MBUF_FAST_FREE; + if (numa_support) { +- if (port_numa[pid] != NUMA_NO_CONFIG) +- port_per_socket[port_numa[pid]]++; +- else { +- uint32_t socket_id = rte_eth_dev_socket_id(pid); ++ socket_id = port_numa[pid]; ++ if (port_numa[pid] == NUMA_NO_CONFIG) { ++ socket_id = rte_eth_dev_socket_id(pid); + + /* + * if socket_id is invalid, +@@ -1470,45 +1505,14 @@ init_config(void) + */ + if (check_socket_id(socket_id) < 0) + socket_id = socket_ids[0]; +- port_per_socket[socket_id]++; +- } +- } +- +- /* Apply Rx offloads configuration */ +- for (k = 0; k < port->dev_info.max_rx_queues; k++) +- port->rx_conf[k].offloads = +- port->dev_conf.rxmode.offloads; +- /* Apply Tx offloads configuration */ +- for (k = 0; k < port->dev_info.max_tx_queues; k++) +- port->tx_conf[k].offloads = +- port->dev_conf.txmode.offloads; +- +- /* set flag to initialize port/queue */ +- port->need_reconfig = 1; +- port->need_reconfig_queues = 1; +- port->tx_metadata = 0; +- +- /* Check for maximum number of segments per MTU. Accordingly +- * update the mbuf data size. +- */ +- if (port->dev_info.rx_desc_lim.nb_mtu_seg_max != UINT16_MAX && +- port->dev_info.rx_desc_lim.nb_mtu_seg_max != 0) { +- data_size = rx_mode.max_rx_pkt_len / +- port->dev_info.rx_desc_lim.nb_mtu_seg_max; +- +- if ((data_size + RTE_PKTMBUF_HEADROOM) > +- mbuf_data_size[0]) { +- mbuf_data_size[0] = data_size + +- RTE_PKTMBUF_HEADROOM; +- warning = 1; + } ++ } else { ++ socket_id = (socket_num == UMA_NO_CONFIG) ? ++ 0 : socket_num; + } ++ /* Apply default TxRx configuration for all ports */ ++ init_config_port_offloads(pid, socket_id); + } +- +- if (warning) +- TESTPMD_LOG(WARNING, +- "Configured mbuf size of the first segment %hu\n", +- mbuf_data_size[0]); + /* + * Create pools of mbuf. + * If NUMA support is disabled, create a single pool of mbuf in +@@ -1595,25 +1599,11 @@ init_config(void) + void + reconfig(portid_t new_port_id, unsigned socket_id) + { +- struct rte_port *port; +- int ret; +- + /* Reconfiguration of Ethernet ports. */ +- port = &ports[new_port_id]; +- +- ret = eth_dev_info_get_print_err(new_port_id, &port->dev_info); +- if (ret != 0) +- return; +- +- /* set flag to initialize port/queue */ +- port->need_reconfig = 1; +- port->need_reconfig_queues = 1; +- port->socket_id = socket_id; +- ++ init_config_port_offloads(new_port_id, socket_id); + init_port_config(); + } + +- + int + init_fwd_streams(void) + { +@@ -1727,7 +1717,7 @@ pkt_burst_stats_display(const char *rx_tx, struct pkt_burst_stats *pbs) + pktnb_stats[0] = 0; + + /* Find the next 2 burst sizes with highest occurrences. */ +- for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST; nb_pkt++) { ++ for (nb_pkt = 1; nb_pkt < MAX_PKT_BURST + 1; nb_pkt++) { + nb_burst = pbs->pkt_burst_spread[nb_pkt]; + + if (nb_burst == 0) +@@ -1828,6 +1818,7 @@ fwd_stats_display(void) + struct rte_port *port; + streamid_t sm_id; + portid_t pt_id; ++ int ret; + int i; + + memset(ports_stats, 0, sizeof(ports_stats)); +@@ -1857,7 +1848,13 @@ fwd_stats_display(void) + pt_id = fwd_ports_ids[i]; + port = &ports[pt_id]; + +- rte_eth_stats_get(pt_id, &stats); ++ ret = rte_eth_stats_get(pt_id, &stats); ++ if (ret != 0) { ++ fprintf(stderr, ++ "%s: Error: failed to get stats (port %u): %d", ++ __func__, pt_id, ret); ++ continue; ++ } + stats.ipackets -= port->stats.ipackets; + stats.opackets -= port->stats.opackets; + stats.ibytes -= port->stats.ibytes; +@@ -1949,11 +1946,16 @@ fwd_stats_reset(void) + { + streamid_t sm_id; + portid_t pt_id; ++ int ret; + int i; + + for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) { + pt_id = fwd_ports_ids[i]; +- rte_eth_stats_get(pt_id, &ports[pt_id].stats); ++ ret = rte_eth_stats_get(pt_id, &ports[pt_id].stats); ++ if (ret != 0) ++ fprintf(stderr, ++ "%s: Error: failed to clear stats (port %u):%d", ++ __func__, pt_id, ret); + } + for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) { + struct fwd_stream *fs = fwd_streams[sm_id]; +@@ -1991,6 +1993,12 @@ flush_fwd_rx_queues(void) + for (rxp = 0; rxp < cur_fwd_config.nb_fwd_ports; rxp++) { + for (rxq = 0; rxq < nb_rxq; rxq++) { + port_id = fwd_ports_ids[rxp]; ++ ++ /* Polling stopped queues is prohibited. */ ++ if (ports[port_id].rxq[rxq].state == ++ RTE_ETH_QUEUE_STATE_STOPPED) ++ continue; ++ + /** + * testpmd can stuck in the below do while loop + * if rte_eth_rx_burst() always returns nonzero +@@ -2036,7 +2044,8 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd) + nb_fs = fc->stream_nb; + do { + for (sm_id = 0; sm_id < nb_fs; sm_id++) +- (*pkt_fwd)(fsm[sm_id]); ++ if (!fsm[sm_id]->disabled) ++ (*pkt_fwd)(fsm[sm_id]); + #ifdef RTE_LIB_BITRATESTATS + if (bitrate_enabled != 0 && + bitrate_lcore_id == rte_lcore_id()) { +@@ -2092,16 +2101,10 @@ run_one_txonly_burst_on_core(void *fwd_arg) + static void + launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore) + { +- port_fwd_begin_t port_fwd_begin; + unsigned int i; + unsigned int lc_id; + int diag; + +- port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin; +- if (port_fwd_begin != NULL) { +- for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) +- (*port_fwd_begin)(fwd_ports_ids[i]); +- } + for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) { + lc_id = fwd_lcores_cpuids[i]; + if ((interactive == 0) || (lc_id != rte_lcore_id())) { +@@ -2123,9 +2126,8 @@ start_packet_forwarding(int with_tx_first) + { + port_fwd_begin_t port_fwd_begin; + port_fwd_end_t port_fwd_end; +- struct rte_port *port; ++ stream_init_t stream_init = cur_fwd_eng->stream_init; + unsigned int i; +- portid_t pt_id; + + if (strcmp(cur_fwd_eng->fwd_mode_name, "rxonly") == 0 && !nb_rxq) + rte_exit(EXIT_FAILURE, "rxq are 0, cannot use rxonly fwd mode\n"); +@@ -2149,26 +2151,37 @@ start_packet_forwarding(int with_tx_first) + return; + } + ++ fwd_config_setup(); + +- if(dcb_test) { +- for (i = 0; i < nb_fwd_ports; i++) { +- pt_id = fwd_ports_ids[i]; +- port = &ports[pt_id]; +- if (!port->dcb_flag) { +- printf("In DCB mode, all forwarding ports must " +- "be configured in this mode.\n"); ++ if (stream_init != NULL) ++ for (i = 0; i < cur_fwd_config.nb_fwd_streams; i++) ++ stream_init(fwd_streams[i]); ++ ++ port_fwd_begin = cur_fwd_config.fwd_eng->port_fwd_begin; ++ if (port_fwd_begin != NULL) { ++ for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) { ++ if (port_fwd_begin(fwd_ports_ids[i])) { ++ fprintf(stderr, ++ "Packet forwarding is not ready\n"); + return; + } + } +- if (nb_fwd_lcores == 1) { +- printf("In DCB mode,the nb forwarding cores " +- "should be larger than 1.\n"); +- return; ++ } ++ ++ if (with_tx_first) { ++ port_fwd_begin = tx_only_engine.port_fwd_begin; ++ if (port_fwd_begin != NULL) { ++ for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) { ++ if (port_fwd_begin(fwd_ports_ids[i])) { ++ fprintf(stderr, ++ "Packet forwarding is not ready\n"); ++ return; ++ } ++ } + } + } +- test_done = 0; + +- fwd_config_setup(); ++ test_done = 0; + + if(!no_flush_rx) + flush_fwd_rx_queues(); +@@ -2178,11 +2191,6 @@ start_packet_forwarding(int with_tx_first) + + fwd_stats_reset(); + if (with_tx_first) { +- port_fwd_begin = tx_only_engine.port_fwd_begin; +- if (port_fwd_begin != NULL) { +- for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) +- (*port_fwd_begin)(fwd_ports_ids[i]); +- } + while (with_tx_first--) { + launch_packet_forwarding( + run_one_txonly_burst_on_core); +@@ -2402,8 +2410,10 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, + { + union rte_eth_rxseg rx_useg[MAX_SEGS_BUFFER_SPLIT] = {}; + unsigned int i, mp_n; +- int ret; ++ int ret = -1; + ++ if (mbuf_data_size_n < 1) ++ goto exit; + if (rx_pkt_nb_segs <= 1 || + (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) == 0) { + rx_conf->rx_seg = NULL; +@@ -2411,7 +2421,7 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, + ret = rte_eth_rx_queue_setup(port_id, rx_queue_id, + nb_rx_desc, socket_id, + rx_conf, mp); +- return ret; ++ goto exit; + } + for (i = 0; i < rx_pkt_nb_segs; i++) { + struct rte_eth_rxseg_split *rx_seg = &rx_useg[i].split; +@@ -2420,7 +2430,7 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, + * Use last valid pool for the segments with number + * exceeding the pool index. + */ +- mp_n = (i > mbuf_data_size_n) ? mbuf_data_size_n - 1 : i; ++ mp_n = (i >= mbuf_data_size_n) ? mbuf_data_size_n - 1 : i; + mpx = mbuf_pool_find(socket_id, mp_n); + /* Handle zero as mbuf data buffer size. */ + rx_seg->length = rx_pkt_seg_lengths[i] ? +@@ -2436,9 +2446,45 @@ rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, + socket_id, rx_conf, NULL); + rx_conf->rx_seg = NULL; + rx_conf->rx_nseg = 0; ++exit: ++ ports[port_id].rxq[rx_queue_id].state = rx_conf->rx_deferred_start ? ++ RTE_ETH_QUEUE_STATE_STOPPED : ++ RTE_ETH_QUEUE_STATE_STARTED; + return ret; + } + ++static int ++change_bonding_slave_port_status(portid_t bond_pid, bool is_stop) ++{ ++#ifdef RTE_NET_BOND ++ ++ portid_t slave_pids[RTE_MAX_ETHPORTS]; ++ struct rte_port *port; ++ int num_slaves; ++ portid_t slave_pid; ++ int i; ++ ++ num_slaves = rte_eth_bond_slaves_get(bond_pid, slave_pids, ++ RTE_MAX_ETHPORTS); ++ if (num_slaves < 0) { ++ fprintf(stderr, "Failed to get slave list for port = %u\n", ++ bond_pid); ++ return num_slaves; ++ } ++ ++ for (i = 0; i < num_slaves; i++) { ++ slave_pid = slave_pids[i]; ++ port = &ports[slave_pid]; ++ port->port_status = ++ is_stop ? RTE_PORT_STOPPED : RTE_PORT_STARTED; ++ } ++#else ++ RTE_SET_USED(bond_pid); ++ RTE_SET_USED(is_stop); ++#endif ++ return 0; ++} ++ + int + start_port(portid_t pid) + { +@@ -2452,18 +2498,22 @@ start_port(portid_t pid) + int peer_pi; + queueid_t qi; + struct rte_port *port; +- struct rte_ether_addr mac_addr; + struct rte_eth_hairpin_cap cap; + + if (port_id_is_invalid(pid, ENABLED_WARN)) + return 0; + +- if(dcb_config) +- dcb_test = 1; + RTE_ETH_FOREACH_DEV(pi) { + if (pid != pi && pid != (portid_t)RTE_PORT_ALL) + continue; + ++ if (port_is_bonding_slave(pi)) { ++ fprintf(stderr, ++ "Please remove port %d from bonded device.\n", ++ pi); ++ continue; ++ } ++ + need_check_link_status = 0; + port = &ports[pi]; + if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_STOPPED, +@@ -2511,20 +2561,28 @@ start_port(portid_t pid) + port->need_reconfig_queues = 0; + /* setup tx queues */ + for (qi = 0; qi < nb_txq; qi++) { ++ struct rte_eth_txconf *conf = ++ &port->txq[qi].conf; ++ + if ((numa_support) && + (txring_numa[pi] != NUMA_NO_CONFIG)) + diag = rte_eth_tx_queue_setup(pi, qi, + port->nb_tx_desc[qi], + txring_numa[pi], +- &(port->tx_conf[qi])); ++ &(port->txq[qi].conf)); + else + diag = rte_eth_tx_queue_setup(pi, qi, + port->nb_tx_desc[qi], + port->socket_id, +- &(port->tx_conf[qi])); ++ &(port->txq[qi].conf)); + +- if (diag == 0) ++ if (diag == 0) { ++ port->txq[qi].state = ++ conf->tx_deferred_start ? ++ RTE_ETH_QUEUE_STATE_STOPPED : ++ RTE_ETH_QUEUE_STATE_STARTED; + continue; ++ } + + /* Fail to setup tx queue, return */ + if (rte_atomic16_cmpset(&(port->port_status), +@@ -2556,7 +2614,7 @@ start_port(portid_t pid) + diag = rx_queue_setup(pi, qi, + port->nb_rx_desc[qi], + rxring_numa[pi], +- &(port->rx_conf[qi]), ++ &(port->rxq[qi].conf), + mp); + } else { + struct rte_mempool *mp = +@@ -2572,7 +2630,7 @@ start_port(portid_t pid) + diag = rx_queue_setup(pi, qi, + port->nb_rx_desc[qi], + port->socket_id, +- &(port->rx_conf[qi]), ++ &(port->rxq[qi].conf), + mp); + } + if (diag == 0) +@@ -2618,16 +2676,28 @@ start_port(portid_t pid) + "stopped\n", pi); + continue; + } ++ /* ++ * Starting a bonded port also starts all slaves under the ++ * bonded device. So if this port is bond device, we need ++ * to modify the port status of these slaves. ++ */ ++ if (port->bond_flag == 1) { ++ if (change_bonding_slave_port_status(pi, false) != 0) ++ continue; ++ } + + if (rte_atomic16_cmpset(&(port->port_status), + RTE_PORT_HANDLING, RTE_PORT_STARTED) == 0) + printf("Port %d can not be set into started\n", pi); + +- if (eth_macaddr_get_print_err(pi, &mac_addr) == 0) ++ if (eth_macaddr_get_print_err(pi, &port->eth_addr) == 0) + printf("Port %d: %02X:%02X:%02X:%02X:%02X:%02X\n", pi, +- mac_addr.addr_bytes[0], mac_addr.addr_bytes[1], +- mac_addr.addr_bytes[2], mac_addr.addr_bytes[3], +- mac_addr.addr_bytes[4], mac_addr.addr_bytes[5]); ++ port->eth_addr.addr_bytes[0], ++ port->eth_addr.addr_bytes[1], ++ port->eth_addr.addr_bytes[2], ++ port->eth_addr.addr_bytes[3], ++ port->eth_addr.addr_bytes[4], ++ port->eth_addr.addr_bytes[5]); + + /* at least one port started, need checking link status */ + need_check_link_status = 1; +@@ -2697,11 +2767,6 @@ stop_port(portid_t pid) + portid_t peer_pl[RTE_MAX_ETHPORTS]; + int peer_pi; + +- if (dcb_test) { +- dcb_test = 0; +- dcb_config = 0; +- } +- + if (port_id_is_invalid(pid, ENABLED_WARN)) + return; + +@@ -2748,6 +2813,17 @@ stop_port(portid_t pid) + if (rte_eth_dev_stop(pi) != 0) + RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n", + pi); ++ /* ++ * Stopping a bonded port also stops all slaves under the bonded ++ * device. So if this port is bond device, we need to modify the ++ * port status of these slaves. ++ */ ++ if (port->bond_flag == 1) { ++ if (change_bonding_slave_port_status(pi, true) != 0) { ++ RTE_LOG(ERR, EAL, "Fail to change bonding slave port status %u\n", ++ pi); ++ } ++ } + + if (rte_atomic16_cmpset(&(port->port_status), + RTE_PORT_HANDLING, RTE_PORT_STOPPED) == 0) +@@ -2782,11 +2858,39 @@ remove_invalid_ports(void) + nb_cfg_ports = nb_fwd_ports; + } + ++static void ++clear_bonding_slave_device(portid_t *slave_pids, uint16_t num_slaves) ++{ ++ struct rte_port *port; ++ portid_t slave_pid; ++ uint16_t i; ++ ++ for (i = 0; i < num_slaves; i++) { ++ slave_pid = slave_pids[i]; ++ if (port_is_started(slave_pid) == 1) { ++ if (rte_eth_dev_stop(slave_pid) != 0) ++ fprintf(stderr, "rte_eth_dev_stop failed for port %u\n", ++ slave_pid); ++ ++ port = &ports[slave_pid]; ++ port->port_status = RTE_PORT_STOPPED; ++ } ++ ++ clear_port_slave_flag(slave_pid); ++ ++ /* Close slave device when testpmd quit or is killed. */ ++ if (cl_quit == 1 || f_quit == 1) ++ rte_eth_dev_close(slave_pid); ++ } ++} ++ + void + close_port(portid_t pid) + { + portid_t pi; + struct rte_port *port; ++ portid_t slave_pids[RTE_MAX_ETHPORTS]; ++ int num_slaves = 0; + + if (port_id_is_invalid(pid, ENABLED_WARN)) + return; +@@ -2814,8 +2918,21 @@ close_port(portid_t pid) + continue; + } + ++ mcast_addr_pool_destroy(pi); + port_flow_flush(pi); ++#ifdef RTE_NET_BOND ++ if (port->bond_flag == 1) ++ num_slaves = rte_eth_bond_slaves_get(pi, ++ slave_pids, RTE_MAX_ETHPORTS); ++#endif + rte_eth_dev_close(pi); ++ /* ++ * If this port is bonded device, all slaves under the ++ * device need to be removed or closed. ++ */ ++ if (port->bond_flag == 1 && num_slaves > 0) ++ clear_bonding_slave_device(slave_pids, ++ num_slaves); + } + + remove_invalid_ports(); +@@ -3298,51 +3415,51 @@ rxtx_port_config(struct rte_port *port) + uint64_t offloads; + + for (qid = 0; qid < nb_rxq; qid++) { +- offloads = port->rx_conf[qid].offloads; +- port->rx_conf[qid] = port->dev_info.default_rxconf; ++ offloads = port->rxq[qid].conf.offloads; ++ port->rxq[qid].conf = port->dev_info.default_rxconf; + if (offloads != 0) +- port->rx_conf[qid].offloads = offloads; ++ port->rxq[qid].conf.offloads = offloads; + + /* Check if any Rx parameters have been passed */ + if (rx_pthresh != RTE_PMD_PARAM_UNSET) +- port->rx_conf[qid].rx_thresh.pthresh = rx_pthresh; ++ port->rxq[qid].conf.rx_thresh.pthresh = rx_pthresh; + + if (rx_hthresh != RTE_PMD_PARAM_UNSET) +- port->rx_conf[qid].rx_thresh.hthresh = rx_hthresh; ++ port->rxq[qid].conf.rx_thresh.hthresh = rx_hthresh; + + if (rx_wthresh != RTE_PMD_PARAM_UNSET) +- port->rx_conf[qid].rx_thresh.wthresh = rx_wthresh; ++ port->rxq[qid].conf.rx_thresh.wthresh = rx_wthresh; + + if (rx_free_thresh != RTE_PMD_PARAM_UNSET) +- port->rx_conf[qid].rx_free_thresh = rx_free_thresh; ++ port->rxq[qid].conf.rx_free_thresh = rx_free_thresh; + + if (rx_drop_en != RTE_PMD_PARAM_UNSET) +- port->rx_conf[qid].rx_drop_en = rx_drop_en; ++ port->rxq[qid].conf.rx_drop_en = rx_drop_en; + + port->nb_rx_desc[qid] = nb_rxd; + } + + for (qid = 0; qid < nb_txq; qid++) { +- offloads = port->tx_conf[qid].offloads; +- port->tx_conf[qid] = port->dev_info.default_txconf; ++ offloads = port->txq[qid].conf.offloads; ++ port->txq[qid].conf = port->dev_info.default_txconf; + if (offloads != 0) +- port->tx_conf[qid].offloads = offloads; ++ port->txq[qid].conf.offloads = offloads; + + /* Check if any Tx parameters have been passed */ + if (tx_pthresh != RTE_PMD_PARAM_UNSET) +- port->tx_conf[qid].tx_thresh.pthresh = tx_pthresh; ++ port->txq[qid].conf.tx_thresh.pthresh = tx_pthresh; + + if (tx_hthresh != RTE_PMD_PARAM_UNSET) +- port->tx_conf[qid].tx_thresh.hthresh = tx_hthresh; ++ port->txq[qid].conf.tx_thresh.hthresh = tx_hthresh; + + if (tx_wthresh != RTE_PMD_PARAM_UNSET) +- port->tx_conf[qid].tx_thresh.wthresh = tx_wthresh; ++ port->txq[qid].conf.tx_thresh.wthresh = tx_wthresh; + + if (tx_rs_thresh != RTE_PMD_PARAM_UNSET) +- port->tx_conf[qid].tx_rs_thresh = tx_rs_thresh; ++ port->txq[qid].conf.tx_rs_thresh = tx_rs_thresh; + + if (tx_free_thresh != RTE_PMD_PARAM_UNSET) +- port->tx_conf[qid].tx_free_thresh = tx_free_thresh; ++ port->txq[qid].conf.tx_free_thresh = tx_free_thresh; + + port->nb_tx_desc[qid] = nb_txd; + } +@@ -3401,9 +3518,9 @@ update_jumbo_frame_offload(portid_t portid) + /* Apply JUMBO_FRAME offload configuration to Rx queue(s) */ + for (qid = 0; qid < port->dev_info.nb_rx_queues; qid++) { + if (on) +- port->rx_conf[qid].offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; ++ port->rxq[qid].conf.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; + else +- port->rx_conf[qid].offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME; ++ port->rxq[qid].conf.offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME; + } + } + +@@ -3603,18 +3720,21 @@ init_port_dcb_config(portid_t pid, + + rte_port = &ports[pid]; + +- memset(&port_conf, 0, sizeof(struct rte_eth_conf)); +- /* Enter DCB configuration status */ +- dcb_config = 1; +- +- port_conf.rxmode = rte_port->dev_conf.rxmode; +- port_conf.txmode = rte_port->dev_conf.txmode; ++ /* retain the original device configuration. */ ++ memcpy(&port_conf, &rte_port->dev_conf, sizeof(struct rte_eth_conf)); + + /*set configuration of DCB in vt mode and DCB in non-vt mode*/ + retval = get_eth_dcb_conf(pid, &port_conf, dcb_mode, num_tcs, pfc_en); + if (retval < 0) + return retval; + port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER; ++ /* remove RSS HASH offload for DCB in vt mode */ ++ if (port_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_DCB) { ++ port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_RSS_HASH; ++ for (i = 0; i < nb_rxq; i++) ++ rte_port->rxq[i].conf.offloads &= ++ ~DEV_RX_OFFLOAD_RSS_HASH; ++ } + + /* re-configure the device . */ + retval = rte_eth_dev_configure(pid, nb_rxq, nb_rxq, &port_conf); +@@ -3673,6 +3793,9 @@ init_port_dcb_config(portid_t pid, + + rte_port->dcb_flag = 1; + ++ /* Enter DCB configuration status */ ++ dcb_config = 1; ++ + return 0; + } + +diff --git a/dpdk/app/test-pmd/testpmd.h b/dpdk/app/test-pmd/testpmd.h +index 2f8f5a92e4..d21d8501d5 100644 +--- a/dpdk/app/test-pmd/testpmd.h ++++ b/dpdk/app/test-pmd/testpmd.h +@@ -24,6 +24,8 @@ + #define RTE_PORT_CLOSED (uint16_t)2 + #define RTE_PORT_HANDLING (uint16_t)3 + ++extern uint8_t cl_quit; ++ + /* + * It is used to allocate the memory for hash key. + * The hash key size is NIC dependent. +@@ -64,6 +66,8 @@ + #define NUMA_NO_CONFIG 0xFF + #define UMA_NO_CONFIG 0xFF + ++#define MIN_TOTAL_NUM_MBUFS 1024 ++ + typedef uint8_t lcoreid_t; + typedef uint16_t portid_t; + typedef uint16_t queueid_t; +@@ -92,7 +96,7 @@ enum { + * that are recorded for each forwarding stream. + */ + struct pkt_burst_stats { +- unsigned int pkt_burst_spread[MAX_PKT_BURST]; ++ unsigned int pkt_burst_spread[MAX_PKT_BURST + 1]; + }; + + /** Information for a given RSS type. */ +@@ -126,6 +130,7 @@ struct fwd_stream { + portid_t tx_port; /**< forwarding port of received packets */ + queueid_t tx_queue; /**< TX queue to send forwarded packets */ + streamid_t peer_addr; /**< index of peer ethernet address of packets */ ++ bool disabled; /**< the stream is disabled and should not run */ + + unsigned int retry_enabled; + +@@ -137,6 +142,7 @@ struct fwd_stream { + uint64_t rx_bad_l4_csum ; /**< received packets has bad l4 checksum */ + uint64_t rx_bad_outer_l4_csum; + /**< received packets has bad outer l4 checksum */ ++ uint64_t ts_skew; /**< TX scheduling timestamp */ + unsigned int gro_times; /**< GRO operation times */ + uint64_t core_cycles; /**< used for RX and TX processing */ + struct pkt_burst_stats rx_burst_stats; +@@ -192,6 +198,18 @@ struct tunnel_ops { + uint32_t items:1; + }; + ++/** RX queue configuration and state. */ ++struct port_rxqueue { ++ struct rte_eth_rxconf conf; ++ uint8_t state; /**< RTE_ETH_QUEUE_STATE_* value. */ ++}; ++ ++/** TX queue configuration and state. */ ++struct port_txqueue { ++ struct rte_eth_txconf conf; ++ uint8_t state; /**< RTE_ETH_QUEUE_STATE_* value. */ ++}; ++ + /** + * The data structure associated with each port. + */ +@@ -214,11 +232,12 @@ struct rte_port { + uint8_t dcb_flag; /**< enable dcb */ + uint16_t nb_rx_desc[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue rx desc number */ + uint16_t nb_tx_desc[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue tx desc number */ +- struct rte_eth_rxconf rx_conf[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue rx configuration */ +- struct rte_eth_txconf tx_conf[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue tx configuration */ ++ struct port_rxqueue rxq[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue Rx config and state */ ++ struct port_txqueue txq[RTE_MAX_QUEUES_PER_PORT+1]; /**< per queue Tx config and state */ + struct rte_ether_addr *mc_addr_pool; /**< pool of multicast addrs */ + uint32_t mc_addr_nb; /**< nb. of addr. in mc_addr_pool */ +- uint8_t slave_flag; /**< bonding slave port */ ++ uint8_t slave_flag : 1, /**< bonding slave port */ ++ bond_flag : 1; /**< port is bond device */ + struct port_flow *flow_list; /**< Associated flows. */ + struct port_shared_action *actions_list; + /**< Associated shared actions. */ +@@ -266,14 +285,16 @@ struct fwd_lcore { + * Forwards packets unchanged on the same port. + * Check that sent IEEE1588 PTP packets are timestamped by the hardware. + */ +-typedef void (*port_fwd_begin_t)(portid_t pi); ++typedef int (*port_fwd_begin_t)(portid_t pi); + typedef void (*port_fwd_end_t)(portid_t pi); ++typedef void (*stream_init_t)(struct fwd_stream *fs); + typedef void (*packet_fwd_t)(struct fwd_stream *fs); + + struct fwd_engine { + const char *fwd_mode_name; /**< Forwarding mode name. */ + port_fwd_begin_t port_fwd_begin; /**< NULL if nothing special to do. */ + port_fwd_end_t port_fwd_end; /**< NULL if nothing special to do. */ ++ stream_init_t stream_init; /**< NULL if nothing special to do. */ + packet_fwd_t packet_fwd; /**< Mandatory. */ + }; + +@@ -423,7 +444,6 @@ extern uint64_t noisy_lkup_num_reads; + extern uint64_t noisy_lkup_num_reads_writes; + + extern uint8_t dcb_config; +-extern uint8_t dcb_test; + + extern uint32_t mbuf_data_size_n; + extern uint16_t mbuf_data_size[MAX_SEGS_BUFFER_SPLIT]; +@@ -821,6 +841,7 @@ int port_flow_create(portid_t port_id, + int port_shared_action_query(portid_t port_id, uint32_t id); + void update_age_action_context(const struct rte_flow_action *actions, + struct port_flow *pf); ++int mcast_addr_pool_destroy(portid_t port_id); + int port_flow_destroy(portid_t port_id, uint32_t n, const uint32_t *rule); + int port_flow_flush(portid_t port_id); + int port_flow_dump(portid_t port_id, const char *file_name); +@@ -879,7 +900,7 @@ void show_tx_pkt_segments(void); + void set_tx_pkt_times(unsigned int *tx_times); + void show_tx_pkt_times(void); + void set_tx_pkt_split(const char *name); +-int parse_fec_mode(const char *name, enum rte_eth_fec_mode *mode); ++int parse_fec_mode(const char *name, uint32_t *fec_capa); + void show_fec_capability(uint32_t num, struct rte_eth_fec_capa *speed_fec_capa); + void set_nb_pkt_per_burst(uint16_t pkt_burst); + char *list_pkt_forwarding_modes(void); +@@ -934,7 +955,7 @@ int set_vf_rate_limit(portid_t port_id, uint16_t vf, uint16_t rate, + + void port_rss_hash_conf_show(portid_t port_id, int show_rss_key); + void port_rss_hash_key_update(portid_t port_id, char rss_type[], +- uint8_t *hash_key, uint hash_key_len); ++ uint8_t *hash_key, uint8_t hash_key_len); + int rx_queue_id_is_invalid(queueid_t rxq_id); + int tx_queue_id_is_invalid(queueid_t txq_id); + void setup_gro(const char *onoff, portid_t port_id); +@@ -1006,6 +1027,8 @@ uint16_t tx_pkt_set_dynf(uint16_t port_id, __rte_unused uint16_t queue, + void add_tx_dynf_callback(portid_t portid); + void remove_tx_dynf_callback(portid_t portid); + int update_jumbo_frame_offload(portid_t portid); ++const char *rsstypes_to_str(uint64_t rss_type); ++ + + /* + * Work-around of a compilation error with ICC on invocations of the +diff --git a/dpdk/app/test-pmd/txonly.c b/dpdk/app/test-pmd/txonly.c +index d55ee7ca00..a7cd3bff0d 100644 +--- a/dpdk/app/test-pmd/txonly.c ++++ b/dpdk/app/test-pmd/txonly.c +@@ -40,6 +40,13 @@ + + #include "testpmd.h" + ++struct tx_timestamp { ++ rte_be32_t signature; ++ rte_be16_t pkt_idx; ++ rte_be16_t queue_idx; ++ rte_be64_t ts; ++}; ++ + /* use RFC863 Discard Protocol */ + uint16_t tx_udp_src_port = 9; + uint16_t tx_udp_dst_port = 9; +@@ -53,14 +60,10 @@ uint32_t tx_ip_dst_addr = (198U << 24) | (18 << 16) | (0 << 8) | 2; + static struct rte_ipv4_hdr pkt_ip_hdr; /**< IP header of transmitted packets. */ + RTE_DEFINE_PER_LCORE(uint8_t, _ip_var); /**< IP address variation */ + static struct rte_udp_hdr pkt_udp_hdr; /**< UDP header of tx packets. */ +-RTE_DEFINE_PER_LCORE(uint64_t, timestamp_qskew); +- /**< Timestamp offset per queue */ +-RTE_DEFINE_PER_LCORE(uint32_t, timestamp_idone); /**< Timestamp init done. */ + + static uint64_t timestamp_mask; /**< Timestamp dynamic flag mask */ + static int32_t timestamp_off; /**< Timestamp dynamic field offset */ + static bool timestamp_enable; /**< Timestamp enable */ +-static uint32_t timestamp_init_req; /**< Timestamp initialization request. */ + static uint64_t timestamp_initial[RTE_MAX_ETHPORTS]; + + static void +@@ -168,14 +171,14 @@ update_pkt_header(struct rte_mbuf *pkt, uint32_t total_pkt_len) + sizeof(struct rte_ether_hdr) + + sizeof(struct rte_ipv4_hdr) + + sizeof(struct rte_udp_hdr))); +- /* updata udp pkt length */ ++ /* update UDP packet length */ + udp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_udp_hdr *, + sizeof(struct rte_ether_hdr) + + sizeof(struct rte_ipv4_hdr)); + pkt_len = (uint16_t) (pkt_data_len + sizeof(struct rte_udp_hdr)); + udp_hdr->dgram_len = RTE_CPU_TO_BE_16(pkt_len); + +- /* updata ip pkt length and csum */ ++ /* update IP packet length and checksum */ + ip_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_ipv4_hdr *, + sizeof(struct rte_ether_hdr)); + ip_hdr->hdr_checksum = 0; +@@ -188,7 +191,7 @@ static inline bool + pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp, + struct rte_ether_hdr *eth_hdr, const uint16_t vlan_tci, + const uint16_t vlan_tci_outer, const uint64_t ol_flags, +- const uint16_t idx, const struct fwd_stream *fs) ++ const uint16_t idx, struct fwd_stream *fs) + { + struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT]; + struct rte_mbuf *pkt_seg; +@@ -256,16 +259,10 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp, + update_pkt_header(pkt, pkt_len); + + if (unlikely(timestamp_enable)) { +- uint64_t skew = RTE_PER_LCORE(timestamp_qskew); +- struct { +- rte_be32_t signature; +- rte_be16_t pkt_idx; +- rte_be16_t queue_idx; +- rte_be64_t ts; +- } timestamp_mark; +- +- if (unlikely(timestamp_init_req != +- RTE_PER_LCORE(timestamp_idone))) { ++ uint64_t skew = fs->ts_skew; ++ struct tx_timestamp timestamp_mark; ++ ++ if (!skew) { + struct rte_eth_dev *dev = &rte_eth_devices[fs->tx_port]; + unsigned int txqs_n = dev->data->nb_tx_queues; + uint64_t phase = tx_pkt_times_inter * fs->tx_queue / +@@ -276,8 +273,7 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp, + */ + skew = timestamp_initial[fs->tx_port] + + tx_pkt_times_inter + phase; +- RTE_PER_LCORE(timestamp_qskew) = skew; +- RTE_PER_LCORE(timestamp_idone) = timestamp_init_req; ++ fs->ts_skew = skew; + } + timestamp_mark.pkt_idx = rte_cpu_to_be_16(idx); + timestamp_mark.queue_idx = rte_cpu_to_be_16(fs->tx_queue); +@@ -287,14 +283,14 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp, + pkt->ol_flags |= timestamp_mask; + *RTE_MBUF_DYNFIELD + (pkt, timestamp_off, uint64_t *) = skew; +- RTE_PER_LCORE(timestamp_qskew) = skew; ++ fs->ts_skew = skew; + timestamp_mark.ts = rte_cpu_to_be_64(skew); + } else if (tx_pkt_times_intra) { + skew += tx_pkt_times_intra; + pkt->ol_flags |= timestamp_mask; + *RTE_MBUF_DYNFIELD + (pkt, timestamp_off, uint64_t *) = skew; +- RTE_PER_LCORE(timestamp_qskew) = skew; ++ fs->ts_skew = skew; + timestamp_mark.ts = rte_cpu_to_be_64(skew); + } else { + timestamp_mark.ts = RTE_BE64(0); +@@ -423,22 +419,31 @@ pkt_burst_transmit(struct fwd_stream *fs) + get_end_cycles(fs, start_tsc); + } + +-static void ++static int + tx_only_begin(portid_t pi) + { +- uint16_t pkt_data_len; ++ uint16_t pkt_hdr_len, pkt_data_len; + int dynf; + +- pkt_data_len = (uint16_t) (tx_pkt_length - ( +- sizeof(struct rte_ether_hdr) + +- sizeof(struct rte_ipv4_hdr) + +- sizeof(struct rte_udp_hdr))); ++ pkt_hdr_len = (uint16_t)(sizeof(struct rte_ether_hdr) + ++ sizeof(struct rte_ipv4_hdr) + ++ sizeof(struct rte_udp_hdr)); ++ pkt_data_len = tx_pkt_length - pkt_hdr_len; ++ ++ if ((tx_pkt_split == TX_PKT_SPLIT_RND || txonly_multi_flow) && ++ tx_pkt_seg_lengths[0] < pkt_hdr_len) { ++ TESTPMD_LOG(ERR, ++ "Random segment number or multiple flow is enabled, " ++ "but tx_pkt_seg_lengths[0] %u < %u (needed)\n", ++ tx_pkt_seg_lengths[0], pkt_hdr_len); ++ return -EINVAL; ++ } ++ + setup_pkt_udp_ip_headers(&pkt_ip_hdr, &pkt_udp_hdr, pkt_data_len); + + timestamp_enable = false; + timestamp_mask = 0; + timestamp_off = -1; +- RTE_PER_LCORE(timestamp_qskew) = 0; + dynf = rte_mbuf_dynflag_lookup + (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); + if (dynf >= 0) +@@ -451,15 +456,54 @@ tx_only_begin(portid_t pi) + timestamp_mask && + timestamp_off >= 0 && + !rte_eth_read_clock(pi, ×tamp_initial[pi]); +- if (timestamp_enable) +- timestamp_init_req++; ++ ++ if (timestamp_enable) { ++ pkt_hdr_len += sizeof(struct tx_timestamp); ++ ++ if (tx_pkt_split == TX_PKT_SPLIT_RND) { ++ if (tx_pkt_seg_lengths[0] < pkt_hdr_len) { ++ TESTPMD_LOG(ERR, ++ "Time stamp and random segment number are enabled, " ++ "but tx_pkt_seg_lengths[0] %u < %u (needed)\n", ++ tx_pkt_seg_lengths[0], pkt_hdr_len); ++ return -EINVAL; ++ } ++ } else { ++ uint16_t total = 0; ++ uint8_t i; ++ ++ for (i = 0; i < tx_pkt_nb_segs; i++) { ++ total += tx_pkt_seg_lengths[i]; ++ if (total >= pkt_hdr_len) ++ break; ++ } ++ ++ if (total < pkt_hdr_len) { ++ TESTPMD_LOG(ERR, ++ "Not enough Tx segment space for time stamp info, " ++ "total %u < %u (needed)\n", ++ total, pkt_hdr_len); ++ return -EINVAL; ++ } ++ } ++ } ++ + /* Make sure all settings are visible on forwarding cores.*/ + rte_wmb(); ++ return 0; ++} ++ ++static void ++tx_only_stream_init(struct fwd_stream *fs) ++{ ++ fs->disabled = ports[fs->tx_port].txq[fs->tx_queue].state == ++ RTE_ETH_QUEUE_STATE_STOPPED; + } + + struct fwd_engine tx_only_engine = { + .fwd_mode_name = "txonly", + .port_fwd_begin = tx_only_begin, + .port_fwd_end = NULL, ++ .stream_init = tx_only_stream_init, + .packet_fwd = pkt_burst_transmit, + }; +diff --git a/dpdk/app/test-pmd/util.c b/dpdk/app/test-pmd/util.c +index a9e431a8b2..2682259678 100644 +--- a/dpdk/app/test-pmd/util.c ++++ b/dpdk/app/test-pmd/util.c +@@ -274,7 +274,11 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[], + " - %s queue=0x%x", is_rx ? "Receive" : "Send", + (unsigned int) queue); + MKDUMPSTR(print_buf, buf_size, cur_len, "\n"); +- rte_get_rx_ol_flag_list(mb->ol_flags, buf, sizeof(buf)); ++ if (is_rx) ++ rte_get_rx_ol_flag_list(mb->ol_flags, buf, sizeof(buf)); ++ else ++ rte_get_tx_ol_flag_list(mb->ol_flags, buf, sizeof(buf)); ++ + MKDUMPSTR(print_buf, buf_size, cur_len, + " ol_flags: %s\n", buf); + if (rte_mbuf_check(mb, 1, &reason) < 0) +diff --git a/dpdk/app/test-regex/main.c b/dpdk/app/test-regex/main.c +index ac6152dea7..6a8eb07ae8 100644 +--- a/dpdk/app/test-regex/main.c ++++ b/dpdk/app/test-regex/main.c +@@ -105,12 +105,11 @@ args_parse(int argc, char **argv, char *rules_file, char *data_file, + *nb_iterations = atoi(optarg); + break; + case ARG_HELP: +- usage("RegEx test app"); ++ usage(argv[0]); + break; + default: +- fprintf(stderr, "Invalid option: %s\n", argv[optind]); +- usage("RegEx test app"); +- rte_exit(EXIT_FAILURE, "Invalid option\n"); ++ usage(argv[0]); ++ rte_exit(EXIT_FAILURE, "Invalid option: %s\n", argv[optind]); + break; + } + } +diff --git a/dpdk/app/test/autotest_test_funcs.py b/dpdk/app/test/autotest_test_funcs.py +index 0811066cb0..6c717bddb4 100644 +--- a/dpdk/app/test/autotest_test_funcs.py ++++ b/dpdk/app/test/autotest_test_funcs.py +@@ -13,13 +13,16 @@ + def default_autotest(child, test_name): + child.sendline(test_name) + result = child.expect(["Test OK", "Test Failed", +- "Command not found", pexpect.TIMEOUT], timeout=900) ++ "Command not found", pexpect.TIMEOUT, ++ "Test Skipped"], timeout=900) + if result == 1: + return -1, "Fail" + elif result == 2: + return -1, "Fail [Not found]" + elif result == 3: + return -1, "Fail [Timeout]" ++ elif result == 4: ++ return 0, "Skipped [Not Run]" + return 0, "Success" + + # autotest used to run dump commands +diff --git a/dpdk/app/test/meson.build b/dpdk/app/test/meson.build +index bdbc619476..c2f0d24c1d 100644 +--- a/dpdk/app/test/meson.build ++++ b/dpdk/app/test/meson.build +@@ -396,8 +396,6 @@ if cc.has_argument('-Wno-format-truncation') + cflags += '-Wno-format-truncation' + endif + +-# specify -D_GNU_SOURCE unconditionally +-cflags += '-D_GNU_SOURCE' + # Strict-aliasing rules are violated by uint8_t[] to context size casts. + cflags += '-fno-strict-aliasing' + +@@ -424,7 +422,6 @@ foreach d:test_deps + def_lib = get_option('default_library') + test_dep_objs += get_variable(def_lib + '_rte_' + d) + endforeach +-test_dep_objs += cc.find_library('execinfo', required: false) + + link_libs = [] + if get_option('default_library') == 'static' +@@ -440,7 +437,7 @@ dpdk_test = executable('dpdk-test', + driver_install_path), + install: true) + +-has_hugepage = run_command('has-hugepage.sh').stdout().strip() != '0' ++has_hugepage = run_command('has-hugepage.sh', check: true).stdout().strip() != '0' + message('hugepage availability: @0@'.format(has_hugepage)) + + # some perf tests (eg: memcpy perf autotest)take very long +diff --git a/dpdk/app/test/packet_burst_generator.c b/dpdk/app/test/packet_burst_generator.c +index f203f9d09e..0fd7290b0e 100644 +--- a/dpdk/app/test/packet_burst_generator.c ++++ b/dpdk/app/test/packet_burst_generator.c +@@ -117,6 +117,7 @@ initialize_tcp_header(struct rte_tcp_hdr *tcp_hdr, uint16_t src_port, + memset(tcp_hdr, 0, sizeof(struct rte_tcp_hdr)); + tcp_hdr->src_port = rte_cpu_to_be_16(src_port); + tcp_hdr->dst_port = rte_cpu_to_be_16(dst_port); ++ tcp_hdr->data_off = (sizeof(struct rte_tcp_hdr) << 2) & 0xF0; + + return pkt_len; + } +@@ -141,8 +142,8 @@ uint16_t + initialize_ipv6_header(struct rte_ipv6_hdr *ip_hdr, uint8_t *src_addr, + uint8_t *dst_addr, uint16_t pkt_data_len) + { +- ip_hdr->vtc_flow = 0; +- ip_hdr->payload_len = pkt_data_len; ++ ip_hdr->vtc_flow = rte_cpu_to_be_32(0x60000000); /* Set version to 6. */ ++ ip_hdr->payload_len = rte_cpu_to_be_16(pkt_data_len); + ip_hdr->proto = IPPROTO_UDP; + ip_hdr->hop_limits = IP_DEFTTL; + +diff --git a/dpdk/app/test/process.h b/dpdk/app/test/process.h +index 27f1b1c0e6..a09a088477 100644 +--- a/dpdk/app/test/process.h ++++ b/dpdk/app/test/process.h +@@ -48,6 +48,7 @@ process_dup(const char *const argv[], int numargs, const char *env_value) + #ifdef RTE_LIB_PDUMP + #ifdef RTE_NET_RING + pthread_t thread; ++ int rc; + #endif + #endif + +@@ -126,8 +127,13 @@ process_dup(const char *const argv[], int numargs, const char *env_value) + /* parent process does a wait */ + #ifdef RTE_LIB_PDUMP + #ifdef RTE_NET_RING +- if ((strcmp(env_value, "run_pdump_server_tests") == 0)) +- pthread_create(&thread, NULL, &send_pkts, NULL); ++ if ((strcmp(env_value, "run_pdump_server_tests") == 0)) { ++ rc = pthread_create(&thread, NULL, &send_pkts, NULL); ++ if (rc != 0) { ++ rte_panic("Cannot start send pkts thread: %s\n", ++ strerror(rc)); ++ } ++ } + #endif + #endif + +diff --git a/dpdk/app/test/sample_packet_forward.c b/dpdk/app/test/sample_packet_forward.c +index 61384b3d9b..aa897274d8 100644 +--- a/dpdk/app/test/sample_packet_forward.c ++++ b/dpdk/app/test/sample_packet_forward.c +@@ -15,6 +15,35 @@ + + #include "sample_packet_forward.h" + ++/* ++ * heper function: configure and start test device ++ */ ++int ++test_dev_start(uint16_t port, struct rte_mempool *mp) ++{ ++ int32_t rc; ++ struct rte_eth_conf pconf; ++ ++ memset(&pconf, 0, sizeof(pconf)); ++ ++ rc = rte_eth_dev_configure(port, NUM_QUEUES, NUM_QUEUES, &pconf); ++ if (rc != 0) ++ return rc; ++ ++ rc = rte_eth_rx_queue_setup(port, 0, RING_SIZE, SOCKET_ID_ANY, ++ NULL, mp); ++ if (rc != 0) ++ return rc; ++ ++ rc = rte_eth_tx_queue_setup(port, 0, RING_SIZE, SOCKET_ID_ANY, ++ NULL); ++ if (rc != 0) ++ return rc; ++ ++ rc = rte_eth_dev_start(port); ++ return rc; ++} ++ + /* Sample test to create virtual rings and tx,rx portid from rings */ + int + test_ring_setup(struct rte_ring **ring, uint16_t *portid) +diff --git a/dpdk/app/test/sample_packet_forward.h b/dpdk/app/test/sample_packet_forward.h +index 6789217de3..af0b1d9924 100644 +--- a/dpdk/app/test/sample_packet_forward.h ++++ b/dpdk/app/test/sample_packet_forward.h +@@ -21,6 +21,9 @@ struct rte_ring; + /* Sample test to create virtual rings and tx,rx portid from rings */ + int test_ring_setup(struct rte_ring **ring, uint16_t *portid); + ++/* configure and start device created by test_ring_setup */ ++int test_dev_start(uint16_t port, struct rte_mempool *mp); ++ + /* Sample test to free the virtual rings */ + void test_ring_free(struct rte_ring *rxtx); + +diff --git a/dpdk/app/test/test.c b/dpdk/app/test/test.c +index 624dd48042..864523ed61 100644 +--- a/dpdk/app/test/test.c ++++ b/dpdk/app/test/test.c +@@ -134,8 +134,13 @@ main(int argc, char **argv) + goto out; + } + ++ argv += ret; ++ ++ prgname = argv[0]; ++ + #ifdef RTE_LIB_TIMER +- if (rte_timer_subsystem_init() < 0) { ++ ret = rte_timer_subsystem_init(); ++ if (ret < 0 && ret != -EALREADY) { + ret = -1; + goto out; + } +@@ -146,10 +151,6 @@ main(int argc, char **argv) + goto out; + } + +- argv += ret; +- +- prgname = argv[0]; +- + recursive_call = getenv(RECURSIVE_ENV_VAR); + if (recursive_call != NULL) { + ret = do_recursive_call(); +diff --git a/dpdk/app/test/test_atomic.c b/dpdk/app/test/test_atomic.c +index f10f555af8..e4b997827e 100644 +--- a/dpdk/app/test/test_atomic.c ++++ b/dpdk/app/test/test_atomic.c +@@ -88,7 +88,7 @@ + * + * - Invoke ``test_atomic_exchange`` on each lcore. Before doing + * anything else, the cores wait for a synchronization event. +- * Each core then does the follwoing for N iterations: ++ * Each core then does the following for N iterations: + * + * Generate a new token with a data integrity check + * Exchange the new token for previously generated token +@@ -591,7 +591,7 @@ test_atomic(void) + rte_atomic32_clear(&synchro); + + iterations = count128.val[0] - count128.val[1]; +- if (iterations != 4*N*(rte_lcore_count()-1)) { ++ if (iterations != (uint64_t)4*N*(rte_lcore_count()-1)) { + printf("128-bit compare and swap failed\n"); + return -1; + } +diff --git a/dpdk/app/test/test_barrier.c b/dpdk/app/test/test_barrier.c +index c27f8a0742..aba47dd78d 100644 +--- a/dpdk/app/test/test_barrier.c ++++ b/dpdk/app/test/test_barrier.c +@@ -6,12 +6,12 @@ + * This is a simple functional test for rte_smp_mb() implementation. + * I.E. make sure that LOAD and STORE operations that precede the + * rte_smp_mb() call are globally visible across the lcores +- * before the the LOAD and STORE operations that follows it. ++ * before the LOAD and STORE operations that follows it. + * The test uses simple implementation of Peterson's lock algorithm + * (https://en.wikipedia.org/wiki/Peterson%27s_algorithm) + * for two execution units to make sure that rte_smp_mb() prevents + * store-load reordering to happen. +- * Also when executed on a single lcore could be used as a approxiamate ++ * Also when executed on a single lcore could be used as a approximate + * estimation of number of cycles particular implementation of rte_smp_mb() + * will take. + */ +@@ -66,7 +66,7 @@ struct plock_test { + struct lcore_plock_test { + struct plock_test *pt[2]; /* shared, lock-protected data */ + uint64_t sum[2]; /* local copy of the shared data */ +- uint64_t iter; /* number of iterations to perfom */ ++ uint64_t iter; /* number of iterations to perform */ + uint32_t lc; /* given lcore id */ + }; + +diff --git a/dpdk/app/test/test_bitratestats.c b/dpdk/app/test/test_bitratestats.c +index f4a92c9be6..1ff540f4c4 100644 +--- a/dpdk/app/test/test_bitratestats.c ++++ b/dpdk/app/test/test_bitratestats.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + + #include "sample_packet_forward.h" + #include "test.h" +@@ -159,12 +160,21 @@ test_bit_packet_forward(void) + printf("allocate mbuf pool Failed\n"); + return TEST_FAILED; + } ++ ret = test_dev_start(portid, mp); ++ if (ret < 0) { ++ printf("test_dev_start(%hu, %p) failed, error code: %d\n", ++ portid, mp, ret); ++ return TEST_FAILED; ++ } ++ + ret = test_packet_forward(pbuf, portid, QUEUE_ID); + if (ret < 0) + printf("send pkts Failed\n"); ++ ++ rte_eth_dev_stop(portid); + test_put_mbuf_to_pool(mp, pbuf); + +- return TEST_SUCCESS; ++ return (ret >= 0) ? TEST_SUCCESS : TEST_FAILED; + } + + static int +diff --git a/dpdk/app/test/test_bpf.c b/dpdk/app/test/test_bpf.c +index 7c3de96c62..782fd1b7ef 100644 +--- a/dpdk/app/test/test_bpf.c ++++ b/dpdk/app/test/test_bpf.c +@@ -22,7 +22,7 @@ + /* + * Basic functional tests for librte_bpf. + * The main procedure - load eBPF program, execute it and +- * compare restuls with expected values. ++ * compare results with expected values. + */ + + struct dummy_offset { +@@ -59,6 +59,9 @@ struct dummy_mbuf { + #define TEST_SHIFT_1 15 + #define TEST_SHIFT_2 33 + ++#define TEST_SHIFT32_MASK (CHAR_BIT * sizeof(uint32_t) - 1) ++#define TEST_SHIFT64_MASK (CHAR_BIT * sizeof(uint64_t) - 1) ++ + #define TEST_JCC_1 0 + #define TEST_JCC_2 -123 + #define TEST_JCC_3 5678 +@@ -548,15 +551,25 @@ static const struct ebpf_insn test_shift1_prog[] = { + .off = offsetof(struct dummy_vect8, out[1].u64), + }, + { +- .code = (BPF_ALU | BPF_RSH | BPF_X), +- .dst_reg = EBPF_REG_2, +- .src_reg = EBPF_REG_4, ++ .code = (BPF_ALU | BPF_AND | BPF_K), ++ .dst_reg = EBPF_REG_4, ++ .imm = TEST_SHIFT64_MASK, + }, + { + .code = (EBPF_ALU64 | BPF_LSH | BPF_X), + .dst_reg = EBPF_REG_3, + .src_reg = EBPF_REG_4, + }, ++ { ++ .code = (BPF_ALU | BPF_AND | BPF_K), ++ .dst_reg = EBPF_REG_4, ++ .imm = TEST_SHIFT32_MASK, ++ }, ++ { ++ .code = (BPF_ALU | BPF_RSH | BPF_X), ++ .dst_reg = EBPF_REG_2, ++ .src_reg = EBPF_REG_4, ++ }, + { + .code = (BPF_STX | BPF_MEM | EBPF_DW), + .dst_reg = EBPF_REG_1, +@@ -590,7 +603,7 @@ static const struct ebpf_insn test_shift1_prog[] = { + { + .code = (BPF_ALU | BPF_AND | BPF_K), + .dst_reg = EBPF_REG_2, +- .imm = sizeof(uint64_t) * CHAR_BIT - 1, ++ .imm = TEST_SHIFT64_MASK, + }, + { + .code = (EBPF_ALU64 | EBPF_ARSH | BPF_X), +@@ -600,7 +613,7 @@ static const struct ebpf_insn test_shift1_prog[] = { + { + .code = (BPF_ALU | BPF_AND | BPF_K), + .dst_reg = EBPF_REG_2, +- .imm = sizeof(uint32_t) * CHAR_BIT - 1, ++ .imm = TEST_SHIFT32_MASK, + }, + { + .code = (BPF_ALU | BPF_LSH | BPF_X), +@@ -666,8 +679,10 @@ test_shift1_check(uint64_t rc, const void *arg) + dve.out[0].u64 = r2; + dve.out[1].u64 = r3; + +- r2 = (uint32_t)r2 >> r4; ++ r4 &= TEST_SHIFT64_MASK; + r3 <<= r4; ++ r4 &= TEST_SHIFT32_MASK; ++ r2 = (uint32_t)r2 >> r4; + + dve.out[2].u64 = r2; + dve.out[3].u64 = r3; +@@ -676,9 +691,9 @@ test_shift1_check(uint64_t rc, const void *arg) + r3 = dvt->in[1].u64; + r4 = dvt->in[2].u32; + +- r2 &= sizeof(uint64_t) * CHAR_BIT - 1; ++ r2 &= TEST_SHIFT64_MASK; + r3 = (int64_t)r3 >> r2; +- r2 &= sizeof(uint32_t) * CHAR_BIT - 1; ++ r2 &= TEST_SHIFT32_MASK; + r4 = (uint32_t)r4 << r2; + + dve.out[4].u64 = r4; +@@ -2391,7 +2406,7 @@ static const struct ebpf_insn test_call5_prog[] = { + }, + }; + +-/* String comparision impelementation, return 0 if equal else difference */ ++/* String comparison implementation, return 0 if equal else difference */ + static uint32_t + dummy_func5(const char *s1, const char *s2) + { +@@ -2691,7 +2706,7 @@ test_ld_mbuf1_check(uint64_t rc, const void *arg) + } + + /* +- * same as ld_mbuf1, but then trancate the mbuf by 1B, ++ * same as ld_mbuf1, but then truncate the mbuf by 1B, + * so load of last 4B fail. + */ + static void +@@ -3207,7 +3222,7 @@ run_test(const struct bpf_test *tst) + printf("%s@%d: check_result(%s) failed, " + "error: %d(%s);\n", + __func__, __LINE__, tst->name, +- rv, strerror(ret)); ++ rv, strerror(rv)); + } + } + +diff --git a/dpdk/app/test/test_cmdline_ipaddr.c b/dpdk/app/test/test_cmdline_ipaddr.c +index b3f50d80d2..2a1ee120fc 100644 +--- a/dpdk/app/test/test_cmdline_ipaddr.c ++++ b/dpdk/app/test/test_cmdline_ipaddr.c +@@ -255,7 +255,7 @@ const char * ipaddr_invalid_strs[] = { + /** misc **/ + + /* too long */ +- "1234:1234:1234:1234:1234:1234:1234:1234:1234:1234:1234" ++ "1234:1234:1234:1234:1234:1234:1234:1234:1234:1234:1234", + "random invalid text", + "", + "\0", +diff --git a/dpdk/app/test/test_cmdline_lib.c b/dpdk/app/test/test_cmdline_lib.c +index bd72df0da2..7bd5f8cd37 100644 +--- a/dpdk/app/test/test_cmdline_lib.c ++++ b/dpdk/app/test/test_cmdline_lib.c +@@ -71,10 +71,12 @@ test_cmdline_parse_fns(void) + if (cmdline_complete(cl, "buffer", &i, NULL, sizeof(dst)) >= 0) + goto error; + ++ cmdline_free(cl); + return 0; + + error: + printf("Error: function accepted null parameter!\n"); ++ cmdline_free(cl); + return -1; + } + +@@ -140,32 +142,45 @@ static int + test_cmdline_socket_fns(void) + { + cmdline_parse_ctx_t ctx; ++ struct cmdline *cl; + +- if (cmdline_stdin_new(NULL, "prompt") != NULL) ++ cl = cmdline_stdin_new(NULL, "prompt"); ++ if (cl != NULL) + goto error; +- if (cmdline_stdin_new(&ctx, NULL) != NULL) ++ cl = cmdline_stdin_new(&ctx, NULL); ++ if (cl != NULL) + goto error; +- if (cmdline_file_new(NULL, "prompt", "/dev/null") != NULL) ++ cl = cmdline_file_new(NULL, "prompt", "/dev/null"); ++ if (cl != NULL) + goto error; +- if (cmdline_file_new(&ctx, NULL, "/dev/null") != NULL) ++ cl = cmdline_file_new(&ctx, NULL, "/dev/null"); ++ if (cl != NULL) + goto error; +- if (cmdline_file_new(&ctx, "prompt", NULL) != NULL) ++ cl = cmdline_file_new(&ctx, "prompt", NULL); ++ if (cl != NULL) + goto error; +- if (cmdline_file_new(&ctx, "prompt", "-/invalid/~/path") != NULL) { ++ cl = cmdline_file_new(&ctx, "prompt", "-/invalid/~/path"); ++ if (cl != NULL) { + printf("Error: succeeded in opening invalid file for reading!"); ++ cmdline_free(cl); + return -1; + } +- if (cmdline_file_new(&ctx, "prompt", "/dev/null") == NULL) { ++ cl = cmdline_file_new(&ctx, "prompt", "/dev/null"); ++ if (cl == NULL) { + printf("Error: failed to open /dev/null for reading!"); + return -1; + } ++ cmdline_free(cl); ++ cl = NULL; + + /* void functions */ + cmdline_stdin_exit(NULL); + ++ cmdline_free(cl); + return 0; + error: + printf("Error: function accepted null parameter!\n"); ++ cmdline_free(cl); + return -1; + } + +@@ -176,13 +191,14 @@ test_cmdline_fns(void) + struct cmdline *cl; + + memset(&ctx, 0, sizeof(ctx)); +- cl = cmdline_new(&ctx, "test", -1, -1); +- if (cl == NULL) ++ cl = cmdline_new(NULL, "prompt", 0, 0); ++ if (cl != NULL) + goto error; +- +- if (cmdline_new(NULL, "prompt", 0, 0) != NULL) ++ cl = cmdline_new(&ctx, NULL, 0, 0); ++ if (cl != NULL) + goto error; +- if (cmdline_new(&ctx, NULL, 0, 0) != NULL) ++ cl = cmdline_new(&ctx, "test", -1, -1); ++ if (cl == NULL) + goto error; + if (cmdline_in(NULL, "buffer", CMDLINE_TEST_BUFSIZE) >= 0) + goto error; +@@ -198,6 +214,7 @@ test_cmdline_fns(void) + cmdline_interact(NULL); + cmdline_quit(NULL); + ++ cmdline_free(cl); + return 0; + + error: +diff --git a/dpdk/app/test/test_cmdline_num.c b/dpdk/app/test/test_cmdline_num.c +index ec479cdb3a..9276de59bd 100644 +--- a/dpdk/app/test/test_cmdline_num.c ++++ b/dpdk/app/test/test_cmdline_num.c +@@ -200,8 +200,8 @@ const char * num_invalid_strs[] = { + "-0x1234580A", + "-0b0111010101", + /* too long (128+ chars) */ +- "0b1111000011110000111100001111000011110000111100001111000011110000" +- "1111000011110000111100001111000011110000111100001111000011110000", ++ ("0b1111000011110000111100001111000011110000111100001111000011110000" ++ "1111000011110000111100001111000011110000111100001111000011110000"), + "1E3", + "0A", + "-B", +diff --git a/dpdk/app/test/test_compressdev.c b/dpdk/app/test/test_compressdev.c +index 0571c17ecb..57c566aa92 100644 +--- a/dpdk/app/test/test_compressdev.c ++++ b/dpdk/app/test/test_compressdev.c +@@ -1256,7 +1256,7 @@ test_deflate_comp_run(const struct interim_data_params *int_data, + /* + * Store original operation index in private data, + * since ordering does not have to be maintained, +- * when dequeueing from compressdev, so a comparison ++ * when dequeuing from compressdev, so a comparison + * at the end of the test can be done. + */ + priv_data = (struct priv_op_data *) (ops[i] + 1); +@@ -1411,7 +1411,6 @@ test_deflate_comp_finalize(const struct interim_data_params *int_data, + /* from int_data: */ + unsigned int num_xforms = int_data->num_xforms; + struct rte_comp_xform **compress_xforms = int_data->compress_xforms; +- uint16_t *buf_idx = int_data->buf_idx; + unsigned int num_bufs = int_data->num_bufs; + + /* from test_priv_data: */ +@@ -1442,7 +1441,7 @@ test_deflate_comp_finalize(const struct interim_data_params *int_data, + + RTE_LOG(DEBUG, USER1, "Buffer %u compressed by %s from %u to" + " %u bytes (level = %d, huffman = %s)\n", +- buf_idx[priv_data->orig_idx], engine, ++ i, engine, + ops_processed[i]->consumed, ops_processed[i]->produced, + compress_xform->level, + huffman_type_strings[huffman_type]); +@@ -1734,7 +1733,6 @@ test_deflate_decomp_finalize(const struct interim_data_params *int_data, + static unsigned int step; + + /* from int_data: */ +- uint16_t *buf_idx = int_data->buf_idx; + unsigned int num_bufs = int_data->num_bufs; + const char * const *test_bufs = int_data->test_bufs; + struct rte_comp_xform **compress_xforms = int_data->compress_xforms; +@@ -1766,7 +1764,7 @@ test_deflate_decomp_finalize(const struct interim_data_params *int_data, + strlcpy(engine, "pmd", sizeof(engine)); + RTE_LOG(DEBUG, USER1, + "Buffer %u decompressed by %s from %u to %u bytes\n", +- buf_idx[priv_data->orig_idx], engine, ++ i, engine, + ops_processed[i]->consumed, ops_processed[i]->produced); + ops[i] = NULL; + } +@@ -2035,7 +2033,7 @@ test_deflate_comp_decomp(const struct interim_data_params *int_data, + test_priv_data.all_decomp_data = &all_decomp_data; + test_priv_data.decomp_produced_data_size = &decomp_produced_data_size; + +- test_priv_data.num_priv_xforms = 0; /* it's used for deompression only */ ++ test_priv_data.num_priv_xforms = 0; /* it's used for decompression only */ + + capa = rte_compressdev_capability_get(0, RTE_COMP_ALGO_DEFLATE); + if (capa == NULL) { +diff --git a/dpdk/app/test/test_crc.c b/dpdk/app/test/test_crc.c +index bf1d344359..8231f81e4a 100644 +--- a/dpdk/app/test/test_crc.c ++++ b/dpdk/app/test/test_crc.c +@@ -80,6 +80,8 @@ test_crc_calc(void) + + /* 32-bit ethernet CRC: Test 2 */ + test_data = rte_zmalloc(NULL, CRC32_VEC_LEN1, 0); ++ if (test_data == NULL) ++ return -7; + + for (i = 0; i < CRC32_VEC_LEN1; i += 12) + rte_memcpy(&test_data[i], crc32_vec1, 12); +diff --git a/dpdk/app/test/test_cryptodev.c b/dpdk/app/test/test_cryptodev.c +index 8189053c13..a87e5583ac 100644 +--- a/dpdk/app/test/test_cryptodev.c ++++ b/dpdk/app/test/test_cryptodev.c +@@ -135,10 +135,11 @@ setup_test_string(struct rte_mempool *mpool, + struct rte_mbuf *m = rte_pktmbuf_alloc(mpool); + size_t t_len = len - (blocksize ? (len % blocksize) : 0); + +- memset(m->buf_addr, 0, m->buf_len); + if (m) { +- char *dst = rte_pktmbuf_append(m, t_len); ++ char *dst; + ++ memset(m->buf_addr, 0, m->buf_len); ++ dst = rte_pktmbuf_append(m, t_len); + if (!dst) { + rte_pktmbuf_free(m); + return NULL; +@@ -177,6 +178,10 @@ post_process_raw_dp_op(void *user_data, uint32_t index __rte_unused, + RTE_CRYPTO_OP_STATUS_ERROR; + } + ++static struct crypto_testsuite_params testsuite_params = { NULL }; ++struct crypto_testsuite_params *p_testsuite_params = &testsuite_params; ++static struct crypto_unittest_params unittest_params; ++ + void + process_sym_raw_dp_op(uint8_t dev_id, uint16_t qp_id, + struct rte_crypto_op *op, uint8_t is_cipher, uint8_t is_auth, +@@ -191,6 +196,7 @@ process_sym_raw_dp_op(uint8_t dev_id, uint16_t qp_id, + struct rte_crypto_sgl sgl; + uint32_t max_len; + union rte_cryptodev_session_ctx sess; ++ uint64_t auth_end_iova; + uint32_t count = 0; + struct rte_crypto_raw_dp_ctx *ctx; + uint32_t cipher_offset = 0, cipher_len = 0, auth_offset = 0, +@@ -200,6 +206,8 @@ process_sym_raw_dp_op(uint8_t dev_id, uint16_t qp_id, + int ctx_service_size; + int32_t status = 0; + int enqueue_status, dequeue_status; ++ struct crypto_unittest_params *ut_params = &unittest_params; ++ int is_sgl = sop->m_src->nb_segs > 1; + + ctx_service_size = rte_cryptodev_get_raw_dp_ctx_size(dev_id); + if (ctx_service_size < 0) { +@@ -265,6 +273,28 @@ process_sym_raw_dp_op(uint8_t dev_id, uint16_t qp_id, + digest.va = (void *)sop->auth.digest.data; + digest.iova = sop->auth.digest.phys_addr; + ++ if (is_sgl) { ++ uint32_t remaining_off = auth_offset + auth_len; ++ struct rte_mbuf *sgl_buf = sop->m_src; ++ ++ while (remaining_off >= rte_pktmbuf_data_len(sgl_buf) ++ && sgl_buf->next != NULL) { ++ remaining_off -= rte_pktmbuf_data_len(sgl_buf); ++ sgl_buf = sgl_buf->next; ++ } ++ ++ auth_end_iova = (uint64_t)rte_pktmbuf_iova_offset( ++ sgl_buf, remaining_off); ++ } else { ++ auth_end_iova = rte_pktmbuf_iova(op->sym->m_src) + ++ auth_offset + auth_len; ++ } ++ /* Then check if digest-encrypted conditions are met */ ++ if ((auth_offset + auth_len < cipher_offset + cipher_len) && ++ (digest.iova == auth_end_iova) && is_sgl) ++ max_len = RTE_MAX(max_len, auth_offset + auth_len + ++ ut_params->auth_xform.auth.digest_length); ++ + } else if (is_cipher) { + cipher_offset = sop->cipher.data.offset; + cipher_len = sop->cipher.data.length; +@@ -487,9 +517,6 @@ process_crypto_request(uint8_t dev_id, struct rte_crypto_op *op) + return op; + } + +-static struct crypto_testsuite_params testsuite_params = { NULL }; +-static struct crypto_unittest_params unittest_params; +- + static int + testsuite_setup(void) + { +@@ -927,7 +954,6 @@ ut_teardown(void) + { + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest_params; +- struct rte_cryptodev_stats stats; + + /* free crypto session structure */ + #ifdef RTE_LIB_SECURITY +@@ -974,8 +1000,6 @@ ut_teardown(void) + RTE_LOG(DEBUG, USER1, "CRYPTO_MBUFPOOL count %u\n", + rte_mempool_avail_count(ts_params->mbuf_pool)); + +- rte_cryptodev_stats_get(ts_params->valid_devs[0], &stats); +- + /* Stop the device */ + rte_cryptodev_stop(ts_params->valid_devs[0]); + } +@@ -1648,6 +1672,7 @@ test_AES_CBC_HMAC_SHA1_encrypt_digest(void) + { + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest_params; ++ int status; + + /* Verify the capabilities */ + struct rte_cryptodev_sym_capability_idx cap_idx; +@@ -1694,12 +1719,17 @@ test_AES_CBC_HMAC_SHA1_encrypt_digest(void) + + ut_params->sess = rte_cryptodev_sym_session_create( + ts_params->session_mpool); ++ TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); + + /* Create crypto session*/ +- rte_cryptodev_sym_session_init(ts_params->valid_devs[0], ++ status = rte_cryptodev_sym_session_init(ts_params->valid_devs[0], + ut_params->sess, &ut_params->cipher_xform, + ts_params->session_priv_mpool); +- TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); ++ ++ if (status == -ENOTSUP) ++ return TEST_SKIPPED; ++ ++ TEST_ASSERT_EQUAL(status, 0, "Session init failed"); + + /* Generate crypto op data structure */ + ut_params->op = rte_crypto_op_alloc(ts_params->op_mpool, +@@ -2630,6 +2660,21 @@ create_wireless_algo_auth_cipher_operation( + iv_ptr += cipher_iv_len; + rte_memcpy(iv_ptr, auth_iv, auth_iv_len); + ++ /* Only copy over the offset data needed from src to dst in OOP, ++ * if the auth and cipher offsets are not aligned ++ */ ++ if (op_mode == OUT_OF_PLACE) { ++ if (cipher_offset > auth_offset) ++ rte_memcpy( ++ rte_pktmbuf_mtod_offset( ++ sym_op->m_dst, ++ uint8_t *, auth_offset >> 3), ++ rte_pktmbuf_mtod_offset( ++ sym_op->m_src, ++ uint8_t *, auth_offset >> 3), ++ ((cipher_offset >> 3) - (auth_offset >> 3))); ++ } ++ + if (cipher_algo == RTE_CRYPTO_CIPHER_SNOW3G_UEA2 || + cipher_algo == RTE_CRYPTO_CIPHER_KASUMI_F8 || + cipher_algo == RTE_CRYPTO_CIPHER_ZUC_EEA3) { +@@ -3665,9 +3710,9 @@ test_kasumi_decryption(const struct kasumi_test_data *tdata) + + /* Create KASUMI operation */ + retval = create_wireless_algo_cipher_operation(tdata->cipher_iv.data, +- tdata->cipher_iv.len, +- tdata->ciphertext.len, +- tdata->validCipherOffsetInBits.len); ++ tdata->cipher_iv.len, ++ RTE_ALIGN_CEIL(tdata->validCipherLenInBits.len, 8), ++ tdata->validCipherOffsetInBits.len); + if (retval < 0) + return retval; + +@@ -4672,16 +4717,20 @@ test_snow3g_auth_cipher(const struct snow3g_test_data *tdata, + + /* Validate obuf */ + if (verify) { +- TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( ++ TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT_OFFSET( + plaintext, + tdata->plaintext.data, +- tdata->plaintext.len >> 3, ++ (tdata->plaintext.len - tdata->cipher.offset_bits - ++ (tdata->digest.len << 3)), ++ tdata->cipher.offset_bits, + "SNOW 3G Plaintext data not as expected"); + } else { +- TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( ++ TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT_OFFSET( + ciphertext, + tdata->ciphertext.data, +- tdata->validDataLenInBits.len, ++ (tdata->validDataLenInBits.len - ++ tdata->cipher.offset_bits), ++ tdata->cipher.offset_bits, + "SNOW 3G Ciphertext data not as expected"); + + TEST_ASSERT_BUFFERS_ARE_EQUAL( +@@ -4883,16 +4932,20 @@ test_snow3g_auth_cipher_sgl(const struct snow3g_test_data *tdata, + + /* Validate obuf */ + if (verify) { +- TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( ++ TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT_OFFSET( + plaintext, + tdata->plaintext.data, +- tdata->plaintext.len >> 3, ++ (tdata->plaintext.len - tdata->cipher.offset_bits - ++ (tdata->digest.len << 3)), ++ tdata->cipher.offset_bits, + "SNOW 3G Plaintext data not as expected"); + } else { +- TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( ++ TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT_OFFSET( + ciphertext, + tdata->ciphertext.data, +- tdata->validDataLenInBits.len, ++ (tdata->validDataLenInBits.len - ++ tdata->cipher.offset_bits), ++ tdata->cipher.offset_bits, + "SNOW 3G Ciphertext data not as expected"); + + TEST_ASSERT_BUFFERS_ARE_EQUAL( +@@ -5485,7 +5538,7 @@ test_zuc_encryption(const struct wireless_test_data *tdata) + retval = create_wireless_algo_cipher_operation(tdata->cipher_iv.data, + tdata->cipher_iv.len, + tdata->plaintext.len, +- 0); ++ tdata->validCipherOffsetInBits.len); + if (retval < 0) + return retval; + +@@ -5585,7 +5638,7 @@ test_zuc_encryption_sgl(const struct wireless_test_data *tdata) + /* Create ZUC operation */ + retval = create_wireless_algo_cipher_operation(tdata->cipher_iv.data, + tdata->cipher_iv.len, tdata->plaintext.len, +- 0); ++ tdata->validCipherOffsetInBits.len); + if (retval < 0) + return retval; + +@@ -5696,8 +5749,8 @@ test_zuc_authentication(const struct wireless_test_data *tdata) + else + ut_params->op = process_crypto_request(ts_params->valid_devs[0], + ut_params->op); +- ut_params->obuf = ut_params->op->sym->m_src; + TEST_ASSERT_NOT_NULL(ut_params->op, "failed to retrieve obuf"); ++ ut_params->obuf = ut_params->op->sym->m_src; + ut_params->digest = rte_pktmbuf_mtod(ut_params->obuf, uint8_t *) + + plaintext_pad_len; + +@@ -5803,20 +5856,20 @@ test_zuc_auth_cipher(const struct wireless_test_data *tdata, + ciphertext = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, + ciphertext_pad_len); + memcpy(ciphertext, tdata->ciphertext.data, ciphertext_len); +- if (op_mode == OUT_OF_PLACE) +- rte_pktmbuf_append(ut_params->obuf, ciphertext_pad_len); + debug_hexdump(stdout, "ciphertext:", ciphertext, + ciphertext_len); + } else { ++ /* make sure enough space to cover partial digest verify case */ + plaintext = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, +- plaintext_pad_len); ++ ciphertext_pad_len); + memcpy(plaintext, tdata->plaintext.data, plaintext_len); +- if (op_mode == OUT_OF_PLACE) +- rte_pktmbuf_append(ut_params->obuf, plaintext_pad_len); + debug_hexdump(stdout, "plaintext:", plaintext, + plaintext_len); + } + ++ if (op_mode == OUT_OF_PLACE) ++ rte_pktmbuf_append(ut_params->obuf, ciphertext_pad_len); ++ + /* Create ZUC operation */ + retval = create_wireless_algo_auth_cipher_operation( + tdata->digest.data, tdata->digest.len, +@@ -6019,7 +6072,7 @@ test_zuc_auth_cipher_sgl(const struct wireless_test_data *tdata, + retval = create_wireless_algo_auth_cipher_operation( + tdata->digest.data, tdata->digest.len, + tdata->cipher_iv.data, tdata->cipher_iv.len, +- NULL, 0, ++ tdata->auth_iv.data, tdata->auth_iv.len, + (tdata->digest.offset_bytes == 0 ? + (verify ? ciphertext_pad_len : plaintext_pad_len) + : tdata->digest.offset_bytes), +@@ -6326,7 +6379,7 @@ test_snow3g_decryption_with_digest_test_case_1(void) + struct snow3g_hash_test_data snow3g_hash_data; + + /* +- * Function prepare data for hash veryfication test case. ++ * Function prepare data for hash verification test case. + * Digest is allocated in 4 last bytes in plaintext, pattern. + */ + snow3g_hash_test_vector_setup(&snow3g_test_case_7, &snow3g_hash_data); +@@ -6817,19 +6870,19 @@ test_mixed_auth_cipher(const struct mixed_cipher_auth_test_data *tdata, + ciphertext = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, + ciphertext_pad_len); + memcpy(ciphertext, tdata->ciphertext.data, ciphertext_len); +- if (op_mode == OUT_OF_PLACE) +- rte_pktmbuf_append(ut_params->obuf, ciphertext_pad_len); + debug_hexdump(stdout, "ciphertext:", ciphertext, + ciphertext_len); + } else { ++ /* make sure enough space to cover partial digest verify case */ + plaintext = (uint8_t *)rte_pktmbuf_append(ut_params->ibuf, +- plaintext_pad_len); ++ ciphertext_pad_len); + memcpy(plaintext, tdata->plaintext.data, plaintext_len); +- if (op_mode == OUT_OF_PLACE) +- rte_pktmbuf_append(ut_params->obuf, plaintext_pad_len); + debug_hexdump(stdout, "plaintext:", plaintext, plaintext_len); + } + ++ if (op_mode == OUT_OF_PLACE) ++ rte_pktmbuf_append(ut_params->obuf, ciphertext_pad_len); ++ + /* Create the operation */ + retval = create_wireless_algo_auth_cipher_operation( + tdata->digest_enc.data, tdata->digest_enc.len, +@@ -6901,27 +6954,30 @@ test_mixed_auth_cipher(const struct mixed_cipher_auth_test_data *tdata, + tdata->digest_enc.len); + } + +- /* Validate obuf */ +- if (verify) { +- TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( +- plaintext, +- tdata->plaintext.data, +- tdata->plaintext.len_bits >> 3, +- "Plaintext data not as expected"); +- } else { +- TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( +- ciphertext, +- tdata->ciphertext.data, +- tdata->validDataLen.len_bits, +- "Ciphertext data not as expected"); +- ++ if (!verify) { + TEST_ASSERT_BUFFERS_ARE_EQUAL( + ut_params->digest, + tdata->digest_enc.data, +- DIGEST_BYTE_LENGTH_SNOW3G_UIA2, ++ tdata->digest_enc.len, + "Generated auth tag not as expected"); + } + ++ if (tdata->cipher_algo != RTE_CRYPTO_CIPHER_NULL) { ++ if (verify) { ++ TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( ++ plaintext, ++ tdata->plaintext.data, ++ tdata->plaintext.len_bits >> 3, ++ "Plaintext data not as expected"); ++ } else { ++ TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( ++ ciphertext, ++ tdata->ciphertext.data, ++ tdata->validDataLen.len_bits, ++ "Ciphertext data not as expected"); ++ } ++ } ++ + TEST_ASSERT_EQUAL(ut_params->op->status, RTE_CRYPTO_OP_STATUS_SUCCESS, + "crypto op processing failed"); + +@@ -7118,19 +7174,7 @@ test_mixed_auth_cipher_sgl(const struct mixed_cipher_auth_test_data *tdata, + tdata->digest_enc.data, tdata->digest_enc.len); + } + +- /* Validate obuf */ +- if (verify) { +- TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( +- plaintext, +- tdata->plaintext.data, +- tdata->plaintext.len_bits >> 3, +- "Plaintext data not as expected"); +- } else { +- TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( +- ciphertext, +- tdata->ciphertext.data, +- tdata->validDataLen.len_bits, +- "Ciphertext data not as expected"); ++ if (!verify) { + TEST_ASSERT_BUFFERS_ARE_EQUAL( + digest, + tdata->digest_enc.data, +@@ -7138,6 +7182,22 @@ test_mixed_auth_cipher_sgl(const struct mixed_cipher_auth_test_data *tdata, + "Generated auth tag not as expected"); + } + ++ if (tdata->cipher_algo != RTE_CRYPTO_CIPHER_NULL) { ++ if (verify) { ++ TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( ++ plaintext, ++ tdata->plaintext.data, ++ tdata->plaintext.len_bits >> 3, ++ "Plaintext data not as expected"); ++ } else { ++ TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( ++ ciphertext, ++ tdata->ciphertext.data, ++ tdata->validDataLen.len_bits, ++ "Ciphertext data not as expected"); ++ } ++ } ++ + TEST_ASSERT_EQUAL(ut_params->op->status, RTE_CRYPTO_OP_STATUS_SUCCESS, + "crypto op processing failed"); + +@@ -7382,6 +7442,7 @@ create_aead_session(uint8_t dev_id, enum rte_crypto_aead_algorithm algo, + uint8_t iv_len) + { + uint8_t aead_key[key_len]; ++ int status; + + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest_params; +@@ -7405,14 +7466,13 @@ create_aead_session(uint8_t dev_id, enum rte_crypto_aead_algorithm algo, + /* Create Crypto session*/ + ut_params->sess = rte_cryptodev_sym_session_create( + ts_params->session_mpool); ++ TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); + +- rte_cryptodev_sym_session_init(dev_id, ut_params->sess, ++ status = rte_cryptodev_sym_session_init(dev_id, ut_params->sess, + &ut_params->aead_xform, + ts_params->session_priv_mpool); + +- TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); +- +- return 0; ++ return status; + } + + static int +@@ -10133,6 +10193,7 @@ static int MD5_HMAC_create_session(struct crypto_testsuite_params *ts_params, + const struct HMAC_MD5_vector *test_case) + { + uint8_t key[64]; ++ int status; + + memcpy(key, test_case->key.data, test_case->key.len); + +@@ -10148,13 +10209,15 @@ static int MD5_HMAC_create_session(struct crypto_testsuite_params *ts_params, + + ut_params->sess = rte_cryptodev_sym_session_create( + ts_params->session_mpool); ++ TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); ++ if (ut_params->sess == NULL) ++ return TEST_FAILED; + +- rte_cryptodev_sym_session_init(ts_params->valid_devs[0], ++ status = rte_cryptodev_sym_session_init(ts_params->valid_devs[0], + ut_params->sess, &ut_params->auth_xform, + ts_params->session_priv_mpool); +- +- if (ut_params->sess == NULL) +- return TEST_FAILED; ++ if (status == -ENOTSUP) ++ return TEST_SKIPPED; + + ut_params->ibuf = rte_pktmbuf_alloc(ts_params->mbuf_pool); + +@@ -10367,6 +10430,7 @@ test_multi_session(void) + struct rte_cryptodev_sym_session **sessions; + + uint16_t i; ++ int status; + + /* Verify the capabilities */ + struct rte_cryptodev_sym_capability_idx cap_idx; +@@ -10388,22 +10452,25 @@ test_multi_session(void) + rte_cryptodev_info_get(ts_params->valid_devs[0], &dev_info); + + sessions = rte_malloc(NULL, +- (sizeof(struct rte_cryptodev_sym_session *) * +- MAX_NB_SESSIONS) + 1, 0); ++ sizeof(struct rte_cryptodev_sym_session *) * ++ (MAX_NB_SESSIONS + 1), 0); + + /* Create multiple crypto sessions*/ + for (i = 0; i < MAX_NB_SESSIONS; i++) { + + sessions[i] = rte_cryptodev_sym_session_create( + ts_params->session_mpool); +- +- rte_cryptodev_sym_session_init(ts_params->valid_devs[0], +- sessions[i], &ut_params->auth_xform, +- ts_params->session_priv_mpool); + TEST_ASSERT_NOT_NULL(sessions[i], + "Session creation failed at session number %u", + i); + ++ status = rte_cryptodev_sym_session_init( ++ ts_params->valid_devs[0], ++ sessions[i], &ut_params->auth_xform, ++ ts_params->session_priv_mpool); ++ if (status == -ENOTSUP) ++ return TEST_SKIPPED; ++ + /* Attempt to send a request on each session */ + TEST_ASSERT_SUCCESS( test_AES_CBC_HMAC_SHA512_decrypt_perform( + sessions[i], +@@ -10434,6 +10501,7 @@ test_multi_session(void) + } + } + ++ sessions[i] = NULL; + /* Next session create should fail */ + rte_cryptodev_sym_session_init(ts_params->valid_devs[0], + sessions[i], &ut_params->auth_xform, +@@ -10495,6 +10563,7 @@ test_multi_session_random_usage(void) + }, + + }; ++ int status; + + /* Verify the capabilities */ + struct rte_cryptodev_sym_capability_idx cap_idx; +@@ -10518,6 +10587,9 @@ test_multi_session_random_usage(void) + for (i = 0; i < MB_SESSION_NUMBER; i++) { + sessions[i] = rte_cryptodev_sym_session_create( + ts_params->session_mpool); ++ TEST_ASSERT_NOT_NULL(sessions[i], ++ "Session creation failed at session number %u", ++ i); + + rte_memcpy(&ut_paramz[i].ut_params, &unittest_params, + sizeof(struct crypto_unittest_params)); +@@ -10527,16 +10599,16 @@ test_multi_session_random_usage(void) + ut_paramz[i].cipher_key, ut_paramz[i].hmac_key); + + /* Create multiple crypto sessions*/ +- rte_cryptodev_sym_session_init( ++ status = rte_cryptodev_sym_session_init( + ts_params->valid_devs[0], + sessions[i], + &ut_paramz[i].ut_params.auth_xform, + ts_params->session_priv_mpool); + +- TEST_ASSERT_NOT_NULL(sessions[i], +- "Session creation failed at session number %u", +- i); ++ if (status == -ENOTSUP) ++ return TEST_SKIPPED; + ++ TEST_ASSERT_EQUAL(status, 0, "Session init failed"); + } + + srand(time(NULL)); +@@ -10648,6 +10720,7 @@ test_null_burst_operation(void) + { + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest_params; ++ int status; + + unsigned i, burst_len = NULL_BURST_LENGTH; + +@@ -10675,12 +10748,17 @@ test_null_burst_operation(void) + + ut_params->sess = rte_cryptodev_sym_session_create( + ts_params->session_mpool); ++ TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); + + /* Create Crypto session*/ +- rte_cryptodev_sym_session_init(ts_params->valid_devs[0], ++ status = rte_cryptodev_sym_session_init(ts_params->valid_devs[0], + ut_params->sess, &ut_params->cipher_xform, + ts_params->session_priv_mpool); +- TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); ++ ++ if (status == -ENOTSUP) ++ return TEST_SKIPPED; ++ ++ TEST_ASSERT_EQUAL(status, 0, "Session init failed"); + + TEST_ASSERT_EQUAL(rte_crypto_op_bulk_alloc(ts_params->op_mpool, + RTE_CRYPTO_OP_TYPE_SYMMETRIC, burst, burst_len), +@@ -10838,6 +10916,7 @@ static int create_gmac_session(uint8_t dev_id, + enum rte_crypto_auth_operation auth_op) + { + uint8_t auth_key[tdata->key.len]; ++ int status; + + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest_params; +@@ -10858,14 +10937,13 @@ static int create_gmac_session(uint8_t dev_id, + + ut_params->sess = rte_cryptodev_sym_session_create( + ts_params->session_mpool); ++ TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); + +- rte_cryptodev_sym_session_init(dev_id, ut_params->sess, ++ status = rte_cryptodev_sym_session_init(dev_id, ut_params->sess, + &ut_params->auth_xform, + ts_params->session_priv_mpool); + +- TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); +- +- return 0; ++ return status; + } + + static int +@@ -10903,6 +10981,8 @@ test_AES_GMAC_authentication(const struct gmac_test_data *tdata) + retval = create_gmac_session(ts_params->valid_devs[0], + tdata, RTE_CRYPTO_AUTH_OP_GENERATE); + ++ if (retval == -ENOTSUP) ++ return TEST_SKIPPED; + if (retval < 0) + return retval; + +@@ -11032,6 +11112,8 @@ test_AES_GMAC_authentication_verify(const struct gmac_test_data *tdata) + retval = create_gmac_session(ts_params->valid_devs[0], + tdata, RTE_CRYPTO_AUTH_OP_VERIFY); + ++ if (retval == -ENOTSUP) ++ return TEST_SKIPPED; + if (retval < 0) + return retval; + +@@ -11159,6 +11241,8 @@ test_AES_GMAC_authentication_SGL(const struct gmac_test_data *tdata, + retval = create_gmac_session(ts_params->valid_devs[0], + tdata, RTE_CRYPTO_AUTH_OP_GENERATE); + ++ if (retval == -ENOTSUP) ++ return TEST_SKIPPED; + if (retval < 0) + return retval; + +@@ -11488,6 +11572,7 @@ create_auth_session(struct crypto_unittest_params *ut_params, + { + struct crypto_testsuite_params *ts_params = &testsuite_params; + uint8_t auth_key[reference->auth_key.len + 1]; ++ int status; + + memcpy(auth_key, reference->auth_key.data, reference->auth_key.len); + +@@ -11503,14 +11588,13 @@ create_auth_session(struct crypto_unittest_params *ut_params, + /* Create Crypto session*/ + ut_params->sess = rte_cryptodev_sym_session_create( + ts_params->session_mpool); ++ TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); + +- rte_cryptodev_sym_session_init(dev_id, ut_params->sess, ++ status = rte_cryptodev_sym_session_init(dev_id, ut_params->sess, + &ut_params->auth_xform, + ts_params->session_priv_mpool); + +- TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); +- +- return 0; ++ return status; + } + + static int +@@ -11523,6 +11607,7 @@ create_auth_cipher_session(struct crypto_unittest_params *ut_params, + struct crypto_testsuite_params *ts_params = &testsuite_params; + uint8_t cipher_key[reference->cipher_key.len + 1]; + uint8_t auth_key[reference->auth_key.len + 1]; ++ int status; + + memcpy(cipher_key, reference->cipher_key.data, + reference->cipher_key.len); +@@ -11556,14 +11641,13 @@ create_auth_cipher_session(struct crypto_unittest_params *ut_params, + /* Create Crypto session*/ + ut_params->sess = rte_cryptodev_sym_session_create( + ts_params->session_mpool); ++ TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); + +- rte_cryptodev_sym_session_init(dev_id, ut_params->sess, ++ status = rte_cryptodev_sym_session_init(dev_id, ut_params->sess, + &ut_params->auth_xform, + ts_params->session_priv_mpool); + +- TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); +- +- return 0; ++ return status; + } + + static int +@@ -11779,6 +11863,9 @@ test_authentication_verify_fail_when_data_corruption( + ts_params->valid_devs[0], + reference, + RTE_CRYPTO_AUTH_OP_VERIFY); ++ ++ if (retval == -ENOTSUP) ++ return TEST_SKIPPED; + if (retval < 0) + return retval; + +@@ -11951,6 +12038,9 @@ test_authenticated_decryption_fail_when_corruption( + reference, + RTE_CRYPTO_AUTH_OP_VERIFY, + RTE_CRYPTO_CIPHER_OP_DECRYPT); ++ ++ if (retval == -ENOTSUP) ++ return TEST_SKIPPED; + if (retval < 0) + return retval; + +@@ -12000,12 +12090,13 @@ test_authenticated_decryption_fail_when_corruption( + } + + static int +-test_authenticated_encryt_with_esn( ++test_authenticated_encrypt_with_esn( + struct crypto_testsuite_params *ts_params, + struct crypto_unittest_params *ut_params, + const struct test_crypto_vector *reference) + { + int retval; ++ int status; + + uint8_t *authciphertext, *plaintext, *auth_tag; + uint16_t plaintext_pad_len; +@@ -12063,13 +12154,17 @@ test_authenticated_encryt_with_esn( + /* Create Crypto session*/ + ut_params->sess = rte_cryptodev_sym_session_create( + ts_params->session_mpool); ++ TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); + +- rte_cryptodev_sym_session_init(ts_params->valid_devs[0], ++ status = rte_cryptodev_sym_session_init(ts_params->valid_devs[0], + ut_params->sess, + &ut_params->cipher_xform, + ts_params->session_priv_mpool); + +- TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); ++ if (status == -ENOTSUP) ++ return TEST_SKIPPED; ++ ++ TEST_ASSERT_EQUAL(status, 0, "Session init failed"); + + ut_params->ibuf = rte_pktmbuf_alloc(ts_params->mbuf_pool); + TEST_ASSERT_NOT_NULL(ut_params->ibuf, +@@ -12195,13 +12290,17 @@ test_authenticated_decrypt_with_esn( + /* Create Crypto session*/ + ut_params->sess = rte_cryptodev_sym_session_create( + ts_params->session_mpool); ++ TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); + +- rte_cryptodev_sym_session_init(ts_params->valid_devs[0], ++ retval = rte_cryptodev_sym_session_init(ts_params->valid_devs[0], + ut_params->sess, + &ut_params->auth_xform, + ts_params->session_priv_mpool); + +- TEST_ASSERT_NOT_NULL(ut_params->sess, "Session creation failed"); ++ if (retval == -ENOTSUP) ++ return TEST_SKIPPED; ++ ++ TEST_ASSERT_EQUAL(retval, 0, "Session init failed"); + + ut_params->ibuf = rte_pktmbuf_alloc(ts_params->mbuf_pool); + TEST_ASSERT_NOT_NULL(ut_params->ibuf, +@@ -12787,7 +12886,7 @@ auth_decryption_AES128CBC_HMAC_SHA1_fail_tag_corrupt(void) + static int + auth_encrypt_AES128CBC_HMAC_SHA1_esn_check(void) + { +- return test_authenticated_encryt_with_esn( ++ return test_authenticated_encrypt_with_esn( + &testsuite_params, + &unittest_params, + &aes128cbc_hmac_sha1_aad_test_vector); +@@ -13762,7 +13861,7 @@ static struct unit_test_suite cryptodev_ccp_testsuite = { + }; + + static int +-test_cryptodev_qat(void /*argv __rte_unused, int argc __rte_unused*/) ++test_cryptodev_qat(void) + { + gbl_driver_id = rte_cryptodev_driver_id_get( + RTE_STR(CRYPTODEV_NAME_QAT_SYM_PMD)); +@@ -13776,7 +13875,7 @@ test_cryptodev_qat(void /*argv __rte_unused, int argc __rte_unused*/) + } + + static int +-test_cryptodev_virtio(void /*argv __rte_unused, int argc __rte_unused*/) ++test_cryptodev_virtio(void) + { + gbl_driver_id = rte_cryptodev_driver_id_get( + RTE_STR(CRYPTODEV_NAME_VIRTIO_PMD)); +@@ -13790,7 +13889,7 @@ test_cryptodev_virtio(void /*argv __rte_unused, int argc __rte_unused*/) + } + + static int +-test_cryptodev_aesni_mb(void /*argv __rte_unused, int argc __rte_unused*/) ++test_cryptodev_aesni_mb(void) + { + gbl_driver_id = rte_cryptodev_driver_id_get( + RTE_STR(CRYPTODEV_NAME_AESNI_MB_PMD)); +@@ -13888,7 +13987,7 @@ test_cryptodev_null(void) + } + + static int +-test_cryptodev_sw_snow3g(void /*argv __rte_unused, int argc __rte_unused*/) ++test_cryptodev_sw_snow3g(void) + { + gbl_driver_id = rte_cryptodev_driver_id_get( + RTE_STR(CRYPTODEV_NAME_SNOW3G_PMD)); +@@ -13902,7 +14001,7 @@ test_cryptodev_sw_snow3g(void /*argv __rte_unused, int argc __rte_unused*/) + } + + static int +-test_cryptodev_sw_kasumi(void /*argv __rte_unused, int argc __rte_unused*/) ++test_cryptodev_sw_kasumi(void) + { + gbl_driver_id = rte_cryptodev_driver_id_get( + RTE_STR(CRYPTODEV_NAME_KASUMI_PMD)); +@@ -13916,7 +14015,7 @@ test_cryptodev_sw_kasumi(void /*argv __rte_unused, int argc __rte_unused*/) + } + + static int +-test_cryptodev_sw_zuc(void /*argv __rte_unused, int argc __rte_unused*/) ++test_cryptodev_sw_zuc(void) + { + gbl_driver_id = rte_cryptodev_driver_id_get( + RTE_STR(CRYPTODEV_NAME_ZUC_PMD)); +@@ -13960,7 +14059,7 @@ test_cryptodev_mrvl(void) + #ifdef RTE_CRYPTO_SCHEDULER + + static int +-test_cryptodev_scheduler(void /*argv __rte_unused, int argc __rte_unused*/) ++test_cryptodev_scheduler(void) + { + gbl_driver_id = rte_cryptodev_driver_id_get( + RTE_STR(CRYPTODEV_NAME_SCHEDULER_PMD)); +@@ -13983,7 +14082,7 @@ REGISTER_TEST_COMMAND(cryptodev_scheduler_autotest, test_cryptodev_scheduler); + #endif + + static int +-test_cryptodev_dpaa2_sec(void /*argv __rte_unused, int argc __rte_unused*/) ++test_cryptodev_dpaa2_sec(void) + { + gbl_driver_id = rte_cryptodev_driver_id_get( + RTE_STR(CRYPTODEV_NAME_DPAA2_SEC_PMD)); +@@ -13997,7 +14096,7 @@ test_cryptodev_dpaa2_sec(void /*argv __rte_unused, int argc __rte_unused*/) + } + + static int +-test_cryptodev_dpaa_sec(void /*argv __rte_unused, int argc __rte_unused*/) ++test_cryptodev_dpaa_sec(void) + { + gbl_driver_id = rte_cryptodev_driver_id_get( + RTE_STR(CRYPTODEV_NAME_DPAA_SEC_PMD)); +@@ -14049,7 +14148,7 @@ test_cryptodev_octeontx2(void) + } + + static int +-test_cryptodev_caam_jr(void /*argv __rte_unused, int argc __rte_unused*/) ++test_cryptodev_caam_jr(void) + { + gbl_driver_id = rte_cryptodev_driver_id_get( + RTE_STR(CRYPTODEV_NAME_CAAM_JR_PMD)); +@@ -14091,7 +14190,7 @@ test_cryptodev_bcmfs(void) + } + + static int +-test_cryptodev_qat_raw_api(void /*argv __rte_unused, int argc __rte_unused*/) ++test_cryptodev_qat_raw_api(void) + { + int ret; + +diff --git a/dpdk/app/test/test_cryptodev_aes_test_vectors.h b/dpdk/app/test/test_cryptodev_aes_test_vectors.h +index c192d75a7e..bb5f09fa81 100644 +--- a/dpdk/app/test/test_cryptodev_aes_test_vectors.h ++++ b/dpdk/app/test/test_cryptodev_aes_test_vectors.h +@@ -2650,7 +2650,7 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = { + .op_mask = BLOCKCIPHER_TEST_OP_ENCRYPT, + }, + { +- .test_descr = "AES-192-CBC Encryption Scater gather", ++ .test_descr = "AES-192-CBC Encryption Scatter gather", + .test_data = &aes_test_data_10, + .op_mask = BLOCKCIPHER_TEST_OP_ENCRYPT, + .feature_mask = BLOCKCIPHER_TEST_FEATURE_SG | +diff --git a/dpdk/app/test/test_cryptodev_asym.c b/dpdk/app/test/test_cryptodev_asym.c +index 85cd076059..af1310d9a2 100644 +--- a/dpdk/app/test/test_cryptodev_asym.c ++++ b/dpdk/app/test/test_cryptodev_asym.c +@@ -559,7 +559,7 @@ test_one_case(const void *test_case, int sessionless) + status = test_cryptodev_asym_op( + &testsuite_params, + &tc, test_msg, sessionless, i, +- RTE_RSA_KET_TYPE_QT); ++ RTE_RSA_KEY_TYPE_QT); + } + if (status) + break; +diff --git a/dpdk/app/test/test_cryptodev_blockcipher.c b/dpdk/app/test/test_cryptodev_blockcipher.c +index 135e57b9fa..8e168724be 100644 +--- a/dpdk/app/test/test_cryptodev_blockcipher.c ++++ b/dpdk/app/test/test_cryptodev_blockcipher.c +@@ -160,7 +160,7 @@ test_blockcipher_one_case(const struct blockcipher_test_case *t, + printf("Raw Data Path APIs do not support OOP, " + "Test Skipped.\n"); + snprintf(test_msg, BLOCKCIPHER_TEST_MSG_LEN, "SKIPPED"); +- status = TEST_SUCCESS; ++ status = TEST_SKIPPED; + goto error_exit; + } + } +diff --git a/dpdk/app/test/test_cryptodev_rsa_test_vectors.h b/dpdk/app/test/test_cryptodev_rsa_test_vectors.h +index 48a72e1492..04539a1ecf 100644 +--- a/dpdk/app/test/test_cryptodev_rsa_test_vectors.h ++++ b/dpdk/app/test/test_cryptodev_rsa_test_vectors.h +@@ -378,7 +378,7 @@ struct rte_crypto_asym_xform rsa_xform_crt = { + .data = rsa_e, + .length = sizeof(rsa_e) + }, +- .key_type = RTE_RSA_KET_TYPE_QT, ++ .key_type = RTE_RSA_KEY_TYPE_QT, + .qt = { + .p = { + .data = rsa_p, +diff --git a/dpdk/app/test/test_cryptodev_snow3g_test_vectors.h b/dpdk/app/test/test_cryptodev_snow3g_test_vectors.h +index bbe05662be..b49a07bcf2 100644 +--- a/dpdk/app/test/test_cryptodev_snow3g_test_vectors.h ++++ b/dpdk/app/test/test_cryptodev_snow3g_test_vectors.h +@@ -138,11 +138,11 @@ struct snow3g_test_data snow3g_test_case_2 = { + .len = 16 + }, + .cipher_iv = { +- .data = { ++ .data = { + 0xE2, 0x8B, 0xCF, 0x7B, 0xC0, 0x00, 0x00, 0x00, + 0xE2, 0x8B, 0xCF, 0x7B, 0xC0, 0x00, 0x00, 0x00 + }, +- .len = 16 ++ .len = 16 + }, + .plaintext = { + .data = { +@@ -359,8 +359,8 @@ struct snow3g_test_data snow3g_auth_cipher_test_case_1 = { + }, + .cipher_iv = { + .data = { +- 0x14, 0x79, 0x3E, 0x41, 0x03, 0x97, 0xE8, 0xFD, +- 0x94, 0x79, 0x3E, 0x41, 0x03, 0x97, 0x68, 0xFD ++ 0x72, 0xA4, 0xF2, 0x0F, 0x48, 0x00, 0x00, 0x00, ++ 0x72, 0xA4, 0xF2, 0x0F, 0x48, 0x00, 0x00, 0x00 + }, + .len = 16 + }, +@@ -383,13 +383,13 @@ struct snow3g_test_data snow3g_auth_cipher_test_case_1 = { + .len = 384 + }, + .ciphertext = { +- .data = { +- 0x95, 0x2E, 0x5A, 0xE1, 0x50, 0xB8, 0x59, 0x2A, +- 0x9B, 0xA0, 0x38, 0xA9, 0x8E, 0x2F, 0xED, 0xAB, +- 0xFD, 0xC8, 0x3B, 0x47, 0x46, 0x0B, 0x50, 0x16, +- 0xEC, 0x88, 0x45, 0xB6, 0x05, 0xC7, 0x54, 0xF8, +- 0xBD, 0x91, 0xAA, 0xB6, 0xA4, 0xDC, 0x64, 0xB4, +- 0xCB, 0xEB, 0x97, 0x06, 0x4C, 0xF7, 0x02, 0x3D ++ .data = { ++ 0x86, 0x4F, 0x4D, 0xE8, 0x86, 0xE6, 0x3E, 0x66, ++ 0x52, 0x97, 0xC7, 0x62, 0xAE, 0x8E, 0xA2, 0xDB, ++ 0x01, 0xD6, 0x33, 0xA9, 0xA4, 0xCE, 0x02, 0xD5, ++ 0xC2, 0xC5, 0x5F, 0x90, 0xE0, 0x89, 0x48, 0xD4, ++ 0x92, 0xF4, 0xE5, 0x9A, 0xDA, 0x13, 0x76, 0xFF, ++ 0x6E, 0x76, 0x6B, 0x71, 0x62, 0x28, 0xB2, 0xEC + }, + .len = 384 + }, +@@ -428,15 +428,15 @@ struct snow3g_test_data snow3g_test_case_7 = { + }, + .cipher_iv = { + .data = { +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 ++ 0xE2, 0x8B, 0xCF, 0x7B, 0xC0, 0x00, 0x00, 0x00, ++ 0xE2, 0x8B, 0xCF, 0x7B, 0xC0, 0x00, 0x00, 0x00 + }, + .len = 16 + }, + .auth_iv = { + .data = { +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, +- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ++ 0x36, 0xAF, 0x61, 0x44, 0x98, 0x38, 0xF0, 0x3A, ++ 0x36, 0xAF, 0x61, 0x44, 0x98, 0x38, 0xF0, 0x3A + }, + .len = 16 + }, +@@ -457,28 +457,28 @@ struct snow3g_test_data snow3g_test_case_7 = { + 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, + 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, + 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, +- 0x5A, 0x5A, 0x5A, 0x5A, 0xF1, 0x9E, 0x2B, 0x6F, ++ 0x5A, 0x5A, 0x5A, 0x5A, 0xBB, 0x2B, 0x8B, 0x15, + }, + .len = 128 << 3 + }, + .ciphertext = { + .data = { +- 0x5A, 0x5A, 0xE4, 0xAD, 0x29, 0xA2, 0x6A, 0xA6, +- 0x20, 0x1D, 0xCD, 0x08, 0x50, 0xD6, 0xE6, 0x47, +- 0xBC, 0x88, 0x08, 0x01, 0x17, 0xFA, 0x47, 0x5B, +- 0x90, 0x40, 0xBA, 0x0C, 0xB5, 0x58, 0xF3, 0x0C, +- 0xA0, 0xD4, 0x98, 0x83, 0x1B, 0xCE, 0x54, 0xE3, +- 0x29, 0x00, 0x3C, 0xA4, 0xAD, 0x74, 0xEE, 0x05, +- 0xA3, 0x6C, 0xD4, 0xAC, 0xC6, 0x30, 0x33, 0xC9, +- 0x37, 0x57, 0x41, 0x9B, 0xD4, 0x73, 0xB9, 0x77, +- 0x70, 0x8B, 0x63, 0xDD, 0x22, 0xB8, 0xE1, 0x85, +- 0xB2, 0x92, 0x7C, 0x37, 0xD3, 0x2E, 0xD9, 0xF4, +- 0x4A, 0x69, 0x25, 0x30, 0xE3, 0x5B, 0x8B, 0xF6, +- 0x0F, 0xDE, 0x0B, 0x92, 0xD5, 0x25, 0x52, 0x6D, +- 0x26, 0xEB, 0x2F, 0x8A, 0x3B, 0x8B, 0x38, 0xE2, +- 0x48, 0xD3, 0x4A, 0x98, 0xF7, 0x3A, 0xC2, 0x46, +- 0x69, 0x8D, 0x73, 0x3E, 0x57, 0x88, 0x2C, 0x80, +- 0xF0, 0xF2, 0x75, 0xB8, 0x7D, 0x27, 0xC6, 0xDA, ++ 0x5A, 0x5A, 0x8A, 0x35, 0xF7, 0x36, 0xDA, 0xD7, ++ 0xC4, 0x2C, 0x10, 0xEA, 0x92, 0x9C, 0x00, 0xF0, ++ 0xAE, 0x35, 0x5E, 0x8D, 0xB6, 0x88, 0x30, 0x66, ++ 0x74, 0x8B, 0xA2, 0x82, 0x5C, 0xA7, 0xF3, 0x54, ++ 0x75, 0x02, 0xA9, 0x90, 0x6B, 0x4B, 0x6A, 0x63, ++ 0xFF, 0x4B, 0x08, 0xFE, 0x11, 0x3C, 0x5A, 0x53, ++ 0xEE, 0x68, 0x14, 0x41, 0x17, 0xCD, 0x7B, 0x27, ++ 0x88, 0xAF, 0x99, 0xE2, 0x9C, 0x86, 0x42, 0x12, ++ 0x97, 0x93, 0xF0, 0xE6, 0xE2, 0xB2, 0x2D, 0xDA, ++ 0x2C, 0x59, 0xB0, 0xA7, 0x09, 0xF6, 0x32, 0xC0, ++ 0x35, 0x9A, 0xD3, 0xBA, 0xDC, 0x8F, 0x2E, 0x18, ++ 0x97, 0x87, 0x44, 0xD6, 0x43, 0xFA, 0x86, 0x5A, ++ 0xB0, 0xA2, 0x5A, 0xB8, 0x5F, 0x57, 0xE3, 0x2F, ++ 0x73, 0x9C, 0x01, 0x3A, 0x02, 0x08, 0x8C, 0xEB, ++ 0xA0, 0x5D, 0x74, 0x58, 0x5A, 0xA1, 0x58, 0x17, ++ 0x5E, 0x86, 0x96, 0xE6, 0x9C, 0xEE, 0x8C, 0xA8 + + }, + .len = 128 << 3 +@@ -493,7 +493,7 @@ struct snow3g_test_data snow3g_test_case_7 = { + }, + .digest = { + .data = { +- 0x7D, 0x27, 0xC6, 0xDA ++ 0x9C, 0xEE, 0x8C, 0xA8 + }, + .len = 4, + .offset_bytes = 124 +@@ -520,15 +520,15 @@ struct snow3g_test_data snow3g_auth_cipher_test_case_2 = { + }, + .cipher_iv = { + .data = { +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 ++ 0xFA, 0x55, 0x6B, 0x26, 0x1C, 0x00, 0x00, 0x00, ++ 0xFA, 0x55, 0x6B, 0x26, 0x1C, 0x00, 0x00, 0x00 + }, + .len = 16 + }, + .auth_iv = { + .data = { +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, +- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ++ 0x3E, 0xDC, 0x87, 0xE2, 0xA4, 0xF2, 0xD8, 0xE2, ++ 0x3E, 0xDC, 0x87, 0xE2, 0xA4, 0xF2, 0xD8, 0xE2 + }, + .len = 16 + }, +@@ -556,22 +556,22 @@ struct snow3g_test_data snow3g_auth_cipher_test_case_2 = { + }, + .ciphertext = { + .data = { +- 0x5A, 0x5A, 0xE4, 0xAD, 0x29, 0xA2, 0x6A, 0xA6, +- 0x20, 0x1D, 0xCD, 0x08, 0x50, 0xD6, 0xE6, 0x47, +- 0xBC, 0x88, 0x08, 0x01, 0x17, 0xFA, 0x47, 0x5B, +- 0x90, 0x40, 0xBA, 0x0C, 0xB5, 0x58, 0xF3, 0x0C, +- 0xA0, 0xD4, 0x98, 0x83, 0x1B, 0xCE, 0x54, 0xE3, +- 0x29, 0x00, 0x3C, 0xA4, 0xAD, 0x74, 0xEE, 0x05, +- 0xA3, 0x6C, 0xD4, 0xAC, 0xC6, 0x30, 0x33, 0xC9, +- 0x37, 0x57, 0x41, 0x9B, 0xD4, 0x73, 0xB9, 0x77, +- 0x70, 0x8B, 0x63, 0xDD, 0x22, 0xB8, 0xE1, 0x85, +- 0xB2, 0x92, 0x7C, 0x37, 0xD3, 0x2E, 0xD9, 0xF4, +- 0x4A, 0x69, 0x25, 0x30, 0xE3, 0x5B, 0x8B, 0xF6, +- 0x0F, 0xDE, 0x0B, 0x92, 0xD5, 0x25, 0x52, 0x6D, +- 0x26, 0xEB, 0x2F, 0x8A, 0x3B, 0x8B, 0x38, 0xE2, +- 0x48, 0xD3, 0x4A, 0x98, 0xF7, 0x3A, 0xC2, 0x46, +- 0x69, 0x8D, 0x73, 0x3E, 0x57, 0x88, 0x2C, 0x80, +- 0xF0, 0xF2, 0x75, 0xB8, 0x7D, 0x27, 0xC6, 0xDA, ++ 0x5A, 0x5A, 0xCF, 0xCF, 0x3D, 0x11, 0xBF, 0xD9, ++ 0xC3, 0x7F, 0x7C, 0xA8, 0x1A, 0x9F, 0x9F, 0x34, ++ 0xC5, 0x6E, 0x1B, 0x2C, 0xE0, 0x81, 0x4B, 0x66, ++ 0x87, 0xCB, 0xD5, 0x61, 0x04, 0xED, 0xBC, 0x69, ++ 0x79, 0x86, 0x73, 0x48, 0x69, 0x4A, 0xBA, 0x55, ++ 0x44, 0x6C, 0xEF, 0xD9, 0x34, 0x61, 0x59, 0x67, ++ 0x80, 0x4E, 0x03, 0x95, 0x0A, 0xA1, 0x6C, 0xBA, ++ 0x74, 0xBD, 0xAF, 0x11, 0x4B, 0xE6, 0x98, 0x61, ++ 0x4E, 0xD4, 0x3E, 0xE4, 0x99, 0x55, 0x5C, 0x3A, ++ 0x8C, 0x3E, 0xC0, 0x01, 0x6E, 0x15, 0xE1, 0x0E, ++ 0x71, 0x4C, 0x89, 0x43, 0x8A, 0x48, 0x69, 0x6D, ++ 0x02, 0x10, 0xC6, 0x54, 0x37, 0x18, 0xAA, 0x10, ++ 0x90, 0x80, 0x0B, 0x69, 0x08, 0xB4, 0xF9, 0x4D, ++ 0xD1, 0x2E, 0x43, 0xD9, 0x92, 0xAF, 0x06, 0x4A, ++ 0xAF, 0x26, 0x25, 0x77, 0x37, 0xD0, 0xFC, 0x3C, ++ 0xA0, 0xCB, 0xAF, 0x06, 0x95, 0x26, 0x30, 0x38, + + }, + .len = 128 << 3 +@@ -586,7 +586,7 @@ struct snow3g_test_data snow3g_auth_cipher_test_case_2 = { + }, + .digest = { + .data = { +- 0x7D, 0x27, 0xC6, 0xDA ++ 0x95, 0x26, 0x30, 0x38 + }, + .len = 4, + .offset_bytes = 124 +@@ -613,15 +613,15 @@ struct snow3g_test_data snow3g_auth_cipher_test_case_3 = { + }, + .cipher_iv = { + .data = { +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 ++ 0x39, 0x8A, 0x59, 0xB4, 0x2C, 0x00, 0x00, 0x00, ++ 0x39, 0x8A, 0x59, 0xB4, 0x2C, 0x00, 0x00, 0x00 + }, + .len = 16 + }, + .auth_iv = { + .data = { +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, +- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ++ 0x29, 0x6F, 0x39, 0x3C, 0x6B, 0x22, 0x77, 0x37, ++ 0x29, 0x6F, 0x39, 0x3C, 0x6B, 0x22, 0x77, 0x37 + }, + .len = 16 + }, +@@ -636,10 +636,10 @@ struct snow3g_test_data snow3g_auth_cipher_test_case_3 = { + }, + .ciphertext = { + .data = { +- 0x5A, 0x5A, 0xE4, 0xAD, 0x29, 0xA2, 0x6A, 0xA6, +- 0x20, 0x1D, 0xCD, 0x08, 0x50, 0xD6, 0xE6, 0x47, +- 0xBC, 0x88, 0x08, 0x01, 0x17, 0xFA, 0x47, 0x5B, +- 0x90, 0x40, 0xBA, 0x0C, 0xBA, 0x6D, 0x6A, 0x5E, ++ 0x5A, 0x5A, 0x93, 0xB0, 0x3F, 0xA4, 0xEB, 0xD4, ++ 0x51, 0x12, 0x3B, 0x95, 0x93, 0x12, 0xBF, 0xBE, ++ 0xF2, 0xFE, 0xA5, 0xAE, 0xE7, 0xF4, 0x80, 0x3E, ++ 0xB2, 0xD1, 0xFF, 0x5F, 0xD9, 0x32, 0x72, 0xFE, + }, + .len = 32 << 3 + }, +@@ -653,7 +653,7 @@ struct snow3g_test_data snow3g_auth_cipher_test_case_3 = { + }, + .digest = { + .data = { +- 0xBA, 0x6D, 0x6A, 0x5E ++ 0xD9, 0x32, 0x72, 0xFE + }, + .len = 4, + .offset_bytes = 28 +@@ -680,15 +680,15 @@ struct snow3g_test_data snow3g_auth_cipher_partial_digest_encryption = { + }, + .cipher_iv = { + .data = { +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 ++ 0x72, 0xA4, 0xF2, 0x0F, 0x48, 0x00, 0x00, 0x00, ++ 0x72, 0xA4, 0xF2, 0x0F, 0x48, 0x00, 0x00, 0x00 + }, + .len = 16 + }, + .auth_iv = { + .data = { +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, +- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ++ 0x14, 0x79, 0x3E, 0x41, 0x03, 0x97, 0xE8, 0xFD, ++ 0x14, 0x79, 0x3E, 0x41, 0x03, 0x97, 0xE8, 0xFD + }, + .len = 16 + }, +@@ -704,9 +704,9 @@ struct snow3g_test_data snow3g_auth_cipher_partial_digest_encryption = { + .ciphertext = { + .data = { + 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, +- 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0xE4, 0xAD, +- 0x29, 0xA2, 0x6A, 0xA6, 0x20, 0x1D, 0xCD, 0x08, +- 0x50, 0xD6, 0xE6, 0x47, 0xB3, 0xBD, 0xC3, 0x08 ++ 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0xA2, 0xB7, ++ 0xDF, 0xA7, 0x98, 0xA1, 0xD8, 0xD4, 0x9B, 0x6E, ++ 0x2C, 0x7A, 0x66, 0x15, 0xCC, 0x4C, 0xE5, 0xE0 + }, + .len = 32 << 3 + }, +@@ -720,7 +720,7 @@ struct snow3g_test_data snow3g_auth_cipher_partial_digest_encryption = { + }, + .digest = { + .data = { +- 0xB3, 0xBD, 0xC3, 0x08 ++ 0xCC, 0x4C, 0xE5, 0xE0 + }, + .len = 4, + .offset_bytes = 28 +diff --git a/dpdk/app/test/test_cryptodev_zuc_test_vectors.h b/dpdk/app/test/test_cryptodev_zuc_test_vectors.h +index cc2338e107..067fb5eb34 100644 +--- a/dpdk/app/test/test_cryptodev_zuc_test_vectors.h ++++ b/dpdk/app/test/test_cryptodev_zuc_test_vectors.h +@@ -558,13 +558,13 @@ static struct wireless_test_data zuc_test_case_cipher_200b_auth_200b = { + }, + .auth_iv = { + .data = { +- 0xFA, 0x55, 0x6B, 0x26, 0x1C, 0x00, 0x00, 0x00, +- 0xFA, 0x55, 0x6B, 0x26, 0x1C, 0x00, 0x00, 0x00 ++ 0xFA, 0x55, 0x6B, 0x26, 0x18, 0x00, 0x00, 0x00, ++ 0xFA, 0x55, 0x6B, 0x26, 0x18, 0x00, 0x00, 0x00 + }, + .len = 16 + }, + .digest = { +- .data = {0x01, 0xFE, 0x5E, 0x38}, ++ .data = {0x2F, 0x45, 0x7D, 0x7B}, + .len = 4 + }, + .validAuthLenInBits = { +@@ -631,13 +631,13 @@ static struct wireless_test_data zuc_test_case_cipher_800b_auth_120b = { + }, + .auth_iv = { + .data = { +- 0xFA, 0x55, 0x6B, 0x26, 0x1C, 0x00, 0x00, 0x00, +- 0xFA, 0x55, 0x6B, 0x26, 0x1C, 0x00, 0x00, 0x00 ++ 0xFA, 0x55, 0x6B, 0x26, 0x18, 0x00, 0x00, 0x00, ++ 0xFA, 0x55, 0x6B, 0x26, 0x18, 0x00, 0x00, 0x00 + }, + .len = 16 + }, + .digest = { +- .data = {0x9D, 0x42, 0x1C, 0xEA}, ++ .data = {0xCA, 0xBB, 0x8D, 0x94}, + .len = 4 + }, + .validAuthLenInBits = { +@@ -1056,15 +1056,15 @@ struct wireless_test_data zuc_auth_cipher_test_case_1 = { + }, + .cipher_iv = { + .data = { +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, +- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ++ 0x66, 0x03, 0x54, 0x92, 0x78, 0x00, 0x00, 0x00, ++ 0x66, 0x03, 0x54, 0x92, 0x78, 0x00, 0x00, 0x00 + }, + .len = 16 + }, + .auth_iv = { + .data = { +- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, +- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ++ 0xFA, 0x55, 0x6B, 0x26, 0x18, 0x00, 0x00, 0x00, ++ 0xFA, 0x55, 0x6B, 0x26, 0x18, 0x00, 0x00, 0x00 + }, + .len = 16 + }, +@@ -1091,22 +1091,22 @@ struct wireless_test_data zuc_auth_cipher_test_case_1 = { + }, + .ciphertext = { + .data = { +- 0x5A, 0x5A, 0xDB, 0x3D, 0xD5, 0xB7, 0xB9, 0x58, +- 0xA5, 0xD3, 0xE3, 0xF9, 0x18, 0x73, 0xB4, 0x74, +- 0x05, 0xF0, 0xE9, 0xB6, 0x5D, 0x9A, 0xE3, 0xFA, +- 0x5D, 0xFD, 0x24, 0x51, 0xAD, 0x73, 0xCA, 0x64, +- 0x91, 0xD5, 0xB3, 0x94, 0x10, 0x91, 0x89, 0xEA, +- 0x73, 0x6F, 0xB0, 0x2A, 0x0A, 0x63, 0x0F, 0x8D, +- 0x64, 0x87, 0xA3, 0x14, 0x6B, 0x93, 0x31, 0x0F, +- 0x14, 0xAD, 0xEA, 0x62, 0x80, 0x3F, 0x44, 0xDD, +- 0x4E, 0x30, 0xFA, 0xC8, 0x0E, 0x5F, 0x46, 0xE7, +- 0x60, 0xEC, 0xDF, 0x8B, 0x94, 0x7D, 0x2E, 0x63, +- 0x48, 0xD9, 0x69, 0x06, 0x13, 0xF2, 0x20, 0x49, +- 0x54, 0xA6, 0xD4, 0x98, 0xF4, 0xF6, 0x1D, 0x4A, +- 0xC9, 0xA5, 0xDA, 0x46, 0x3D, 0xD9, 0x02, 0x47, +- 0x1C, 0x20, 0x73, 0x35, 0x17, 0x1D, 0x81, 0x8D, +- 0x2E, 0xCD, 0x70, 0x37, 0x22, 0x55, 0x3C, 0xF3, +- 0xDA, 0x70, 0x42, 0x12, 0x0E, 0xAA, 0xC4, 0xAB ++ 0x5A, 0x5A, 0x94, 0xE7, 0xB8, 0xD7, 0x4E, 0xBB, ++ 0x4C, 0xC3, 0xD1, 0x16, 0xFC, 0x8C, 0xE4, 0x27, ++ 0x44, 0xEC, 0x04, 0x26, 0x60, 0x9C, 0xFF, 0x81, ++ 0xB6, 0x2B, 0x48, 0x1D, 0xEE, 0x26, 0xF7, 0x58, ++ 0x40, 0x38, 0x58, 0xEA, 0x22, 0x23, 0xE6, 0x34, ++ 0x9A, 0x69, 0x32, 0x68, 0xBD, 0xDD, 0x7D, 0xA3, ++ 0xC0, 0x04, 0x79, 0xF0, 0xF1, 0x58, 0x78, 0x5E, ++ 0xD0, 0xDF, 0x27, 0x9A, 0x53, 0x70, 0x5D, 0xFB, ++ 0x1B, 0xCA, 0xBA, 0x97, 0x12, 0x1F, 0x59, 0x6B, ++ 0x75, 0x7B, 0x94, 0xF6, 0xE7, 0xFA, 0x49, 0x6B, ++ 0x7D, 0x7F, 0x8F, 0x0F, 0x78, 0x56, 0x40, 0x52, ++ 0x84, 0x3E, 0xA9, 0xE8, 0x84, 0x6F, 0xEF, 0xFB, ++ 0x4A, 0x48, 0x3A, 0x4C, 0x81, 0x98, 0xDD, 0x17, ++ 0x89, 0x66, 0x3B, 0xC0, 0xEC, 0x71, 0xDB, 0xF6, ++ 0x44, 0xDF, 0xA7, 0x97, 0xB2, 0x9B, 0x84, 0xA7, ++ 0x2D, 0x2D, 0xC1, 0x93, 0x12, 0x37, 0xEA, 0xD2 + }, + .len = 128 << 3 + }, +@@ -1123,7 +1123,7 @@ struct wireless_test_data zuc_auth_cipher_test_case_1 = { + .len = 2 << 3 + }, + .digest = { +- .data = {0x0E, 0xAA, 0xC4, 0xAB}, ++ .data = {0x12, 0x37, 0xEA, 0xD2}, + .len = 4, + .offset_bytes = 124 + } +diff --git a/dpdk/app/test/test_debug.c b/dpdk/app/test/test_debug.c +index 834a7386f5..23b24db177 100644 +--- a/dpdk/app/test/test_debug.c ++++ b/dpdk/app/test/test_debug.c +@@ -4,6 +4,8 @@ + + #include + #include ++#include ++#include + #include + #include + +@@ -28,9 +30,14 @@ test_panic(void) + + pid = fork(); + +- if (pid == 0) ++ if (pid == 0) { ++ struct rlimit rl; ++ ++ /* No need to generate a coredump when panicking. */ ++ rl.rlim_cur = rl.rlim_max = 0; ++ setrlimit(RLIMIT_CORE, &rl); + rte_panic("Test Debug\n"); +- else if (pid < 0){ ++ } else if (pid < 0) { + printf("Fork Failed\n"); + return -1; + } +diff --git a/dpdk/app/test/test_distributor_perf.c b/dpdk/app/test/test_distributor_perf.c +index b25f79a348..92e330f194 100644 +--- a/dpdk/app/test/test_distributor_perf.c ++++ b/dpdk/app/test/test_distributor_perf.c +@@ -108,7 +108,6 @@ static int + handle_work(void *arg) + { + struct rte_distributor *d = arg; +- unsigned int count = 0; + unsigned int num = 0; + int i; + unsigned int id = __atomic_fetch_add(&worker_idx, 1, __ATOMIC_RELAXED); +@@ -120,11 +119,9 @@ handle_work(void *arg) + num = rte_distributor_get_pkt(d, id, buf, buf, num); + while (!quit) { + worker_stats[id].handled_packets += num; +- count += num; + num = rte_distributor_get_pkt(d, id, buf, buf, num); + } + worker_stats[id].handled_packets += num; +- count += num; + rte_distributor_return_pkt(d, id, buf, num); + return 0; + } +@@ -188,13 +185,15 @@ quit_workers(struct rte_distributor *d, struct rte_mempool *p) + rte_mempool_get_bulk(p, (void *)bufs, num_workers); + + quit = 1; +- for (i = 0; i < num_workers; i++) ++ for (i = 0; i < num_workers; i++) { + bufs[i]->hash.usr = i << 1; +- rte_distributor_process(d, bufs, num_workers); ++ rte_distributor_process(d, &bufs[i], 1); ++ } + + rte_mempool_put_bulk(p, (void *)bufs, num_workers); + + rte_distributor_process(d, NULL, 0); ++ rte_distributor_flush(d); + rte_eal_mp_wait_lcore(); + quit = 0; + worker_idx = 0; +diff --git a/dpdk/app/test/test_eal_flags.c b/dpdk/app/test/test_eal_flags.c +index 932fbe3d08..b4880ee802 100644 +--- a/dpdk/app/test/test_eal_flags.c ++++ b/dpdk/app/test/test_eal_flags.c +@@ -124,6 +124,7 @@ process_hugefiles(const char * prefix, enum hugepage_action action) + case HUGEPAGE_CHECK_EXISTS: + { + /* file exists, return */ ++ closedir(hugepage_dir); + result = 1; + goto end; + } +diff --git a/dpdk/app/test/test_efd.c b/dpdk/app/test/test_efd.c +index 180dc4748e..97498d57aa 100644 +--- a/dpdk/app/test/test_efd.c ++++ b/dpdk/app/test/test_efd.c +@@ -98,7 +98,7 @@ static inline uint8_t efd_get_all_sockets_bitmask(void) + unsigned int next_lcore = rte_get_main_lcore(); + const int val_true = 1, val_false = 0; + for (i = 0; i < rte_lcore_count(); i++) { +- all_cpu_sockets_bitmask |= 1 << rte_lcore_to_socket_id(next_lcore); ++ all_cpu_sockets_bitmask |= 1ULL << rte_lcore_to_socket_id(next_lcore); + next_lcore = rte_get_next_lcore(next_lcore, val_false, val_true); + } + +diff --git a/dpdk/app/test/test_event_crypto_adapter.c b/dpdk/app/test/test_event_crypto_adapter.c +index 335211cd8c..c7517e215c 100644 +--- a/dpdk/app/test/test_event_crypto_adapter.c ++++ b/dpdk/app/test/test_event_crypto_adapter.c +@@ -208,10 +208,10 @@ test_op_forward_mode(uint8_t session_less) + + if (cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA) { + /* Fill in private user data information */ +- rte_memcpy(&m_data.response_info, &response_info, +- sizeof(response_info)); +- rte_memcpy(&m_data.request_info, &request_info, +- sizeof(request_info)); ++ m_data.request_info.cdev_id = request_info.cdev_id; ++ m_data.request_info.queue_pair_id = ++ request_info.queue_pair_id; ++ m_data.response_info.event = response_info.event; + rte_cryptodev_sym_session_set_user_data(sess, + &m_data, sizeof(m_data)); + } +@@ -224,14 +224,12 @@ test_op_forward_mode(uint8_t session_less) + op->sess_type = RTE_CRYPTO_OP_SESSIONLESS; + first_xform = &cipher_xform; + sym_op->xform = first_xform; +- uint32_t len = IV_OFFSET + MAXIMUM_IV_LENGTH + +- (sizeof(struct rte_crypto_sym_xform) * 2); ++ uint32_t len = IV_OFFSET + MAXIMUM_IV_LENGTH; + op->private_data_offset = len; + /* Fill in private data information */ +- rte_memcpy(&m_data.response_info, &response_info, +- sizeof(response_info)); +- rte_memcpy(&m_data.request_info, &request_info, +- sizeof(request_info)); ++ m_data.request_info.cdev_id = request_info.cdev_id; ++ m_data.request_info.queue_pair_id = request_info.queue_pair_id; ++ m_data.response_info.event = response_info.event; + rte_memcpy((uint8_t *)op + len, &m_data, sizeof(m_data)); + } + +@@ -402,8 +400,7 @@ test_op_new_mode(uint8_t session_less) + + if (cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA) { + /* Fill in private user data information */ +- rte_memcpy(&m_data.response_info, &response_info, +- sizeof(m_data)); ++ m_data.response_info.event = response_info.event; + rte_cryptodev_sym_session_set_user_data(sess, + &m_data, sizeof(m_data)); + } +@@ -419,12 +416,10 @@ test_op_new_mode(uint8_t session_less) + op->sess_type = RTE_CRYPTO_OP_SESSIONLESS; + first_xform = &cipher_xform; + sym_op->xform = first_xform; +- uint32_t len = IV_OFFSET + MAXIMUM_IV_LENGTH + +- (sizeof(struct rte_crypto_sym_xform) * 2); ++ uint32_t len = IV_OFFSET + MAXIMUM_IV_LENGTH; + op->private_data_offset = len; + /* Fill in private data information */ +- rte_memcpy(&m_data.response_info, &response_info, +- sizeof(m_data)); ++ m_data.response_info.event = response_info.event; + rte_memcpy((uint8_t *)op + len, &m_data, sizeof(m_data)); + } + +@@ -516,7 +511,8 @@ configure_cryptodev(void) + NUM_MBUFS, MBUF_CACHE_SIZE, + DEFAULT_NUM_XFORMS * + sizeof(struct rte_crypto_sym_xform) + +- MAXIMUM_IV_LENGTH, ++ MAXIMUM_IV_LENGTH + ++ sizeof(union rte_event_crypto_metadata), + rte_socket_id()); + if (params.op_mpool == NULL) { + RTE_LOG(ERR, USER1, "Can't create CRYPTO_OP_POOL\n"); +diff --git a/dpdk/app/test/test_event_timer_adapter.c b/dpdk/app/test/test_event_timer_adapter.c +index ad3f4dcc20..efd86cad58 100644 +--- a/dpdk/app/test/test_event_timer_adapter.c ++++ b/dpdk/app/test/test_event_timer_adapter.c +@@ -3,6 +3,8 @@ + * Copyright(c) 2017-2018 Intel Corporation. + */ + ++#include ++ + #include + #include + #include +@@ -46,7 +48,7 @@ static uint64_t global_info_bkt_tck_ns; + static volatile uint8_t arm_done; + + #define CALC_TICKS(tks) \ +- ((tks * global_bkt_tck_ns) / global_info_bkt_tck_ns) ++ ceil((double)(tks * global_bkt_tck_ns) / global_info_bkt_tck_ns) + + + static bool using_services; +@@ -964,8 +966,6 @@ adapter_create(void) + TEST_ASSERT_SUCCESS(rte_event_timer_adapter_free(adapter), + "Failed to free adapter"); + +- rte_mempool_free(eventdev_test_mempool); +- + return TEST_SUCCESS; + } + +diff --git a/dpdk/app/test/test_external_mem.c b/dpdk/app/test/test_external_mem.c +index 7eb81f6448..5edf88b9f6 100644 +--- a/dpdk/app/test/test_external_mem.c ++++ b/dpdk/app/test/test_external_mem.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -532,8 +533,8 @@ test_extmem_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, + static int + test_external_mem(void) + { ++ size_t pgsz = rte_mem_page_size(); + size_t len = EXTERNAL_MEM_SZ; +- size_t pgsz = RTE_PGSIZE_4K; + rte_iova_t iova[len / pgsz]; + void *addr; + int ret, n_pages; +diff --git a/dpdk/app/test/test_fib_perf.c b/dpdk/app/test/test_fib_perf.c +index dd2e54db8b..40d172645b 100644 +--- a/dpdk/app/test/test_fib_perf.c ++++ b/dpdk/app/test/test_fib_perf.c +@@ -345,7 +345,7 @@ test_fib_perf(void) + fib = rte_fib_create(__func__, SOCKET_ID_ANY, &config); + TEST_FIB_ASSERT(fib != NULL); + +- /* Measue add. */ ++ /* Measure add. */ + begin = rte_rdtsc(); + + for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { +diff --git a/dpdk/app/test/test_flow_classify.c b/dpdk/app/test/test_flow_classify.c +index ef0b6fdd5c..951606f248 100644 +--- a/dpdk/app/test/test_flow_classify.c ++++ b/dpdk/app/test/test_flow_classify.c +@@ -828,6 +828,12 @@ test_flow_classify(void) + cls_params.name = "flow_classifier"; + cls_params.socket_id = 0; + cls->cls = rte_flow_classifier_create(&cls_params); ++ if (cls->cls == NULL) { ++ printf("Line %i: flow classifier create has failed!\n", ++ __LINE__); ++ rte_free(cls); ++ return TEST_FAILED; ++ } + + /* initialise ACL table params */ + table_acl_params.n_rule_fields = RTE_DIM(ipv4_defs); +diff --git a/dpdk/app/test/test_func_reentrancy.c b/dpdk/app/test/test_func_reentrancy.c +index 231c99a9eb..36e83bc587 100644 +--- a/dpdk/app/test/test_func_reentrancy.c ++++ b/dpdk/app/test/test_func_reentrancy.c +@@ -89,6 +89,10 @@ ring_clean(unsigned int lcore_id) + char ring_name[MAX_STRING_SIZE]; + int i; + ++ rp = rte_ring_lookup("fr_test_once"); ++ if (rp != NULL) ++ rte_ring_free(rp); ++ + for (i = 0; i < MAX_ITER_MULTI; i++) { + snprintf(ring_name, sizeof(ring_name), + "fr_test_%d_%d", lcore_id, i); +@@ -148,7 +152,10 @@ mempool_clean(unsigned int lcore_id) + char mempool_name[MAX_STRING_SIZE]; + int i; + +- /* verify all ring created successful */ ++ mp = rte_mempool_lookup("fr_test_once"); ++ if (mp != NULL) ++ rte_mempool_free(mp); ++ + for (i = 0; i < MAX_ITER_MULTI; i++) { + snprintf(mempool_name, sizeof(mempool_name), "fr_test_%d_%d", + lcore_id, i); +@@ -208,6 +215,10 @@ hash_clean(unsigned lcore_id) + struct rte_hash *handle; + int i; + ++ handle = rte_hash_find_existing("fr_test_once"); ++ if (handle != NULL) ++ rte_hash_free(handle); ++ + for (i = 0; i < MAX_ITER_MULTI; i++) { + snprintf(hash_name, sizeof(hash_name), "fr_test_%d_%d", lcore_id, i); + +@@ -242,7 +253,7 @@ hash_create_free(__rte_unused void *arg) + rte_atomic32_inc(&obj_count); + } + +- /* create mutiple times simultaneously */ ++ /* create multiple times simultaneously */ + for (i = 0; i < MAX_ITER_MULTI; i++) { + snprintf(hash_name, sizeof(hash_name), "fr_test_%d_%d", lcore_self, i); + hash_params.name = hash_name; +@@ -272,6 +283,10 @@ fbk_clean(unsigned lcore_id) + struct rte_fbk_hash_table *handle; + int i; + ++ handle = rte_fbk_hash_find_existing("fr_test_once"); ++ if (handle != NULL) ++ rte_fbk_hash_free(handle); ++ + for (i = 0; i < MAX_ITER_MULTI; i++) { + snprintf(fbk_name, sizeof(fbk_name), "fr_test_%d_%d", lcore_id, i); + +@@ -306,7 +321,7 @@ fbk_create_free(__rte_unused void *arg) + rte_atomic32_inc(&obj_count); + } + +- /* create mutiple fbk tables simultaneously */ ++ /* create multiple fbk tables simultaneously */ + for (i = 0; i < MAX_ITER_MULTI; i++) { + snprintf(fbk_name, sizeof(fbk_name), "fr_test_%d_%d", lcore_self, i); + fbk_params.name = fbk_name; +@@ -338,6 +353,10 @@ lpm_clean(unsigned int lcore_id) + struct rte_lpm *lpm; + int i; + ++ lpm = rte_lpm_find_existing("fr_test_once"); ++ if (lpm != NULL) ++ rte_lpm_free(lpm); ++ + for (i = 0; i < MAX_LPM_ITER_TIMES; i++) { + snprintf(lpm_name, sizeof(lpm_name), "fr_test_%d_%d", lcore_id, i); + +@@ -368,7 +387,7 @@ lpm_create_free(__rte_unused void *arg) + rte_atomic32_inc(&obj_count); + } + +- /* create mutiple fbk tables simultaneously */ ++ /* create multiple fbk tables simultaneously */ + for (i = 0; i < MAX_LPM_ITER_TIMES; i++) { + snprintf(lpm_name, sizeof(lpm_name), "fr_test_%d_%d", lcore_self, i); + lpm = rte_lpm_create(lpm_name, SOCKET_ID_ANY, &config); +@@ -418,11 +437,10 @@ struct test_case test_cases[] = { + static int + launch_test(struct test_case *pt_case) + { ++ unsigned int lcore_id; ++ unsigned int cores; ++ unsigned int count; + int ret = 0; +- unsigned lcore_id; +- unsigned cores_save = rte_lcore_count(); +- unsigned cores = RTE_MIN(cores_save, MAX_LCORES); +- unsigned count; + + if (pt_case->func == NULL) + return -1; +@@ -430,6 +448,7 @@ launch_test(struct test_case *pt_case) + rte_atomic32_set(&obj_count, 0); + rte_atomic32_set(&synchro, 0); + ++ cores = RTE_MIN(rte_lcore_count(), MAX_LCORES); + RTE_LCORE_FOREACH_WORKER(lcore_id) { + if (cores == 1) + break; +@@ -442,14 +461,12 @@ launch_test(struct test_case *pt_case) + if (pt_case->func(pt_case->arg) < 0) + ret = -1; + +- cores = cores_save; + RTE_LCORE_FOREACH_WORKER(lcore_id) { +- if (cores == 1) +- break; +- cores--; + if (rte_eal_wait_lcore(lcore_id) < 0) + ret = -1; ++ } + ++ RTE_LCORE_FOREACH(lcore_id) { + if (pt_case->clean != NULL) + pt_case->clean(lcore_id); + } +diff --git a/dpdk/app/test/test_hash.c b/dpdk/app/test/test_hash.c +index bd4d0cb722..b99e8de1db 100644 +--- a/dpdk/app/test/test_hash.c ++++ b/dpdk/app/test/test_hash.c +@@ -74,13 +74,17 @@ static uint32_t hashtest_key_lens[] = {0, 2, 4, 5, 6, 7, 8, 10, 11, 15, 16, 21, + } \ + } while (0) + +-/* 5-tuple key type */ ++/* ++ * 5-tuple key type. ++ * Should be packed to avoid holes with potentially ++ * undefined content in the middle. ++ */ + struct flow_key { + uint32_t ip_src; + uint32_t ip_dst; + uint16_t port_src; + uint16_t port_dst; +- uint8_t proto; ++ uint32_t proto; + } __rte_packed; + + /* +@@ -147,7 +151,7 @@ static struct flow_key keys[5] = { { + /* Parameters used for hash table in unit test functions. Name set later. */ + static struct rte_hash_parameters ut_params = { + .entries = 64, +- .key_len = sizeof(struct flow_key), /* 13 */ ++ .key_len = sizeof(struct flow_key), + .hash_func = rte_jhash, + .hash_func_init_val = 0, + .socket_id = 0, +@@ -792,7 +796,7 @@ static int test_full_bucket(void) + struct rte_hash_parameters params_pseudo_hash = { + .name = "test4", + .entries = 64, +- .key_len = sizeof(struct flow_key), /* 13 */ ++ .key_len = sizeof(struct flow_key), + .hash_func = pseudo_hash, + .hash_func_init_val = 0, + .socket_id = 0, +@@ -895,7 +899,7 @@ static int test_extendable_bucket(void) + struct rte_hash_parameters params_pseudo_hash = { + .name = "test5", + .entries = 64, +- .key_len = sizeof(struct flow_key), /* 13 */ ++ .key_len = sizeof(struct flow_key), + .hash_func = pseudo_hash, + .hash_func_init_val = 0, + .socket_id = 0, +@@ -1606,6 +1610,17 @@ static struct rte_hash_parameters hash_params_ex = { + .socket_id = 0, + }; + ++/* ++ * Wrapper function around rte_jhash_32b. ++ * It is required because rte_jhash_32b() accepts the length ++ * as size of 4-byte units. ++ */ ++static inline uint32_t ++test_jhash_32b(const void *k, uint32_t length, uint32_t initval) ++{ ++ return rte_jhash_32b(k, length >> 2, initval); ++} ++ + /* + * add/delete key with jhash2 + */ +@@ -1618,7 +1633,7 @@ test_hash_add_delete_jhash2(void) + + hash_params_ex.name = "hash_test_jhash2"; + hash_params_ex.key_len = 4; +- hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b; ++ hash_params_ex.hash_func = (rte_hash_function)test_jhash_32b; + + handle = rte_hash_create(&hash_params_ex); + if (handle == NULL) { +@@ -1657,7 +1672,7 @@ test_hash_add_delete_2_jhash2(void) + + hash_params_ex.name = "hash_test_2_jhash2"; + hash_params_ex.key_len = 8; +- hash_params_ex.hash_func = (rte_hash_function)rte_jhash_32b; ++ hash_params_ex.hash_func = (rte_hash_function)test_jhash_32b; + + handle = rte_hash_create(&hash_params_ex); + if (handle == NULL) +@@ -1915,7 +1930,7 @@ test_hash_rcu_qsbr_dq_mode(uint8_t ext_bkt) + struct rte_hash_parameters params_pseudo_hash = { + .name = "test_hash_rcu_qsbr_dq_mode", + .entries = total_entries, +- .key_len = sizeof(struct flow_key), /* 13 */ ++ .key_len = sizeof(struct flow_key), + .hash_func = pseudo_hash, + .hash_func_init_val = 0, + .socket_id = 0, +@@ -2085,7 +2100,7 @@ test_hash_rcu_qsbr_sync_mode(uint8_t ext_bkt) + struct rte_hash_parameters params_pseudo_hash = { + .name = "test_hash_rcu_qsbr_sync_mode", + .entries = total_entries, +- .key_len = sizeof(struct flow_key), /* 13 */ ++ .key_len = sizeof(struct flow_key), + .hash_func = pseudo_hash, + .hash_func_init_val = 0, + .socket_id = 0, +@@ -2180,6 +2195,8 @@ test_hash_rcu_qsbr_sync_mode(uint8_t ext_bkt) + static int + test_hash(void) + { ++ RTE_BUILD_BUG_ON(sizeof(struct flow_key) % sizeof(uint32_t) != 0); ++ + if (test_add_delete() < 0) + return -1; + if (test_hash_add_delete_jhash2() < 0) +diff --git a/dpdk/app/test/test_hash_readwrite.c b/dpdk/app/test/test_hash_readwrite.c +index 4860768a64..f40fec74dd 100644 +--- a/dpdk/app/test/test_hash_readwrite.c ++++ b/dpdk/app/test/test_hash_readwrite.c +@@ -670,8 +670,12 @@ test_hash_rw_perf_main(void) + printf("Results summary:\n"); + printf("================\n"); + +- printf("single read: %u\n", htm_results.single_read); +- printf("single write: %u\n", htm_results.single_write); ++ printf("HTM:\n"); ++ printf(" single read: %u\n", htm_results.single_read); ++ printf(" single write: %u\n", htm_results.single_write); ++ printf("non HTM:\n"); ++ printf(" single read: %u\n", non_htm_results.single_read); ++ printf(" single write: %u\n", non_htm_results.single_write); + for (i = 0; i < NUM_TEST; i++) { + printf("+++ core_cnt: %u +++\n", core_cnt[i]); + printf("HTM:\n"); +diff --git a/dpdk/app/test/test_hash_readwrite_lf_perf.c b/dpdk/app/test/test_hash_readwrite_lf_perf.c +index 8120cf43be..32f9ec9250 100644 +--- a/dpdk/app/test/test_hash_readwrite_lf_perf.c ++++ b/dpdk/app/test/test_hash_readwrite_lf_perf.c +@@ -59,7 +59,7 @@ struct rwc_perf { + uint32_t w_ks_r_hit_nsp[2][NUM_TEST]; + uint32_t w_ks_r_hit_sp[2][NUM_TEST]; + uint32_t w_ks_r_miss[2][NUM_TEST]; +- uint32_t multi_rw[NUM_TEST - 1][2][NUM_TEST]; ++ uint32_t multi_rw[NUM_TEST][2][NUM_TEST]; + uint32_t w_ks_r_hit_extbkt[2][NUM_TEST]; + uint32_t writer_add_del[NUM_TEST]; + }; +diff --git a/dpdk/app/test/test_ipsec.c b/dpdk/app/test/test_ipsec.c +index d18220a885..39531ff667 100644 +--- a/dpdk/app/test/test_ipsec.c ++++ b/dpdk/app/test/test_ipsec.c +@@ -544,12 +544,14 @@ struct rte_ipv4_hdr ipv4_outer = { + }; + + static struct rte_mbuf * +-setup_test_string(struct rte_mempool *mpool, +- const char *string, size_t len, uint8_t blocksize) ++setup_test_string(struct rte_mempool *mpool, const char *string, ++ size_t string_len, size_t len, uint8_t blocksize) + { + struct rte_mbuf *m = rte_pktmbuf_alloc(mpool); + size_t t_len = len - (blocksize ? (len % blocksize) : 0); + ++ RTE_VERIFY(len <= string_len); ++ + if (m) { + memset(m->buf_addr, 0, m->buf_len); + char *dst = rte_pktmbuf_append(m, t_len); +@@ -654,7 +656,7 @@ create_crypto_session(struct ipsec_unitest_params *ut, + if (s == NULL) + return -ENOMEM; + +- /* initiliaze SA crypto session for device */ ++ /* initialize SA crypto session for device */ + rc = rte_cryptodev_sym_session_init(dev_id, s, + ut->crypto_xforms, qp->mp_session_private); + if (rc == 0) { +@@ -1355,7 +1357,8 @@ test_ipsec_crypto_outb_burst_null_null(int i) + /* Generate input mbuf data */ + for (j = 0; j < num_pkts && rc == 0; j++) { + ut_params->ibuf[j] = setup_test_string(ts_params->mbuf_pool, +- null_plain_data, test_cfg[i].pkt_sz, 0); ++ null_plain_data, sizeof(null_plain_data), ++ test_cfg[i].pkt_sz, 0); + if (ut_params->ibuf[j] == NULL) + rc = TEST_FAILED; + else { +@@ -1473,7 +1476,8 @@ test_ipsec_inline_crypto_inb_burst_null_null(int i) + /* Generate test mbuf data */ + ut_params->obuf[j] = setup_test_string( + ts_params->mbuf_pool, +- null_plain_data, test_cfg[i].pkt_sz, 0); ++ null_plain_data, sizeof(null_plain_data), ++ test_cfg[i].pkt_sz, 0); + if (ut_params->obuf[j] == NULL) + rc = TEST_FAILED; + } +@@ -1541,16 +1545,17 @@ test_ipsec_inline_proto_inb_burst_null_null(int i) + + /* Generate inbound mbuf data */ + for (j = 0; j < num_pkts && rc == 0; j++) { +- ut_params->ibuf[j] = setup_test_string( +- ts_params->mbuf_pool, +- null_plain_data, test_cfg[i].pkt_sz, 0); ++ ut_params->ibuf[j] = setup_test_string(ts_params->mbuf_pool, ++ null_plain_data, sizeof(null_plain_data), ++ test_cfg[i].pkt_sz, 0); + if (ut_params->ibuf[j] == NULL) + rc = TEST_FAILED; + else { + /* Generate test mbuf data */ + ut_params->obuf[j] = setup_test_string( + ts_params->mbuf_pool, +- null_plain_data, test_cfg[i].pkt_sz, 0); ++ null_plain_data, sizeof(null_plain_data), ++ test_cfg[i].pkt_sz, 0); + if (ut_params->obuf[j] == NULL) + rc = TEST_FAILED; + } +@@ -1650,7 +1655,8 @@ test_ipsec_inline_crypto_outb_burst_null_null(int i) + /* Generate test mbuf data */ + for (j = 0; j < num_pkts && rc == 0; j++) { + ut_params->ibuf[j] = setup_test_string(ts_params->mbuf_pool, +- null_plain_data, test_cfg[i].pkt_sz, 0); ++ null_plain_data, sizeof(null_plain_data), ++ test_cfg[i].pkt_sz, 0); + if (ut_params->ibuf[0] == NULL) + rc = TEST_FAILED; + +@@ -1728,15 +1734,17 @@ test_ipsec_inline_proto_outb_burst_null_null(int i) + /* Generate test mbuf data */ + for (j = 0; j < num_pkts && rc == 0; j++) { + ut_params->ibuf[j] = setup_test_string(ts_params->mbuf_pool, +- null_plain_data, test_cfg[i].pkt_sz, 0); ++ null_plain_data, sizeof(null_plain_data), ++ test_cfg[i].pkt_sz, 0); + if (ut_params->ibuf[0] == NULL) + rc = TEST_FAILED; + + if (rc == 0) { + /* Generate test tunneled mbuf data for comparison */ + ut_params->obuf[j] = setup_test_string( +- ts_params->mbuf_pool, +- null_plain_data, test_cfg[i].pkt_sz, 0); ++ ts_params->mbuf_pool, null_plain_data, ++ sizeof(null_plain_data), test_cfg[i].pkt_sz, ++ 0); + if (ut_params->obuf[j] == NULL) + rc = TEST_FAILED; + } +@@ -1805,7 +1813,8 @@ test_ipsec_lksd_proto_inb_burst_null_null(int i) + for (j = 0; j < num_pkts && rc == 0; j++) { + /* packet with sequence number 0 is invalid */ + ut_params->ibuf[j] = setup_test_string(ts_params->mbuf_pool, +- null_encrypted_data, test_cfg[i].pkt_sz, 0); ++ null_encrypted_data, sizeof(null_encrypted_data), ++ test_cfg[i].pkt_sz, 0); + if (ut_params->ibuf[j] == NULL) + rc = TEST_FAILED; + } +diff --git a/dpdk/app/test/test_ipsec_perf.c b/dpdk/app/test/test_ipsec_perf.c +index 92106bf374..7e07805ea3 100644 +--- a/dpdk/app/test/test_ipsec_perf.c ++++ b/dpdk/app/test/test_ipsec_perf.c +@@ -580,8 +580,8 @@ testsuite_teardown(void) + static int + test_libipsec_perf(void) + { +- struct ipsec_sa sa_out; +- struct ipsec_sa sa_in; ++ struct ipsec_sa sa_out = { .sa_prm = { 0 } }; ++ struct ipsec_sa sa_in = { .sa_prm = { 0 } }; + uint32_t i; + int ret; + +diff --git a/dpdk/app/test/test_kni.c b/dpdk/app/test/test_kni.c +index f53a53eff6..2f6dda0f53 100644 +--- a/dpdk/app/test/test_kni.c ++++ b/dpdk/app/test/test_kni.c +@@ -326,7 +326,7 @@ test_kni_register_handler_mp(void) + + /* Check with the invalid parameters */ + if (rte_kni_register_handlers(kni, NULL) == 0) { +- printf("Unexpectedly register successuflly " ++ printf("Unexpectedly register successfully " + "with NULL ops pointer\n"); + exit(-1); + } +@@ -475,7 +475,7 @@ test_kni_processing(uint16_t port_id, struct rte_mempool *mp) + + /** + * Check multiple processes support on +- * registerring/unregisterring handlers. ++ * registering/unregistering handlers. + */ + if (test_kni_register_handler_mp() < 0) { + printf("fail to check multiple process support\n"); +@@ -562,8 +562,12 @@ test_kni(void) + } + closedir(dir); + +- /* Initialize KNI subsytem */ +- rte_kni_init(KNI_TEST_MAX_PORTS); ++ /* Initialize KNI subsystem */ ++ ret = rte_kni_init(KNI_TEST_MAX_PORTS); ++ if (ret < 0) { ++ printf("fail to initialize KNI subsystem\n"); ++ return -1; ++ } + + if (test_kni_allocate_lcores() < 0) { + printf("No enough lcores for kni processing\n"); +diff --git a/dpdk/app/test/test_kvargs.c b/dpdk/app/test/test_kvargs.c +index 2a2dae43a0..c639010de4 100644 +--- a/dpdk/app/test/test_kvargs.c ++++ b/dpdk/app/test/test_kvargs.c +@@ -11,7 +11,7 @@ + + #include "test.h" + +-/* incrementd in handler, to check it is properly called once per ++/* incremented in handler, to check it is properly called once per + * key/value association */ + static unsigned count; + +@@ -75,14 +75,14 @@ static int test_valid_kvargs(void) + goto fail; + } + count = 0; +- /* call check_handler() for all entries with key="unexistant_key" */ +- if (rte_kvargs_process(kvlist, "unexistant_key", check_handler, NULL) < 0) { ++ /* call check_handler() for all entries with key="nonexistent_key" */ ++ if (rte_kvargs_process(kvlist, "nonexistent_key", check_handler, NULL) < 0) { + printf("rte_kvargs_process() error\n"); + rte_kvargs_free(kvlist); + goto fail; + } + if (count != 0) { +- printf("invalid count value %d after rte_kvargs_process(unexistant_key)\n", ++ printf("invalid count value %d after rte_kvargs_process(nonexistent_key)\n", + count); + rte_kvargs_free(kvlist); + goto fail; +@@ -103,10 +103,10 @@ static int test_valid_kvargs(void) + rte_kvargs_free(kvlist); + goto fail; + } +- /* count all entries with key="unexistant_key" */ +- count = rte_kvargs_count(kvlist, "unexistant_key"); ++ /* count all entries with key="nonexistent_key" */ ++ count = rte_kvargs_count(kvlist, "nonexistent_key"); + if (count != 0) { +- printf("invalid count value %d after rte_kvargs_count(unexistant_key)\n", ++ printf("invalid count value %d after rte_kvargs_count(nonexistent_key)\n", + count); + rte_kvargs_free(kvlist); + goto fail; +@@ -124,7 +124,7 @@ static int test_valid_kvargs(void) + /* call check_handler() on all entries with key="check", it + * should fail as the value is not recognized by the handler */ + if (rte_kvargs_process(kvlist, "check", check_handler, NULL) == 0) { +- printf("rte_kvargs_process() is success bu should not\n"); ++ printf("rte_kvargs_process() is success but should not\n"); + rte_kvargs_free(kvlist); + goto fail; + } +diff --git a/dpdk/app/test/test_latencystats.c b/dpdk/app/test/test_latencystats.c +index 427339904d..db06c7d5c7 100644 +--- a/dpdk/app/test/test_latencystats.c ++++ b/dpdk/app/test/test_latencystats.c +@@ -6,6 +6,7 @@ + #include + #include + ++#include + #include + #include "rte_lcore.h" + #include "rte_metrics.h" +@@ -80,7 +81,7 @@ static int test_latencystats_get_names(void) + /* Success Test: Valid names and size */ + size = NUM_STATS; + ret = rte_latencystats_get_names(names, size); +- for (i = 0; i <= NUM_STATS; i++) { ++ for (i = 0; i < NUM_STATS; i++) { + if (strcmp(lat_stats_strings[i].name, names[i].name) == 0) + printf(" %s\n", names[i].name); + else +@@ -158,12 +159,21 @@ static int test_latency_packet_forward(void) + printf("allocate mbuf pool Failed\n"); + return TEST_FAILED; + } ++ ret = test_dev_start(portid, mp); ++ if (ret < 0) { ++ printf("test_dev_start(%hu, %p) failed, error code: %d\n", ++ portid, mp, ret); ++ return TEST_FAILED; ++ } ++ + ret = test_packet_forward(pbuf, portid, QUEUE_ID); + if (ret < 0) + printf("send pkts Failed\n"); ++ ++ rte_eth_dev_stop(portid); + test_put_mbuf_to_pool(mp, pbuf); + +- return TEST_SUCCESS; ++ return (ret >= 0) ? TEST_SUCCESS : TEST_FAILED; + } + + static struct +diff --git a/dpdk/app/test/test_link_bonding.c b/dpdk/app/test/test_link_bonding.c +index e8b76bd850..868bf94301 100644 +--- a/dpdk/app/test/test_link_bonding.c ++++ b/dpdk/app/test/test_link_bonding.c +@@ -3041,7 +3041,7 @@ test_balance_tx_burst_slave_tx_fail(void) + first_tx_fail_idx = TEST_BAL_SLAVE_TX_FAIL_BURST_SIZE_1 - + TEST_BAL_SLAVE_TX_FAIL_PACKETS_COUNT; + +- /* copy mbuf referneces for expected transmission failures */ ++ /* copy mbuf references for expected transmission failures */ + for (i = 0; i < TEST_BAL_SLAVE_TX_FAIL_PACKETS_COUNT; i++) + expected_fail_pkts[i] = pkts_burst_1[i + first_tx_fail_idx]; + +diff --git a/dpdk/app/test/test_link_bonding_rssconf.c b/dpdk/app/test/test_link_bonding_rssconf.c +index 5dac60ca1e..514f09bf19 100644 +--- a/dpdk/app/test/test_link_bonding_rssconf.c ++++ b/dpdk/app/test/test_link_bonding_rssconf.c +@@ -466,15 +466,85 @@ test_rss(void) + + TEST_ASSERT_SUCCESS(test_propagate(), "Propagation test failed"); + +- TEST_ASSERT(slave_remove_and_add() == 1, "New slave should be synced"); ++ TEST_ASSERT(slave_remove_and_add() == 1, "remove and add slaves success."); + + remove_slaves_and_stop_bonded_device(); + + return TEST_SUCCESS; + } + ++ ++/** ++ * Test RSS configuration over bonded and slaves. ++ */ ++static int ++test_rss_config_lazy(void) ++{ ++ struct rte_eth_rss_conf bond_rss_conf = {0}; ++ struct slave_conf *port; ++ uint8_t rss_key[40]; ++ uint64_t rss_hf; ++ int retval; ++ uint16_t i; ++ uint8_t n; ++ ++ retval = rte_eth_dev_info_get(test_params.bond_port_id, ++ &test_params.bond_dev_info); ++ TEST_ASSERT((retval == 0), "Error during getting device (port %u) info: %s\n", ++ test_params.bond_port_id, strerror(-retval)); ++ ++ rss_hf = test_params.bond_dev_info.flow_type_rss_offloads; ++ if (rss_hf != 0) { ++ bond_rss_conf.rss_key = NULL; ++ bond_rss_conf.rss_hf = rss_hf; ++ retval = rte_eth_dev_rss_hash_update(test_params.bond_port_id, ++ &bond_rss_conf); ++ TEST_ASSERT(retval != 0, "Succeeded in setting bonded port hash function"); ++ } ++ ++ /* Set all keys to zero for all slaves */ ++ FOR_EACH_PORT(n, port) { ++ port = &test_params.slave_ports[n]; ++ retval = rte_eth_dev_rss_hash_conf_get(port->port_id, ++ &port->rss_conf); ++ TEST_ASSERT_SUCCESS(retval, "Cannot get slaves RSS configuration"); ++ memset(port->rss_key, 0, sizeof(port->rss_key)); ++ port->rss_conf.rss_key = port->rss_key; ++ port->rss_conf.rss_key_len = sizeof(port->rss_key); ++ retval = rte_eth_dev_rss_hash_update(port->port_id, ++ &port->rss_conf); ++ TEST_ASSERT(retval != 0, "Succeeded in setting slaves RSS keys"); ++ } ++ ++ /* Set RSS keys for bonded port */ ++ memset(rss_key, 1, sizeof(rss_key)); ++ bond_rss_conf.rss_hf = rss_hf; ++ bond_rss_conf.rss_key = rss_key; ++ bond_rss_conf.rss_key_len = sizeof(rss_key); ++ ++ retval = rte_eth_dev_rss_hash_update(test_params.bond_port_id, ++ &bond_rss_conf); ++ TEST_ASSERT(retval != 0, "Succeeded in setting bonded port RSS keys"); ++ ++ /* Test RETA propagation */ ++ for (i = 0; i < RXTX_QUEUE_COUNT; i++) { ++ FOR_EACH_PORT(n, port) { ++ port = &test_params.slave_ports[n]; ++ retval = reta_set(port->port_id, (i + 1) % RXTX_QUEUE_COUNT, ++ port->dev_info.reta_size); ++ TEST_ASSERT(retval != 0, "Succeeded in setting slaves RETA"); ++ } ++ ++ retval = reta_set(test_params.bond_port_id, i % RXTX_QUEUE_COUNT, ++ test_params.bond_dev_info.reta_size); ++ TEST_ASSERT(retval != 0, "Succeeded in setting bonded port RETA"); ++ } ++ ++ return TEST_SUCCESS; ++} ++ + /** +- * Test propagation logic, when RX_RSS mq_mode is turned off for bonding port ++ * Test RSS function logic, when RX_RSS mq_mode is turned off for bonding port + */ + static int + test_rss_lazy(void) +@@ -495,9 +565,7 @@ test_rss_lazy(void) + TEST_ASSERT_SUCCESS(rte_eth_dev_start(test_params.bond_port_id), + "Failed to start bonding port (%d).", test_params.bond_port_id); + +- TEST_ASSERT_SUCCESS(test_propagate(), "Propagation test failed"); +- +- TEST_ASSERT(slave_remove_and_add() == 0, "New slave shouldn't be synced"); ++ TEST_ASSERT_SUCCESS(test_rss_config_lazy(), "Succeeded in setting RSS hash when RX_RSS mq_mode is turned off"); + + remove_slaves_and_stop_bonded_device(); + +diff --git a/dpdk/app/test/test_lpm.c b/dpdk/app/test/test_lpm.c +index 258b2f67c7..bee8307caa 100644 +--- a/dpdk/app/test/test_lpm.c ++++ b/dpdk/app/test/test_lpm.c +@@ -179,7 +179,7 @@ test3(void) + status = rte_lpm_add(NULL, ip, depth, next_hop); + TEST_LPM_ASSERT(status < 0); + +- /*Create vaild lpm to use in rest of test. */ ++ /*Create valid lpm to use in rest of test. */ + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); + TEST_LPM_ASSERT(lpm != NULL); + +@@ -217,7 +217,7 @@ test4(void) + status = rte_lpm_delete(NULL, ip, depth); + TEST_LPM_ASSERT(status < 0); + +- /*Create vaild lpm to use in rest of test. */ ++ /*Create valid lpm to use in rest of test. */ + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); + TEST_LPM_ASSERT(lpm != NULL); + +@@ -255,7 +255,7 @@ test5(void) + status = rte_lpm_lookup(NULL, ip, &next_hop_return); + TEST_LPM_ASSERT(status < 0); + +- /*Create vaild lpm to use in rest of test. */ ++ /*Create valid lpm to use in rest of test. */ + lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); + TEST_LPM_ASSERT(lpm != NULL); + +diff --git a/dpdk/app/test/test_lpm6.c b/dpdk/app/test/test_lpm6.c +index 0d664546fa..17221f992a 100644 +--- a/dpdk/app/test/test_lpm6.c ++++ b/dpdk/app/test/test_lpm6.c +@@ -261,7 +261,7 @@ test4(void) + status = rte_lpm6_add(NULL, ip, depth, next_hop); + TEST_LPM_ASSERT(status < 0); + +- /*Create vaild lpm to use in rest of test. */ ++ /*Create valid lpm to use in rest of test. */ + lpm = rte_lpm6_create(__func__, SOCKET_ID_ANY, &config); + TEST_LPM_ASSERT(lpm != NULL); + +@@ -299,7 +299,7 @@ test5(void) + status = rte_lpm6_delete(NULL, ip, depth); + TEST_LPM_ASSERT(status < 0); + +- /*Create vaild lpm to use in rest of test. */ ++ /*Create valid lpm to use in rest of test. */ + lpm = rte_lpm6_create(__func__, SOCKET_ID_ANY, &config); + TEST_LPM_ASSERT(lpm != NULL); + +@@ -337,7 +337,7 @@ test6(void) + status = rte_lpm6_lookup(NULL, ip, &next_hop_return); + TEST_LPM_ASSERT(status < 0); + +- /*Create vaild lpm to use in rest of test. */ ++ /*Create valid lpm to use in rest of test. */ + lpm = rte_lpm6_create(__func__, SOCKET_ID_ANY, &config); + TEST_LPM_ASSERT(lpm != NULL); + +@@ -375,7 +375,7 @@ test7(void) + status = rte_lpm6_lookup_bulk_func(NULL, ip, next_hop_return, 10); + TEST_LPM_ASSERT(status < 0); + +- /*Create vaild lpm to use in rest of test. */ ++ /*Create valid lpm to use in rest of test. */ + lpm = rte_lpm6_create(__func__, SOCKET_ID_ANY, &config); + TEST_LPM_ASSERT(lpm != NULL); + +@@ -413,7 +413,7 @@ test8(void) + status = rte_lpm6_delete_bulk_func(NULL, ip, depth, 10); + TEST_LPM_ASSERT(status < 0); + +- /*Create vaild lpm to use in rest of test. */ ++ /*Create valid lpm to use in rest of test. */ + lpm = rte_lpm6_create(__func__, SOCKET_ID_ANY, &config); + TEST_LPM_ASSERT(lpm != NULL); + +@@ -433,7 +433,7 @@ test8(void) + /* + * Call add, lookup and delete for a single rule with depth < 24. + * Check all the combinations for the first three bytes that result in a hit. +- * Delete the rule and check that the same test returs a miss. ++ * Delete the rule and check that the same test returns a miss. + */ + int32_t + test9(void) +@@ -1738,7 +1738,7 @@ test27(void) + * Call add, lookup and delete for a single rule with maximum 21bit next_hop + * size. + * Check that next_hop returned from lookup is equal to provisioned value. +- * Delete the rule and check that the same test returs a miss. ++ * Delete the rule and check that the same test returns a miss. + */ + int32_t + test28(void) +diff --git a/dpdk/app/test/test_lpm6_data.h b/dpdk/app/test/test_lpm6_data.h +index c3894f730e..da9b161f20 100644 +--- a/dpdk/app/test/test_lpm6_data.h ++++ b/dpdk/app/test/test_lpm6_data.h +@@ -22,7 +22,7 @@ struct ips_tbl_entry { + * in previous test_lpm6_routes.h . Because this table has only 1000 + * lines, keeping it doesn't make LPM6 test case so large and also + * make the algorithm to generate rule table unnecessary and the +- * algorithm to genertate test input IPv6 and associated expected ++ * algorithm to generate test input IPv6 and associated expected + * next_hop much simple. + */ + +diff --git a/dpdk/app/test/test_malloc.c b/dpdk/app/test/test_malloc.c +index afff0de9f0..6d9249f831 100644 +--- a/dpdk/app/test/test_malloc.c ++++ b/dpdk/app/test/test_malloc.c +@@ -603,7 +603,7 @@ test_realloc_numa(void) + } + } + +- /* Print warnign if only a single socket, but don't fail the test */ ++ /* Print warning if only a single socket, but don't fail the test */ + if (socket_count < 2) + printf("WARNING: realloc_socket test needs memory on multiple sockets!\n"); + +@@ -971,7 +971,7 @@ test_alloc_socket(void) + } + } + +- /* Print warnign if only a single socket, but don't fail the test */ ++ /* Print warning if only a single socket, but don't fail the test */ + if (socket_count < 2) { + printf("WARNING: alloc_socket test needs memory on multiple sockets!\n"); + } +diff --git a/dpdk/app/test/test_mbuf.c b/dpdk/app/test/test_mbuf.c +index a40f7d4883..f0dd693e94 100644 +--- a/dpdk/app/test/test_mbuf.c ++++ b/dpdk/app/test/test_mbuf.c +@@ -1148,7 +1148,7 @@ test_refcnt_mbuf(void) + + rte_eal_mp_wait_lcore(); + +- /* check that we porcessed all references */ ++ /* check that we processed all references */ + tref = 0; + main_lcore = rte_get_main_lcore(); + +@@ -1174,6 +1174,8 @@ test_refcnt_mbuf(void) + } + + #include ++#include ++#include + #include + + /* use fork() to test mbuf errors panic */ +@@ -1186,9 +1188,14 @@ verify_mbuf_check_panics(struct rte_mbuf *buf) + pid = fork(); + + if (pid == 0) { ++ struct rlimit rl; ++ ++ /* No need to generate a coredump when panicking. */ ++ rl.rlim_cur = rl.rlim_max = 0; ++ setrlimit(RLIMIT_CORE, &rl); + rte_mbuf_sanity_check(buf, 1); /* should panic */ + exit(0); /* return normally if it doesn't panic */ +- } else if (pid < 0){ ++ } else if (pid < 0) { + printf("Fork Failed\n"); + return -1; + } +@@ -2023,8 +2030,6 @@ test_pktmbuf_read_from_offset(struct rte_mempool *pktmbuf_pool) + NULL); + if (data_copy == NULL) + GOTO_FAIL("%s: Error in reading packet data!\n", __func__); +- if (strlen(data_copy) != MBUF_TEST_DATA_LEN2 - 5) +- GOTO_FAIL("%s: Incorrect data length!\n", __func__); + for (off = 0; off < MBUF_TEST_DATA_LEN2 - 5; off++) { + if (data_copy[off] != (char)0xcc) + GOTO_FAIL("Data corrupted at offset %u", off); +@@ -2046,8 +2051,6 @@ test_pktmbuf_read_from_offset(struct rte_mempool *pktmbuf_pool) + data_copy = rte_pktmbuf_read(m, hdr_len, 0, NULL); + if (data_copy == NULL) + GOTO_FAIL("%s: Error in reading packet data!\n", __func__); +- if (strlen(data_copy) != MBUF_TEST_DATA_LEN2) +- GOTO_FAIL("%s: Corrupted data content!\n", __func__); + for (off = 0; off < MBUF_TEST_DATA_LEN2; off++) { + if (data_copy[off] != (char)0xcc) + GOTO_FAIL("Data corrupted at offset %u", off); +@@ -2298,16 +2301,16 @@ test_pktmbuf_read_from_chain(struct rte_mempool *pktmbuf_pool) + + /* Define a free call back function to be used for external buffer */ + static void +-ext_buf_free_callback_fn(void *addr __rte_unused, void *opaque) ++ext_buf_free_callback_fn(void *addr, void *opaque) + { +- void *ext_buf_addr = opaque; ++ bool *freed = opaque; + +- if (ext_buf_addr == NULL) { ++ if (addr == NULL) { + printf("External buffer address is invalid\n"); + return; + } +- rte_free(ext_buf_addr); +- ext_buf_addr = NULL; ++ rte_free(addr); ++ *freed = true; + printf("External buffer freed via callback\n"); + } + +@@ -2331,6 +2334,7 @@ test_pktmbuf_ext_shinfo_init_helper(struct rte_mempool *pktmbuf_pool) + void *ext_buf_addr = NULL; + uint16_t buf_len = EXT_BUF_TEST_DATA_LEN + + sizeof(struct rte_mbuf_ext_shared_info); ++ bool freed = false; + + /* alloc a mbuf */ + m = rte_pktmbuf_alloc(pktmbuf_pool); +@@ -2346,7 +2350,7 @@ test_pktmbuf_ext_shinfo_init_helper(struct rte_mempool *pktmbuf_pool) + GOTO_FAIL("%s: External buffer allocation failed\n", __func__); + + ret_shinfo = rte_pktmbuf_ext_shinfo_init_helper(ext_buf_addr, &buf_len, +- ext_buf_free_callback_fn, ext_buf_addr); ++ ext_buf_free_callback_fn, &freed); + if (ret_shinfo == NULL) + GOTO_FAIL("%s: Shared info initialization failed!\n", __func__); + +@@ -2356,7 +2360,7 @@ test_pktmbuf_ext_shinfo_init_helper(struct rte_mempool *pktmbuf_pool) + if (rte_mbuf_refcnt_read(m) != 1) + GOTO_FAIL("%s: Invalid refcnt in mbuf\n", __func__); + +- buf_iova = rte_mempool_virt2iova(ext_buf_addr); ++ buf_iova = rte_mem_virt2iova(ext_buf_addr); + rte_pktmbuf_attach_extbuf(m, ext_buf_addr, buf_iova, buf_len, + ret_shinfo); + if (m->ol_flags != EXT_ATTACHED_MBUF) +@@ -2379,26 +2383,35 @@ test_pktmbuf_ext_shinfo_init_helper(struct rte_mempool *pktmbuf_pool) + + if (rte_mbuf_ext_refcnt_read(ret_shinfo) != 2) + GOTO_FAIL("%s: Invalid ext_buf ref_cnt\n", __func__); ++ if (freed) ++ GOTO_FAIL("%s: extbuf should not be freed\n", __func__); + + /* test to manually update ext_buf_ref_cnt from 2 to 3*/ + rte_mbuf_ext_refcnt_update(ret_shinfo, 1); + if (rte_mbuf_ext_refcnt_read(ret_shinfo) != 3) + GOTO_FAIL("%s: Update ext_buf ref_cnt failed\n", __func__); ++ if (freed) ++ GOTO_FAIL("%s: extbuf should not be freed\n", __func__); + + /* reset the ext_refcnt before freeing the external buffer */ + rte_mbuf_ext_refcnt_set(ret_shinfo, 2); + if (rte_mbuf_ext_refcnt_read(ret_shinfo) != 2) + GOTO_FAIL("%s: set ext_buf ref_cnt failed\n", __func__); ++ if (freed) ++ GOTO_FAIL("%s: extbuf should not be freed\n", __func__); + + /* detach the external buffer from mbufs */ + rte_pktmbuf_detach_extbuf(m); + /* check if ref cnt is decremented */ + if (rte_mbuf_ext_refcnt_read(ret_shinfo) != 1) + GOTO_FAIL("%s: Invalid ext_buf ref_cnt\n", __func__); ++ if (freed) ++ GOTO_FAIL("%s: extbuf should not be freed\n", __func__); + + rte_pktmbuf_detach_extbuf(clone); +- if (rte_mbuf_ext_refcnt_read(ret_shinfo) != 0) +- GOTO_FAIL("%s: Invalid ext_buf ref_cnt\n", __func__); ++ if (!freed) ++ GOTO_FAIL("%s: extbuf should be freed\n", __func__); ++ freed = false; + + rte_pktmbuf_free(m); + m = NULL; +@@ -2570,6 +2583,16 @@ test_mbuf_dyn(struct rte_mempool *pktmbuf_pool) + .align = 3, + .flags = 0, + }; ++ const struct rte_mbuf_dynfield dynfield_fail_flag = { ++ .name = "test-dynfield", ++ .size = sizeof(uint8_t), ++ .align = __alignof__(uint8_t), ++ .flags = 1, ++ }; ++ const struct rte_mbuf_dynflag dynflag_fail_flag = { ++ .name = "test-dynflag", ++ .flags = 1, ++ }; + const struct rte_mbuf_dynflag dynflag = { + .name = "test-dynflag", + .flags = 0, +@@ -2631,6 +2654,14 @@ test_mbuf_dyn(struct rte_mempool *pktmbuf_pool) + if (ret != -1) + GOTO_FAIL("dynamic field creation should fail (not avail)"); + ++ ret = rte_mbuf_dynfield_register(&dynfield_fail_flag); ++ if (ret != -1) ++ GOTO_FAIL("dynamic field creation should fail (invalid flag)"); ++ ++ ret = rte_mbuf_dynflag_register(&dynflag_fail_flag); ++ if (ret != -1) ++ GOTO_FAIL("dynamic flag creation should fail (invalid flag)"); ++ + flag = rte_mbuf_dynflag_register(&dynflag); + if (flag == -1) + GOTO_FAIL("failed to register dynamic flag, flag=%d: %s", +@@ -2677,6 +2708,70 @@ test_mbuf_dyn(struct rte_mempool *pktmbuf_pool) + return -1; + } + ++/* check that m->nb_segs and m->next are reset on mbuf free */ ++static int ++test_nb_segs_and_next_reset(void) ++{ ++ struct rte_mbuf *m0 = NULL, *m1 = NULL, *m2 = NULL; ++ struct rte_mempool *pool = NULL; ++ ++ pool = rte_pktmbuf_pool_create("test_mbuf_reset", ++ 3, 0, 0, MBUF_DATA_SIZE, SOCKET_ID_ANY); ++ if (pool == NULL) ++ GOTO_FAIL("Failed to create mbuf pool"); ++ ++ /* alloc mbufs */ ++ m0 = rte_pktmbuf_alloc(pool); ++ m1 = rte_pktmbuf_alloc(pool); ++ m2 = rte_pktmbuf_alloc(pool); ++ if (m0 == NULL || m1 == NULL || m2 == NULL) ++ GOTO_FAIL("Failed to allocate mbuf"); ++ ++ /* append data in all of them */ ++ if (rte_pktmbuf_append(m0, 500) == NULL || ++ rte_pktmbuf_append(m1, 500) == NULL || ++ rte_pktmbuf_append(m2, 500) == NULL) ++ GOTO_FAIL("Failed to append data in mbuf"); ++ ++ /* chain them in one mbuf m0 */ ++ rte_pktmbuf_chain(m1, m2); ++ rte_pktmbuf_chain(m0, m1); ++ if (m0->nb_segs != 3 || m0->next != m1 || m1->next != m2 || ++ m2->next != NULL) { ++ m1 = m2 = NULL; ++ GOTO_FAIL("Failed to chain mbufs"); ++ } ++ ++ /* split m0 chain in two, between m1 and m2 */ ++ m0->nb_segs = 2; ++ m1->next = NULL; ++ m2->nb_segs = 1; ++ ++ /* free the 2 mbuf chains m0 and m2 */ ++ rte_pktmbuf_free(m0); ++ rte_pktmbuf_free(m2); ++ ++ /* realloc the 3 mbufs */ ++ m0 = rte_mbuf_raw_alloc(pool); ++ m1 = rte_mbuf_raw_alloc(pool); ++ m2 = rte_mbuf_raw_alloc(pool); ++ if (m0 == NULL || m1 == NULL || m2 == NULL) ++ GOTO_FAIL("Failed to reallocate mbuf"); ++ ++ /* ensure that m->next and m->nb_segs are reset allocated mbufs */ ++ if (m0->nb_segs != 1 || m0->next != NULL || ++ m1->nb_segs != 1 || m1->next != NULL || ++ m2->nb_segs != 1 || m2->next != NULL) ++ GOTO_FAIL("nb_segs or next was not reset properly"); ++ ++ return 0; ++ ++fail: ++ if (pool != NULL) ++ rte_mempool_free(pool); ++ return -1; ++} ++ + static int + test_mbuf(void) + { +@@ -2867,6 +2962,11 @@ test_mbuf(void) + goto err; + } + ++ /* test reset of m->nb_segs and m->next on mbuf free */ ++ if (test_nb_segs_and_next_reset() < 0) { ++ printf("test_nb_segs_and_next_reset() failed\n"); ++ goto err; ++ } + + ret = 0; + err: +diff --git a/dpdk/app/test/test_member.c b/dpdk/app/test/test_member.c +index 40aa4c8627..af9d50915c 100644 +--- a/dpdk/app/test/test_member.c ++++ b/dpdk/app/test/test_member.c +@@ -459,7 +459,7 @@ static int test_member_multimatch(void) + MAX_MATCH, set_ids_cache); + /* + * For cache mode, keys overwrite when signature same. +- * the mutimatch should work like single match. ++ * the multimatch should work like single match. + */ + TEST_ASSERT(ret_ht == M_MATCH_CNT && ret_vbf == M_MATCH_CNT && + ret_cache == 1, +diff --git a/dpdk/app/test/test_memory.c b/dpdk/app/test/test_memory.c +index 7d5ae99bab..140ac3f3cf 100644 +--- a/dpdk/app/test/test_memory.c ++++ b/dpdk/app/test/test_memory.c +@@ -6,6 +6,7 @@ + #include + + #include ++#include + #include + #include + #include +@@ -53,7 +54,7 @@ check_seg_fds(const struct rte_memseg_list *msl, const struct rte_memseg *ms, + /* ENOTSUP means segment is valid, but there is not support for + * segment fd API (e.g. on FreeBSD). + */ +- if (errno == ENOTSUP) ++ if (rte_errno == ENOTSUP) + return 1; + /* all other errors are treated as failures */ + return -1; +@@ -62,7 +63,7 @@ check_seg_fds(const struct rte_memseg_list *msl, const struct rte_memseg *ms, + /* we're able to get memseg fd - try getting its offset */ + ret = rte_memseg_get_fd_offset_thread_unsafe(ms, &offset); + if (ret < 0) { +- if (errno == ENOTSUP) ++ if (rte_errno == ENOTSUP) + return 1; + return -1; + } +diff --git a/dpdk/app/test/test_mempool.c b/dpdk/app/test/test_mempool.c +index 084842fdaa..2cd42c3eff 100644 +--- a/dpdk/app/test/test_mempool.c ++++ b/dpdk/app/test/test_mempool.c +@@ -287,7 +287,7 @@ static int test_mempool_single_consumer(void) + } + + /* +- * test function for mempool test based on singple consumer and single producer, ++ * test function for mempool test based on single consumer and single producer, + * can run on one lcore only + */ + static int +@@ -305,7 +305,7 @@ my_mp_init(struct rte_mempool *mp, __rte_unused void *arg) + } + + /* +- * it tests the mempool operations based on singple producer and single consumer ++ * it tests the mempool operations based on single producer and single consumer + */ + static int + test_mempool_sp_sc(void) +@@ -552,7 +552,7 @@ test_mempool(void) + GOTO_ERR(ret, err); + + /* test to initialize mempool objects and memory */ +- nb_objs = rte_mempool_obj_iter(mp_stack_mempool_iter, rte_pktmbuf_init, ++ nb_objs = rte_mempool_obj_iter(mp_stack_mempool_iter, my_obj_init, + NULL); + if (nb_objs == 0) + GOTO_ERR(ret, err); +@@ -633,7 +633,7 @@ test_mempool(void) + if (test_mempool_basic_ex(mp_nocache) < 0) + GOTO_ERR(ret, err); + +- /* mempool operation test based on single producer and single comsumer */ ++ /* mempool operation test based on single producer and single consumer */ + if (test_mempool_sp_sc() < 0) + GOTO_ERR(ret, err); + +diff --git a/dpdk/app/test/test_mempool_perf.c b/dpdk/app/test/test_mempool_perf.c +index d7d0aaa334..7c0c280fce 100644 +--- a/dpdk/app/test/test_mempool_perf.c ++++ b/dpdk/app/test/test_mempool_perf.c +@@ -89,7 +89,7 @@ static rte_atomic32_t synchro; + static unsigned n_get_bulk; + static unsigned n_put_bulk; + +-/* number of objects retrived from mempool before putting them back */ ++/* number of objects retrieved from mempool before putting them back */ + static unsigned n_keep; + + /* number of enqueues / dequeues */ +diff --git a/dpdk/app/test/test_memzone.c b/dpdk/app/test/test_memzone.c +index 0343b0326e..5caeaf76f5 100644 +--- a/dpdk/app/test/test_memzone.c ++++ b/dpdk/app/test/test_memzone.c +@@ -522,7 +522,7 @@ test_memzone_reserve_max(void) + } + + if (mz->len != maxlen) { +- printf("Memzone reserve with 0 size did not return bigest block\n"); ++ printf("Memzone reserve with 0 size did not return biggest block\n"); + printf("Expected size = %zu, actual size = %zu\n", + maxlen, mz->len); + rte_dump_physmem_layout(stdout); +@@ -585,7 +585,7 @@ test_memzone_reserve_max_aligned(void) + + if (mz->len < minlen || mz->len > maxlen) { + printf("Memzone reserve with 0 size and alignment %u did not return" +- " bigest block\n", align); ++ " biggest block\n", align); + printf("Expected size = %zu-%zu, actual size = %zu\n", + minlen, maxlen, mz->len); + rte_dump_physmem_layout(stdout); +@@ -1033,7 +1033,7 @@ test_memzone_basic(void) + if (mz != memzone1) + return -1; + +- printf("test duplcate zone name\n"); ++ printf("test duplicate zone name\n"); + mz = rte_memzone_reserve(TEST_MEMZONE_NAME("testzone1"), 100, + SOCKET_ID_ANY, 0); + if (mz != NULL) +diff --git a/dpdk/app/test/test_meter.c b/dpdk/app/test/test_meter.c +index f6fe6494ab..15d5a4839b 100644 +--- a/dpdk/app/test/test_meter.c ++++ b/dpdk/app/test/test_meter.c +@@ -444,7 +444,7 @@ tm_test_srtcm_color_aware_check(void) + * if using blind check + */ + +- /* previouly have a green, test points should keep unchanged */ ++ /* previously have a green, test points should keep unchanged */ + in[0] = in[1] = in[2] = in[3] = RTE_COLOR_GREEN; + out[0] = RTE_COLOR_GREEN; + out[1] = RTE_COLOR_YELLOW; +@@ -551,7 +551,7 @@ tm_test_trtcm_color_aware_check(void) + * if using blind check + */ + +- /* previouly have a green, test points should keep unchanged */ ++ /* previously have a green, test points should keep unchanged */ + in[0] = in[1] = in[2] = in[3] = RTE_COLOR_GREEN; + out[0] = RTE_COLOR_GREEN; + out[1] = RTE_COLOR_YELLOW; +@@ -648,7 +648,7 @@ tm_test_trtcm_rfc4115_color_aware_check(void) + * if using blind check + */ + +- /* previouly have a green, test points should keep unchanged */ ++ /* previously have a green, test points should keep unchanged */ + in[0] = in[1] = in[2] = in[3] = RTE_COLOR_GREEN; + out[0] = RTE_COLOR_GREEN; + out[1] = RTE_COLOR_YELLOW; +diff --git a/dpdk/app/test/test_metrics.c b/dpdk/app/test/test_metrics.c +index e736019ae4..11222133d0 100644 +--- a/dpdk/app/test/test_metrics.c ++++ b/dpdk/app/test/test_metrics.c +@@ -121,7 +121,7 @@ test_metrics_update_value(void) + err = rte_metrics_update_value(RTE_METRICS_GLOBAL, KEY, VALUE); + TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__); + +- /* Successful Test: Valid port_id otherthan RTE_METRICS_GLOBAL, key ++ /* Successful Test: Valid port_id other than RTE_METRICS_GLOBAL, key + * and value + */ + err = rte_metrics_update_value(9, KEY, VALUE); +diff --git a/dpdk/app/test/test_pdump.c b/dpdk/app/test/test_pdump.c +index 21fcc1bc4c..03b26dd26d 100644 +--- a/dpdk/app/test/test_pdump.c ++++ b/dpdk/app/test/test_pdump.c +@@ -147,11 +147,19 @@ send_pkts(void *empty) + ret = test_get_mbuf_from_pool(&mp, pbuf, poolname); + if (ret < 0) + printf("get_mbuf_from_pool failed\n"); +- do { ++ ++ ret = test_dev_start(portid, mp); ++ if (ret < 0) ++ printf("test_dev_start(%hu, %p) failed, error code: %d\n", ++ portid, mp, ret); ++ ++ while (ret >= 0 && flag_for_send_pkts) { + ret = test_packet_forward(pbuf, portid, QUEUE_ID); + if (ret < 0) + printf("send pkts Failed\n"); +- } while (flag_for_send_pkts); ++ }; ++ ++ rte_eth_dev_stop(portid); + test_put_mbuf_to_pool(mp, pbuf); + return empty; + } +diff --git a/dpdk/app/test/test_pmd_perf.c b/dpdk/app/test/test_pmd_perf.c +index 3a248d512c..6ce02e3787 100644 +--- a/dpdk/app/test/test_pmd_perf.c ++++ b/dpdk/app/test/test_pmd_perf.c +@@ -456,6 +456,7 @@ main_loop(__rte_unused void *args) + #define PACKET_SIZE 64 + #define FRAME_GAP 12 + #define MAC_PREAMBLE 8 ++#define MAX_RETRY_COUNT 5 + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; + unsigned lcore_id; + unsigned i, portid, nb_rx = 0, nb_tx = 0; +@@ -463,6 +464,8 @@ main_loop(__rte_unused void *args) + int pkt_per_port; + uint64_t diff_tsc; + uint64_t packets_per_second, total_packets; ++ int retry_cnt = 0; ++ int free_pkt = 0; + + lcore_id = rte_lcore_id(); + conf = &lcore_conf[lcore_id]; +@@ -480,10 +483,19 @@ main_loop(__rte_unused void *args) + nb_tx = RTE_MIN(MAX_PKT_BURST, num); + nb_tx = rte_eth_tx_burst(portid, 0, + &tx_burst[idx], nb_tx); ++ if (nb_tx == 0) ++ retry_cnt++; + num -= nb_tx; + idx += nb_tx; ++ if (retry_cnt == MAX_RETRY_COUNT) { ++ retry_cnt = 0; ++ break; ++ } + } + } ++ for (free_pkt = idx; free_pkt < (MAX_TRAFFIC_BURST * conf->nb_ports); ++ free_pkt++) ++ rte_pktmbuf_free(tx_burst[free_pkt]); + printf("Total packets inject to prime ports = %u\n", idx); + + packets_per_second = (link_mbps * 1000 * 1000) / +@@ -753,7 +765,7 @@ test_pmd_perf(void) + "rte_eth_dev_start: err=%d, port=%d\n", + ret, portid); + +- /* always eanble promiscuous */ ++ /* always enable promiscuous */ + ret = rte_eth_promiscuous_enable(portid); + if (ret != 0) + rte_exit(EXIT_FAILURE, +diff --git a/dpdk/app/test/test_power_cpufreq.c b/dpdk/app/test/test_power_cpufreq.c +index 731c6b4dc8..77aec04697 100644 +--- a/dpdk/app/test/test_power_cpufreq.c ++++ b/dpdk/app/test/test_power_cpufreq.c +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + + #include "test.h" + +@@ -34,37 +35,86 @@ test_power_caps(void) + #define TEST_POWER_LCORE_INVALID ((unsigned)RTE_MAX_LCORE) + #define TEST_POWER_FREQS_NUM_MAX ((unsigned)RTE_MAX_LCORE_FREQS) + +-#define TEST_POWER_SYSFILE_CUR_FREQ \ ++/* macros used for rounding frequency to nearest 100000 */ ++#define TEST_FREQ_ROUNDING_DELTA 50000 ++#define TEST_ROUND_FREQ_TO_N_100000 100000 ++ ++#define TEST_POWER_SYSFILE_CPUINFO_FREQ \ + "/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_cur_freq" ++#define TEST_POWER_SYSFILE_SCALING_FREQ \ ++ "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq" + + static uint32_t total_freq_num; + static uint32_t freqs[TEST_POWER_FREQS_NUM_MAX]; + + static int +-check_cur_freq(unsigned lcore_id, uint32_t idx) ++check_cur_freq(unsigned int lcore_id, uint32_t idx, bool turbo) + { + #define TEST_POWER_CONVERT_TO_DECIMAL 10 ++#define MAX_LOOP 100 + FILE *f; + char fullpath[PATH_MAX]; + char buf[BUFSIZ]; ++ enum power_management_env env; + uint32_t cur_freq; ++ uint32_t freq_conv; + int ret = -1; ++ int i; + + if (snprintf(fullpath, sizeof(fullpath), +- TEST_POWER_SYSFILE_CUR_FREQ, lcore_id) < 0) { ++ TEST_POWER_SYSFILE_CPUINFO_FREQ, lcore_id) < 0) { + return 0; + } + f = fopen(fullpath, "r"); + if (f == NULL) { +- return 0; ++ if (snprintf(fullpath, sizeof(fullpath), ++ TEST_POWER_SYSFILE_SCALING_FREQ, lcore_id) < 0) { ++ return 0; ++ } ++ f = fopen(fullpath, "r"); ++ if (f == NULL) { ++ return 0; ++ } ++ } ++ for (i = 0; i < MAX_LOOP; i++) { ++ fflush(f); ++ if (fgets(buf, sizeof(buf), f) == NULL) ++ goto fail_all; ++ ++ cur_freq = strtoul(buf, NULL, TEST_POWER_CONVERT_TO_DECIMAL); ++ freq_conv = cur_freq; ++ ++ env = rte_power_get_env(); ++ ++ if (env == PM_ENV_PSTATE_CPUFREQ) { ++ /* convert the frequency to nearest 100000 value ++ * Ex: if cur_freq=1396789 then freq_conv=1400000 ++ * Ex: if cur_freq=800030 then freq_conv=800000 ++ */ ++ unsigned int freq_conv = 0; ++ freq_conv = (cur_freq + TEST_FREQ_ROUNDING_DELTA) ++ / TEST_ROUND_FREQ_TO_N_100000; ++ freq_conv = freq_conv * TEST_ROUND_FREQ_TO_N_100000; ++ } ++ ++ if (turbo) ++ ret = (freqs[idx] <= freq_conv ? 0 : -1); ++ else ++ ret = (freqs[idx] == freq_conv ? 0 : -1); ++ ++ if (ret == 0) ++ break; ++ ++ if (fseek(f, 0, SEEK_SET) < 0) { ++ printf("Fail to set file position indicator to 0\n"); ++ goto fail_all; ++ } ++ ++ /* wait for the value to be updated */ ++ rte_delay_ms(10); + } +- if (fgets(buf, sizeof(buf), f) == NULL) { +- goto fail_get_cur_freq; +- } +- cur_freq = strtoul(buf, NULL, TEST_POWER_CONVERT_TO_DECIMAL); +- ret = (freqs[idx] == cur_freq ? 0 : -1); + +-fail_get_cur_freq: ++fail_all: + fclose(f); + + return ret; +@@ -143,7 +193,7 @@ check_power_get_freq(void) + } + + /* Check the current frequency */ +- ret = check_cur_freq(TEST_POWER_LCORE_ID, count); ++ ret = check_cur_freq(TEST_POWER_LCORE_ID, count, false); + if (ret < 0) + return -1; + +@@ -193,7 +243,7 @@ check_power_set_freq(void) + } + + /* Check the current frequency */ +- ret = check_cur_freq(TEST_POWER_LCORE_ID, total_freq_num - 1); ++ ret = check_cur_freq(TEST_POWER_LCORE_ID, total_freq_num - 1, false); + if (ret < 0) + return -1; + +@@ -206,6 +256,8 @@ check_power_freq_down(void) + { + int ret; + ++ rte_power_freq_enable_turbo(TEST_POWER_LCORE_ID); ++ + /* test with an invalid lcore id */ + ret = rte_power_freq_down(TEST_POWER_LCORE_INVALID); + if (ret >= 0) { +@@ -229,7 +281,7 @@ check_power_freq_down(void) + } + + /* Check the current frequency */ +- ret = check_cur_freq(TEST_POWER_LCORE_ID, total_freq_num - 1); ++ ret = check_cur_freq(TEST_POWER_LCORE_ID, total_freq_num - 1, false); + if (ret < 0) + return -1; + +@@ -248,7 +300,7 @@ check_power_freq_down(void) + } + + /* Check the current frequency */ +- ret = check_cur_freq(TEST_POWER_LCORE_ID, 1); ++ ret = check_cur_freq(TEST_POWER_LCORE_ID, 1, false); + if (ret < 0) + return -1; + +@@ -284,7 +336,7 @@ check_power_freq_up(void) + } + + /* Check the current frequency */ +- ret = check_cur_freq(TEST_POWER_LCORE_ID, total_freq_num - 2); ++ ret = check_cur_freq(TEST_POWER_LCORE_ID, total_freq_num - 2, false); + if (ret < 0) + return -1; + +@@ -303,7 +355,7 @@ check_power_freq_up(void) + } + + /* Check the current frequency */ +- ret = check_cur_freq(TEST_POWER_LCORE_ID, 0); ++ ret = check_cur_freq(TEST_POWER_LCORE_ID, 0, true); + if (ret < 0) + return -1; + +@@ -331,7 +383,7 @@ check_power_freq_max(void) + } + + /* Check the current frequency */ +- ret = check_cur_freq(TEST_POWER_LCORE_ID, 0); ++ ret = check_cur_freq(TEST_POWER_LCORE_ID, 0, true); + if (ret < 0) + return -1; + +@@ -359,7 +411,7 @@ check_power_freq_min(void) + } + + /* Check the current frequency */ +- ret = check_cur_freq(TEST_POWER_LCORE_ID, total_freq_num - 1); ++ ret = check_cur_freq(TEST_POWER_LCORE_ID, total_freq_num - 1, false); + if (ret < 0) + return -1; + +@@ -391,9 +443,15 @@ check_power_turbo(void) + TEST_POWER_LCORE_ID); + return -1; + } ++ ret = rte_power_freq_max(TEST_POWER_LCORE_ID); ++ if (ret < 0) { ++ printf("Fail to scale up the freq to max on lcore %u\n", ++ TEST_POWER_LCORE_ID); ++ return -1; ++ } + + /* Check the current frequency */ +- ret = check_cur_freq(TEST_POWER_LCORE_ID, 0); ++ ret = check_cur_freq(TEST_POWER_LCORE_ID, 0, true); + if (ret < 0) + return -1; + +@@ -410,9 +468,15 @@ check_power_turbo(void) + TEST_POWER_LCORE_ID); + return -1; + } ++ ret = rte_power_freq_max(TEST_POWER_LCORE_ID); ++ if (ret < 0) { ++ printf("Fail to scale up the freq to max on lcore %u\n", ++ TEST_POWER_LCORE_ID); ++ return -1; ++ } + + /* Check the current frequency */ +- ret = check_cur_freq(TEST_POWER_LCORE_ID, 1); ++ ret = check_cur_freq(TEST_POWER_LCORE_ID, 1, false); + if (ret < 0) + return -1; + +@@ -596,7 +660,7 @@ test_power_cpufreq(void) + /* test of exit power management for an invalid lcore */ + ret = rte_power_exit(TEST_POWER_LCORE_INVALID); + if (ret == 0) { +- printf("Unpectedly exit power management successfully for " ++ printf("Unexpectedly exit power management successfully for " + "lcore %u\n", TEST_POWER_LCORE_INVALID); + rte_power_unset_env(); + return -1; +diff --git a/dpdk/app/test/test_prefetch.c b/dpdk/app/test/test_prefetch.c +index 5489885b51..7b4a8e4144 100644 +--- a/dpdk/app/test/test_prefetch.c ++++ b/dpdk/app/test/test_prefetch.c +@@ -20,7 +20,7 @@ + static int + test_prefetch(void) + { +- int a; ++ int a = 0; + + rte_prefetch0(&a); + rte_prefetch1(&a); +diff --git a/dpdk/app/test/test_rcu_qsbr.c b/dpdk/app/test/test_rcu_qsbr.c +index ab37a068cd..70404e89e6 100644 +--- a/dpdk/app/test/test_rcu_qsbr.c ++++ b/dpdk/app/test/test_rcu_qsbr.c +@@ -408,7 +408,7 @@ test_rcu_qsbr_synchronize_reader(void *arg) + + /* + * rte_rcu_qsbr_synchronize: Wait till all the reader threads have entered +- * the queiscent state. ++ * the quiescent state. + */ + static int + test_rcu_qsbr_synchronize(void) +@@ -443,7 +443,7 @@ test_rcu_qsbr_synchronize(void) + rte_rcu_qsbr_synchronize(t[0], RTE_MAX_LCORE - 1); + rte_rcu_qsbr_thread_offline(t[0], RTE_MAX_LCORE - 1); + +- /* Test if the API returns after unregisterng all the threads */ ++ /* Test if the API returns after unregistering all the threads */ + for (i = 0; i < RTE_MAX_LCORE; i++) + rte_rcu_qsbr_thread_unregister(t[0], i); + rte_rcu_qsbr_synchronize(t[0], RTE_QSBR_THRID_INVALID); +diff --git a/dpdk/app/test/test_reciprocal_division_perf.c b/dpdk/app/test/test_reciprocal_division_perf.c +index a7be8aa71a..4f625873e5 100644 +--- a/dpdk/app/test/test_reciprocal_division_perf.c ++++ b/dpdk/app/test/test_reciprocal_division_perf.c +@@ -71,10 +71,12 @@ test_reciprocal_division_perf(void) + tot_cyc_n); + printf("Total number of cycles reciprocal division : %"PRIu64"\n", + tot_cyc_r); +- printf("Cycles per division(normal) : %3.2f\n", +- ((double)tot_cyc_n)/i); +- printf("Cycles per division(reciprocal) : %3.2f\n\n", +- ((double)tot_cyc_r)/i); ++ if (i != 0) { ++ printf("Cycles per division(normal) : %3.2f\n", ++ ((double)tot_cyc_n)/i); ++ printf("Cycles per division(reciprocal) : %3.2f\n\n", ++ ((double)tot_cyc_r)/i); ++ } + + tot_cyc_n = 0; + tot_cyc_r = 0; +@@ -111,11 +113,12 @@ test_reciprocal_division_perf(void) + tot_cyc_n); + printf("Total number of cycles reciprocal division : %"PRIu64"\n", + tot_cyc_r); +- printf("Cycles per division(normal) : %3.2f\n", +- ((double)tot_cyc_n)/i); +- printf("Cycles per division(reciprocal) : %3.2f\n\n", +- ((double)tot_cyc_r)/i); +- ++ if (i != 0) { ++ printf("Cycles per division(normal) : %3.2f\n", ++ ((double)tot_cyc_n)/i); ++ printf("Cycles per division(reciprocal) : %3.2f\n\n", ++ ((double)tot_cyc_r)/i); ++ } + tot_cyc_n = 0; + tot_cyc_r = 0; + +@@ -152,10 +155,12 @@ test_reciprocal_division_perf(void) + tot_cyc_n); + printf("Total number of cycles reciprocal division : %"PRIu64"\n", + tot_cyc_r); +- printf("Cycles per division(normal) : %3.2f\n", +- ((double)tot_cyc_n)/i); +- printf("Cycles per division(reciprocal) : %3.2f\n\n", +- ((double)tot_cyc_r)/i); ++ if (i != 0) { ++ printf("Cycles per division(normal) : %3.2f\n", ++ ((double)tot_cyc_n)/i); ++ printf("Cycles per division(reciprocal) : %3.2f\n\n", ++ ((double)tot_cyc_r)/i); ++ } + + tot_cyc_n = 0; + tot_cyc_r = 0; +@@ -190,10 +195,12 @@ test_reciprocal_division_perf(void) + tot_cyc_n); + printf("Total number of cycles reciprocal division : %"PRIu64"\n", + tot_cyc_r); +- printf("Cycles per division(normal) : %3.2f\n", +- ((double)tot_cyc_n)/i); +- printf("Cycles per division(reciprocal) : %3.2f\n", +- ((double)tot_cyc_r)/i); ++ if (i != 0) { ++ printf("Cycles per division(normal) : %3.2f\n", ++ ((double)tot_cyc_n)/i); ++ printf("Cycles per division(reciprocal) : %3.2f\n", ++ ((double)tot_cyc_r)/i); ++ } + + return result; + } +diff --git a/dpdk/app/test/test_red.c b/dpdk/app/test/test_red.c +index e973f3131e..33a9f4ebb7 100644 +--- a/dpdk/app/test/test_red.c ++++ b/dpdk/app/test/test_red.c +@@ -1049,7 +1049,7 @@ static struct test_queue ft6_tqueue = { + static struct test_config func_test6_config = { + .ifname = "functional test 6 interface", + .msg = "functional test 6 : use several queues (each with its own run-time data),\n" +- " use several RED configurations (such that each configuration is sharte_red by multiple queues),\n" ++ " use several RED configurations (such that each configuration is shared by multiple queues),\n" + " increase average queue size to target level,\n" + " dequeue all packets until queue is empty,\n" + " confirm that average queue size is computed correctly while queue is empty\n" +@@ -1566,10 +1566,10 @@ static void ovfl_check_avg(uint32_t avg) + } + + static struct test_config ovfl_test1_config = { +- .ifname = "queue avergage overflow test interface", ++ .ifname = "queue average overflow test interface", + .msg = "overflow test 1 : use one RED configuration,\n" + " increase average queue size to target level,\n" +- " check maximum number of bits requirte_red to represent avg_s\n\n", ++ " check maximum number of bits required to represent avg_s\n\n", + .htxt = "avg queue size " + "wq_log2 " + "fraction bits " +@@ -1757,12 +1757,12 @@ test_invalid_parameters(void) + printf("%i: rte_red_config_init should have failed!\n", __LINE__); + return -1; + } +- /* min_treshold == max_treshold */ ++ /* min_threshold == max_threshold */ + if (rte_red_config_init(&config, 0, 1, 1, 0) == 0) { + printf("%i: rte_red_config_init should have failed!\n", __LINE__); + return -1; + } +- /* min_treshold > max_treshold */ ++ /* min_threshold > max_threshold */ + if (rte_red_config_init(&config, 0, 2, 1, 0) == 0) { + printf("%i: rte_red_config_init should have failed!\n", __LINE__); + return -1; +diff --git a/dpdk/app/test/test_ring.h b/dpdk/app/test/test_ring.h +index c8bfec8399..45c263f3ff 100644 +--- a/dpdk/app/test/test_ring.h ++++ b/dpdk/app/test/test_ring.h +@@ -97,7 +97,7 @@ test_ring_copy_from(struct rte_ring_zc_data *zcd, void *dst, int esize, + } + } + +-static __rte_always_inline unsigned int ++static inline unsigned int + test_ring_enqueue(struct rte_ring *r, void **obj, int esize, unsigned int n, + unsigned int api_type) + { +@@ -158,7 +158,7 @@ test_ring_enqueue(struct rte_ring *r, void **obj, int esize, unsigned int n, + } + } + +-static __rte_always_inline unsigned int ++static inline unsigned int + test_ring_dequeue(struct rte_ring *r, void **obj, int esize, unsigned int n, + unsigned int api_type) + { +@@ -222,7 +222,7 @@ test_ring_dequeue(struct rte_ring *r, void **obj, int esize, unsigned int n, + /* This function is placed here as it is required for both + * performance and functional tests. + */ +-static __rte_always_inline void * ++static inline void * + test_ring_calloc(unsigned int rsize, int esize) + { + unsigned int sz; +diff --git a/dpdk/app/test/test_security.c b/dpdk/app/test/test_security.c +index 060cf1ffa8..059731b65d 100644 +--- a/dpdk/app/test/test_security.c ++++ b/dpdk/app/test/test_security.c +@@ -237,7 +237,7 @@ + * increases .called counter. Function returns value stored in .ret field + * of the structure. + * In case of some parameters in some functions the expected value is unknown +- * and cannot be detrmined prior to call. Such parameters are stored ++ * and cannot be determined prior to call. Such parameters are stored + * in structure and can be compared or analyzed later in test case code. + * + * Below structures and functions follow the rules just described. +diff --git a/dpdk/app/test/test_service_cores.c b/dpdk/app/test/test_service_cores.c +index 37d7172d53..0aee8c04e3 100644 +--- a/dpdk/app/test/test_service_cores.c ++++ b/dpdk/app/test/test_service_cores.c +@@ -66,7 +66,7 @@ static int32_t dummy_mt_unsafe_cb(void *args) + rte_delay_ms(250); + rte_atomic32_clear((rte_atomic32_t *)atomic_lock); + } else { +- /* 2nd thread will fail to take lock, so set pass flag */ ++ /* 2nd thread will fail to take lock, so clear pass flag */ + *pass_test = 0; + } + +@@ -314,10 +314,16 @@ service_attr_get(void) + TEST_ASSERT_EQUAL(1, cycles_gt_zero, + "attr_get() failed to get cycles (expected > zero)"); + +- rte_service_lcore_stop(slcore_id); ++ TEST_ASSERT_EQUAL(0, rte_service_map_lcore_set(id, slcore_id, 0), ++ "Disabling valid service and core failed"); ++ TEST_ASSERT_EQUAL(0, rte_service_lcore_stop(slcore_id), ++ "Failed to stop service lcore"); + + wait_slcore_inactive(slcore_id); + ++ TEST_ASSERT_EQUAL(0, rte_service_lcore_may_be_active(slcore_id), ++ "Service lcore not stopped after waiting."); ++ + TEST_ASSERT_EQUAL(0, rte_service_attr_get(id, attr_calls, &attr_value), + "Valid attr_get() call didn't return success"); + TEST_ASSERT_EQUAL(1, (attr_value > 0), +diff --git a/dpdk/app/test/test_stack.c b/dpdk/app/test/test_stack.c +index 02422a32d6..00efb38e2a 100644 +--- a/dpdk/app/test/test_stack.c ++++ b/dpdk/app/test/test_stack.c +@@ -373,7 +373,11 @@ test_stack(void) + static int + test_lf_stack(void) + { ++#if defined(RTE_STACK_LF_SUPPORTED) + return __test_stack(RTE_STACK_F_LF); ++#else ++ return TEST_SKIPPED; ++#endif + } + + REGISTER_TEST_COMMAND(stack_autotest, test_stack); +diff --git a/dpdk/app/test/test_stack_perf.c b/dpdk/app/test/test_stack_perf.c +index 3590625c49..4ee40d5d19 100644 +--- a/dpdk/app/test/test_stack_perf.c ++++ b/dpdk/app/test/test_stack_perf.c +@@ -349,7 +349,11 @@ test_stack_perf(void) + static int + test_lf_stack_perf(void) + { ++#if defined(RTE_STACK_LF_SUPPORTED) + return __test_stack_perf(RTE_STACK_F_LF); ++#else ++ return TEST_SKIPPED; ++#endif + } + + REGISTER_TEST_COMMAND(stack_perf_autotest, test_stack_perf); +diff --git a/dpdk/app/test/test_table_pipeline.c b/dpdk/app/test/test_table_pipeline.c +index aabf4375db..915c451fed 100644 +--- a/dpdk/app/test/test_table_pipeline.c ++++ b/dpdk/app/test/test_table_pipeline.c +@@ -364,7 +364,7 @@ setup_pipeline(int test_type) + .action = RTE_PIPELINE_ACTION_PORT, + {.port_id = port_out_id[i^1]}, + }; +- printf("Setting secont table to output to port\n"); ++ printf("Setting second table to output to port\n"); + + /* Add the default action for the table. */ + ret = rte_pipeline_table_default_entry_add(p, +diff --git a/dpdk/app/test/test_table_tables.c b/dpdk/app/test/test_table_tables.c +index 1aa269f95d..494fb6ffaa 100644 +--- a/dpdk/app/test/test_table_tables.c ++++ b/dpdk/app/test/test_table_tables.c +@@ -28,7 +28,8 @@ table_test table_tests[] = { + APP_METADATA_OFFSET(0)); \ + key = RTE_MBUF_METADATA_UINT8_PTR(mbuf, \ + APP_METADATA_OFFSET(32)); \ +- memset(key, 0, 32); \ ++ if (mbuf->priv_size + mbuf->buf_len >= 64) \ ++ memset(key, 0, 32); \ + k32 = (uint32_t *) key; \ + k32[0] = (value); \ + *signature = pipeline_test_hash(key, NULL, 0, 0); \ +@@ -289,10 +290,10 @@ test_table_lpm(void) + struct rte_mbuf *mbufs[RTE_PORT_IN_BURST_SIZE_MAX]; + void *table; + char *entries[RTE_PORT_IN_BURST_SIZE_MAX]; +- char entry; ++ uint64_t entry; + void *entry_ptr; + int key_found; +- uint32_t entry_size = 1; ++ uint32_t entry_size = sizeof(entry); + + /* Initialize params and create tables */ + struct rte_table_lpm_params lpm_params = { +@@ -354,7 +355,7 @@ test_table_lpm(void) + struct rte_table_lpm_key lpm_key; + lpm_key.ip = 0xadadadad; + +- table = rte_table_lpm_ops.f_create(&lpm_params, 0, 1); ++ table = rte_table_lpm_ops.f_create(&lpm_params, 0, entry_size); + if (table == NULL) + return -9; + +@@ -455,10 +456,10 @@ test_table_lpm_ipv6(void) + struct rte_mbuf *mbufs[RTE_PORT_IN_BURST_SIZE_MAX]; + void *table; + char *entries[RTE_PORT_IN_BURST_SIZE_MAX]; +- char entry; ++ uint64_t entry; + void *entry_ptr; + int key_found; +- uint32_t entry_size = 1; ++ uint32_t entry_size = sizeof(entry); + + /* Initialize params and create tables */ + struct rte_table_lpm_ipv6_params lpm_params = { +diff --git a/dpdk/app/test/test_timer.c b/dpdk/app/test/test_timer.c +index a10b2fe9da..631b737d16 100644 +--- a/dpdk/app/test/test_timer.c ++++ b/dpdk/app/test/test_timer.c +@@ -432,7 +432,7 @@ timer_basic_cb(struct rte_timer *tim, void *arg) + return; + } + +- /* Explicitelly stop timer 0. Once stop() called, we can even ++ /* Explicitly stop timer 0. Once stop() called, we can even + * erase the content of the structure: it is not referenced + * anymore by any code (in case of dynamic structure, it can + * be freed) */ +diff --git a/dpdk/app/test/test_timer_secondary.c b/dpdk/app/test/test_timer_secondary.c +index 1e8f1d4549..16a9f1878b 100644 +--- a/dpdk/app/test/test_timer_secondary.c ++++ b/dpdk/app/test/test_timer_secondary.c +@@ -125,9 +125,9 @@ test_timer_secondary(void) + + mz = rte_memzone_reserve(TEST_INFO_MZ_NAME, sizeof(*test_info), + SOCKET_ID_ANY, 0); +- test_info = mz->addr; +- TEST_ASSERT_NOT_NULL(test_info, "Couldn't allocate memory for " ++ TEST_ASSERT_NOT_NULL(mz, "Couldn't allocate memory for " + "test data"); ++ test_info = mz->addr; + + test_info->tim_mempool = rte_mempool_create("test_timer_mp", + NUM_TIMERS, sizeof(struct rte_timer), 0, 0, +@@ -171,9 +171,9 @@ test_timer_secondary(void) + int i; + + mz = rte_memzone_lookup(TEST_INFO_MZ_NAME); +- test_info = mz->addr; +- TEST_ASSERT_NOT_NULL(test_info, "Couldn't lookup memzone for " ++ TEST_ASSERT_NOT_NULL(mz, "Couldn't lookup memzone for " + "test info"); ++ test_info = mz->addr; + + for (i = 0; i < NUM_TIMERS; i++) { + rte_mempool_get(test_info->tim_mempool, (void **)&tim); +diff --git a/dpdk/app/test/test_trace_perf.c b/dpdk/app/test/test_trace_perf.c +index e1ad8e6f55..46ae7d8074 100644 +--- a/dpdk/app/test/test_trace_perf.c ++++ b/dpdk/app/test/test_trace_perf.c +@@ -79,7 +79,6 @@ signal_workers_to_finish(struct test_data *data) + + for (workers = 0; workers < data->nb_workers; workers++) { + data->ldata[workers].done = 1; +- rte_smp_wmb(); + } + } + +@@ -102,7 +101,6 @@ worker_fn_##func(void *arg) \ + { \ + struct lcore_data *ldata = arg; \ + ldata->started = 1; \ +- rte_smp_wmb(); \ + __worker_##func(ldata); \ + return 0; \ + } +@@ -137,11 +135,12 @@ run_test(const char *str, lcore_function_t f, struct test_data *data, size_t sz) + + wait_till_workers_are_ready(data); + rte_delay_ms(100); /* Wait for some time to accumulate the stats */ +- measure_perf(str, data); + signal_workers_to_finish(data); + + RTE_LCORE_FOREACH_WORKER(id) + rte_eal_wait_lcore(id); ++ ++ measure_perf(str, data); + } + + static int +diff --git a/dpdk/buildtools/binutils-avx512-check.sh b/dpdk/buildtools/binutils-avx512-check.sh +index a7e068140f..2a833b64b7 100755 +--- a/dpdk/buildtools/binutils-avx512-check.sh ++++ b/dpdk/buildtools/binutils-avx512-check.sh +@@ -3,7 +3,7 @@ + # Copyright(c) 2020 Intel Corporation + + AS=${AS:-as} +-OBJFILE=$(mktemp -t dpdk.binutils-check.XXXXXX.o) ++OBJFILE=$(mktemp -t dpdk.binutils-check.XXXXXX) + trap 'rm -f "$OBJFILE"' EXIT + # from https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90028 + GATHER_PARAMS='0x8(,%ymm1,1),%ymm0{%k2}' +diff --git a/dpdk/buildtools/call-sphinx-build.py b/dpdk/buildtools/call-sphinx-build.py +index 26b199220a..39a60d09fa 100755 +--- a/dpdk/buildtools/call-sphinx-build.py ++++ b/dpdk/buildtools/call-sphinx-build.py +@@ -7,7 +7,7 @@ + import os + from os.path import join + from subprocess import run, PIPE, STDOUT +-from distutils.version import StrictVersion ++from packaging.version import Version + + # assign parameters to variables + (sphinx, version, src, dst, *extra_args) = sys.argv[1:] +@@ -19,7 +19,7 @@ + ver = run([sphinx, '--version'], stdout=PIPE, + stderr=STDOUT).stdout.decode().split()[-1] + sphinx_cmd = [sphinx] + extra_args +-if StrictVersion(ver) >= StrictVersion('1.7'): ++if Version(ver) >= Version('1.7'): + sphinx_cmd += ['-j', 'auto'] + + # find all the files sphinx will process so we can write them as dependencies +diff --git a/dpdk/buildtools/check-symbols.sh b/dpdk/buildtools/check-symbols.sh +index e407553a34..83b3a0182f 100755 +--- a/dpdk/buildtools/check-symbols.sh ++++ b/dpdk/buildtools/check-symbols.sh +@@ -18,7 +18,7 @@ then + exit 0 + fi + +-DUMPFILE=$(mktemp -t dpdk.${0##*/}.XXX.objdump) ++DUMPFILE=$(mktemp -t dpdk.${0##*/}.objdump.XXXXXX) + trap 'rm -f "$DUMPFILE"' EXIT + objdump -t $OBJFILE >$DUMPFILE + +diff --git a/dpdk/buildtools/list-dir-globs.py b/dpdk/buildtools/list-dir-globs.py +index 80b5e801f2..f5f7d73485 100755 +--- a/dpdk/buildtools/list-dir-globs.py ++++ b/dpdk/buildtools/list-dir-globs.py +@@ -16,4 +16,4 @@ + for path in sys.argv[1].split(','): + for p in iglob(os.path.join(root, path)): + if os.path.isdir(p): +- print(os.path.relpath(p)) ++ print(os.path.relpath(p).replace('\\', '/')) +diff --git a/dpdk/buildtools/map-list-symbol.sh b/dpdk/buildtools/map-list-symbol.sh +index 5509b4a7fa..3bf9bd66f8 100755 +--- a/dpdk/buildtools/map-list-symbol.sh ++++ b/dpdk/buildtools/map-list-symbol.sh +@@ -44,7 +44,7 @@ for file in $@; do + ret = 1; + } + } +- /^.*{/ { ++ /^.*\{/ { + if ("'$section'" == "all" || $1 == "'$section'") { + current_section = $1; + } +diff --git a/dpdk/buildtools/meson.build b/dpdk/buildtools/meson.build +index 04808dabc1..cfad51f52a 100644 +--- a/dpdk/buildtools/meson.build ++++ b/dpdk/buildtools/meson.build +@@ -3,17 +3,17 @@ + + pkgconf = find_program('pkg-config', 'pkgconf', required: false) + pmdinfo = find_program('gen-pmdinfo-cfile.sh') +-list_dir_globs = find_program('list-dir-globs.py') + check_symbols = find_program('check-symbols.sh') + ldflags_ibverbs_static = find_program('options-ibverbs-static.sh') + binutils_avx512_check = find_program('binutils-avx512-check.sh') + +-# set up map-to-win script using python, either built-in or external + python3 = import('python').find_installation(required: false) + if python3.found() + py3 = [python3] + else + py3 = ['meson', 'runpython'] + endif ++echo = py3 + ['-c', 'import sys; print(*sys.argv[1:])'] ++list_dir_globs = py3 + files('list-dir-globs.py') + map_to_win_cmd = py3 + files('map_to_win.py') + sphinx_wrapper = py3 + files('call-sphinx-build.py') +diff --git a/dpdk/buildtools/pmdinfogen/pmdinfogen.c b/dpdk/buildtools/pmdinfogen/pmdinfogen.c +index a68d1ea999..f7133267be 100644 +--- a/dpdk/buildtools/pmdinfogen/pmdinfogen.c ++++ b/dpdk/buildtools/pmdinfogen/pmdinfogen.c +@@ -428,7 +428,7 @@ static void output_pmd_info_string(struct elf_info *info, char *outfile) + + int main(int argc, char **argv) + { +- struct elf_info info = {0}; ++ struct elf_info info; + int rc = 1; + + if (argc < 3) { +@@ -437,6 +437,7 @@ int main(int argc, char **argv) + basename(argv[0])); + exit(127); + } ++ memset(&info, 0, sizeof(struct elf_info)); + use_stdin = !strcmp(argv[1], "-"); + use_stdout = !strcmp(argv[2], "-"); + parse_elf(&info, argv[1]); +diff --git a/dpdk/buildtools/symlink-drivers-solibs.py b/dpdk/buildtools/symlink-drivers-solibs.py +new file mode 100644 +index 0000000000..9c999508a9 +--- /dev/null ++++ b/dpdk/buildtools/symlink-drivers-solibs.py +@@ -0,0 +1,49 @@ ++#!/usr/bin/env python3 ++# SPDX-License-Identifier: BSD-3-Clause ++# Copyright(c) 2021 Intel Corporation ++ ++import os ++import sys ++import glob ++import shutil ++ ++# post-install script for meson/ninja builds to symlink the PMDs stored in ++# $libdir/dpdk/pmds-*/ to $libdir. This is needed as some PMDs depend on ++# others, e.g. PCI device PMDs depending on the PCI bus driver. ++ ++# parameters to script are paths relative to install prefix: ++# 1. directory for installed regular libs e.g. lib64 ++# 2. subdirectory of libdir where the PMDs are ++# 3. directory for installed regular binaries e.g. bin ++ ++os.chdir(os.environ['MESON_INSTALL_DESTDIR_PREFIX']) ++ ++lib_dir = sys.argv[1] ++pmd_subdir = sys.argv[2] ++bin_dir = sys.argv[3] ++pmd_dir = os.path.join(lib_dir, pmd_subdir) ++ ++# copy Windows PMDs to avoid any issues with symlinks since the ++# build could be a cross-compilation under WSL, Msys or Cygnus. ++# the filenames are dependent upon the specific toolchain in use. ++ ++def copy_pmd_files(pattern, to_dir): ++ for file in glob.glob(os.path.join(pmd_dir, pattern)): ++ to = os.path.join(to_dir, os.path.basename(file)) ++ shutil.copy2(file, to) ++ print(to + ' -> ' + file) ++ ++copy_pmd_files('*rte_*.dll', bin_dir) ++copy_pmd_files('*rte_*.pdb', bin_dir) ++copy_pmd_files('*rte_*.lib', lib_dir) ++copy_pmd_files('*rte_*.dll.a', lib_dir) ++ ++# symlink shared objects ++ ++os.chdir(lib_dir) ++for file in glob.glob(os.path.join(pmd_subdir, 'librte_*.so*')): ++ to = os.path.basename(file) ++ if os.path.exists(to): ++ os.remove(to) ++ os.symlink(file, to) ++ print(to + ' -> ' + file) +diff --git a/dpdk/config/arm/arm64_armada_linux_gcc b/dpdk/config/arm/arm64_armada_linux_gcc +index fa40c0398f..d78c99e8e8 100644 +--- a/dpdk/config/arm/arm64_armada_linux_gcc ++++ b/dpdk/config/arm/arm64_armada_linux_gcc +@@ -1,6 +1,6 @@ + [binaries] + c = 'aarch64-linux-gnu-gcc' +-cpp = 'aarch64-linux-gnu-cpp' ++cpp = 'aarch64-linux-gnu-g++' + ar = 'aarch64-linux-gnu-ar' + as = 'aarch64-linux-gnu-as' + strip = 'aarch64-linux-gnu-strip' +diff --git a/dpdk/config/arm/arm64_armv8_linux_gcc b/dpdk/config/arm/arm64_armv8_linux_gcc +index 88f0ff9dae..057d70bbdd 100644 +--- a/dpdk/config/arm/arm64_armv8_linux_gcc ++++ b/dpdk/config/arm/arm64_armv8_linux_gcc +@@ -1,6 +1,6 @@ + [binaries] + c = 'aarch64-linux-gnu-gcc' +-cpp = 'aarch64-linux-gnu-cpp' ++cpp = 'aarch64-linux-gnu-g++' + ar = 'aarch64-linux-gnu-gcc-ar' + strip = 'aarch64-linux-gnu-strip' + pkgconfig = 'aarch64-linux-gnu-pkg-config' +diff --git a/dpdk/config/arm/arm64_bluefield_linux_gcc b/dpdk/config/arm/arm64_bluefield_linux_gcc +index 86797d23cd..616e633495 100644 +--- a/dpdk/config/arm/arm64_bluefield_linux_gcc ++++ b/dpdk/config/arm/arm64_bluefield_linux_gcc +@@ -1,6 +1,6 @@ + [binaries] + c = 'aarch64-linux-gnu-gcc' +-cpp = 'aarch64-linux-gnu-cpp' ++cpp = 'aarch64-linux-gnu-g++' + ar = 'aarch64-linux-gnu-gcc-ar' + strip = 'aarch64-linux-gnu-strip' + pkgconfig = 'aarch64-linux-gnu-pkg-config' +diff --git a/dpdk/config/arm/arm64_dpaa_linux_gcc b/dpdk/config/arm/arm64_dpaa_linux_gcc +index 1a46821543..0108bb952a 100644 +--- a/dpdk/config/arm/arm64_dpaa_linux_gcc ++++ b/dpdk/config/arm/arm64_dpaa_linux_gcc +@@ -1,6 +1,6 @@ + [binaries] + c = 'aarch64-linux-gnu-gcc' +-cpp = 'aarch64-linux-gnu-cpp' ++cpp = 'aarch64-linux-gnu-g++' + ar = 'aarch64-linux-gnu-ar' + as = 'aarch64-linux-gnu-as' + strip = 'aarch64-linux-gnu-strip' +diff --git a/dpdk/config/arm/arm64_emag_linux_gcc b/dpdk/config/arm/arm64_emag_linux_gcc +index 8edcd3e976..3bb5134224 100644 +--- a/dpdk/config/arm/arm64_emag_linux_gcc ++++ b/dpdk/config/arm/arm64_emag_linux_gcc +@@ -1,6 +1,6 @@ + [binaries] + c = 'aarch64-linux-gnu-gcc' +-cpp = 'aarch64-linux-gnu-cpp' ++cpp = 'aarch64-linux-gnu-g++' + ar = 'aarch64-linux-gnu-gcc-ar' + strip = 'aarch64-linux-gnu-strip' + pkgconfig = 'aarch64-linux-gnu-pkg-config' +diff --git a/dpdk/config/arm/arm64_graviton2_linux_gcc b/dpdk/config/arm/arm64_graviton2_linux_gcc +index 022e063039..421d06c77f 100644 +--- a/dpdk/config/arm/arm64_graviton2_linux_gcc ++++ b/dpdk/config/arm/arm64_graviton2_linux_gcc +@@ -1,6 +1,6 @@ + [binaries] + c = 'aarch64-linux-gnu-gcc' +-cpp = 'aarch64-linux-gnu-cpp' ++cpp = 'aarch64-linux-gnu-g++' + ar = 'aarch64-linux-gnu-gcc-ar' + strip = 'aarch64-linux-gnu-strip' + pkgconfig = 'aarch64-linux-gnu-pkg-config' +diff --git a/dpdk/config/arm/arm64_n1sdp_linux_gcc b/dpdk/config/arm/arm64_n1sdp_linux_gcc +index 022e063039..421d06c77f 100644 +--- a/dpdk/config/arm/arm64_n1sdp_linux_gcc ++++ b/dpdk/config/arm/arm64_n1sdp_linux_gcc +@@ -1,6 +1,6 @@ + [binaries] + c = 'aarch64-linux-gnu-gcc' +-cpp = 'aarch64-linux-gnu-cpp' ++cpp = 'aarch64-linux-gnu-g++' + ar = 'aarch64-linux-gnu-gcc-ar' + strip = 'aarch64-linux-gnu-strip' + pkgconfig = 'aarch64-linux-gnu-pkg-config' +diff --git a/dpdk/config/arm/arm64_octeontx2_linux_gcc b/dpdk/config/arm/arm64_octeontx2_linux_gcc +index 365bd7cbdd..0d7a66c97e 100644 +--- a/dpdk/config/arm/arm64_octeontx2_linux_gcc ++++ b/dpdk/config/arm/arm64_octeontx2_linux_gcc +@@ -1,6 +1,6 @@ + [binaries] + c = 'aarch64-linux-gnu-gcc' +-cpp = 'aarch64-linux-gnu-cpp' ++cpp = 'aarch64-linux-gnu-g++' + ar = 'aarch64-linux-gnu-gcc-ar' + strip = 'aarch64-linux-gnu-strip' + pkgconfig = 'aarch64-linux-gnu-pkg-config' +diff --git a/dpdk/config/arm/arm64_stingray_linux_gcc b/dpdk/config/arm/arm64_stingray_linux_gcc +index 86797d23cd..616e633495 100644 +--- a/dpdk/config/arm/arm64_stingray_linux_gcc ++++ b/dpdk/config/arm/arm64_stingray_linux_gcc +@@ -1,6 +1,6 @@ + [binaries] + c = 'aarch64-linux-gnu-gcc' +-cpp = 'aarch64-linux-gnu-cpp' ++cpp = 'aarch64-linux-gnu-g++' + ar = 'aarch64-linux-gnu-gcc-ar' + strip = 'aarch64-linux-gnu-strip' + pkgconfig = 'aarch64-linux-gnu-pkg-config' +diff --git a/dpdk/config/arm/arm64_thunderx2_linux_gcc b/dpdk/config/arm/arm64_thunderx2_linux_gcc +index 2b41acc615..24346ffe71 100644 +--- a/dpdk/config/arm/arm64_thunderx2_linux_gcc ++++ b/dpdk/config/arm/arm64_thunderx2_linux_gcc +@@ -1,6 +1,6 @@ + [binaries] + c = 'aarch64-linux-gnu-gcc' +-cpp = 'aarch64-linux-gnu-cpp' ++cpp = 'aarch64-linux-gnu-g++' + ar = 'aarch64-linux-gnu-gcc-ar' + strip = 'aarch64-linux-gnu-strip' + pkgconfig = 'aarch64-linux-gnu-pkg-config' +diff --git a/dpdk/config/arm/arm64_thunderx_linux_gcc b/dpdk/config/arm/arm64_thunderx_linux_gcc +index 6572ab615d..cbf60bac73 100644 +--- a/dpdk/config/arm/arm64_thunderx_linux_gcc ++++ b/dpdk/config/arm/arm64_thunderx_linux_gcc +@@ -1,6 +1,6 @@ + [binaries] + c = 'aarch64-linux-gnu-gcc' +-cpp = 'aarch64-linux-gnu-cpp' ++cpp = 'aarch64-linux-gnu-g++' + ar = 'aarch64-linux-gnu-gcc-ar' + strip = 'aarch64-linux-gnu-strip' + pkgconfig = 'aarch64-linux-gnu-pkg-config' +diff --git a/dpdk/config/arm/meson.build b/dpdk/config/arm/meson.build +index 42b4e43c74..bb72f40c65 100644 +--- a/dpdk/config/arm/meson.build ++++ b/dpdk/config/arm/meson.build +@@ -137,12 +137,14 @@ dpdk_conf.set('RTE_FORCE_INTRINSICS', 1) + if dpdk_conf.get('RTE_ARCH_32') + dpdk_conf.set('RTE_CACHE_LINE_SIZE', 64) + dpdk_conf.set('RTE_ARCH_ARMv7', 1) ++ dpdk_conf.set('RTE_ARCH', 'armv7') + # the minimum architecture supported, armv7-a, needs the following, + # mk/machine/armv7a/rte.vars.mk sets it too + machine_args += '-mfpu=neon' + else + dpdk_conf.set('RTE_CACHE_LINE_SIZE', 128) + dpdk_conf.set('RTE_ARCH_ARM64', 1) ++ dpdk_conf.set('RTE_ARCH', 'armv8') + + machine = [] + cmd_generic = ['generic', '', '', 'default', ''] +@@ -156,7 +158,7 @@ else + # 'Primary Part number', 'Revision'] + detect_vendor = find_program(join_paths( + meson.current_source_dir(), 'armv8_machine.py')) +- cmd = run_command(detect_vendor.path()) ++ cmd = run_command(detect_vendor.path(), check: false) + if cmd.returncode() == 0 + cmd_output = cmd.stdout().to_lower().strip().split(' ') + endif +diff --git a/dpdk/config/meson.build b/dpdk/config/meson.build +index 2f150de3b8..bd50f189e0 100644 +--- a/dpdk/config/meson.build ++++ b/dpdk/config/meson.build +@@ -23,7 +23,7 @@ is_ms_linker = is_windows and (cc.get_id() == 'clang') + pver = meson.project_version().split('.') + major_version = '@0@.@1@'.format(pver.get(0), pver.get(1)) + abi_version = run_command(find_program('cat', 'more'), +- abi_version_file).stdout().strip() ++ abi_version_file, check: true).stdout().strip() + + # Libraries have the abi_version as the filename extension + # and have the soname be all but the final part of the abi_version. +@@ -61,6 +61,10 @@ if not is_windows + meson.add_install_script('../buildtools/symlink-drivers-solibs.sh', + get_option('libdir'), + pmd_subdir_opt) ++elif meson.version().version_compare('>=0.55.0') ++ # 0.55.0 is required to use external program with add_install_script ++ meson.add_install_script(py3, '../buildtools/symlink-drivers-solibs.py', ++ get_option('libdir'), pmd_subdir_opt, get_option('bindir')) + endif + + # set the machine type and cflags for it +@@ -125,11 +129,8 @@ if cc.find_library('m', required : false).found() + dpdk_extra_ldflags += '-lm' + endif + +-# for linux link against dl, for bsd execinfo + if is_linux + link_lib = 'dl' +-elif is_freebsd +- link_lib = 'execinfo' + else + link_lib = '' + endif +@@ -159,6 +160,12 @@ if fdt_dep.found() and cc.has_header('fdt.h') + dpdk_extra_ldflags += '-lfdt' + endif + ++libexecinfo = cc.find_library('libexecinfo', required: false) ++if libexecinfo.found() and cc.has_header('execinfo.h') ++ add_project_link_arguments('-lexecinfo', language: 'c') ++ dpdk_extra_ldflags += '-lexecinfo' ++endif ++ + # check for libbsd + libbsd = dependency('libbsd', required: false, method: 'pkg-config') + if libbsd.found() +@@ -187,10 +194,9 @@ endif + add_project_arguments('-include', 'rte_config.h', language: 'c') + + # enable extra warnings and disable any unwanted warnings ++# -Wall is added by default at warning level 1, and -Wextra ++# at warning level 2 (DPDK default) + warning_flags = [ +- # -Wall is added by meson by default, so add -Wextra only +- '-Wextra', +- + # additional warnings in alphabetical order + '-Wcast-qual', + '-Wdeprecated', +@@ -271,7 +277,8 @@ if is_freebsd + endif + + if is_windows +- # VirtualAlloc2() is available since Windows 10 / Server 2016. ++ # VirtualAlloc2() is available since Windows 10 / Server 2019. ++ # It's essential for EAL, so we don't support older versions. + add_project_arguments('-D_WIN32_WINNT=0x0A00', language: 'c') + + # Use MinGW-w64 stdio, because DPDK assumes ANSI-compliant formatting. +@@ -279,16 +286,11 @@ if is_windows + add_project_arguments('-D__USE_MINGW_ANSI_STDIO', language: 'c') + endif + +- add_project_link_arguments('-lws2_32', language: 'c') +- +- # Contrary to docs, VirtualAlloc2() is exported by mincore.lib +- # in Windows SDK, while MinGW exports it by advapi32.a. +- if is_ms_linker +- add_project_link_arguments('-lmincore', language: 'c') ++ # Disable secure CRT deprecated warnings for clang ++ if cc.get_id() == 'clang' ++ add_project_arguments('-D_CRT_SECURE_NO_WARNINGS', language: 'c') + endif + +- add_project_link_arguments('-ladvapi32', '-lsetupapi', language: 'c') +- add_project_link_arguments('-ldbghelp', language: 'c') + endif + + if get_option('b_lto') +diff --git a/dpdk/config/ppc/meson.build b/dpdk/config/ppc/meson.build +index 0d8da87e6f..dc4a3d8c13 100644 +--- a/dpdk/config/ppc/meson.build ++++ b/dpdk/config/ppc/meson.build +@@ -1,5 +1,6 @@ + # SPDX-License-Identifier: BSD-3-Clause + # Copyright(c) 2018 Luca Boccassi ++# Copyright(c) 2021 IBM Corporation + + if not dpdk_conf.get('RTE_ARCH_64') + error('Only 64-bit compiles are supported for this platform type') +@@ -17,7 +18,25 @@ if not power9_supported + dpdk_conf.set('RTE_MACHINE','power8') + endif + +-# overrides specific to ppc64 +-dpdk_conf.set('RTE_MAX_LCORE', 1536) +-dpdk_conf.set('RTE_MAX_NUMA_NODES', 32) ++# Suppress the gcc warning "note: the layout of aggregates containing ++# vectors with 4-byte alignment has changed in GCC 5". ++if (cc.get_id() == 'gcc' and cc.version().version_compare('>=10.0') and ++ cc.version().version_compare('<12.0') and cc.has_argument('-Wno-psabi')) ++ add_project_arguments('-Wno-psabi', language: 'c') ++endif ++ ++# Certain POWER9 systems can scale as high as 1536 LCORES, but setting such a ++# high value can waste memory, cause timeouts in time limited autotests, and is ++# unlikely to be used in many production situations. Similarly, keeping the ++# default 64 LCORES seems too small as most POWER9 dual socket systems will have ++# at least 128 LCORES available. Set RTE_MAX_LCORE to 128 for POWER systems as ++# a compromise. ++dpdk_conf.set('RTE_MAX_LCORE', 128) ++ ++# POWER systems do not allocate NUMA nodes sequentially. A dual socket system ++# will have CPUs associated with NUMA nodes 0 & 8, so ensure that the second ++# NUMA node will be supported by setting RTE_MAX_NUMA_NODES to 16. High end ++# systems can scale even higher with as many as 32 NUMA nodes. ++dpdk_conf.set('RTE_MAX_NUMA_NODES', 16) ++ + dpdk_conf.set('RTE_CACHE_LINE_SIZE', 128) +diff --git a/dpdk/config/ppc/ppc64le-power8-linux-gcc b/dpdk/config/ppc/ppc64le-power8-linux-gcc +index 51f7ceebf3..784c33df9e 100644 +--- a/dpdk/config/ppc/ppc64le-power8-linux-gcc ++++ b/dpdk/config/ppc/ppc64le-power8-linux-gcc +@@ -1,6 +1,6 @@ + [binaries] + c = 'powerpc64le-linux-gcc' +-cpp = 'powerpc64le-linux-cpp' ++cpp = 'powerpc64le-linux-g++' + ar = 'powerpc64le-linux-gcc-ar' + strip = 'powerpc64le-linux-strip' + +diff --git a/dpdk/config/x86/meson.build b/dpdk/config/x86/meson.build +index 31bfa63b1c..918a29008b 100644 +--- a/dpdk/config/x86/meson.build ++++ b/dpdk/config/x86/meson.build +@@ -2,9 +2,10 @@ + # Copyright(c) 2017-2020 Intel Corporation + + # get binutils version for the workaround of Bug 97 +-if not is_windows +- binutils_ok = run_command(binutils_avx512_check) +- if binutils_ok.returncode() != 0 and cc.has_argument('-mno-avx512f') ++binutils_ok = true ++if not is_windows and (is_linux or cc.get_id() == 'gcc') ++ binutils_ok = run_command(binutils_avx512_check, check: false).returncode() == 0 ++ if not binutils_ok and cc.has_argument('-mno-avx512f') + machine_args += '-mno-avx512f' + warning('Binutils error with AVX512 assembly, disabling AVX512 support') + endif +diff --git a/dpdk/devtools/check-forbidden-tokens.awk b/dpdk/devtools/check-forbidden-tokens.awk +index 61ba707c9b..026844141c 100755 +--- a/dpdk/devtools/check-forbidden-tokens.awk ++++ b/dpdk/devtools/check-forbidden-tokens.awk +@@ -20,6 +20,9 @@ BEGIN { + # state machine assumes the comments structure is enforced by + # checkpatches.pl + (in_file) { ++ if ($0 ~ "^@@") { ++ in_comment = 0 ++ } + # comment start + if (index($0,comment_start) > 0) { + in_comment = 1 +diff --git a/dpdk/devtools/check-git-log.sh b/dpdk/devtools/check-git-log.sh +index 9988bf863d..885d444b3d 100755 +--- a/dpdk/devtools/check-git-log.sh ++++ b/dpdk/devtools/check-git-log.sh +@@ -111,12 +111,12 @@ IFS=' + ' + words="$selfdir/words-case.txt" + for word in $(cat $words); do +- bad=$(echo "$headlines" | grep -iw $word | grep -v $word) ++ bad=$(echo "$headlines" | grep -iw $word | grep -vw $word) + if [ "$word" = "Tx" ]; then + bad=$(echo $bad | grep -v 'OCTEON\ TX') + fi + for bad_line in $bad; do +- bad_word=$(echo $bad_line | cut -d":" -f2 | grep -io $word) ++ bad_word=$(echo $bad_line | cut -d":" -f2 | grep -iwo $word) + [ -z "$bad_word" ] || { printf "Wrong headline case:\n\ + \"$bad_line\": $bad_word --> $word\n" && failure=true;} + done +diff --git a/dpdk/devtools/check-maintainers.sh b/dpdk/devtools/check-maintainers.sh +index df3f740b6e..71697bb352 100755 +--- a/dpdk/devtools/check-maintainers.sh ++++ b/dpdk/devtools/check-maintainers.sh +@@ -15,10 +15,10 @@ files () # [ ...] + if [ -z "$1" ] ; then + return + fi +- if [ -d .git ] ; then ++ if [ -r .git ] ; then + git ls-files "$1" + else +- find "$1" -type f | ++ find $1 -type f | + sed 's,^\./,,' + fi | + # if not ended by / +diff --git a/dpdk/devtools/check-symbol-change.sh b/dpdk/devtools/check-symbol-change.sh +index 8fcd0ce1a1..8992214ac8 100755 +--- a/dpdk/devtools/check-symbol-change.sh ++++ b/dpdk/devtools/check-symbol-change.sh +@@ -25,7 +25,7 @@ build_map_changes() + + # Triggering this rule, which starts a line and ends it + # with a { identifies a versioned section. The section name is +- # the rest of the line with the + and { symbols remvoed. ++ # the rest of the line with the + and { symbols removed. + # Triggering this rule sets in_sec to 1, which actives the + # symbol rule below + /^.*{/ { +@@ -35,7 +35,7 @@ build_map_changes() + } + } + +- # This rule idenfies the end of a section, and disables the ++ # This rule identifies the end of a section, and disables the + # symbol rule + /.*}/ {in_sec=0} + +@@ -100,7 +100,7 @@ check_for_rule_violations() + # Just inform the user of this occurrence, but + # don't flag it as an error + echo -n "INFO: symbol $symname is added but " +- echo -n "patch has insuficient context " ++ echo -n "patch has insufficient context " + echo -n "to determine the section name " + echo -n "please ensure the version is " + echo "EXPERIMENTAL" +diff --git a/dpdk/devtools/check-symbol-maps.sh b/dpdk/devtools/check-symbol-maps.sh +index 0e097eed89..e07682a479 100755 +--- a/dpdk/devtools/check-symbol-maps.sh ++++ b/dpdk/devtools/check-symbol-maps.sh +@@ -20,8 +20,7 @@ find_orphan_symbols () + else + symsrc=$sym + fi +- if ! grep -q -r --exclude=$(basename $map) \ +- -w $symsrc $(dirname $map) ; then ++ if [ -z "$(grep -rlw $symsrc $(dirname $map) | grep -v $map)" ] ; then + echo "$map: $sym" + fi + done +diff --git a/dpdk/devtools/checkpatches.sh b/dpdk/devtools/checkpatches.sh +index 78a408ef98..db4c7d8301 100755 +--- a/dpdk/devtools/checkpatches.sh ++++ b/dpdk/devtools/checkpatches.sh +@@ -118,8 +118,7 @@ check_forbidden_additions() { # + -f $(dirname $(readlink -f $0))/check-forbidden-tokens.awk \ + "$1" || res=1 + +- # svg figures must be included with wildcard extension +- # because of png conversion for pdf docs ++ # SVG must be included with wildcard extension to allow conversion + awk -v FOLDERS='doc' \ + -v EXPRESSIONS='::[[:space:]]*[^[:space:]]*\\.svg' \ + -v RET_ON_FAIL=1 \ +diff --git a/dpdk/devtools/test-null.sh b/dpdk/devtools/test-null.sh +index 4ba57a6829..6cd34f64f1 100755 +--- a/dpdk/devtools/test-null.sh ++++ b/dpdk/devtools/test-null.sh +@@ -27,6 +27,7 @@ else + fi + + (sleep 1 && echo stop) | +-$testpmd -c $coremask --no-huge -m 20 \ ++# testpmd only needs 20M, make it x2 (default number of cores) for NUMA systems ++$testpmd -c $coremask --no-huge -m 40 \ + $libs -a 0:0.0 --vdev net_null1 --vdev net_null2 $eal_options -- \ + --no-mlockall --total-num-mbufs=2048 $testpmd_options -ia +diff --git a/dpdk/doc/api/doxy-api-index.md b/dpdk/doc/api/doxy-api-index.md +index 748514e243..0661c5c1f6 100644 +--- a/dpdk/doc/api/doxy-api-index.md ++++ b/dpdk/doc/api/doxy-api-index.md +@@ -9,212 +9,212 @@ API + The public API headers are grouped by topics: + + - **device**: +- [dev] (@ref rte_dev.h), +- [ethdev] (@ref rte_ethdev.h), +- [ethctrl] (@ref rte_eth_ctrl.h), +- [rte_flow] (@ref rte_flow.h), +- [rte_tm] (@ref rte_tm.h), +- [rte_mtr] (@ref rte_mtr.h), +- [bbdev] (@ref rte_bbdev.h), +- [cryptodev] (@ref rte_cryptodev.h), +- [security] (@ref rte_security.h), +- [compressdev] (@ref rte_compressdev.h), +- [compress] (@ref rte_comp.h), +- [regexdev] (@ref rte_regexdev.h), +- [eventdev] (@ref rte_eventdev.h), +- [event_eth_rx_adapter] (@ref rte_event_eth_rx_adapter.h), +- [event_eth_tx_adapter] (@ref rte_event_eth_tx_adapter.h), +- [event_timer_adapter] (@ref rte_event_timer_adapter.h), +- [event_crypto_adapter] (@ref rte_event_crypto_adapter.h), +- [rawdev] (@ref rte_rawdev.h), +- [metrics] (@ref rte_metrics.h), +- [bitrate] (@ref rte_bitrate.h), +- [latency] (@ref rte_latencystats.h), +- [devargs] (@ref rte_devargs.h), +- [PCI] (@ref rte_pci.h), +- [vdev] (@ref rte_bus_vdev.h), +- [vfio] (@ref rte_vfio.h) ++ [dev](@ref rte_dev.h), ++ [ethdev](@ref rte_ethdev.h), ++ [ethctrl](@ref rte_eth_ctrl.h), ++ [rte_flow](@ref rte_flow.h), ++ [rte_tm](@ref rte_tm.h), ++ [rte_mtr](@ref rte_mtr.h), ++ [bbdev](@ref rte_bbdev.h), ++ [cryptodev](@ref rte_cryptodev.h), ++ [security](@ref rte_security.h), ++ [compressdev](@ref rte_compressdev.h), ++ [compress](@ref rte_comp.h), ++ [regexdev](@ref rte_regexdev.h), ++ [eventdev](@ref rte_eventdev.h), ++ [event_eth_rx_adapter](@ref rte_event_eth_rx_adapter.h), ++ [event_eth_tx_adapter](@ref rte_event_eth_tx_adapter.h), ++ [event_timer_adapter](@ref rte_event_timer_adapter.h), ++ [event_crypto_adapter](@ref rte_event_crypto_adapter.h), ++ [rawdev](@ref rte_rawdev.h), ++ [metrics](@ref rte_metrics.h), ++ [bitrate](@ref rte_bitrate.h), ++ [latency](@ref rte_latencystats.h), ++ [devargs](@ref rte_devargs.h), ++ [PCI](@ref rte_pci.h), ++ [vdev](@ref rte_bus_vdev.h), ++ [vfio](@ref rte_vfio.h) + + - **device specific**: +- [softnic] (@ref rte_eth_softnic.h), +- [bond] (@ref rte_eth_bond.h), +- [vhost] (@ref rte_vhost.h), +- [vdpa] (@ref rte_vdpa.h), +- [KNI] (@ref rte_kni.h), ++ [softnic](@ref rte_eth_softnic.h), ++ [bond](@ref rte_eth_bond.h), ++ [vhost](@ref rte_vhost.h), ++ [vdpa](@ref rte_vdpa.h), ++ [KNI](@ref rte_kni.h), + [ark] (@ref rte_pmd_ark.h), +- [ixgbe] (@ref rte_pmd_ixgbe.h), +- [i40e] (@ref rte_pmd_i40e.h), +- [ice] (@ref rte_pmd_ice.h), +- [iavf] (@ref rte_pmd_iavf.h), +- [ioat] (@ref rte_ioat_rawdev.h), +- [bnxt] (@ref rte_pmd_bnxt.h), +- [dpaa] (@ref rte_pmd_dpaa.h), +- [dpaa2] (@ref rte_pmd_dpaa2.h), +- [mlx5] (@ref rte_pmd_mlx5.h), +- [dpaa2_mempool] (@ref rte_dpaa2_mempool.h), +- [dpaa2_cmdif] (@ref rte_pmd_dpaa2_cmdif.h), +- [dpaa2_qdma] (@ref rte_pmd_dpaa2_qdma.h), +- [crypto_scheduler] (@ref rte_cryptodev_scheduler.h), +- [dlb] (@ref rte_pmd_dlb.h), +- [dlb2] (@ref rte_pmd_dlb2.h) ++ [ixgbe](@ref rte_pmd_ixgbe.h), ++ [i40e](@ref rte_pmd_i40e.h), ++ [ice](@ref rte_pmd_ice.h), ++ [iavf](@ref rte_pmd_iavf.h), ++ [ioat](@ref rte_ioat_rawdev.h), ++ [bnxt](@ref rte_pmd_bnxt.h), ++ [dpaa](@ref rte_pmd_dpaa.h), ++ [dpaa2](@ref rte_pmd_dpaa2.h), ++ [mlx5](@ref rte_pmd_mlx5.h), ++ [dpaa2_mempool](@ref rte_dpaa2_mempool.h), ++ [dpaa2_cmdif](@ref rte_pmd_dpaa2_cmdif.h), ++ [dpaa2_qdma](@ref rte_pmd_dpaa2_qdma.h), ++ [crypto_scheduler](@ref rte_cryptodev_scheduler.h), ++ [dlb](@ref rte_pmd_dlb.h), ++ [dlb2](@ref rte_pmd_dlb2.h) + + - **memory**: +- [memseg] (@ref rte_memory.h), +- [memzone] (@ref rte_memzone.h), +- [mempool] (@ref rte_mempool.h), +- [malloc] (@ref rte_malloc.h), +- [memcpy] (@ref rte_memcpy.h) ++ [memseg](@ref rte_memory.h), ++ [memzone](@ref rte_memzone.h), ++ [mempool](@ref rte_mempool.h), ++ [malloc](@ref rte_malloc.h), ++ [memcpy](@ref rte_memcpy.h) + + - **timers**: +- [cycles] (@ref rte_cycles.h), +- [timer] (@ref rte_timer.h), +- [alarm] (@ref rte_alarm.h) ++ [cycles](@ref rte_cycles.h), ++ [timer](@ref rte_timer.h), ++ [alarm](@ref rte_alarm.h) + + - **locks**: +- [atomic] (@ref rte_atomic.h), +- [mcslock] (@ref rte_mcslock.h), +- [rwlock] (@ref rte_rwlock.h), +- [spinlock] (@ref rte_spinlock.h), +- [ticketlock] (@ref rte_ticketlock.h), +- [RCU] (@ref rte_rcu_qsbr.h) ++ [atomic](@ref rte_atomic.h), ++ [mcslock](@ref rte_mcslock.h), ++ [rwlock](@ref rte_rwlock.h), ++ [spinlock](@ref rte_spinlock.h), ++ [ticketlock](@ref rte_ticketlock.h), ++ [RCU](@ref rte_rcu_qsbr.h) + + - **CPU arch**: +- [branch prediction] (@ref rte_branch_prediction.h), +- [cache prefetch] (@ref rte_prefetch.h), +- [SIMD] (@ref rte_vect.h), +- [byte order] (@ref rte_byteorder.h), +- [CPU flags] (@ref rte_cpuflags.h), +- [CPU pause] (@ref rte_pause.h), +- [I/O access] (@ref rte_io.h) ++ [branch prediction](@ref rte_branch_prediction.h), ++ [cache prefetch](@ref rte_prefetch.h), ++ [SIMD](@ref rte_vect.h), ++ [byte order](@ref rte_byteorder.h), ++ [CPU flags](@ref rte_cpuflags.h), ++ [CPU pause](@ref rte_pause.h), ++ [I/O access](@ref rte_io.h) + + - **CPU multicore**: +- [interrupts] (@ref rte_interrupts.h), +- [launch] (@ref rte_launch.h), +- [lcore] (@ref rte_lcore.h), +- [per-lcore] (@ref rte_per_lcore.h), +- [service cores] (@ref rte_service.h), +- [keepalive] (@ref rte_keepalive.h), +- [power/freq] (@ref rte_power.h) ++ [interrupts](@ref rte_interrupts.h), ++ [launch](@ref rte_launch.h), ++ [lcore](@ref rte_lcore.h), ++ [per-lcore](@ref rte_per_lcore.h), ++ [service cores](@ref rte_service.h), ++ [keepalive](@ref rte_keepalive.h), ++ [power/freq](@ref rte_power.h) + + - **layers**: +- [ethernet] (@ref rte_ether.h), +- [ARP] (@ref rte_arp.h), +- [HIGIG] (@ref rte_higig.h), +- [ICMP] (@ref rte_icmp.h), +- [ESP] (@ref rte_esp.h), +- [IPsec] (@ref rte_ipsec.h), +- [IPsec group] (@ref rte_ipsec_group.h), +- [IPsec SA] (@ref rte_ipsec_sa.h), +- [IPsec SAD] (@ref rte_ipsec_sad.h), +- [IP] (@ref rte_ip.h), +- [frag/reass] (@ref rte_ip_frag.h), +- [SCTP] (@ref rte_sctp.h), +- [TCP] (@ref rte_tcp.h), +- [UDP] (@ref rte_udp.h), +- [GTP] (@ref rte_gtp.h), +- [GRO] (@ref rte_gro.h), +- [GSO] (@ref rte_gso.h), +- [GRE] (@ref rte_gre.h), +- [MPLS] (@ref rte_mpls.h), +- [VXLAN] (@ref rte_vxlan.h), +- [Geneve] (@ref rte_geneve.h), +- [eCPRI] (@ref rte_ecpri.h) ++ [ethernet](@ref rte_ether.h), ++ [ARP](@ref rte_arp.h), ++ [HIGIG](@ref rte_higig.h), ++ [ICMP](@ref rte_icmp.h), ++ [ESP](@ref rte_esp.h), ++ [IPsec](@ref rte_ipsec.h), ++ [IPsec group](@ref rte_ipsec_group.h), ++ [IPsec SA](@ref rte_ipsec_sa.h), ++ [IPsec SAD](@ref rte_ipsec_sad.h), ++ [IP](@ref rte_ip.h), ++ [frag/reass](@ref rte_ip_frag.h), ++ [SCTP](@ref rte_sctp.h), ++ [TCP](@ref rte_tcp.h), ++ [UDP](@ref rte_udp.h), ++ [GTP](@ref rte_gtp.h), ++ [GRO](@ref rte_gro.h), ++ [GSO](@ref rte_gso.h), ++ [GRE](@ref rte_gre.h), ++ [MPLS](@ref rte_mpls.h), ++ [VXLAN](@ref rte_vxlan.h), ++ [Geneve](@ref rte_geneve.h), ++ [eCPRI](@ref rte_ecpri.h) + + - **QoS**: +- [metering] (@ref rte_meter.h), +- [scheduler] (@ref rte_sched.h), +- [RED congestion] (@ref rte_red.h) ++ [metering](@ref rte_meter.h), ++ [scheduler](@ref rte_sched.h), ++ [RED congestion](@ref rte_red.h) + + - **routing**: +- [LPM IPv4 route] (@ref rte_lpm.h), +- [LPM IPv6 route] (@ref rte_lpm6.h), +- [RIB IPv4] (@ref rte_rib.h), +- [RIB IPv6] (@ref rte_rib6.h), +- [FIB IPv4] (@ref rte_fib.h), +- [FIB IPv6] (@ref rte_fib6.h) ++ [LPM IPv4 route](@ref rte_lpm.h), ++ [LPM IPv6 route](@ref rte_lpm6.h), ++ [RIB IPv4](@ref rte_rib.h), ++ [RIB IPv6](@ref rte_rib6.h), ++ [FIB IPv4](@ref rte_fib.h), ++ [FIB IPv6](@ref rte_fib6.h) + + - **hashes**: +- [hash] (@ref rte_hash.h), +- [jhash] (@ref rte_jhash.h), +- [thash] (@ref rte_thash.h), +- [FBK hash] (@ref rte_fbk_hash.h), +- [CRC hash] (@ref rte_hash_crc.h) ++ [hash](@ref rte_hash.h), ++ [jhash](@ref rte_jhash.h), ++ [thash](@ref rte_thash.h), ++ [FBK hash](@ref rte_fbk_hash.h), ++ [CRC hash](@ref rte_hash_crc.h) + + - **classification** +- [reorder] (@ref rte_reorder.h), +- [distributor] (@ref rte_distributor.h), +- [EFD] (@ref rte_efd.h), +- [ACL] (@ref rte_acl.h), +- [member] (@ref rte_member.h), +- [flow classify] (@ref rte_flow_classify.h), +- [BPF] (@ref rte_bpf.h) ++ [reorder](@ref rte_reorder.h), ++ [distributor](@ref rte_distributor.h), ++ [EFD](@ref rte_efd.h), ++ [ACL](@ref rte_acl.h), ++ [member](@ref rte_member.h), ++ [flow classify](@ref rte_flow_classify.h), ++ [BPF](@ref rte_bpf.h) + + - **containers**: +- [mbuf] (@ref rte_mbuf.h), +- [mbuf pool ops] (@ref rte_mbuf_pool_ops.h), +- [ring] (@ref rte_ring.h), +- [stack] (@ref rte_stack.h), +- [tailq] (@ref rte_tailq.h), +- [bitmap] (@ref rte_bitmap.h) ++ [mbuf](@ref rte_mbuf.h), ++ [mbuf pool ops](@ref rte_mbuf_pool_ops.h), ++ [ring](@ref rte_ring.h), ++ [stack](@ref rte_stack.h), ++ [tailq](@ref rte_tailq.h), ++ [bitmap](@ref rte_bitmap.h) + + - **packet framework**: +- * [port] (@ref rte_port.h): +- [ethdev] (@ref rte_port_ethdev.h), +- [ring] (@ref rte_port_ring.h), +- [frag] (@ref rte_port_frag.h), +- [reass] (@ref rte_port_ras.h), +- [sched] (@ref rte_port_sched.h), +- [kni] (@ref rte_port_kni.h), +- [src/sink] (@ref rte_port_source_sink.h) +- * [table] (@ref rte_table.h): +- [lpm IPv4] (@ref rte_table_lpm.h), +- [lpm IPv6] (@ref rte_table_lpm_ipv6.h), +- [ACL] (@ref rte_table_acl.h), +- [hash] (@ref rte_table_hash.h), +- [array] (@ref rte_table_array.h), +- [stub] (@ref rte_table_stub.h) +- * [pipeline] (@ref rte_pipeline.h) +- [port_in_action] (@ref rte_port_in_action.h) +- [table_action] (@ref rte_table_action.h) ++ * [port](@ref rte_port.h): ++ [ethdev](@ref rte_port_ethdev.h), ++ [ring](@ref rte_port_ring.h), ++ [frag](@ref rte_port_frag.h), ++ [reass](@ref rte_port_ras.h), ++ [sched](@ref rte_port_sched.h), ++ [kni](@ref rte_port_kni.h), ++ [src/sink](@ref rte_port_source_sink.h) ++ * [table](@ref rte_table.h): ++ [lpm IPv4](@ref rte_table_lpm.h), ++ [lpm IPv6](@ref rte_table_lpm_ipv6.h), ++ [ACL](@ref rte_table_acl.h), ++ [hash](@ref rte_table_hash.h), ++ [array](@ref rte_table_array.h), ++ [stub](@ref rte_table_stub.h) ++ * [pipeline](@ref rte_pipeline.h) ++ [port_in_action](@ref rte_port_in_action.h) ++ [table_action](@ref rte_table_action.h) + * SWX pipeline: +- [control] (@ref rte_swx_ctl.h), +- [extern] (@ref rte_swx_extern.h), +- [pipeline] (@ref rte_swx_pipeline.h) ++ [control](@ref rte_swx_ctl.h), ++ [extern](@ref rte_swx_extern.h), ++ [pipeline](@ref rte_swx_pipeline.h) + * SWX port: +- [port] (@ref rte_swx_port.h), +- [ethdev] (@ref rte_swx_port_ethdev.h), +- [src/sink] (@ref rte_swx_port_source_sink.h) ++ [port](@ref rte_swx_port.h), ++ [ethdev](@ref rte_swx_port_ethdev.h), ++ [src/sink](@ref rte_swx_port_source_sink.h) + * SWX table: +- [table] (@ref rte_swx_table.h), +- [table_em] (@ref rte_swx_table_em.h) +- * [graph] (@ref rte_graph.h): +- [graph_worker] (@ref rte_graph_worker.h) ++ [table](@ref rte_swx_table.h), ++ [table_em](@ref rte_swx_table_em.h) ++ * [graph](@ref rte_graph.h): ++ [graph_worker](@ref rte_graph_worker.h) + * graph_nodes: +- [eth_node] (@ref rte_node_eth_api.h), +- [ip4_node] (@ref rte_node_ip4_api.h) ++ [eth_node](@ref rte_node_eth_api.h), ++ [ip4_node](@ref rte_node_ip4_api.h) + + - **basic**: +- [bitops] (@ref rte_bitops.h), +- [approx fraction] (@ref rte_approx.h), +- [random] (@ref rte_random.h), +- [config file] (@ref rte_cfgfile.h), +- [key/value args] (@ref rte_kvargs.h), +- [string] (@ref rte_string_fns.h) ++ [bitops](@ref rte_bitops.h), ++ [approx fraction](@ref rte_approx.h), ++ [random](@ref rte_random.h), ++ [config file](@ref rte_cfgfile.h), ++ [key/value args](@ref rte_kvargs.h), ++ [string](@ref rte_string_fns.h) + + - **debug**: +- [jobstats] (@ref rte_jobstats.h), +- [telemetry] (@ref rte_telemetry.h), +- [pdump] (@ref rte_pdump.h), +- [hexdump] (@ref rte_hexdump.h), +- [debug] (@ref rte_debug.h), +- [log] (@ref rte_log.h), +- [errno] (@ref rte_errno.h), +- [trace] (@ref rte_trace.h), +- [trace_point] (@ref rte_trace_point.h) ++ [jobstats](@ref rte_jobstats.h), ++ [telemetry](@ref rte_telemetry.h), ++ [pdump](@ref rte_pdump.h), ++ [hexdump](@ref rte_hexdump.h), ++ [debug](@ref rte_debug.h), ++ [log](@ref rte_log.h), ++ [errno](@ref rte_errno.h), ++ [trace](@ref rte_trace.h), ++ [trace_point](@ref rte_trace_point.h) + + - **misc**: +- [EAL config] (@ref rte_eal.h), +- [common] (@ref rte_common.h), +- [experimental APIs] (@ref rte_compat.h), +- [ABI versioning] (@ref rte_function_versioning.h), +- [version] (@ref rte_version.h) ++ [EAL config](@ref rte_eal.h), ++ [common](@ref rte_common.h), ++ [experimental APIs](@ref rte_compat.h), ++ [ABI versioning](@ref rte_function_versioning.h), ++ [version](@ref rte_version.h) +diff --git a/dpdk/doc/api/doxy-api.conf.in b/dpdk/doc/api/doxy-api.conf.in +index 5c883b613b..a536bcb493 100644 +--- a/dpdk/doc/api/doxy-api.conf.in ++++ b/dpdk/doc/api/doxy-api.conf.in +@@ -80,7 +80,8 @@ INPUT += @API_EXAMPLES@ + FILE_PATTERNS = rte_*.h \ + cmdline.h + PREDEFINED = __DOXYGEN__ \ +- VFIO_PRESENT \ ++ RTE_HAS_CPUSET \ ++ VFIO_PRESENT \ + __attribute__(x)= + + OPTIMIZE_OUTPUT_FOR_C = YES +diff --git a/dpdk/doc/api/generate_examples.sh b/dpdk/doc/api/generate_examples.sh +index dae7ee0be0..48574563ca 100755 +--- a/dpdk/doc/api/generate_examples.sh ++++ b/dpdk/doc/api/generate_examples.sh +@@ -5,12 +5,16 @@ + EXAMPLES_DIR=$1 + API_EXAMPLES=$2 + ++FIND=find ++ + # generate a .d file including both C files and also build files, so we can + # detect both file changes and file additions/deletions +-echo "$API_EXAMPLES: $(find ${EXAMPLES_DIR} -type f \( -name '*.c' -o -name 'meson.build' \) -printf '%p ' )" > ${API_EXAMPLES}.d ++echo "$API_EXAMPLES: $($FIND ${EXAMPLES_DIR} -type f \( -name '*.c' -o -name 'meson.build' \) | tr '\n' ' ' )" > ${API_EXAMPLES}.d + + exec > "${API_EXAMPLES}" + printf '/**\n' + printf '@page examples DPDK Example Programs\n\n' +-find "${EXAMPLES_DIR}" -type f -name '*.c' -printf '@example examples/%P\n' | LC_ALL=C sort ++$FIND "${EXAMPLES_DIR}" -type f -name '*.c' | ++ sed "s|${EXAMPLES_DIR}|@example examples|" | ++ LC_ALL=C sort + printf '*/\n' +diff --git a/dpdk/doc/api/meson.build b/dpdk/doc/api/meson.build +index dfdefdc924..94e683455f 100644 +--- a/dpdk/doc/api/meson.build ++++ b/dpdk/doc/api/meson.build +@@ -24,7 +24,7 @@ htmldir = join_paths(get_option('datadir'), 'doc', 'dpdk') + # So use a configure option for now. + example = custom_target('examples.dox', + output: 'examples.dox', +- command: [generate_examples, join_paths(meson.source_root(), 'examples'), '@OUTPUT@'], ++ command: [generate_examples, join_paths(dpdk_source_root, 'examples'), '@OUTPUT@'], + depfile: 'examples.dox.d', + install: get_option('enable_docs'), + install_dir: htmldir, +@@ -32,11 +32,11 @@ example = custom_target('examples.dox', + + cdata = configuration_data() + cdata.set('VERSION', meson.project_version()) +-cdata.set('API_EXAMPLES', join_paths(meson.build_root(), 'doc', 'api', 'examples.dox')) +-cdata.set('OUTPUT', join_paths(meson.build_root(), 'doc', 'api')) ++cdata.set('API_EXAMPLES', join_paths(dpdk_build_root, 'doc', 'api', 'examples.dox')) ++cdata.set('OUTPUT', join_paths(dpdk_build_root, 'doc', 'api')) + cdata.set('HTML_OUTPUT', 'html') +-cdata.set('TOPDIR', meson.source_root()) +-cdata.set('STRIP_FROM_PATH', meson.source_root()) ++cdata.set('TOPDIR', dpdk_source_root) ++cdata.set('STRIP_FROM_PATH', ' '.join([dpdk_source_root, join_paths(dpdk_build_root, 'doc', 'api')])) + cdata.set('WARN_AS_ERROR', 'NO') + if get_option('werror') + cdata.set('WARN_AS_ERROR', 'YES') +diff --git a/dpdk/doc/guides/bbdevs/turbo_sw.rst b/dpdk/doc/guides/bbdevs/turbo_sw.rst +index 43c5129fd7..1e23e37027 100644 +--- a/dpdk/doc/guides/bbdevs/turbo_sw.rst ++++ b/dpdk/doc/guides/bbdevs/turbo_sw.rst +@@ -149,7 +149,7 @@ Example: + + * For AVX512 machines with SDK libraries installed then both 4G and 5G can be enabled for full real time FEC capability. + For AVX2 machines it is possible to only enable the 4G libraries and the PMD capabilities will be limited to 4G FEC. +- If no library is present then the PMD driver will still build but its capabilities will be limited accordingly. ++ If no library is present then the PMD will still build but its capabilities will be limited accordingly. + + + To use the PMD in an application, user must: +diff --git a/dpdk/doc/guides/conf.py b/dpdk/doc/guides/conf.py +index aceeb62a4f..8379c53d7c 100644 +--- a/dpdk/doc/guides/conf.py ++++ b/dpdk/doc/guides/conf.py +@@ -3,10 +3,8 @@ + # Copyright(c) 2010-2015 Intel Corporation + + from docutils import nodes +-from distutils.version import LooseVersion ++from packaging.version import Version + from sphinx import __version__ as sphinx_version +-from sphinx.highlighting import PygmentsBridge +-from pygments.formatters.latex import LatexFormatter + from os import listdir + from os import environ + from os.path import basename +@@ -20,7 +18,6 @@ + import sphinx_rtd_theme + + html_theme = "sphinx_rtd_theme" +- html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + except: + print('Install the sphinx ReadTheDocs theme for improved html documentation ' + 'layout: https://sphinx-rtd-theme.readthedocs.io/', +@@ -31,8 +28,10 @@ + + project = 'Data Plane Development Kit' + html_logo = '../logo/DPDK_logo_vertical_rev_small.png' +-latex_logo = '../logo/DPDK_logo_horizontal_tag.png' +-html_add_permalinks = "" ++if Version(sphinx_version) >= Version('3.5'): ++ html_permalinks = False ++else: ++ html_add_permalinks = "" + html_show_copyright = False + highlight_language = 'none' + +@@ -47,46 +46,6 @@ + # Figures, tables and code-blocks automatically numbered if they have caption + numfig = True + +-latex_documents = [ +- ('index', +- 'doc.tex', +- '', +- '', +- 'manual') +-] +- +-# Latex directives to be included directly in the latex/pdf docs. +-custom_latex_preamble = r""" +-\usepackage{textalpha} +-\RecustomVerbatimEnvironment{Verbatim}{Verbatim}{xleftmargin=5mm} +-\usepackage{etoolbox} +-\robustify\( +-\robustify\) +-""" +- +-# Configuration for the latex/pdf docs. +-latex_elements = { +- 'papersize': 'a4paper', +- 'pointsize': '11pt', +- # remove blank pages +- 'classoptions': ',openany,oneside', +- 'babel': '\\usepackage[english]{babel}', +- # customize Latex formatting +- 'preamble': custom_latex_preamble +-} +- +- +-# Override the default Latex formatter in order to modify the +-# code/verbatim blocks. +-class CustomLatexFormatter(LatexFormatter): +- def __init__(self, **options): +- super(CustomLatexFormatter, self).__init__(**options) +- # Use the second smallest font size for code/verbatim blocks. +- self.verboptions = r'formatcom=\footnotesize' +- +-# Replace the default latex formatter. +-PygmentsBridge.latex_formatter = CustomLatexFormatter +- + # Configuration for man pages + man_pages = [("testpmd_app_ug/run_app", "testpmd", + "tests for dpdk pmds", "", 1), +@@ -426,7 +385,7 @@ def setup(app): + 'Features availability in bbdev drivers', + 'Feature') + +- if LooseVersion(sphinx_version) < LooseVersion('1.3.1'): ++ if Version(sphinx_version) < Version('1.3.1'): + print('Upgrade sphinx to version >= 1.3.1 for ' + 'improved Figure/Table number handling.', + file=stderr) +diff --git a/dpdk/doc/guides/contributing/coding_style.rst b/dpdk/doc/guides/contributing/coding_style.rst +index bb3f3efcbc..baa366c218 100644 +--- a/dpdk/doc/guides/contributing/coding_style.rst ++++ b/dpdk/doc/guides/contributing/coding_style.rst +@@ -55,7 +55,7 @@ License Header + ~~~~~~~~~~~~~~ + + Each file must begin with a special comment containing the +-`Software Package Data Exchange (SPDX) License Identfier `_. ++`Software Package Data Exchange (SPDX) License Identifier `_. + + Generally this is the BSD License, except for code granted special exceptions. + The SPDX licences identifier is sufficient, a file should not contain +@@ -619,7 +619,7 @@ Return Value + ~~~~~~~~~~~~ + + * Functions which create objects, or allocate memory, should return pointer types, and NULL on error. +- The error type should be indicated may setting the variable ``rte_errno`` appropriately. ++ The error type should be indicated by setting the variable ``rte_errno`` appropriately. + * Functions which work on bursts of packets, such as RX-like or TX-like functions, should return the number of packets handled. + * Other functions returning int should generally behave like system calls: + returning 0 on success and -1 on error, setting ``rte_errno`` to indicate the specific type of error. +diff --git a/dpdk/doc/guides/contributing/documentation.rst b/dpdk/doc/guides/contributing/documentation.rst +index a4e6be6aca..7c76b32dbf 100644 +--- a/dpdk/doc/guides/contributing/documentation.rst ++++ b/dpdk/doc/guides/contributing/documentation.rst +@@ -8,7 +8,7 @@ DPDK Documentation Guidelines + + This document outlines the guidelines for writing the DPDK Guides and API documentation in RST and Doxygen format. + +-It also explains the structure of the DPDK documentation and shows how to build the Html and PDF versions of the documents. ++It also explains the structure of the DPDK documentation and how to build it. + + + Structure of the Documentation +@@ -136,17 +136,11 @@ Building the Documentation + Dependencies + ~~~~~~~~~~~~ + +- + The following dependencies must be installed to build the documentation: + + * Doxygen. +- + * Sphinx (also called python-sphinx). + +-* TexLive (at least TexLive-core and the extra Latex support). +- +-* Inkscape. +- + `Doxygen`_ generates documentation from commented source code. + It can be installed as follows: + +@@ -158,7 +152,7 @@ It can be installed as follows: + # Red Hat/Fedora. + sudo dnf -y install doxygen + +-`Sphinx`_ is a Python documentation tool for converting RST files to Html or to PDF (via LaTeX). ++`Sphinx`_ is a Python documentation tool for converting RST files to HTML. + For full support with figure and table captioning the latest version of Sphinx can be installed as follows: + + .. code-block:: console +@@ -177,43 +171,6 @@ For further information on getting started with Sphinx see the + To get full support for Figure and Table numbering it is best to install Sphinx 1.3.1 or later. + + +-`Inkscape`_ is a vector based graphics program which is used to create SVG images and also to convert SVG images to PDF images. +-It can be installed as follows: +- +-.. code-block:: console +- +- # Ubuntu/Debian. +- sudo apt-get -y install inkscape +- +- # Red Hat/Fedora. +- sudo dnf -y install inkscape +- +-`TexLive `_ is an installation package for Tex/LaTeX. +-It is used to generate the PDF versions of the documentation. +-The main required packages can be installed as follows: +- +-.. code-block:: console +- +- # Ubuntu/Debian. +- sudo apt-get -y install texlive-latex-extra texlive-lang-greek +- +- # Red Hat/Fedora, selective install. +- sudo dnf -y install texlive-collection-latexextra texlive-greek-fontenc +- +-`Latexmk `_ is a perl script +-for running LaTeX for resolving cross references, +-and it also runs auxiliary programs like bibtex, makeindex if necessary, and dvips. +-It has also a number of other useful capabilities (see man 1 latexmk). +- +-.. code-block:: console +- +- # Ubuntu/Debian. +- sudo apt-get -y install latexmk +- +- # Red Hat/Fedora. +- sudo dnf -y install latexmk +- +- + Build commands + ~~~~~~~~~~~~~~ + +@@ -225,16 +182,7 @@ To build the documentation:: + + See :doc:`../linux_gsg/build_dpdk` for more detail on compiling DPDK with meson. + +-The output is generated in the ``build`` directory:: +- +- build/doc +- |-- html +- | |-- api +- | +-- guides +- | +- +-- pdf +- +-- guides +- ++The output is generated in the directories ``build/doc/html/{api,guides}``. + + .. Note:: + +@@ -259,7 +207,8 @@ Here are some guidelines in relation to the style of the documentation: + RST Guidelines + -------------- + +-The RST (reStructuredText) format is a plain text markup format that can be converted to Html, PDF or other formats. ++The RST (reStructuredText) format is a plain text markup format ++that can be converted to HTML or other formats. + It is most closely associated with Python but it can be used to document any language. + It is used in DPDK to document everything apart from the API. + +@@ -282,9 +231,8 @@ Line Length + words. Multiple sentences which are not separated by a blank line are joined + automatically into paragraphs. + +-* Lines in literal blocks **must** be less than 80 characters since +- they are not wrapped by the document formatters and can exceed the page width +- in PDF documents. ++* Lines in literal blocks should be less than 80 characters ++ since they are not wrapped by the document formatters. + + Long literal command lines can be shown wrapped with backslashes. For + example:: +@@ -437,8 +385,8 @@ Code and Literal block sections + * The default encoding for a literal block using the simplified ``::`` + directive is ``none``. + +-* Lines in literal blocks must be less than 80 characters since they can exceed the page width when converted to PDF documentation. +- For long literal lines that exceed that limit try to wrap the text at sensible locations. ++* Lines in literal blocks should be less than 80 characters. ++ For long literal lines, try to wrap the text at sensible locations. + For example a long command line could be documented like this and still work if copied directly from the docs:: + + .//app/dpdk-testpmd -l 0-2 -n3 --vdev=net_pcap0,iface=eth0 \ +@@ -503,7 +451,7 @@ Tables + ~~~~~~ + + * RST tables should be used sparingly. +- They are hard to format and to edit, they are often rendered incorrectly in PDF format, and the same information ++ They are hard to format and to edit, and the same information + can usually be shown just as clearly with a definition or bullet list. + + * Tables in the documentation should be formatted as follows: +@@ -533,8 +481,6 @@ Tables + + The QOS configuration is shown in :numref:`table_qos_pipes`. + +-* Tables should not include merged cells since they are not supported by the PDF renderer. +- + + .. _links: + +@@ -703,7 +649,7 @@ The following are some guidelines for use of Doxygen in the DPDK API documentati + /**< Virtual address of the first mempool object. */ + uintptr_t elt_va_end; + /**< Virtual address of the mempool object. */ +- phys_addr_t elt_pa[MEMPOOL_PG_NUM_DEFAULT]; ++ phys_addr_t elt_pa[1]; + /**< Array of physical page addresses for the mempool buffer. */ + + This doesn't have an effect on the rendered documentation but it is confusing for the developer reading the code. +@@ -722,7 +668,7 @@ The following are some guidelines for use of Doxygen in the DPDK API documentati + /** Virtual address of the mempool object. */ + uintptr_t elt_va_end; + /** Array of physical page addresses for the mempool buffer. */ +- phys_addr_t elt_pa[MEMPOOL_PG_NUM_DEFAULT]; ++ phys_addr_t elt_pa[1]; + + * Read the rendered section of the documentation that you have added for correctness, clarity and consistency + with the surrounding text. +diff --git a/dpdk/doc/guides/cryptodevs/caam_jr.rst b/dpdk/doc/guides/cryptodevs/caam_jr.rst +index 5ef33ae78e..d7b0f14234 100644 +--- a/dpdk/doc/guides/cryptodevs/caam_jr.rst ++++ b/dpdk/doc/guides/cryptodevs/caam_jr.rst +@@ -24,7 +24,7 @@ accelerators. This provides significant improvement to system level performance. + + SEC HW accelerator above 4.x+ version are also known as CAAM. + +-caam_jr PMD is one of DPAA drivers which uses uio interface to interact with ++caam_jr PMD is one of DPAA drivers which uses UIO interface to interact with + Linux kernel for configure and destroy the device instance (ring). + + +diff --git a/dpdk/doc/guides/cryptodevs/ccp.rst b/dpdk/doc/guides/cryptodevs/ccp.rst +index 36dae090f9..52e98b0859 100644 +--- a/dpdk/doc/guides/cryptodevs/ccp.rst ++++ b/dpdk/doc/guides/cryptodevs/ccp.rst +@@ -100,7 +100,7 @@ The following parameters (all optional) can be provided in the previous two call + + * ccp_auth_opt: Specify authentication operations to perform on CPU using openssl APIs. + +-To validate ccp pmd, l2fwd-crypto example can be used with following command: ++To validate ccp PMD, l2fwd-crypto example can be used with following command: + + .. code-block:: console + +diff --git a/dpdk/doc/guides/cryptodevs/openssl.rst b/dpdk/doc/guides/cryptodevs/openssl.rst +index 848a2e8eb8..03041ceda1 100644 +--- a/dpdk/doc/guides/cryptodevs/openssl.rst ++++ b/dpdk/doc/guides/cryptodevs/openssl.rst +@@ -69,7 +69,7 @@ use version 1.1.1g or newer. + Initialization + -------------- + +-User can use app/test application to check how to use this pmd and to verify ++User can use app/test application to check how to use this PMD and to verify + crypto processing. + + Test name is cryptodev_openssl_autotest. +diff --git a/dpdk/doc/guides/cryptodevs/overview.rst b/dpdk/doc/guides/cryptodevs/overview.rst +index e2a1e08ec1..f9a58fbd3d 100644 +--- a/dpdk/doc/guides/cryptodevs/overview.rst ++++ b/dpdk/doc/guides/cryptodevs/overview.rst +@@ -19,7 +19,7 @@ Supported Feature Flags + + - "OOP SGL In SGL Out" feature flag stands for + "Out-of-place Scatter-gather list Input, Scatter-gather list Output", +- which means pmd supports different scatter-gather styled input and output buffers ++ which means PMD supports different scatter-gather styled input and output buffers + (i.e. both can consists of multiple segments). + + - "OOP SGL In LB Out" feature flag stands for +diff --git a/dpdk/doc/guides/cryptodevs/qat.rst b/dpdk/doc/guides/cryptodevs/qat.rst +index cf16f03503..ea5c03b8fa 100644 +--- a/dpdk/doc/guides/cryptodevs/qat.rst ++++ b/dpdk/doc/guides/cryptodevs/qat.rst +@@ -562,7 +562,7 @@ Binding the available VFs to the vfio-pci driver + + Note: + +-* Please note that due to security issues, the usage of older DPDK igb-uio ++* Please note that due to security issues, the usage of older DPDK igb_uio + driver is not recommended. This document shows how to use the more secure + vfio-pci driver. + * If QAT fails to bind to vfio-pci on Linux kernel 5.9+, please see the +diff --git a/dpdk/doc/guides/cryptodevs/scheduler.rst b/dpdk/doc/guides/cryptodevs/scheduler.rst +index 835d999cfa..d08207f2e1 100644 +--- a/dpdk/doc/guides/cryptodevs/scheduler.rst ++++ b/dpdk/doc/guides/cryptodevs/scheduler.rst +@@ -118,7 +118,7 @@ operation: + than the designated threshold, otherwise it will be handled by the secondary + worker. + +- A typical usecase in this mode is with the QAT cryptodev as the primary and ++ A typical use case in this mode is with the QAT cryptodev as the primary and + a software cryptodev as the secondary worker. This may help applications to + process additional crypto workload than what the QAT cryptodev can handle on + its own, by making use of the available CPU cycles to deal with smaller +diff --git a/dpdk/doc/guides/cryptodevs/virtio.rst b/dpdk/doc/guides/cryptodevs/virtio.rst +index 83d8e32397..ce4d43519a 100644 +--- a/dpdk/doc/guides/cryptodevs/virtio.rst ++++ b/dpdk/doc/guides/cryptodevs/virtio.rst +@@ -63,7 +63,7 @@ QEMU can then be started using the following parameters: + -device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 + [...] + +-Secondly bind the uio_generic driver for the virtio-crypto device. ++Secondly bind the uio_pci_generic driver for the virtio-crypto device. + For example, 0000:00:04.0 is the domain, bus, device and function + number of the virtio-crypto device: + +@@ -73,7 +73,7 @@ number of the virtio-crypto device: + echo -n 0000:00:04.0 > /sys/bus/pci/drivers/virtio-pci/unbind + echo "1af4 1054" > /sys/bus/pci/drivers/uio_pci_generic/new_id + +-Finally the front-end virtio crypto PMD driver can be installed. ++Finally the front-end virtio crypto PMD can be installed. + + Tests + ----- +diff --git a/dpdk/doc/guides/eventdevs/dlb2.rst b/dpdk/doc/guides/eventdevs/dlb2.rst +index 94d2c77ff4..834e2242a9 100644 +--- a/dpdk/doc/guides/eventdevs/dlb2.rst ++++ b/dpdk/doc/guides/eventdevs/dlb2.rst +@@ -178,20 +178,20 @@ A DLB2 eventdev contains one load-balanced and one directed credit pool. These + pools' sizes are controlled by the nb_events_limit field in struct + rte_event_dev_config. The load-balanced pool is sized to contain + nb_events_limit credits, and the directed pool is sized to contain +-nb_events_limit/4 credits. The directed pool size can be overridden with the +-num_dir_credits vdev argument, like so: ++nb_events_limit/2 credits. The directed pool size can be overridden with the ++num_dir_credits devargs argument, like so: + + .. code-block:: console + +- --vdev=dlb1_event,num_dir_credits= ++ --allow ea:00.0,num_dir_credits= + + This can be used if the default allocation is too low or too high for the +-specific application needs. The PMD also supports a vdev arg that limits the ++specific application needs. The PMD also supports a devarg that limits the + max_num_events reported by rte_event_dev_info_get(): + + .. code-block:: console + +- --vdev=dlb1_event,max_num_events= ++ --allow ea:00.0,max_num_events= + + By default, max_num_events is reported as the total available load-balanced + credits. If multiple DLB2-based applications are being used, it may be desirable +@@ -266,8 +266,8 @@ queue A. + Due to this, workers should stop retrying after a time, release the events it + is attempting to enqueue, and dequeue more events. It is important that the + worker release the events and don't simply set them aside to retry the enqueue +-again later, because the port has limited history list size (by default, twice +-the port's dequeue_depth). ++again later, because the port has limited history list size (by default, same ++as port's dequeue_depth). + + Priority + ~~~~~~~~ +@@ -314,27 +314,6 @@ The PMD does not support the following configuration sequences: + This sequence is not supported because the event device must be reconfigured + before its ports or queues can be. + +-Deferred Scheduling +-~~~~~~~~~~~~~~~~~~~ +- +-The DLB2 PMD's default behavior for managing a CQ is to "pop" the CQ once per +-dequeued event before returning from rte_event_dequeue_burst(). This frees the +-corresponding entries in the CQ, which enables the DLB2 to schedule more events +-to it. +- +-To support applications seeking finer-grained scheduling control -- for example +-deferring scheduling to get the best possible priority scheduling and +-load-balancing -- the PMD supports a deferred scheduling mode. In this mode, +-the CQ entry is not popped until the *subsequent* rte_event_dequeue_burst() +-call. This mode only applies to load-balanced event ports with dequeue depth of +-1. +- +-To enable deferred scheduling, use the defer_sched vdev argument like so: +- +- .. code-block:: console +- +- --vdev=dlb1_event,defer_sched=on +- + Atomic Inflights Allocation + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +@@ -351,18 +330,11 @@ scheduled. The likelihood of this case depends on the eventdev configuration, + traffic behavior, event processing latency, potential for a worker to be + interrupted or otherwise delayed, etc. + +-By default, the PMD allocates 16 buffer entries for each load-balanced queue, +-which provides an even division across all 128 queues but potentially wastes ++By default, the PMD allocates 64 buffer entries for each load-balanced queue, ++which provides an even division across all 32 queues but potentially wastes + buffer space (e.g. if not all queues are used, or aren't used for atomic + scheduling). + +-The PMD provides a dev arg to override the default per-queue allocation. To +-increase a vdev's per-queue atomic-inflight allocation to (for example) 64: +- +- .. code-block:: console +- +- --vdev=dlb1_event,atm_inflights=64 +- + QID Depth Threshold + ~~~~~~~~~~~~~~~~~~~ + +@@ -379,14 +351,14 @@ Per queue threshold metrics are tracked in the DLB2 xstats, and are also + returned in the impl_opaque field of each received event. + + The per qid threshold can be specified as part of the device args, and +-can be applied to all queue, a range of queues, or a single queue, as ++can be applied to all queues, a range of queues, or a single queue, as + shown below. + + .. code-block:: console + +- --vdev=dlb2_event,qid_depth_thresh=all: +- --vdev=dlb2_event,qid_depth_thresh=qidA-qidB: +- --vdev=dlb2_event,qid_depth_thresh=qid: ++ --allow ea:00.0,qid_depth_thresh=all: ++ --allow ea:00.0,qid_depth_thresh=qidA-qidB: ++ --allow ea:00.0,qid_depth_thresh=qid: + + Class of service + ~~~~~~~~~~~~~~~~ +@@ -408,4 +380,4 @@ Class of service can be specified in the devargs, as follows + + .. code-block:: console + +- --vdev=dlb2_event,cos=<0..4> ++ --allow ea:00.0,cos=<0..4> +diff --git a/dpdk/doc/guides/eventdevs/opdl.rst b/dpdk/doc/guides/eventdevs/opdl.rst +index cbfd1f11b7..f220959249 100644 +--- a/dpdk/doc/guides/eventdevs/opdl.rst ++++ b/dpdk/doc/guides/eventdevs/opdl.rst +@@ -87,7 +87,7 @@ due to the static nature of the underlying queues. It is because of this + that the implementation can achieve such high throughput and low latency + + The following list is a comprehensive outline of the what is supported and +-the limitations / restrictions imposed by the opdl pmd ++the limitations / restrictions imposed by the opdl PMD + + - The order in which packets moved between queues is static and fixed \ + (dynamic scheduling is not supported). +diff --git a/dpdk/doc/guides/freebsd_gsg/build_sample_apps.rst b/dpdk/doc/guides/freebsd_gsg/build_sample_apps.rst +index 4fba671e4f..c87e982759 100644 +--- a/dpdk/doc/guides/freebsd_gsg/build_sample_apps.rst ++++ b/dpdk/doc/guides/freebsd_gsg/build_sample_apps.rst +@@ -88,7 +88,7 @@ Other options, specific to Linux and are not supported under FreeBSD are as foll + * ``--huge-dir``: + The directory where hugetlbfs is mounted. + +-* ``mbuf-pool-ops-name``: ++* ``--mbuf-pool-ops-name``: + Pool ops name for mbuf to use. + + * ``--file-prefix``: +diff --git a/dpdk/doc/guides/howto/img/virtio_user_for_container_networking.svg b/dpdk/doc/guides/howto/img/virtio_user_for_container_networking.svg +index de80806649..dc9b318e7e 100644 +--- a/dpdk/doc/guides/howto/img/virtio_user_for_container_networking.svg ++++ b/dpdk/doc/guides/howto/img/virtio_user_for_container_networking.svg +@@ -465,7 +465,7 @@ + v:mID="63" + id="shape63-63">Sheet.63Contanier/AppContainer/App ++ + .. _Enabling_Additional_Functionality: + + Enabling Additional Functionality +@@ -64,13 +66,62 @@ The application can then determine what action to take, if any, if the HPET is n + Running DPDK Applications Without Root Privileges + ------------------------------------------------- + +-In order to run DPDK as non-root, the following Linux filesystem objects' +-permissions should be adjusted to ensure that the Linux account being used to +-run the DPDK application has access to them: ++The following sections describe generic requirements and configuration ++for running DPDK applications as non-root. ++There may be additional requirements documented for some drivers. ++ ++Hugepages ++~~~~~~~~~ ++ ++Hugepages must be reserved as root before running the application as non-root, ++for example:: ++ ++ sudo dpdk-hugepages.py --reserve 1G ++ ++If multi-process is not required, running with ``--in-memory`` ++bypasses the need to access hugepage mount point and files within it. ++Otherwise, hugepage directory must be made accessible ++for writing to the unprivileged user. ++A good way for managing multiple applications using hugepages ++is to mount the filesystem with group permissions ++and add a supplementary group to each application or container. ++ ++One option is to mount manually:: ++ ++ mount -t hugetlbfs -o pagesize=1G,uid=`id -u`,gid=`id -g` nodev $HOME/huge-1G ++ ++In production environment, the OS can manage mount points ++(`systemd example `_). ++ ++The ``hugetlb`` filesystem has additional options to guarantee or limit ++the amount of memory that is possible to allocate using the mount point. ++Refer to the `documentation `_. ++ ++.. note:: ++ ++ Using ``vfio-pci`` kernel driver, if applicable, can eliminate the need ++ for physical addresses and therefore eliminate the permission requirements ++ described below. ++ ++If the driver requires using physical addresses (PA), ++the executable file must be granted additional capabilities: ++ ++* ``SYS_ADMIN`` to read ``/proc/self/pagemaps`` ++* ``IPC_LOCK`` to lock hugepages in memory + +-* All directories which serve as hugepage mount points, for example, ``/dev/hugepages`` ++.. code-block:: console ++ ++ setcap cap_ipc_lock,cap_sys_admin+ep ++ ++If physical addresses are not accessible, ++the following message will appear during EAL initialization:: ++ ++ EAL: rte_mem_virt2phy(): cannot open /proc/self/pagemap: Permission denied + +-* If the HPET is to be used, ``/dev/hpet`` ++It is harmless in case PA are not needed. ++ ++Resource Limits ++~~~~~~~~~~~~~~~ + + When running as non-root user, there may be some additional resource limits + that are imposed by the system. Specifically, the following resource limits may +@@ -85,8 +136,10 @@ need to be adjusted in order to ensure normal DPDK operation: + The above limits can usually be adjusted by editing + ``/etc/security/limits.conf`` file, and rebooting. + +-Additionally, depending on which kernel driver is in use, the relevant +-resources also should be accessible by the user running the DPDK application. ++Device Control ++~~~~~~~~~~~~~~ ++ ++If the HPET is to be used, ``/dev/hpet`` permissions must be adjusted. + + For ``vfio-pci`` kernel driver, the following Linux file system objects' + permissions should be adjusted: +@@ -96,38 +149,18 @@ permissions should be adjusted: + * The directories under ``/dev/vfio`` that correspond to IOMMU group numbers of + devices intended to be used by DPDK, for example, ``/dev/vfio/50`` + +-.. note:: +- +- The instructions below will allow running DPDK with ``igb_uio`` or +- ``uio_pci_generic`` drivers as non-root with older Linux kernel versions. +- However, since version 4.0, the kernel does not allow unprivileged processes +- to read the physical address information from the pagemaps file, making it +- impossible for those processes to be used by non-privileged users. In such +- cases, using the VFIO driver is recommended. +- +-For ``igb_uio`` or ``uio_pci_generic`` kernel drivers, the following Linux file +-system objects' permissions should be adjusted: +- +-* The userspace-io device files in ``/dev``, for example, ``/dev/uio0``, ``/dev/uio1``, and so on +- +-* The userspace-io sysfs config and resource files, for example for ``uio0``:: +- +- /sys/class/uio/uio0/device/config +- /sys/class/uio/uio0/device/resource* +- +- + Power Management and Power Saving Functionality + ----------------------------------------------- + +-Enhanced Intel SpeedStep® Technology must be enabled in the platform BIOS if the power management feature of DPDK is to be used. ++Enhanced Intel SpeedStep\ |reg| Technology must be enabled in the platform BIOS if the power management feature of DPDK is to be used. + Otherwise, the sys file folder ``/sys/devices/system/cpu/cpu0/cpufreq`` will not exist, and the CPU frequency- based power management cannot be used. + Consult the relevant BIOS documentation to determine how these settings can be accessed. + +-For example, on some Intel reference platform BIOS variants, the path to Enhanced Intel SpeedStep® Technology is:: ++For example, on some Intel reference platform BIOS variants, the path to Enhanced Intel SpeedStep\ |reg| Technology is:: + + Advanced + -> Processor Configuration +- -> Enhanced Intel SpeedStep® Tech ++ -> Enhanced Intel SpeedStep\ |reg| Tech + + In addition, C3 and C6 should be enabled as well for power management. The path of C3 and C6 on the same platform BIOS is:: + +diff --git a/dpdk/doc/guides/linux_gsg/linux_drivers.rst b/dpdk/doc/guides/linux_gsg/linux_drivers.rst +index 90635a45d9..67ab9009a9 100644 +--- a/dpdk/doc/guides/linux_gsg/linux_drivers.rst ++++ b/dpdk/doc/guides/linux_gsg/linux_drivers.rst +@@ -3,6 +3,8 @@ + Copyright 2017 Mellanox Technologies, Ltd + All rights reserved. + ++.. include:: ++ + .. _linux_gsg_linux_drivers: + + Linux Drivers +@@ -25,6 +27,16 @@ To make use of VFIO, the ``vfio-pci`` module must be loaded: + VFIO kernel is usually present by default in all distributions, + however please consult your distributions documentation to make sure that is the case. + ++For DMA mapping of either external memory or hugepages, VFIO interface is used. ++VFIO does not support partial unmap of once mapped memory. Hence DPDK's memory is ++mapped in hugepage granularity or system page granularity. Number of DMA ++mappings is limited by kernel with user locked memory limit of a process (rlimit) ++for system/hugepage memory. Another per-container overall limit applicable both ++for external memory and system memory was added in kernel 5.1 defined by ++VFIO module parameter ``dma_entry_limit`` with a default value of 64K. ++When application is out of DMA entries, these limits need to be adjusted to ++increase the allowed limit. ++ + Since Linux version 5.7, + the ``vfio-pci`` module supports the creation of virtual functions. + After the PF is bound to ``vfio-pci`` module, +@@ -83,7 +95,7 @@ The token will be used for all PF and VF ports within the application. + + To make use of full VFIO functionality, + both kernel and BIOS must support and be configured +-to use IO virtualization (such as Intel® VT-d). ++to use IO virtualization (such as Intel\ |reg| VT-d). + + .. note:: + +@@ -156,6 +168,11 @@ It can be loaded as shown below: + sudo modprobe uio + sudo insmod igb_uio.ko + ++.. note:: ++ ++ For some devices which lack support for legacy interrupts, e.g. virtual function ++ (VF) devices, the ``igb_uio`` module may be needed in place of ``uio_pci_generic``. ++ + .. note:: + + If UEFI secure boot is enabled, +@@ -319,7 +336,7 @@ Please refer to earlier sections on how to configure kernel parameters + correctly for your system. + + If the kernel is configured correctly, one also has to make sure that +-the BIOS configuration has virtualization features (such as Intel® VT-d). ++the BIOS configuration has virtualization features (such as Intel\ |reg| VT-d). + There is no standard way to check if the platform is configured correctly, + so please check with your platform documentation to see if it has such features, + and how to enable them. +diff --git a/dpdk/doc/guides/linux_gsg/sys_reqs.rst b/dpdk/doc/guides/linux_gsg/sys_reqs.rst +index be714adf22..50a86d5a2e 100644 +--- a/dpdk/doc/guides/linux_gsg/sys_reqs.rst ++++ b/dpdk/doc/guides/linux_gsg/sys_reqs.rst +@@ -1,6 +1,8 @@ + .. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + ++.. include:: ++ + System Requirements + =================== + +@@ -66,10 +68,10 @@ Compilation of the DPDK + + **Optional Tools:** + +-* Intel® C++ Compiler (icc). For installation, additional libraries may be required. ++* Intel\ |reg| C++ Compiler (icc). For installation, additional libraries may be required. + See the icc Installation Guide found in the Documentation directory under the compiler installation. + +-* IBM® Advance ToolChain for Powerlinux. This is a set of open source development tools and runtime libraries ++* IBM\ |reg| Advance ToolChain for Powerlinux. This is a set of open source development tools and runtime libraries + which allows users to take leading edge advantage of IBM's latest POWER hardware features on Linux. To install + it, see the IBM official installation document. + +diff --git a/dpdk/doc/guides/nics/af_packet.rst b/dpdk/doc/guides/nics/af_packet.rst +index efd6f1ca73..636adefea0 100644 +--- a/dpdk/doc/guides/nics/af_packet.rst ++++ b/dpdk/doc/guides/nics/af_packet.rst +@@ -5,11 +5,11 @@ AF_PACKET Poll Mode Driver + ========================== + + The AF_PACKET socket in Linux allows an application to receive and send raw +-packets. This Linux-specific PMD driver binds to an AF_PACKET socket and allows ++packets. This Linux-specific PMD binds to an AF_PACKET socket and allows + a DPDK application to send and receive raw packets through the Kernel. + + In order to improve Rx and Tx performance this implementation makes use of +-PACKET_MMAP, which provides a mmap'ed ring buffer, shared between user space ++PACKET_MMAP, which provides a mmapped ring buffer, shared between user space + and kernel, that's used to send and receive packets. This helps reducing system + calls and the copies needed between user space and Kernel. + +diff --git a/dpdk/doc/guides/nics/af_xdp.rst b/dpdk/doc/guides/nics/af_xdp.rst +index 5ed24374f8..7b2989ab84 100644 +--- a/dpdk/doc/guides/nics/af_xdp.rst ++++ b/dpdk/doc/guides/nics/af_xdp.rst +@@ -12,7 +12,7 @@ For the full details behind AF_XDP socket, you can refer to + `AF_XDP documentation in the Kernel + `_. + +-This Linux-specific PMD driver creates the AF_XDP socket and binds it to a ++This Linux-specific PMD creates the AF_XDP socket and binds it to a + specific netdev queue, it allows a DPDK application to send and receive raw + packets through the socket which would bypass the kernel network stack. + Current implementation only supports single queue, multi-queues feature will +diff --git a/dpdk/doc/guides/nics/avp.rst b/dpdk/doc/guides/nics/avp.rst +index 1a194fc23c..a749f2a0f6 100644 +--- a/dpdk/doc/guides/nics/avp.rst ++++ b/dpdk/doc/guides/nics/avp.rst +@@ -35,7 +35,7 @@ to another with minimal packet loss. + Features and Limitations of the AVP PMD + --------------------------------------- + +-The AVP PMD driver provides the following functionality. ++The AVP PMD provides the following functionality. + + * Receive and transmit of both simple and chained mbuf packets, + +@@ -74,7 +74,7 @@ Launching a VM with an AVP type network attachment + The following example will launch a VM with three network attachments. The + first attachment will have a default vif-model of "virtio". The next two + network attachments will have a vif-model of "avp" and may be used with a DPDK +-application which is built to include the AVP PMD driver. ++application which is built to include the AVP PMD. + + .. code-block:: console + +diff --git a/dpdk/doc/guides/nics/bnx2x.rst b/dpdk/doc/guides/nics/bnx2x.rst +index 9ad4f9f410..788a6dac08 100644 +--- a/dpdk/doc/guides/nics/bnx2x.rst ++++ b/dpdk/doc/guides/nics/bnx2x.rst +@@ -105,7 +105,7 @@ Jumbo: Limitation + ----------------- + + Rx descriptor limit for number of segments per MTU is set to 1. +-PMD doesn't support Jumbo Rx scatter gather. Some applciations can ++PMD doesn't support Jumbo Rx scatter gather. Some applications can + adjust mbuf_size based on this param and max_pkt_len. + + For others, PMD detects the condition where Rx packet length cannot +diff --git a/dpdk/doc/guides/nics/dpaa.rst b/dpdk/doc/guides/nics/dpaa.rst +index 917482dbe2..7355ec3059 100644 +--- a/dpdk/doc/guides/nics/dpaa.rst ++++ b/dpdk/doc/guides/nics/dpaa.rst +@@ -297,7 +297,7 @@ FMC - FMAN Configuration Tool + + + The details can be found in FMC Doc at: +- `Frame Mnager Configuration Tool `_. ++ `Frame Manager Configuration Tool `_. + + FMLIB + ~~~~~ +@@ -307,7 +307,7 @@ FMLIB + + This is an alternate to the FMC based configuration. This library provides + direct ioctl based interfaces for FMAN configuration as used by the FMC tool +- as well. This helps in overcoming the main limitaiton of FMC - i.e. lack ++ as well. This helps in overcoming the main limitation of FMC - i.e. lack + of dynamic configuration. + + The location for the fmd driver as used by FMLIB and FMC is as follows: +@@ -319,7 +319,7 @@ VSP (Virtual Storage Profile) + The storage profiled are means to provide virtualized interface. A ranges of + storage profiles cab be associated to Ethernet ports. + They are selected during classification. Specify how the frame should be +- written to memory and which buffer pool to select for packet storange in ++ written to memory and which buffer pool to select for packet storage in + queues. Start and End margin of buffer can also be configured. + + Limitations +diff --git a/dpdk/doc/guides/nics/e1000em.rst b/dpdk/doc/guides/nics/e1000em.rst +index b6a2534e36..63c0b1b337 100644 +--- a/dpdk/doc/guides/nics/e1000em.rst ++++ b/dpdk/doc/guides/nics/e1000em.rst +@@ -8,9 +8,9 @@ The DPDK EM poll mode driver supports the following emulated devices: + + * qemu-kvm emulated Intel® 82540EM Gigabit Ethernet Controller (qemu e1000 device) + +-* VMware* emulated Intel® 82545EM Gigabit Ethernet Controller ++* VMware emulated Intel® 82545EM Gigabit Ethernet Controller + +-* VMware emulated Intel® 8274L Gigabit Ethernet Controller. ++* VMware emulated Intel® 82574L Gigabit Ethernet Controller. + + Validated Hypervisors + --------------------- +diff --git a/dpdk/doc/guides/nics/ena.rst b/dpdk/doc/guides/nics/ena.rst +index 0f1f63f722..df720201f9 100644 +--- a/dpdk/doc/guides/nics/ena.rst ++++ b/dpdk/doc/guides/nics/ena.rst +@@ -234,7 +234,7 @@ Example output: + + [...] + EAL: PCI device 0000:00:06.0 on NUMA socket -1 +- EAL: Invalid NUMA socket, default to 0 ++ EAL: Device 0000:00:06.0 is not NUMA-aware, defaulting socket to 0 + EAL: probe driver: 1d0f:ec20 net_ena + + Interactive-mode selected +diff --git a/dpdk/doc/guides/nics/enic.rst b/dpdk/doc/guides/nics/enic.rst +index 5d1cc9f7fa..102522492a 100644 +--- a/dpdk/doc/guides/nics/enic.rst ++++ b/dpdk/doc/guides/nics/enic.rst +@@ -294,35 +294,31 @@ inner and outer packets can be IPv4 or IPv6. + + RSS hash calculation, therefore queue selection, is done on inner packets. + +-In order to enable overlay offload, the 'Enable VXLAN' box should be checked ++In order to enable overlay offload, enable VXLAN and/or Geneve on vNIC + via CIMC or UCSM followed by a reboot of the server. When PMD successfully +-enables overlay offload, it prints the following message on the console. ++enables overlay offload, it prints one of the following messages on the console. + + .. code-block:: console + +- Overlay offload is enabled ++ Overlay offload is enabled (VxLAN) ++ Overlay offload is enabled (Geneve) ++ Overlay offload is enabled (VxLAN, Geneve) + + By default, PMD enables overlay offload if hardware supports it. To disable + it, set ``devargs`` parameter ``disable-overlay=1``. For example:: + + -a 12:00.0,disable-overlay=1 + +-By default, the NIC uses 4789 as the VXLAN port. The user may change +-it through ``rte_eth_dev_udp_tunnel_port_{add,delete}``. However, as +-the current NIC has a single VXLAN port number, the user cannot +-configure multiple port numbers. +- +-Geneve headers with non-zero options are not supported by default. To +-use Geneve with options, update the VIC firmware to the latest version +-and then set ``devargs`` parameter ``geneve-opt=1``. When Geneve with +-options is enabled, flow API cannot be used as the features are +-currently mutually exclusive. When this feature is successfully +-enabled, PMD prints the following message. +- +-.. code-block:: console +- +- Geneve with options is enabled ++By default, the NIC uses 4789 and 6081 as the VXLAN and Geneve ports, ++respectively. The user may change them through ++``rte_eth_dev_udp_tunnel_port_{add,delete}``. However, as the current ++NIC has a single VXLAN port number and a single Geneve port number, ++the user cannot configure multiple port numbers for each tunnel type. + ++Geneve offload support has evolved over VIC models. On older models, ++Geneve offload and advanced filters are mutually exclusive. This is ++enforced by UCSM and CIMC, which only allow one of the two features ++to be selected at one time. Newer VIC models do not have this restriction. + + Ingress VLAN Rewrite + -------------------- +diff --git a/dpdk/doc/guides/nics/features/bnxt.ini b/dpdk/doc/guides/nics/features/bnxt.ini +index f8a7fd9a1f..fd1c262267 100644 +--- a/dpdk/doc/guides/nics/features/bnxt.ini ++++ b/dpdk/doc/guides/nics/features/bnxt.ini +@@ -10,6 +10,8 @@ Link status event = Y + Rx interrupt = Y + Fast mbuf free = Y + Queue start/stop = Y ++Runtime Rx queue setup = Y ++Runtime Tx queue setup = Y + Burst mode info = Y + MTU update = Y + Jumbo frame = Y +@@ -23,6 +25,7 @@ Multicast MAC filter = Y + RSS hash = Y + RSS key update = Y + RSS reta update = Y ++Inner RSS = Y + VMDq = Y + SR-IOV = Y + VLAN filter = Y +diff --git a/dpdk/doc/guides/nics/features/hns3.ini b/dpdk/doc/guides/nics/features/hns3.ini +index a1dc7eb9eb..00d9b1aef2 100644 +--- a/dpdk/doc/guides/nics/features/hns3.ini ++++ b/dpdk/doc/guides/nics/features/hns3.ini +@@ -10,6 +10,7 @@ Queue start/stop = Y + Runtime Rx queue setup = Y + Runtime Tx queue setup = Y + Burst mode info = Y ++Fast mbuf free = Y + MTU update = Y + Jumbo frame = Y + Scattered Rx = Y +diff --git a/dpdk/doc/guides/nics/features/ice_dcf.ini b/dpdk/doc/guides/nics/features/ice_dcf.ini +index e2b5659090..ac3c83c8e7 100644 +--- a/dpdk/doc/guides/nics/features/ice_dcf.ini ++++ b/dpdk/doc/guides/nics/features/ice_dcf.ini +@@ -3,6 +3,9 @@ + ; + ; Refer to default.ini for the full list of available PMD features. + ; ++; A feature with "P" indicates only be supported when non-vector path ++; is selected. ++; + [Features] + Queue start/stop = Y + Jumbo frame = Y +@@ -12,6 +15,8 @@ Flow API = Y + CRC offload = Y + L3 checksum offload = P + L4 checksum offload = P ++Inner L3 checksum = P ++Inner L4 checksum = P + Basic stats = Y + Linux UIO = Y + Linux VFIO = Y +diff --git a/dpdk/doc/guides/nics/fm10k.rst b/dpdk/doc/guides/nics/fm10k.rst +index 7b8ef0e782..9057fd8c9e 100644 +--- a/dpdk/doc/guides/nics/fm10k.rst ++++ b/dpdk/doc/guides/nics/fm10k.rst +@@ -114,9 +114,9 @@ Switch manager + ~~~~~~~~~~~~~~ + + The Intel FM10000 family of NICs integrate a hardware switch and multiple host +-interfaces. The FM10000 PMD driver only manages host interfaces. For the ++interfaces. The FM10000 PMD only manages host interfaces. For the + switch component another switch driver has to be loaded prior to the +-FM10000 PMD driver. The switch driver can be acquired from Intel support. ++FM10000 PMD. The switch driver can be acquired from Intel support. + Only Testpoint is validated with DPDK, the latest version that has been + validated with DPDK is 4.1.6. + +diff --git a/dpdk/doc/guides/nics/hns3.rst b/dpdk/doc/guides/nics/hns3.rst +index 84bd7a3c92..9a0196bbcf 100644 +--- a/dpdk/doc/guides/nics/hns3.rst ++++ b/dpdk/doc/guides/nics/hns3.rst +@@ -1,12 +1,12 @@ + .. SPDX-License-Identifier: BSD-3-Clause +- Copyright(c) 2018-2019 Hisilicon Limited. ++ Copyright(c) 2018-2019 HiSilicon Limited. + + HNS3 Poll Mode Driver + =============================== + + The hns3 PMD (**librte_net_hns3**) provides poll mode driver support +-for the inbuilt Hisilicon Network Subsystem(HNS) network engine +-found in the Hisilicon Kunpeng 920 SoC. ++for the inbuilt HiSilicon Network Subsystem(HNS) network engine ++found in the HiSilicon Kunpeng 920 SoC. + + Features + -------- +diff --git a/dpdk/doc/guides/nics/i40e.rst b/dpdk/doc/guides/nics/i40e.rst +index 64f20e7dab..53818fcf4a 100644 +--- a/dpdk/doc/guides/nics/i40e.rst ++++ b/dpdk/doc/guides/nics/i40e.rst +@@ -88,7 +88,15 @@ For X710/XL710/XXV710, + +--------------+-----------------------+------------------+ + | DPDK version | Kernel driver version | Firmware version | + +==============+=======================+==================+ +- | 20.11 | 2.13.10 | 8.00 | ++ | 22.03 | 2.17.15 | 8.30 | ++ +--------------+-----------------------+------------------+ ++ | 21.11 | 2.17.4 | 8.30 | ++ +--------------+-----------------------+------------------+ ++ | 21.08 | 2.15.9 | 8.30 | ++ +--------------+-----------------------+------------------+ ++ | 21.05 | 2.15.9 | 8.30 | ++ +--------------+-----------------------+------------------+ ++ | 20.11 | 2.14.13 | 8.00 | + +--------------+-----------------------+------------------+ + | 20.08 | 2.12.6 | 7.30 | + +--------------+-----------------------+------------------+ +@@ -133,6 +141,14 @@ For X722, + +--------------+-----------------------+------------------+ + | DPDK version | Kernel driver version | Firmware version | + +==============+=======================+==================+ ++ | 22.03 | 2.17.15 | 5.50 | ++ +--------------+-----------------------+------------------+ ++ | 21.11 | 2.17.4 | 5.30 | ++ +--------------+-----------------------+------------------+ ++ | 21.08 | 2.15.9 | 5.30 | ++ +--------------+-----------------------+------------------+ ++ | 21.05 | 2.15.9 | 5.30 | ++ +--------------+-----------------------+------------------+ + | 20.11 | 2.13.10 | 5.00 | + +--------------+-----------------------+------------------+ + | 20.08 | 2.12.6 | 4.11 | +@@ -758,6 +774,13 @@ it will fail and return the info "Conflict with the first rule's input set", + which means the current rule's input set conflicts with the first rule's. + Remove the first rule if want to change the input set of the PCTYPE. + ++PF reset fail after QinQ set with FW >= 8.4 ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++If upgrade FW to version 8.4 and higher, after set MAC VLAN filter and configure outer VLAN on PF, kill ++DPDK process will cause the card crash. ++ ++ + Example of getting best performance with l3fwd example + ------------------------------------------------------ + +diff --git a/dpdk/doc/guides/nics/ice.rst b/dpdk/doc/guides/nics/ice.rst +index ccda26f82f..1cbaf5a399 100644 +--- a/dpdk/doc/guides/nics/ice.rst ++++ b/dpdk/doc/guides/nics/ice.rst +@@ -34,11 +34,21 @@ to avoid the compatibility issues with ice PMD. + Here is the suggested matching list which has been tested and verified. + The detailed information can refer to chapter Tested Platforms/Tested NICs in release notes. + +- +-----------+---------------+-----------------+-----------+-----------+ +- | DPDK | Kernel Driver | OS Default DDP | COMMS DDP | Firmware | +- +===========+===============+=================+===========+===========+ +- | 20.11 | 1.3.0 | 1.3.20 | 1.3.24 | 2.3 | +- +-----------+---------------+-----------------+-----------+-----------+ ++ +-----------+---------------+-----------------+-----------+--------------+-----------+ ++ | DPDK | Kernel Driver | OS Default DDP | COMMS DDP | Wireless DDP | Firmware | ++ +===========+===============+=================+===========+==============+===========+ ++ | 20.11 | 1.3.2 | 1.3.20 | 1.3.24 | N/A | 2.3 | ++ +-----------+---------------+-----------------+-----------+--------------+-----------+ ++ | 21.02 | 1.4.11 | 1.3.24 | 1.3.28 | 1.3.4 | 2.4 | ++ +-----------+---------------+-----------------+-----------+--------------+-----------+ ++ | 21.05 | 1.6.5 | 1.3.26 | 1.3.30 | 1.3.6 | 3.0 | ++ +-----------+---------------+-----------------+-----------+--------------+-----------+ ++ | 21.08 | 1.7.16 | 1.3.27 | 1.3.31 | 1.3.7 | 3.1 | ++ +-----------+---------------+-----------------+-----------+--------------+-----------+ ++ | 21.11 | 1.7.16 | 1.3.27 | 1.3.31 | 1.3.7 | 3.1 | ++ +-----------+---------------+-----------------+-----------+--------------+-----------+ ++ | 22.03 | 1.8.3 | 1.3.28 | 1.3.35 | 1.3.8 | 3.2 | ++ +-----------+---------------+-----------------+-----------+--------------+-----------+ + + Pre-Installation Configuration + ------------------------------ +diff --git a/dpdk/doc/guides/nics/intel_vf.rst b/dpdk/doc/guides/nics/intel_vf.rst +index 529ff4a955..079ff7e4d7 100644 +--- a/dpdk/doc/guides/nics/intel_vf.rst ++++ b/dpdk/doc/guides/nics/intel_vf.rst +@@ -570,7 +570,7 @@ Fast Host-based Packet Processing + + Software Defined Network (SDN) trends are demanding fast host-based packet handling. + In a virtualization environment, +-the DPDK VF PMD driver performs the same throughput result as a non-VT native environment. ++the DPDK VF PMD performs the same throughput result as a non-VT native environment. + + With such host instance fast packet processing, lots of services such as filtering, QoS, + DPI can be offloaded on the host fast path. +diff --git a/dpdk/doc/guides/nics/ixgbe.rst b/dpdk/doc/guides/nics/ixgbe.rst +index 4f4d3b1c2c..388778a01d 100644 +--- a/dpdk/doc/guides/nics/ixgbe.rst ++++ b/dpdk/doc/guides/nics/ixgbe.rst +@@ -81,6 +81,23 @@ To guarantee the constraint, capabilities in dev_conf.rxmode.offloads will be ch + + fdir_conf->mode will also be checked. + ++Disable SDP3 TX_DISABLE for Fiber Links ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++The following ``devargs`` option can be enabled at runtime. It must ++be passed as part of EAL arguments. For example, ++ ++.. code-block:: console ++ ++ dpdk-testpmd -a fiber_sdp3_no_tx_disable=1 -- -i ++ ++- ``fiber_sdp3_no_tx_disable`` (default **0**) ++ ++ Not all IXGBE implementations with SFP cages use the SDP3 signal as ++ TX_DISABLE as a means to disable the laser on fiber SFP modules. ++ This option informs the driver that in this case, SDP3 is not to be ++ used as a check for link up by testing for laser on/off. ++ + VF Runtime Options + ^^^^^^^^^^^^^^^^^^ + +diff --git a/dpdk/doc/guides/nics/kni.rst b/dpdk/doc/guides/nics/kni.rst +index 80ba459d49..7c724dd467 100644 +--- a/dpdk/doc/guides/nics/kni.rst ++++ b/dpdk/doc/guides/nics/kni.rst +@@ -33,7 +33,7 @@ Usage + + EAL ``--vdev`` argument can be used to create KNI device instance, like:: + +- testpmd --vdev=net_kni0 --vdev=net_kn1 -- -i ++ testpmd --vdev=net_kni0 --vdev=net_kni1 -- -i + + Above command will create ``kni0`` and ``kni1`` Linux network interfaces, + those interfaces can be controlled by standard Linux tools. +diff --git a/dpdk/doc/guides/nics/memif.rst b/dpdk/doc/guides/nics/memif.rst +index d783f2d4a4..aca843640b 100644 +--- a/dpdk/doc/guides/nics/memif.rst ++++ b/dpdk/doc/guides/nics/memif.rst +@@ -107,13 +107,13 @@ region n (no-zero-copy): + +-----------------------+-------------------------------------------------------------------------+ + | Rings | Buffers | + +-----------+-----------+-----------------+---+---------------------------------------------------+ +-| S2M rings | M2S rings | packet buffer 0 | . | pb ((1 << pmd->run.log2_ring_size)*(s2m + m2s))-1 | ++| C2S rings | S2C rings | packet buffer 0 | . | pb ((1 << pmd->run.log2_ring_size)*(c2s + s2c))-1 | + +-----------+-----------+-----------------+---+---------------------------------------------------+ + +-S2M OR M2S Rings: ++C2S OR S2C Rings: + + +--------+--------+-----------------------+ +-| ring 0 | ring 1 | ring num_s2m_rings - 1| ++| ring 0 | ring 1 | ring num_c2s_rings - 1| + +--------+--------+-----------------------+ + + ring 0: +@@ -123,8 +123,8 @@ ring 0: + +-------------+---------------------------------------+ + + Descriptors are assigned packet buffers in order of rings creation. If we have one ring +-in each direction and ring size is 1024, then first 1024 buffers will belong to S2M ring and +-last 1024 will belong to M2S ring. In case of zero-copy, buffers are dequeued and ++in each direction and ring size is 1024, then first 1024 buffers will belong to C2S ring and ++last 1024 will belong to S2C ring. In case of zero-copy, buffers are dequeued and + enqueued as needed. + + **Descriptor format** +@@ -193,7 +193,7 @@ region 0: + +-----------------------+ + | Rings | + +-----------+-----------+ +-| S2M rings | M2S rings | ++| C2S rings | S2C rings | + +-----------+-----------+ + + region n: +diff --git a/dpdk/doc/guides/nics/mlx4.rst b/dpdk/doc/guides/nics/mlx4.rst +index 354c2bb82b..71705feb9b 100644 +--- a/dpdk/doc/guides/nics/mlx4.rst ++++ b/dpdk/doc/guides/nics/mlx4.rst +@@ -14,7 +14,7 @@ the `Mellanox website `_. Help is also provided by + the `Mellanox community `_. + + There is also a `section dedicated to this poll mode driver +-`_. ++`_. + + + Implementation details +@@ -178,7 +178,7 @@ DPDK and must be installed separately: + + - mlx4_core: hardware driver managing Mellanox ConnectX-3 devices. + - mlx4_en: Ethernet device driver that provides kernel network interfaces. +- - mlx4_ib: InifiniBand device driver. ++ - mlx4_ib: InfiniBand device driver. + - ib_uverbs: user space driver for verbs (entry point for libibverbs). + + - **Firmware update** +@@ -219,7 +219,7 @@ Mellanox OFED as a fallback + - `Mellanox OFED`_ version: **4.4, 4.5, 4.6**. + - firmware version: **2.42.5000** and above. + +-.. _`Mellanox OFED`: http://www.mellanox.com/page/products_dyn?product_family=26&mtag=linux_sw_drivers ++.. _`Mellanox OFED`: https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/ + + .. note:: + +diff --git a/dpdk/doc/guides/nics/mlx5.rst b/dpdk/doc/guides/nics/mlx5.rst +index 24d5a69227..9322c5327a 100644 +--- a/dpdk/doc/guides/nics/mlx5.rst ++++ b/dpdk/doc/guides/nics/mlx5.rst +@@ -19,7 +19,7 @@ Information and documentation about these adapters can be found on the + `Mellanox community `__. + + There is also a `section dedicated to this poll mode driver +-`__. ++`_. + + + Design +@@ -325,6 +325,8 @@ Limitations + - Supports ``RTE_FLOW_ACTION_TYPE_SAMPLE`` action only within NIC Rx and E-Switch steering domain. + - The E-Switch Sample flow must have the eswitch_manager VPORT destination (PF or ECPF) and no additional actions. + - For ConnectX-5, the ``RTE_FLOW_ACTION_TYPE_SAMPLE`` is typically used as first action in the E-Switch egress flow if with header modify or encapsulation actions. ++ - For ConnectX-5 trusted device, the application metadata with SET_TAG index 0 ++ is not supported before ``RTE_FLOW_ACTION_TYPE_SAMPLE`` action. + + - IPv6 header item 'proto' field, indicating the next header protocol, should + not be set as extension header. +@@ -338,6 +340,22 @@ Limitations + - Hairpin between two ports could only manual binding and explicit Tx flow mode. For single port hairpin, all the combinations of auto/manual binding and explicit/implicit Tx flow mode could be supported. + - Hairpin in switchdev SR-IOV mode is not supported till now. + ++- Timestamps: ++ ++ - CQE timestamp field width is limited by hardware to 63 bits, MSB is zero. ++ - In the free-running mode the timestamp counter is reset on power on ++ and 63-bit value provides over 1800 years of uptime till overflow. ++ - In the real-time mode ++ (configurable with ``REAL_TIME_CLOCK_ENABLE`` firmware settings), ++ the timestamp presents the nanoseconds elapsed since 01-Jan-1970, ++ hardware timestamp overflow will happen on 19-Jan-2038 ++ (0x80000000 seconds since 01-Jan-1970). ++ - The send scheduling is based on timestamps ++ from the reference "Clock Queue" completions, ++ the scheduled send timestamps should not be specified with non-zero MSB. ++ ++- The NIC egress flow rules on representor port are not supported. ++ + Statistics + ---------- + +@@ -384,15 +402,6 @@ Environment variables + The register would be flushed to HW usually when the write-combining buffer + becomes full, but it depends on CPU design. + +- Except for vectorized Tx burst routines, a write memory barrier is enforced +- after updating the register so that the update can be immediately visible to +- HW. +- +- When vectorized Tx burst is called, the barrier is set only if the burst size +- is not aligned to MLX5_VPMD_TX_MAX_BURST. However, setting this environmental +- variable will bring better latency even though the maximum throughput can +- slightly decline. +- + Run-time configuration + ~~~~~~~~~~~~~~~~~~~~~~ + +@@ -591,6 +600,13 @@ Driver options + it is not recommended and may prevent NIC from sending packets over + some configurations. + ++ For ConnectX-4 and ConnectX-4 Lx NICs, automatically configured value ++ is insufficient for some traffic, because they require at least all L2 headers ++ to be inlined. For example, Q-in-Q adds 4 bytes to default 18 bytes ++ of Ethernet and VLAN, thus ``txq_inline_min`` must be set to 22. ++ MPLS would add 4 bytes per label. Final value must account for all possible ++ L2 encapsulation headers used in particular environment. ++ + Please, note, this minimal data inlining disengages eMPW feature (Enhanced + Multi-Packet Write), because last one does not support partial packet inlining. + This is not very critical due to minimal data inlining is mostly required +@@ -830,7 +846,7 @@ Driver options + + For the MARK action the last 16 values in the full range are reserved for + internal PMD purposes (to emulate FLAG action). The valid range for the +- MARK action values is 0-0xFFEF for the 16-bit mode and 0-xFFFFEF ++ MARK action values is 0-0xFFEF for the 16-bit mode and 0-0xFFFFEF + for the 24-bit mode, the flows with the MARK action value outside + the specified range will be rejected. + +@@ -1028,6 +1044,10 @@ Below are some firmware configurations listed. + FLEX_PARSER_PROFILE_ENABLE=4 + PROG_PARSE_GRAPH=1 + ++- enable realtime timestamp format:: ++ ++ REAL_TIME_CLOCK_ENABLE=1 ++ + Prerequisites + ------------- + +@@ -1066,7 +1086,7 @@ DPDK and must be installed separately: + - mlx5_core: hardware driver managing Mellanox + ConnectX-4/ConnectX-5/ConnectX-6/BlueField devices and related Ethernet kernel + network devices. +- - mlx5_ib: InifiniBand device driver. ++ - mlx5_ib: InfiniBand device driver. + - ib_uverbs: user space driver for Verbs (entry point for libibverbs). + + - **Firmware update** +@@ -1131,9 +1151,9 @@ managers on most distributions, this PMD requires Ethernet extensions that + may not be supported at the moment (this is a work in progress). + + `Mellanox OFED +-`__ and ++`__ and + `Mellanox EN +-`__ ++`__ + include the necessary support and should be used in the meantime. For DPDK, + only libibverbs, libmlx5, mlnx-ofed-kernel packages and firmware updates are + required from that distribution. +@@ -1281,7 +1301,7 @@ the DPDK application. + + echo -n " /sys/bus/pci/drivers/mlx5_core/unbind + +-5. Enbale switchdev mode:: ++5. Enable switchdev mode:: + + echo switchdev > /sys/class/net//compat/devlink/mode + +diff --git a/dpdk/doc/guides/nics/netvsc.rst b/dpdk/doc/guides/nics/netvsc.rst +index 19f9940fe6..77efe1dc91 100644 +--- a/dpdk/doc/guides/nics/netvsc.rst ++++ b/dpdk/doc/guides/nics/netvsc.rst +@@ -14,7 +14,7 @@ checksum and segmentation offloads. + Features and Limitations of Hyper-V PMD + --------------------------------------- + +-In this release, the hyper PMD driver provides the basic functionality of packet reception and transmission. ++In this release, the hyper PMD provides the basic functionality of packet reception and transmission. + + * It supports merge-able buffers per packet when receiving packets and scattered buffer per packet + when transmitting packets. The packet size supported is from 64 to 65536. +@@ -62,7 +62,7 @@ store it in a shell variable: + + .. _`UUID`: https://en.wikipedia.org/wiki/Universally_unique_identifier + +-There are several possible ways to assign the uio device driver for a device. ++There are several possible ways to assign the UIO device driver for a device. + The easiest way (but only on 4.18 or later) + is to use the `driverctl Device Driver control utility`_ to override + the normal kernel device. +diff --git a/dpdk/doc/guides/nics/nfp.rst b/dpdk/doc/guides/nics/nfp.rst +index fef99973b6..30cdc69202 100644 +--- a/dpdk/doc/guides/nics/nfp.rst ++++ b/dpdk/doc/guides/nics/nfp.rst +@@ -14,7 +14,7 @@ This document explains how to use DPDK with the Netronome Poll Mode + Driver (PMD) supporting Netronome's Network Flow Processor 6xxx + (NFP-6xxx) and Netronome's Flow Processor 4xxx (NFP-4xxx). + +-NFP is a SRIOV capable device and the PMD driver supports the physical ++NFP is a SRIOV capable device and the PMD supports the physical + function (PF) and the virtual functions (VFs). + + Dependencies +@@ -117,15 +117,15 @@ although once they are created, DPDK apps should be able to use them as normal + PCI ports. + + NFP ports belonging to same PF can be seen inside PMD initialization with a +-suffix added to the PCI ID: wwww:xx:yy.z_port_n. For example, a PF with PCI ID ++suffix added to the PCI ID: wwww:xx:yy.z_portn. For example, a PF with PCI ID + 0000:03:00.0 and four ports is seen by the PMD code as: + + .. code-block:: console + +- 0000:03:00.0_port_0 +- 0000:03:00.0_port_1 +- 0000:03:00.0_port_2 +- 0000:03:00.0_port_3 ++ 0000:03:00.0_port0 ++ 0000:03:00.0_port1 ++ 0000:03:00.0_port2 ++ 0000:03:00.0_port3 + + .. Note:: + +diff --git a/dpdk/doc/guides/nics/octeontx.rst b/dpdk/doc/guides/nics/octeontx.rst +index b1a868b054..c348dfce74 100644 +--- a/dpdk/doc/guides/nics/octeontx.rst ++++ b/dpdk/doc/guides/nics/octeontx.rst +@@ -108,7 +108,7 @@ for details. + Initialization + -------------- + +-The OCTEON TX ethdev pmd is exposed as a vdev device which consists of a set ++The OCTEON TX ethdev PMD is exposed as a vdev device which consists of a set + of PKI and PKO PCIe VF devices. On EAL initialization, + PKI/PKO PCIe VF devices will be probed and then the vdev device can be created + from the application code, or from the EAL command line based on +@@ -126,7 +126,7 @@ the number of interesting ports with ``nr_ports`` argument. + + Dependency + ~~~~~~~~~~ +-``eth_octeontx`` pmd is depend on ``event_octeontx`` eventdev device and ++``eth_octeontx`` PMD is depend on ``event_octeontx`` eventdev device and + ``octeontx_fpavf`` external mempool handler. + + Example: +diff --git a/dpdk/doc/guides/nics/octeontx2.rst b/dpdk/doc/guides/nics/octeontx2.rst +index a4f224424e..31d1cb5688 100644 +--- a/dpdk/doc/guides/nics/octeontx2.rst ++++ b/dpdk/doc/guides/nics/octeontx2.rst +@@ -153,7 +153,7 @@ Runtime Config Options + + -a 0002:02:00.0,max_sqb_count=64 + +- With the above configuration, each send queue's decscriptor buffer count is ++ With the above configuration, each send queue's descriptor buffer count is + limited to a maximum of 64 buffers. + + - ``Switch header enable`` (default ``none``) +@@ -242,7 +242,7 @@ configure the following features: + #. Hierarchical scheduling + #. Single rate - Two color, Two rate - Three color shaping + +-Both DWRR and Static Priority(SP) hierarchial scheduling is supported. ++Both DWRR and Static Priority(SP) hierarchical scheduling is supported. + + Every parent can have atmost 10 SP Children and unlimited DWRR children. + +@@ -256,7 +256,7 @@ Limitations + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + The OCTEON TX2 SoC family NIC has inbuilt HW assisted external mempool manager. +-``net_octeontx2`` pmd only works with ``mempool_octeontx2`` mempool handler ++``net_octeontx2`` PMD only works with ``mempool_octeontx2`` mempool handler + as it is performance wise most effective way for packet allocation and Tx buffer + recycling on OCTEON TX2 SoC platform. + +@@ -269,7 +269,7 @@ the host interface irrespective of the offload configuration. + Multicast MAC filtering + ~~~~~~~~~~~~~~~~~~~~~~~ + +-``net_octeontx2`` pmd supports multicast mac filtering feature only on physical ++``net_octeontx2`` PMD supports multicast mac filtering feature only on physical + function devices. + + SDP interface support +@@ -278,7 +278,7 @@ OCTEON TX2 SDP interface support is limited to PF device, No VF support. + + Inline Protocol Processing + ~~~~~~~~~~~~~~~~~~~~~~~~~~ +-``net_octeontx2`` pmd doesn't support the following features for packets to be ++``net_octeontx2`` PMD doesn't support the following features for packets to be + inline protocol processed. + - TSO offload + - VLAN/QinQ offload +diff --git a/dpdk/doc/guides/nics/thunderx.rst b/dpdk/doc/guides/nics/thunderx.rst +index 12d43ce93e..f5d7d20576 100644 +--- a/dpdk/doc/guides/nics/thunderx.rst ++++ b/dpdk/doc/guides/nics/thunderx.rst +@@ -199,7 +199,7 @@ Each port consists of a primary VF and n secondary VF(s). Each VF provides 8 Tx/ + When a given port is configured to use more than 8 queues, it requires one (or more) secondary VF. + Each secondary VF adds 8 additional queues to the queue set. + +-During PMD driver initialization, the primary VF's are enumerated by checking the ++During PMD initialization, the primary VF's are enumerated by checking the + specific flag (see sqs message in DPDK boot log - sqs indicates secondary queue set). + They are at the beginning of VF list (the remain ones are secondary VF's). + +@@ -371,7 +371,7 @@ Module params + skip_data_bytes + ~~~~~~~~~~~~~~~ + This feature is used to create a hole between HEADROOM and actual data. Size of hole is specified +-in bytes as module param("skip_data_bytes") to pmd. ++in bytes as module param("skip_data_bytes") to PMD. + This scheme is useful when application would like to insert vlan header without disturbing HEADROOM. + + Example: +diff --git a/dpdk/doc/guides/nics/virtio.rst b/dpdk/doc/guides/nics/virtio.rst +index c03c2d0fed..f96df3fb62 100644 +--- a/dpdk/doc/guides/nics/virtio.rst ++++ b/dpdk/doc/guides/nics/virtio.rst +@@ -17,7 +17,7 @@ With this enhancement, virtio could achieve quite promising performance. + For basic qemu-KVM installation and other Intel EM poll mode driver in guest VM, + please refer to Chapter "Driver for VM Emulated Devices". + +-In this chapter, we will demonstrate usage of virtio PMD driver with two backends, ++In this chapter, we will demonstrate usage of virtio PMD with two backends, + standard qemu vhost back end and vhost kni back end. + + Virtio Implementation in DPDK +@@ -40,7 +40,7 @@ end if necessary. + Features and Limitations of virtio PMD + -------------------------------------- + +-In this release, the virtio PMD driver provides the basic functionality of packet reception and transmission. ++In this release, the virtio PMD provides the basic functionality of packet reception and transmission. + + * It supports merge-able buffers per packet when receiving packets and scattered buffer per packet + when transmitting packets. The packet size supported is from 64 to 1518. +@@ -71,7 +71,7 @@ In this release, the virtio PMD driver provides the basic functionality of packe + + * Virtio supports software vlan stripping and inserting. + +-* Virtio supports using port IO to get PCI resource when uio/igb_uio module is not available. ++* Virtio supports using port IO to get PCI resource when UIO module is not available. + + Prerequisites + ------------- +@@ -103,7 +103,8 @@ Host2VM communication example + + insmod rte_kni.ko + +- Other basic DPDK preparations like hugepage enabling, uio port binding are not listed here. ++ Other basic DPDK preparations like hugepage enabling, ++ UIO port binding are not listed here. + Please refer to the *DPDK Getting Started Guide* for detailed instructions. + + #. Launch the kni user application: +@@ -508,7 +509,7 @@ are shown in below table: + Split virtqueue in-order non-mergeable path virtio_recv_pkts_inorder virtio_xmit_pkts_inorder + Split virtqueue vectorized Rx path virtio_recv_pkts_vec virtio_xmit_pkts + Packed virtqueue mergeable path virtio_recv_mergeable_pkts_packed virtio_xmit_pkts_packed +- Packed virtqueue non-meregable path virtio_recv_pkts_packed virtio_xmit_pkts_packed ++ Packed virtqueue non-mergeable path virtio_recv_pkts_packed virtio_xmit_pkts_packed + Packed virtqueue in-order mergeable path virtio_recv_mergeable_pkts_packed virtio_xmit_pkts_packed + Packed virtqueue in-order non-mergeable path virtio_recv_pkts_packed virtio_xmit_pkts_packed + Packed virtqueue vectorized Rx path virtio_recv_pkts_packed_vec virtio_xmit_pkts_packed +diff --git a/dpdk/doc/guides/nics/vmxnet3.rst b/dpdk/doc/guides/nics/vmxnet3.rst +index ae146f0d55..190cf91a47 100644 +--- a/dpdk/doc/guides/nics/vmxnet3.rst ++++ b/dpdk/doc/guides/nics/vmxnet3.rst +@@ -119,7 +119,8 @@ This section describes an example setup for Phy-vSwitch-VM-Phy communication. + + .. note:: + +- Other instructions on preparing to use DPDK such as, hugepage enabling, uio port binding are not listed here. ++ Other instructions on preparing to use DPDK such as, ++ hugepage enabling, UIO port binding are not listed here. + Please refer to *DPDK Getting Started Guide and DPDK Sample Application's User Guide* for detailed instructions. + + The packet reception and transmission flow path is:: +diff --git a/dpdk/doc/guides/platform/dpaa.rst b/dpdk/doc/guides/platform/dpaa.rst +index 20a0e39329..389692907d 100644 +--- a/dpdk/doc/guides/platform/dpaa.rst ++++ b/dpdk/doc/guides/platform/dpaa.rst +@@ -78,7 +78,7 @@ compatible board: + based config (if /tmp/fmc.bin is present). DPAA FMD will be used only if no + previous fmc config is existing. + +- Note that fmlib based integratin rely on underlying fmd driver in kernel, ++ Note that fmlib based integration rely on underlying fmd driver in kernel, + which is available as part of NXP kernel or NXP SDK. + + The following dependencies are not part of DPDK and must be installed +diff --git a/dpdk/doc/guides/prog_guide/bbdev.rst b/dpdk/doc/guides/prog_guide/bbdev.rst +index 6b2bd54e1a..9619280ffc 100644 +--- a/dpdk/doc/guides/prog_guide/bbdev.rst ++++ b/dpdk/doc/guides/prog_guide/bbdev.rst +@@ -639,7 +639,7 @@ optionally the ``soft_output`` mbuf data pointers. + "soft output","soft LLR output buffer (optional)" + "op_flags","bitmask of all active operation capabilities" + "rv_index","redundancy version index [0..3]" +- "iter_max","maximum number of iterations to perofrm in decode all CBs" ++ "iter_max","maximum number of iterations to perform in decode all CBs" + "iter_min","minimum number of iterations to perform in decoding all CBs" + "iter_count","number of iterations to performed in decoding all CBs" + "ext_scale","scale factor on extrinsic info (5 bits)" +diff --git a/dpdk/doc/guides/prog_guide/bpf_lib.rst b/dpdk/doc/guides/prog_guide/bpf_lib.rst +index 1feb7734a3..1cf2d59429 100644 +--- a/dpdk/doc/guides/prog_guide/bpf_lib.rst ++++ b/dpdk/doc/guides/prog_guide/bpf_lib.rst +@@ -10,7 +10,7 @@ user-space dpdk application. + + It supports basic set of features from eBPF spec. + Please refer to the +-`eBPF spec ` ++`eBPF spec `_ + for more information. + Also it introduces basic framework to load/unload BPF-based filters + on eth devices (right now only via SW RX/TX callbacks). +@@ -48,9 +48,9 @@ For example, ``(BPF_IND | BPF_W | BPF_LD)`` means: + .. code-block:: c + + uint32_t tmp; +- R0 = rte_pktmbuf_read((const struct rte_mbuf *)R6, src_reg + imm32, +- sizeof(tmp), &tmp); +- if (R0 == NULL) return FAILED; ++ R0 = rte_pktmbuf_read((const struct rte_mbuf *)R6, src_reg + imm32, sizeof(tmp), &tmp); ++ if (R0 == NULL) ++ return FAILED; + R0 = ntohl(*(uint32_t *)R0); + + and ``R1-R5`` were scratched. +diff --git a/dpdk/doc/guides/prog_guide/compressdev.rst b/dpdk/doc/guides/prog_guide/compressdev.rst +index 231687d891..2a59c434c1 100644 +--- a/dpdk/doc/guides/prog_guide/compressdev.rst ++++ b/dpdk/doc/guides/prog_guide/compressdev.rst +@@ -2,7 +2,7 @@ + Copyright(c) 2017-2018 Cavium Networks. + + Compression Device Library +-=========================== ++========================== + + The compression framework provides a generic set of APIs to perform compression services + as well as to query and configure compression devices both physical(hardware) and virtual(software) +@@ -28,14 +28,14 @@ From the command line using the --vdev EAL option + + .. code-block:: console + +- --vdev ',socket_id=0' ++ --vdev ',socket_id=0' + + .. Note:: + +- * If DPDK application requires multiple software compression PMD devices then required +- number of ``--vdev`` with appropriate libraries are to be added. ++ * If a DPDK application requires multiple software compression PMD devices then the ++ required number of ``--vdev`` args with appropriate libraries are to be added. + +- * An Application with multiple compression device instances exposed by the same PMD must ++ * An application with multiple compression device instances exposed by the same PMD must + specify a unique name for each device. + + Example: ``--vdev 'pmd0' --vdev 'pmd1'`` +@@ -53,7 +53,7 @@ All virtual compression devices support the following initialization parameters: + Device Identification + ~~~~~~~~~~~~~~~~~~~~~ + +-Each device, whether virtual or physical is uniquely designated by two ++Each device, whether virtual or physical, is uniquely designated by two + identifiers: + + - A unique device index used to designate the compression device in all functions +@@ -76,7 +76,7 @@ The ``rte_compressdev_configure`` API is used to configure a compression device. + The ``rte_compressdev_config`` structure is used to pass the configuration + parameters. + +-See *DPDK API Reference* for details. ++See the `DPDK API Reference `_ for details. + + Configuration of Queue Pairs + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@@ -85,87 +85,88 @@ Each compression device queue pair is individually configured through the + ``rte_compressdev_queue_pair_setup`` API. + + The ``max_inflight_ops`` is used to pass maximum number of +-rte_comp_op that could be present in a queue at-a-time. +-PMD then can allocate resources accordingly on a specified socket. ++``rte_comp_op`` that could be present in a queue at a time. ++The PMD can then allocate resources accordingly on a specified socket. + +-See *DPDK API Reference* for details. ++See the `DPDK API Reference `_ for details. + +-Logical Cores, Memory and Queues Pair Relationships +-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++Logical Cores, Memory and Queue Pair Relationships ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-Library supports NUMA similarly as described in Cryptodev library section. ++The Compressdev library supports NUMA similarly as described in Cryptodev library section. + +-A queue pair cannot be shared and should be exclusively used by a single processing +-context for enqueuing operations or dequeuing operations on the same compression device ++A queue pair cannot be shared, and should be exclusively used by a single processing ++context for enqueuing operations or dequeuing operations on the same compression device, + since sharing would require global locks and hinder performance. It is however possible + to use a different logical core to dequeue an operation on a queue pair from the logical +-core on which it was enqueued. This means that a compression burst enqueue/dequeue ++core on which it was enqueued. This means that for a compression burst, enqueue/dequeue + APIs are a logical place to transition from one logical core to another in a + data processing pipeline. + + Device Features and Capabilities +---------------------------------- ++-------------------------------- + + Compression devices define their functionality through two mechanisms, global device +-features and algorithm features. Global devices features identify device +-wide level features which are applicable to the whole device such as supported hardware ++features and algorithm features. Global device features identify device ++wide level features which are applicable to the whole device, such as supported hardware + acceleration and CPU features. List of compression device features can be seen in the + RTE_COMPDEV_FF_XXX macros. + +-The algorithm features lists individual algo feature which device supports per-algorithm, +-such as a stateful compression/decompression, checksums operation etc. List of algorithm +-features can be seen in the RTE_COMP_FF_XXX macros. ++The algorithm features are features which the device supports per-algorithm, ++such as a stateful compression/decompression, checksums operation etc. ++The list of algorithm features can be seen in the RTE_COMP_FF_XXX macros. + + Capabilities + ~~~~~~~~~~~~ + Each PMD has a list of capabilities, including algorithms listed in +-enum ``rte_comp_algorithm`` and its associated feature flag and +-sliding window range in log base 2 value. Sliding window tells +-the minimum and maximum size of lookup window that algorithm uses ++the enum ``rte_comp_algorithm``, its associated feature flag, and ++sliding window range in log base 2 value. The sliding window range ++defines the minimum and maximum size of a lookup window that an algorithm uses + to find duplicates. + +-See *DPDK API Reference* for details. ++See the `DPDK API Reference `_ for details. + + Each Compression poll mode driver defines its array of capabilities +-for each algorithm it supports. See PMD implementation for capability ++for each algorithm it supports. See the PMD implementation for capability + initialization. + + Capabilities Discovery + ~~~~~~~~~~~~~~~~~~~~~~ + +-PMD capability and features are discovered via ``rte_compressdev_info_get`` function. ++PMD capability and features are discovered via the ``rte_compressdev_info_get`` function. + + The ``rte_compressdev_info`` structure contains all the relevant information for the device. + +-See *DPDK API Reference* for details. ++See the `DPDK API Reference `_ for details. + + Compression Operation +----------------------- ++--------------------- + + DPDK compression supports two types of compression methodologies: + +-- Stateless, data associated to a compression operation is compressed without any reference ++- Stateless - data associated with a compression operation is compressed without any reference + to another compression operation. + +-- Stateful, data in each compression operation is compressed with reference to previous compression ++- Stateful - data in each compression operation is compressed with reference to previous compression + operations in the same data stream i.e. history of data is maintained between the operations. + +-For more explanation, please refer RFC https://www.ietf.org/rfc/rfc1951.txt ++For more explanation, please refer to the RFC https://www.ietf.org/rfc/rfc1951.txt + + Operation Representation + ~~~~~~~~~~~~~~~~~~~~~~~~ + +-Compression operation is described via ``struct rte_comp_op``, which contains both input and ++A compression operation is described via ``struct rte_comp_op``, which contains both input and + output data. The operation structure includes the operation type (stateless or stateful), +-the operation status and the priv_xform/stream handle, source, destination and checksum buffer ++the operation status, the priv_xform/stream handle, source, destination and checksum buffer + pointers. It also contains the source mempool from which the operation is allocated. +-PMD updates consumed field with amount of data read from source buffer and produced +-field with amount of data of written into destination buffer along with status of +-operation. See section *Produced, Consumed And Operation Status* for more details. +- +-Compression operations mempool also has an ability to allocate private memory with the +-operation for application's purposes. Application software is responsible for specifying +-all the operation specific fields in the ``rte_comp_op`` structure which are then used ++The PMD updates the consumed field with the amount of data read from the source buffer, ++and the produced field with the amount of data written into the destination buffer, ++along with status of operation. ++See the section :ref:`compressdev_prod_cons_op_status`: for more details. ++ ++The compression operations mempool also has the ability to allocate private memory with the ++operation for the application's use. The application software is responsible for specifying ++all the operation specific fields in the ``rte_comp_op`` structure, which are then used + by the compression PMD to process the requested operation. + + +@@ -181,27 +182,27 @@ A ``rte_comp_op`` contains a field indicating the pool it originated from. + + ``rte_comp_op_alloc()`` and ``rte_comp_op_bulk_alloc()`` are used to allocate + compression operations from a given compression operation mempool. +-The operation gets reset before being returned to a user so that operation ++The operation gets reset before being returned to a user so that the operation + is always in a good known state before use by the application. + + ``rte_comp_op_free()`` is called by the application to return an operation to + its allocating pool. + +-See *DPDK API Reference* for details. ++See the `DPDK API Reference `_ for details. + + Passing source data as mbuf-chain +-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + If input data is scattered across several different buffers, then +-Application can either parse through all such buffers and make one ++the application can either parse through all such buffers and make one + mbuf-chain and enqueue it for processing or, alternatively, it can +-make multiple sequential enqueue_burst() calls for each of them +-processing them statefully. See *Compression API Stateful Operation* ++make multiple sequential enqueue_burst() calls for each of them, ++processing them statefully. See :ref:`compressdev_stateful_op`: + for stateful processing of ops. + + Operation Status + ~~~~~~~~~~~~~~~~ +-Each operation carries a status information updated by PMD after it is processed. +-Following are currently supported: ++Each operation carries status information updated by the PMD after it is processed. ++The following are currently supported: + + - RTE_COMP_OP_STATUS_SUCCESS, + Operation is successfully completed +@@ -225,22 +226,25 @@ Following are currently supported: + - RTE_COMP_OP_STATUS_OUT_OF_SPACE_RECOVERABLE, + Output buffer ran out of space before operation completed, but this + is not an error case. Output data up to op.produced can be used and +- next op in the stream should continue on from op.consumed+1. ++ the next op in the stream should continue on from op.consumed+1. + + Operation status after enqueue / dequeue + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Some of the above values may arise in the op after an +-``rte_compressdev_enqueue_burst()``. If number ops enqueued < number ops requested then +-the app should check the op.status of nb_enqd+1. If status is RTE_COMP_OP_STATUS_NOT_PROCESSED, +-it likely indicates a full-queue case for a hardware device and a retry after dequeuing some ops is likely +-to be successful. If the op holds any other status, e.g. RTE_COMP_OP_STATUS_INVALID_ARGS, a retry with ++``rte_compressdev_enqueue_burst()``. If the number of ops enqueued < the number of ops requested ++then the app should check the op.status of nb_enqd+1. ++If the status is RTE_COMP_OP_STATUS_NOT_PROCESSED, it likely indicates a full-queue case for a ++hardware device, and a retry after dequeuing some ops is likely to be successful. ++If the op holds any other status, e.g. RTE_COMP_OP_STATUS_INVALID_ARGS, a retry with + the same op is unlikely to be successful. + + ++.. _compressdev_prod_cons_op_status: ++ + Produced, Consumed And Operation Status + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-- If status is RTE_COMP_OP_STATUS_SUCCESS, ++- If the status is RTE_COMP_OP_STATUS_SUCCESS, + consumed = amount of data read from input buffer, and + produced = amount of data written in destination buffer + - If status is RTE_COMP_OP_STATUS_ERROR, +@@ -253,37 +257,37 @@ Produced, Consumed And Operation Status + - If status is RTE_COMP_OP_STATUS_OUT_OF_SPACE_RECOVERABLE, + consumed = amount of data read, and + produced = amount of data successfully produced until +- out of space condition hit. PMD has ability to recover +- from here, so application can submit next op from +- consumed+1 and a destination buffer with available space. ++ out of space condition hit. The PMD has ability to recover ++ from here, so an application can submit the next op from ++ consumed+1, and a destination buffer with available space. + + Transforms + ---------- + + Compression transforms (``rte_comp_xform``) are the mechanism + to specify the details of the compression operation such as algorithm, +-window size and checksum. ++window size, and checksum. + + Compression API Hash support + ---------------------------- + +-Compression API allows application to enable digest calculation ++The compression API allows an application to enable digest calculation + alongside compression and decompression of data. A PMD reflects its + support for hash algorithms via capability algo feature flags. +-If supported, PMD calculates digest always on plaintext i.e. ++If supported, the PMD always calculates the digest on plaintext i.e. + before compression and after decompression. + + Currently supported list of hash algos are SHA-1 and SHA2 family + SHA256. + +-See *DPDK API Reference* for details. ++See the `DPDK API Reference `_ for details. + +-If required, application should set valid hash algo in compress ++If required, the application should set the valid hash algo in compress + or decompress xforms during ``rte_compressdev_stream_create()`` +-or ``rte_compressdev_private_xform_create()`` and pass a valid ++or ``rte_compressdev_private_xform_create()``, and pass a valid + output buffer in ``rte_comp_op`` hash field struct to store the +-resulting digest. Buffer passed should be contiguous and large +-enough to store digest which is 20 bytes for SHA-1 and ++resulting digest. The buffer passed should be contiguous and large ++enough to store digest, which is 20 bytes for SHA-1 and + 32 bytes for SHA2-256. + + Compression API Stateless operation +@@ -295,20 +299,21 @@ An op is processed stateless if it has + (required only on compression side), + - All required input in source buffer + +-When all of the above conditions are met, PMD initiates stateless processing ++When all of the above conditions are met, the PMD initiates stateless processing + and releases acquired resources after processing of current operation is +-complete. Application can enqueue multiple stateless ops in a single burst ++complete. The application can enqueue multiple stateless ops in a single burst + and must attach priv_xform handle to such ops. + + priv_xform in Stateless operation + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-priv_xform is PMD internally managed private data that it maintains to do stateless processing. +-priv_xforms are initialized provided a generic xform structure by an application via making call +-to ``rte_compressdev_private_xform_create``, at an output PMD returns an opaque priv_xform reference. +-If PMD support SHAREABLE priv_xform indicated via algorithm feature flag, then application can +-attach same priv_xform with many stateless ops at-a-time. If not, then application needs to +-create as many priv_xforms as it expects to have stateless operations in-flight. ++A priv_xform is private data managed internally by the PMD to do stateless processing. ++A priv_xform is initialized by an application providing a generic xform structure ++to ``rte_compressdev_private_xform_create``, which returns an opaque priv_xform reference. ++If the PMD supports SHAREABLE priv_xform, indicated via algorithm feature flag, ++then the application can attach the same priv_xform with many stateless ops at a time. ++If not, then the application needs to create as many priv_xforms as it expects to have ++stateless operations in-flight. + + .. figure:: img/stateless-op.* + +@@ -320,8 +325,9 @@ create as many priv_xforms as it expects to have stateless operations in-flight. + Stateless Ops using Shareable priv_xform + + +-Application should call ``rte_compressdev_private_xform_create()`` and attach to stateless op before +-enqueuing them for processing and free via ``rte_compressdev_private_xform_free()`` during termination. ++The application should call ``rte_compressdev_private_xform_create()`` and attach it to a stateless ++op before enqueuing them for processing and free via ``rte_compressdev_private_xform_free()`` ++during termination. + + An example pseudocode to setup and process NUM_OPS stateless ops with each of length OP_LEN + using priv_xform would look like: +@@ -332,7 +338,7 @@ using priv_xform would look like: + * pseudocode for stateless compression + */ + +- uint8_t cdev_id = rte_compressdev_get_dev_id(); ++ uint8_t cdev_id = rte_compressdev_get_dev_id(); + + /* configure the device. */ + if (rte_compressdev_configure(cdev_id, &conf) < 0) +@@ -399,75 +405,80 @@ using priv_xform would look like: + + + Stateless and OUT_OF_SPACE +-~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-OUT_OF_SPACE is a condition when output buffer runs out of space and where PMD +-still has more data to produce. If PMD runs into such condition, then PMD returns +-RTE_COMP_OP_OUT_OF_SPACE_TERMINATED error. In such case, PMD resets itself and can set ++OUT_OF_SPACE is a condition when the output buffer runs out of space and where the PMD ++still has more data to produce. If the PMD runs into such condition, then the PMD returns ++RTE_COMP_OP_OUT_OF_SPACE_TERMINATED error. In such case, the PMD resets itself and can set + consumed=0 and produced=amount of output it could produce before hitting out_of_space. +-Application would need to resubmit the whole input with a larger output buffer, if it ++The application would need to resubmit the whole input with a larger output buffer, if it + wants the operation to be completed. + + Hash in Stateless + ~~~~~~~~~~~~~~~~~ +-If hash is enabled, digest buffer will contain valid data after op is successfully ++If hash is enabled, the digest buffer will contain valid data after an op is successfully + processed i.e. dequeued with status = RTE_COMP_OP_STATUS_SUCCESS. + + Checksum in Stateless + ~~~~~~~~~~~~~~~~~~~~~ +-If checksum is enabled, checksum will only be available after op is successfully ++If checksum is enabled, checksum will only be available after an op is successfully + processed i.e. dequeued with status = RTE_COMP_OP_STATUS_SUCCESS. + ++.. _compressdev_stateful_op: ++ + Compression API Stateful operation + ----------------------------------- + +-Compression API provide RTE_COMP_FF_STATEFUL_COMPRESSION and +-RTE_COMP_FF_STATEFUL_DECOMPRESSION feature flag for PMD to reflect ++The compression API provides RTE_COMP_FF_STATEFUL_COMPRESSION and ++RTE_COMP_FF_STATEFUL_DECOMPRESSION feature flag for the PMD to reflect + its support for Stateful operations. + +-A Stateful operation in DPDK compression means application invokes enqueue +-burst() multiple times to process related chunk of data because +-application broke data into several ops. ++A Stateful operation in DPDK compression means the application invokes enqueue ++burst() multiple times to process a related chunk of data because the ++application broke the data into several ops. + +-In such case ++In such cases + - ops are setup with op_type RTE_COMP_OP_STATEFUL, +-- all ops except last set to flush value = RTE_COMP_FLUSH_NONE/SYNC +-and last set to flush value RTE_COMP_FLUSH_FULL/FINAL. ++- all ops except the last are set with flush value = RTE_COMP_FLUSH_NONE/SYNC ++and the last is set with flush value RTE_COMP_FLUSH_FULL/FINAL. + +-In case of either one or all of the above conditions, PMD initiates +-stateful processing and releases acquired resources after processing ++In case of either one or all of the above conditions, the PMD initiates ++stateful processing and releases acquired resources after processing the + operation with flush value = RTE_COMP_FLUSH_FULL/FINAL is complete. +-Unlike stateless, application can enqueue only one stateful op from +-a particular stream at a time and must attach stream handle ++Unlike stateless, the application can enqueue only one stateful op from ++a particular stream at a time and must attach a stream handle + to each op. + + Stream in Stateful operation + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-`stream` in DPDK compression is a logical entity which identifies related set of ops, say, a one large +-file broken into multiple chunks then file is represented by a stream and each chunk of that file is +-represented by compression op `rte_comp_op`. Whenever application wants a stateful processing of such +-data, then it must get a stream handle via making call to ``rte_compressdev_stream_create()`` +-with xform, at an output the target PMD will return an opaque stream handle to application which +-it must attach to all of the ops carrying data of that stream. In stateful processing, every op +-requires previous op data for compression/decompression. A PMD allocates and set up resources such +-as history, states, etc. within a stream, which are maintained during the processing of the related ops. ++A stream in DPDK compression is a logical entity which identifies a related set of ops. ++For example, one large file broken into multiple chunks, then the file is represented by a stream, ++and each chunk of that file is represented by a compression op ``rte_comp_op``. ++Whenever an application wants stateful processing of such data, then it must get a stream handle ++via making call to ``rte_compressdev_stream_create()`` with an xform, which will return an opaque ++stream handle to attach to all of the ops carrying data of that stream. ++In stateful processing, every op requires previous op data for compression/decompression. ++A PMD allocates and sets up resources such as history, states, etc. within a stream, ++which are maintained during the processing of related ops. + +-Unlike priv_xforms, stream is always a NON_SHAREABLE entity. One stream handle must be attached to only +-one set of related ops and cannot be reused until all of them are processed with status Success or failure. ++Unlike priv_xforms, a stream is always a NON_SHAREABLE entity. One stream handle must be attached ++to only one set of related ops and cannot be reused until all of them are processed with a ++success/failure status. + + .. figure:: img/stateful-op.* + + Stateful Ops + + +-Application should call ``rte_compressdev_stream_create()`` and attach to op before ++An application should call ``rte_compressdev_stream_create()`` and attach it to the op before + enqueuing them for processing and free via ``rte_compressdev_stream_free()`` during +-termination. All ops that are to be processed statefully should carry *same* stream. ++termination. All ops that are to be processed statefully should carry the *same* stream. + +-See *DPDK API Reference* document for details. ++See the `DPDK API Reference `_ for details. + +-An example pseudocode to set up and process a stream having NUM_CHUNKS with each chunk size of CHUNK_LEN would look like: ++An example pseudocode to set up and process a stream having NUM_CHUNKS, ++with each chunk size of CHUNK_LEN, would look like: + + .. code-block:: c + +@@ -475,7 +486,7 @@ An example pseudocode to set up and process a stream having NUM_CHUNKS with each + * pseudocode for stateful compression + */ + +- uint8_t cdev_id = rte_compressdev_get_dev_id(); ++ uint8_t cdev_id = rte_compressdev_get_dev_id(); + + /* configure the device. */ + if (rte_compressdev_configure(cdev_id, &conf) < 0) +@@ -549,64 +560,65 @@ An example pseudocode to set up and process a stream having NUM_CHUNKS with each + + + Stateful and OUT_OF_SPACE +-~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++~~~~~~~~~~~~~~~~~~~~~~~~~ + +-If PMD supports stateful operation, then OUT_OF_SPACE status is not an actual +-error for the PMD. In such case, PMD returns with status ++If a PMD supports stateful operation, then an OUT_OF_SPACE status is not an actual ++error for the PMD. In such a case, the PMD returns with status + RTE_COMP_OP_STATUS_OUT_OF_SPACE_RECOVERABLE with consumed = number of input bytes +-read and produced = length of complete output buffer. +-Application should enqueue next op with source starting at consumed+1 and an ++read, and produced = length of complete output buffer. ++The application should enqueue the next op with source starting at consumed+1, and an + output buffer with available space. + + Hash in Stateful + ~~~~~~~~~~~~~~~~ +-If enabled, digest buffer will contain valid digest after last op in stream ++If enabled, the digest buffer will contain valid digest after the last op in a stream + (having flush = RTE_COMP_FLUSH_FINAL) is successfully processed i.e. dequeued + with status = RTE_COMP_OP_STATUS_SUCCESS. + + Checksum in Stateful + ~~~~~~~~~~~~~~~~~~~~ +-If enabled, checksum will only be available after last op in stream ++If enabled, the checksum will only be available after the last op in a stream + (having flush = RTE_COMP_FLUSH_FINAL) is successfully processed i.e. dequeued + with status = RTE_COMP_OP_STATUS_SUCCESS. + + Burst in compression API +-------------------------- ++------------------------ + + Scheduling of compression operations on DPDK's application data path is + performed using a burst oriented asynchronous API set. A queue pair on a compression +-device accepts a burst of compression operations using enqueue burst API. On physical +-devices the enqueue burst API will place the operations to be processed ++device accepts a burst of compression operations using the enqueue burst API. ++On physical devices the enqueue burst API will place the operations to be processed + on the device's hardware input queue, for virtual devices the processing of the + operations is usually completed during the enqueue call to the compression + device. The dequeue burst API will retrieve any processed operations available + from the queue pair on the compression device, from physical devices this is usually +-directly from the devices processed queue, and for virtual device's from a ++directly from the devices processed queue, and for virtual device's from an + ``rte_ring`` where processed operations are placed after being processed on the + enqueue call. + +-A burst in DPDK compression can be a combination of stateless and stateful operations with a condition +-that for stateful ops only one op at-a-time should be enqueued from a particular stream i.e. no-two ops +-should belong to same stream in a single burst. However a burst may contain multiple stateful ops as long +-as each op is attached to a different stream i.e. a burst can look like: ++A burst in DPDK compression can be a combination of stateless and stateful operations with a ++condition that for stateful ops only one op at a time should be enqueued from a particular stream ++i.e. two ops should never belong to the same stream in a single burst. ++However, a burst may contain multiple stateful ops, as long as each op is attached to a different ++stream, i.e. a burst can look like: + + +---------------+--------------+--------------+-----------------+--------------+--------------+ + | enqueue_burst | op1.no_flush | op2.no_flush | op3.flush_final | op4.no_flush | op5.no_flush | + +---------------+--------------+--------------+-----------------+--------------+--------------+ + +-Where, op1 .. op5 all belong to different independent data units. op1, op2, op4, op5 must be stateful +-as stateless ops can only use flush full or final and op3 can be of type stateless or stateful. +-Every op with type set to RTE_COMP_OP_STATELESS must be attached to priv_xform and +-Every op with type set to RTE_COMP_OP_STATEFUL *must* be attached to stream. ++Where, op1 .. op5 all belong to different independent data units. op1, op2, op4, op5 must be ++stateful as stateless ops can only use flush full or final and op3 can be of type stateless or ++stateful. Every op with type set to RTE_COMP_OP_STATELESS must be attached to priv_xform and ++every op with type set to RTE_COMP_OP_STATEFUL *must* be attached to stream. + + Since each operation in a burst is independent and thus can be completed +-out-of-order, applications which need ordering, should setup per-op user data +-area with reordering information so that it can determine enqueue order at ++out of order, applications which need ordering should setup a per-op user data ++area, with reordering information so that it can determine enqueue order at + dequeue. + +-Also if multiple threads calls enqueue_burst() on same queue pair then it’s +-application onus to use proper locking mechanism to ensure exclusive enqueuing +-of operations. ++Also, if multiple threads calls enqueue_burst() on the same queue pair then it's ++the application's responsibility to use a proper locking mechanism to ensure ++exclusive enqueuing of operations. + + Enqueue / Dequeue Burst APIs + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@@ -629,9 +641,10 @@ Sample code + ----------- + + There are unit test applications that show how to use the compressdev library inside +-app/test/test_compressdev.c ++``app/test/test_compressdev.c`` + + Compression Device API + ~~~~~~~~~~~~~~~~~~~~~~ + +-The compressdev Library API is described in the *DPDK API Reference* document. ++The compressdev Library API is described in the ++`DPDK API Reference `_. +diff --git a/dpdk/doc/guides/prog_guide/cryptodev_lib.rst b/dpdk/doc/guides/prog_guide/cryptodev_lib.rst +index 473b014a10..2faa7f11bd 100644 +--- a/dpdk/doc/guides/prog_guide/cryptodev_lib.rst ++++ b/dpdk/doc/guides/prog_guide/cryptodev_lib.rst +@@ -707,7 +707,7 @@ feature is useful when the user wants to abandon partially enqueued operations + for a failed enqueue burst operation and try enqueuing in a whole later. + + Similar as enqueue, there are two dequeue functions: +-``rte_cryptodev_raw_dequeue`` for dequeing single operation, and ++``rte_cryptodev_raw_dequeue`` for dequeuing single operation, and + ``rte_cryptodev_raw_dequeue_burst`` for dequeuing a burst of operations (e.g. + all operations in a ``struct rte_crypto_sym_vec`` descriptor). The + ``rte_cryptodev_raw_dequeue_burst`` function allows the user to provide callback +diff --git a/dpdk/doc/guides/prog_guide/env_abstraction_layer.rst b/dpdk/doc/guides/prog_guide/env_abstraction_layer.rst +index 1f30e13b8b..c6accce701 100644 +--- a/dpdk/doc/guides/prog_guide/env_abstraction_layer.rst ++++ b/dpdk/doc/guides/prog_guide/env_abstraction_layer.rst +@@ -204,8 +204,8 @@ variables: + * ``RTE_MAX_MEMSEG_LISTS`` controls how many segment lists can DPDK have + * ``RTE_MAX_MEM_MB_PER_LIST`` controls how much megabytes of memory each + segment list can address +-* ``RTE_MAX_MEMSEG_PER_LIST`` controls how many segments each segment can +- have ++* ``RTE_MAX_MEMSEG_PER_LIST`` controls how many segments each segment list ++ can have + * ``RTE_MAX_MEMSEG_PER_TYPE`` controls how many segments each memory type + can have (where "type" is defined as "page size + NUMA node" combination) + * ``RTE_MAX_MEM_MB_PER_TYPE`` controls how much megabytes of memory each +@@ -433,7 +433,7 @@ and decides on a preferred IOVA mode. + + - if all buses report RTE_IOVA_PA, then the preferred IOVA mode is RTE_IOVA_PA, + - if all buses report RTE_IOVA_VA, then the preferred IOVA mode is RTE_IOVA_VA, +-- if all buses report RTE_IOVA_DC, no bus expressed a preferrence, then the ++- if all buses report RTE_IOVA_DC, no bus expressed a preference, then the + preferred mode is RTE_IOVA_DC, + - if the buses disagree (at least one wants RTE_IOVA_PA and at least one wants + RTE_IOVA_VA), then the preferred IOVA mode is RTE_IOVA_DC (see below with the +@@ -465,7 +465,7 @@ devices would fail anyway. + - By default, the mempool, first asks for IOVA-contiguous memory using + ``RTE_MEMZONE_IOVA_CONTIG``. This is slow in RTE_IOVA_PA mode and it may + affect the application boot time. +- - It is easy to enable large amount of IOVA-contiguous memory use-cases ++ - It is easy to enable large amount of IOVA-contiguous memory use cases + with IOVA in VA mode. + + It is expected that all PCI drivers work in both RTE_IOVA_PA and +@@ -658,7 +658,7 @@ Known Issues + + rte_ring + + rte_ring supports multi-producer enqueue and multi-consumer dequeue. +- However, it is non-preemptive, this has a knock on effect of making rte_mempool non-preemptable. ++ However, it is non-preemptive, this has a knock on effect of making rte_mempool non-preemptible. + + .. note:: + +diff --git a/dpdk/doc/guides/prog_guide/eventdev.rst b/dpdk/doc/guides/prog_guide/eventdev.rst +index ccde086f63..347203f404 100644 +--- a/dpdk/doc/guides/prog_guide/eventdev.rst ++++ b/dpdk/doc/guides/prog_guide/eventdev.rst +@@ -120,7 +120,7 @@ Ports + ~~~~~ + + Ports are the points of contact between worker cores and the eventdev. The +-general use-case will see one CPU core using one port to enqueue and dequeue ++general use case will see one CPU core using one port to enqueue and dequeue + events from an eventdev. Ports are linked to queues in order to retrieve events + from those queues (more details in `Linking Queues and Ports`_ below). + +diff --git a/dpdk/doc/guides/prog_guide/flow_classify_lib.rst b/dpdk/doc/guides/prog_guide/flow_classify_lib.rst +index f0ed5a1a04..7dae0bc8c6 100644 +--- a/dpdk/doc/guides/prog_guide/flow_classify_lib.rst ++++ b/dpdk/doc/guides/prog_guide/flow_classify_lib.rst +@@ -366,7 +366,7 @@ Packet Matching + ~~~~~~~~~~~~~~~ + + The ``rte_flow_classifier_query`` API is used to find packets which match a +-given flow Flow rule in the table. ++given flow rule in the table. + This API calls the flow_classify_run internal function which calls the + ``table.ops.f_lookup`` API to see if any packets in a burst match any + of the Flow rules in the table. +diff --git a/dpdk/doc/guides/prog_guide/graph_lib.rst b/dpdk/doc/guides/prog_guide/graph_lib.rst +index fcff9c4286..1cfdc86433 100644 +--- a/dpdk/doc/guides/prog_guide/graph_lib.rst ++++ b/dpdk/doc/guides/prog_guide/graph_lib.rst +@@ -220,7 +220,7 @@ the user needs to update the context of the node hence access to + ``struct rte_node *`` memory. + + ``rte_graph_foreach_node()``, ``rte_graph_node_get()``, +-``rte_graph_node_get_by_name()`` APIs can be used to to get the ++``rte_graph_node_get_by_name()`` APIs can be used to get the + ``struct rte_node*``. ``rte_graph_foreach_node()`` iterator function works on + ``struct rte_graph *`` fast-path graph object while others works on graph ID or name. + +diff --git a/dpdk/doc/guides/prog_guide/img/flow_tru_droppper.png b/dpdk/doc/guides/prog_guide/img/flow_tru_dropper.png +similarity index 100% +rename from dpdk/doc/guides/prog_guide/img/flow_tru_droppper.png +rename to dpdk/doc/guides/prog_guide/img/flow_tru_dropper.png +diff --git a/dpdk/doc/guides/prog_guide/img/turbo_tb_decode.svg b/dpdk/doc/guides/prog_guide/img/turbo_tb_decode.svg +index a259f45866..95779c3642 100644 +--- a/dpdk/doc/guides/prog_guide/img/turbo_tb_decode.svg ++++ b/dpdk/doc/guides/prog_guide/img/turbo_tb_decode.svg +@@ -460,7 +460,7 @@ + height="14.642858" + x="39.285713" + y="287.16254" />offse offset offse offset /kernel/linux/kni/rte_kni.ko enable_bifurcated=on ++ ++Enabling bifurcated device support releases ``rtnl`` lock before calling ++callback and locks it back after callback. Also enables asynchronous request to ++support callbacks that requires rtnl lock to work (interface down). ++ + KNI Creation and Deletion + ------------------------- + +diff --git a/dpdk/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst b/dpdk/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst +index 30c56cd375..55ec06a46e 100644 +--- a/dpdk/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst ++++ b/dpdk/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst +@@ -29,7 +29,7 @@ bonded device and its slave devices. + + The Link Bonding PMD Library is enabled by default in the build + configuration, the library can be disabled using the meson option +- "-Ddisable_drivers=net/bond". ++ "-Ddisable_drivers=net/bonding". + + + Link Bonding Modes Overview +diff --git a/dpdk/doc/guides/prog_guide/lpm6_lib.rst b/dpdk/doc/guides/prog_guide/lpm6_lib.rst +index d1aea91ca9..8425d14805 100644 +--- a/dpdk/doc/guides/prog_guide/lpm6_lib.rst ++++ b/dpdk/doc/guides/prog_guide/lpm6_lib.rst +@@ -64,9 +64,9 @@ that are most commonly used in IPv6. + + The main data structure is built using the following elements: + +-* A table with 224 entries ++* A table with 2^24 entries + +-* A number of tables, configurable by the user through the API, with 28 entries ++* A number of tables, configurable by the user through the API, with 2^8 entries + + The first table, called tbl24, is indexed using the first 24 bits of the IP address be looked up, + while the rest of the tables, called tbl8s, +diff --git a/dpdk/doc/guides/prog_guide/multi_proc_support.rst b/dpdk/doc/guides/prog_guide/multi_proc_support.rst +index 6b0ac30c5b..815e8bdc43 100644 +--- a/dpdk/doc/guides/prog_guide/multi_proc_support.rst ++++ b/dpdk/doc/guides/prog_guide/multi_proc_support.rst +@@ -325,7 +325,7 @@ supported. However, since sending messages (not requests) does not involve an + IPC thread, sending messages while processing another message or request is + supported. + +-Since the memory sybsystem uses IPC internally, memory allocations and IPC must ++Since the memory subsystem uses IPC internally, memory allocations and IPC must + not be mixed: it is not safe to use IPC inside a memory-related callback, nor is + it safe to allocate/free memory inside IPC callbacks. Attempting to do so may + lead to a deadlock. +diff --git a/dpdk/doc/guides/prog_guide/qos_framework.rst b/dpdk/doc/guides/prog_guide/qos_framework.rst +index 4e4ea33ccb..4f6de8d1da 100644 +--- a/dpdk/doc/guides/prog_guide/qos_framework.rst ++++ b/dpdk/doc/guides/prog_guide/qos_framework.rst +@@ -737,7 +737,7 @@ Strict priority scheduling of traffic classes within the same pipe is implemente + which selects the queues in ascending order. + Therefore, queue 0 (associated with TC 0, highest priority TC) is handled before + queue 1 (TC 1, lower priority than TC 0), +-which is handled before queue 2 (TC 2, lower priority than TC 1) and it conitnues until queues of all TCs except the ++which is handled before queue 2 (TC 2, lower priority than TC 1) and it continues until queues of all TCs except the + lowest priority TC are handled. At last, queues 12..15 (best effort TC, lowest priority TC) are handled. + + Upper Limit Enforcement +@@ -1191,12 +1191,12 @@ In the case of severe congestion, the dropper resorts to tail drop. + This occurs when a packet queue has reached maximum capacity and cannot store any more packets. + In this situation, all arriving packets are dropped. + +-The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`. ++The flow through the dropper is illustrated in :numref:`figure_flow_tru_dropper`. + The RED/WRED algorithm is exercised first and tail drop second. + +-.. _figure_flow_tru_droppper: ++.. _figure_flow_tru_dropper: + +-.. figure:: img/flow_tru_droppper.* ++.. figure:: img/flow_tru_dropper.* + + Flow Through the Dropper + +diff --git a/dpdk/doc/guides/prog_guide/regexdev.rst b/dpdk/doc/guides/prog_guide/regexdev.rst +index 3d8b591b56..5ca7e0c769 100644 +--- a/dpdk/doc/guides/prog_guide/regexdev.rst ++++ b/dpdk/doc/guides/prog_guide/regexdev.rst +@@ -124,7 +124,7 @@ The configuration mode is depended on the PMD capabilities. + + Online rule configuration is done using the following API functions: + ``rte_regexdev_rule_db_update`` which add / remove rules from the rules +-precomplied list, and ``rte_regexdev_rule_db_compile_activate`` ++precompiled list, and ``rte_regexdev_rule_db_compile_activate`` + which compile the rules and loads them to the RegEx HW. + + Offline rule configuration can be done by adding a pointer to the compiled +diff --git a/dpdk/doc/guides/prog_guide/rte_flow.rst b/dpdk/doc/guides/prog_guide/rte_flow.rst +index 86b3444803..10e6d06853 100644 +--- a/dpdk/doc/guides/prog_guide/rte_flow.rst ++++ b/dpdk/doc/guides/prog_guide/rte_flow.rst +@@ -65,12 +65,12 @@ Flow rules can also be grouped, the flow rule priority is specific to the + group they belong to. All flow rules in a given group are thus processed within + the context of that group. Groups are not linked by default, so the logical + hierarchy of groups must be explicitly defined by flow rules themselves in each +-group using the JUMP action to define the next group to redirect too. Only flow +-rules defined in the default group 0 are guarantee to be matched against, this ++group using the JUMP action to define the next group to redirect to. Only flow ++rules defined in the default group 0 are guaranteed to be matched against. This + makes group 0 the origin of any group hierarchy defined by an application. + + Support for multiple actions per rule may be implemented internally on top +-of non-default hardware priorities, as a result both features may not be ++of non-default hardware priorities. As a result, both features may not be + simultaneously available to applications. + + Considering that allowed pattern/actions combinations cannot be known in +@@ -1337,7 +1337,7 @@ Matches a network service header (RFC 8300). + - ``ttl``: maximum SFF hopes (6 bits). + - ``length``: total length in 4 bytes words (6 bits). + - ``reserved1``: reserved1 bits (4 bits). +-- ``mdtype``: ndicates format of NSH header (4 bits). ++- ``mdtype``: indicates format of NSH header (4 bits). + - ``next_proto``: indicates protocol type of encap data (8 bits). + - ``spi``: service path identifier (3 bytes). + - ``sindex``: service index (1 byte). +diff --git a/dpdk/doc/guides/prog_guide/vhost_lib.rst b/dpdk/doc/guides/prog_guide/vhost_lib.rst +index ba4c62aeb8..8970db8e5c 100644 +--- a/dpdk/doc/guides/prog_guide/vhost_lib.rst ++++ b/dpdk/doc/guides/prog_guide/vhost_lib.rst +@@ -118,6 +118,18 @@ The following is an overview of some key Vhost API functions: + + It is disabled by default. + ++ - ``RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS`` ++ ++ Since v16.04, the vhost library forwards checksum and gso requests for ++ packets received from a virtio driver by filling Tx offload metadata in ++ the mbuf. This behavior is inconsistent with other drivers but it is left ++ untouched for existing applications that might rely on it. ++ ++ This flag disables the legacy behavior and instead ask vhost to simply ++ populate Rx offload metadata in the mbuf. ++ ++ It is disabled by default. ++ + * ``rte_vhost_driver_set_features(path, features)`` + + This function sets the feature bits the vhost-user driver supports. The +@@ -287,7 +299,7 @@ vhost-user implementation has two options: + + * The vhost supported features must be exactly the same before and + after the restart. For example, if TSO is disabled and then enabled, +- nothing will work and issues undefined might happen. ++ nothing will work and undefined issues might happen. + + No matter which mode is used, once a connection is established, DPDK + vhost-user will start receiving and processing vhost messages from QEMU. +@@ -318,12 +330,12 @@ Guest memory requirement + + * Memory pre-allocation + +- For non-async data path, guest memory pre-allocation is not a +- must. This can help save of memory. If users really want the guest memory +- to be pre-allocated (e.g., for performance reason), we can add option +- ``-mem-prealloc`` when starting QEMU. Or, we can lock all memory at vhost +- side which will force memory to be allocated when mmap at vhost side; +- option --mlockall in ovs-dpdk is an example in hand. ++ For non-async data path guest memory pre-allocation is not a ++ must but can help save memory. To do this we can add option ++ ``-mem-prealloc`` when starting QEMU, or we can lock all memory at vhost ++ side which will force memory to be allocated when it calls mmap ++ (option --mlockall in ovs-dpdk is an example in hand). ++ + + For async data path, we force the VM memory to be pre-allocated at vhost + lib when mapping the guest memory; and also we need to lock the memory to +@@ -331,8 +343,8 @@ Guest memory requirement + + * Memory sharing + +- Make sure ``share=on`` QEMU option is given. vhost-user will not work with +- a QEMU version without shared memory mapping. ++ Make sure ``share=on`` QEMU option is given. The vhost-user will not work with ++ a QEMU instance without shared memory mapping. + + Vhost supported vSwitch reference + --------------------------------- +diff --git a/dpdk/doc/guides/prog_guide/writing_efficient_code.rst b/dpdk/doc/guides/prog_guide/writing_efficient_code.rst +index 7baeaae431..e6c26efdd3 100644 +--- a/dpdk/doc/guides/prog_guide/writing_efficient_code.rst ++++ b/dpdk/doc/guides/prog_guide/writing_efficient_code.rst +@@ -119,8 +119,8 @@ The code algorithm that dequeues messages may be something similar to the follow + my_process_bulk(obj_table, count); + } + +-PMD Driver +----------- ++PMD ++--- + + The DPDK Poll Mode Driver (PMD) is also able to work in bulk/burst mode, + allowing the factorization of some code for each call in the send or receive function. +@@ -143,20 +143,21 @@ In order to achieve higher throughput, + the DPDK attempts to aggregate the cost of processing each packet individually by processing packets in bursts. + + Using the testpmd application as an example, +-the burst size can be set on the command line to a value of 16 (also the default value). +-This allows the application to request 16 packets at a time from the PMD. ++the burst size can be set on the command line to a value of 32 (also the default value). ++This allows the application to request 32 packets at a time from the PMD. + The testpmd application then immediately attempts to transmit all the packets that were received, +-in this case, all 16 packets. ++in this case, all 32 packets. + + The packets are not transmitted until the tail pointer is updated on the corresponding TX queue of the network port. + This behavior is desirable when tuning for high throughput because +-the cost of tail pointer updates to both the RX and TX queues can be spread across 16 packets, ++the cost of tail pointer updates to both the RX and TX queues can be spread ++across 32 packets, + effectively hiding the relatively slow MMIO cost of writing to the PCIe* device. + However, this is not very desirable when tuning for low latency because +-the first packet that was received must also wait for another 15 packets to be received. +-It cannot be transmitted until the other 15 packets have also been processed because ++the first packet that was received must also wait for another 31 packets to be received. ++It cannot be transmitted until the other 31 packets have also been processed because + the NIC will not know to transmit the packets until the TX tail pointer has been updated, +-which is not done until all 16 packets have been processed for transmission. ++which is not done until all 32 packets have been processed for transmission. + + To consistently achieve low latency, even under heavy system load, + the application developer should avoid processing packets in bunches. +diff --git a/dpdk/doc/guides/rawdevs/ioat.rst b/dpdk/doc/guides/rawdevs/ioat.rst +index 250cfc48a6..59ba20740f 100644 +--- a/dpdk/doc/guides/rawdevs/ioat.rst ++++ b/dpdk/doc/guides/rawdevs/ioat.rst +@@ -65,7 +65,7 @@ To assign an engine to a group:: + $ accel-config config-engine dsa0/engine0.1 --group-id=1 + + To assign work queues to groups for passing descriptors to the engines a similar accel-config command can be used. +-However, the work queues also need to be configured depending on the use-case. ++However, the work queues also need to be configured depending on the use case. + Some configuration options include: + + * mode (Dedicated/Shared): Indicates whether a WQ may accept jobs from multiple queues simultaneously. +diff --git a/dpdk/doc/guides/rawdevs/ntb.rst b/dpdk/doc/guides/rawdevs/ntb.rst +index 2c5fa7690c..2bb115d13f 100644 +--- a/dpdk/doc/guides/rawdevs/ntb.rst ++++ b/dpdk/doc/guides/rawdevs/ntb.rst +@@ -17,7 +17,7 @@ some information by using scratchpad registers. + BIOS setting on Intel Xeon + -------------------------- + +-Intel Non-transparent Bridge needs special BIOS setting. The referencce for ++Intel Non-transparent Bridge needs special BIOS setting. The reference for + Skylake is https://www.intel.com/content/dam/support/us/en/documents/server-products/Intel_Xeon_Processor_Scalable_Family_BIOS_User_Guide.pdf + + - Set the needed PCIe port as NTB to NTB mode on both hosts. +diff --git a/dpdk/doc/guides/regexdevs/features_overview.rst b/dpdk/doc/guides/regexdevs/features_overview.rst +index f90b394801..3e7ab409bf 100644 +--- a/dpdk/doc/guides/regexdevs/features_overview.rst ++++ b/dpdk/doc/guides/regexdevs/features_overview.rst +@@ -16,13 +16,13 @@ PCRE atomic grouping + Support PCRE atomic grouping. + + PCRE back reference +- Support PCRE back regerence. ++ Support PCRE back reference. + + PCRE back tracking ctrl + Support PCRE back tracking ctrl. + + PCRE call outs +- Support PCRE call outes. ++ Support PCRE call routes. + + PCRE forward reference + Support Forward reference. +diff --git a/dpdk/doc/guides/regexdevs/mlx5.rst b/dpdk/doc/guides/regexdevs/mlx5.rst +index faaa6ac11d..fb1693e085 100644 +--- a/dpdk/doc/guides/regexdevs/mlx5.rst ++++ b/dpdk/doc/guides/regexdevs/mlx5.rst +@@ -7,7 +7,7 @@ MLX5 RegEx driver + ================= + + The MLX5 RegEx (Regular Expression) driver library +-(**librte_regex_mlx5**) provides support for **Mellanox BlueField 2** ++(**librte_regex_mlx5**) provides support for **Mellanox BlueField-2** + families of 25/50/100/200 Gb/s adapters. + + Design +@@ -38,13 +38,13 @@ For example: ``class=net:regex`` will probe both the net PMD and the RegEx PMD. + Supported NICs + -------------- + +-* Mellanox\ |reg| BlueField 2 SmartNIC ++* Mellanox\ |reg| BlueField-2 SmartNIC + + Prerequisites + ------------- + +-- BlueField 2 running Mellanox supported kernel. +-- Enable the RegEx capabilities using system call from the BlueField 2. ++- BlueField-2 running Mellanox supported kernel. ++- Enable the RegEx capabilities using system call from the BlueField-2. + - Official support is not yet released. + + Run-time configuration +diff --git a/dpdk/doc/guides/rel_notes/deprecation.rst b/dpdk/doc/guides/rel_notes/deprecation.rst +index 2f498a0be4..cf11196748 100644 +--- a/dpdk/doc/guides/rel_notes/deprecation.rst ++++ b/dpdk/doc/guides/rel_notes/deprecation.rst +@@ -27,16 +27,21 @@ Deprecation Notices + + * rte_atomicNN_xxx: These APIs do not take memory order parameter. This does + not allow for writing optimized code for all the CPU architectures supported +- in DPDK. DPDK will adopt C11 atomic operations semantics and provide wrappers +- using C11 atomic built-ins. These wrappers must be used for patches that +- need to be merged in 20.08 onwards. This change will not introduce any +- performance degradation. ++ in DPDK. DPDK has adopted the atomic operations from ++ https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html. These ++ operations must be used for patches that need to be merged in 20.08 onwards. ++ This change will not introduce any performance degradation. + + * rte_smp_*mb: These APIs provide full barrier functionality. However, many +- use cases do not require full barriers. To support such use cases, DPDK will +- adopt C11 barrier semantics and provide wrappers using C11 atomic built-ins. +- These wrappers must be used for patches that need to be merged in 20.08 +- onwards. This change will not introduce any performance degradation. ++ use cases do not require full barriers. To support such use cases, DPDK has ++ adopted atomic operations from ++ https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html. These ++ operations and a new wrapper ``rte_atomic_thread_fence`` instead of ++ ``__atomic_thread_fence`` must be used for patches that need to be merged in ++ 20.08 onwards. This change will not introduce any performance degradation. ++ ++* mempool: The mempool API macros ``MEMPOOL_PG_*`` are deprecated and ++ will be removed in DPDK 22.11. + + * lib: will fix extending some enum/define breaking the ABI. There are multiple + samples in DPDK that enum/define terminated with a ``.*MAX.*`` value which is +@@ -127,12 +132,6 @@ Deprecation Notices + from the release: ``0x16c8, 0x16c9, 0x16ca, 0x16ce, 0x16cf, 0x16df,`` + ``0x16d0, 0x16d1, 0x16d2, 0x16d4, 0x16d5, 0x16e7, 0x16e8, 0x16e9``. + +-* sched: To allow more traffic classes, flexible mapping of pipe queues to +- traffic classes, and subport level configuration of pipes and queues +- changes will be made to macros, data structures and API functions defined +- in "rte_sched.h". These changes are aligned to improvements suggested in the +- RFC https://mails.dpdk.org/archives/dev/2018-November/120035.html. +- + * metrics: The function ``rte_metrics_init`` will have a non-void return + in order to notify errors instead of calling ``rte_exit``. + +diff --git a/dpdk/doc/guides/rel_notes/known_issues.rst b/dpdk/doc/guides/rel_notes/known_issues.rst +index ee3ed1e658..a248aa86ab 100644 +--- a/dpdk/doc/guides/rel_notes/known_issues.rst ++++ b/dpdk/doc/guides/rel_notes/known_issues.rst +@@ -250,7 +250,7 @@ PMD does not work with --no-huge EAL command line parameter + + **Description**: + Currently, the DPDK does not store any information about memory allocated by ``malloc()` (for example, NUMA node, +- physical address), hence PMD drivers do not work when the ``--no-huge`` command line parameter is supplied to EAL. ++ physical address), hence PMDs do not work when the ``--no-huge`` command line parameter is supplied to EAL. + + **Implication**: + Sending and receiving data with PMD will not work. +@@ -419,7 +419,7 @@ Binding PCI devices to igb_uio fails on Linux kernel 3.9 when more than one devi + ------------------------------------------------------------------------------------------ + + **Description**: +- A known bug in the uio driver included in Linux kernel version 3.9 prevents more than one PCI device to be ++ A known bug in the UIO driver included in Linux kernel version 3.9 prevents more than one PCI device to be + bound to the igb_uio driver. + + **Implication**: +@@ -614,7 +614,7 @@ I40e VF may not receive packets in the promiscuous mode + Poll Mode Driver (PMD). + + +-uio pci generic module bind failed in X710/XL710/XXV710 ++uio_pci_generic module bind failed in X710/XL710/XXV710 + ------------------------------------------------------- + + **Description**: +@@ -671,7 +671,7 @@ virtio tx_burst() function cannot do TSO on shared packets + Poll Mode Driver (PMD). + + +-igb uio legacy mode can not be used in X710/XL710/XXV710 ++igb_uio legacy mode can not be used in X710/XL710/XXV710 + -------------------------------------------------------- + + **Description**: +@@ -752,7 +752,7 @@ Netvsc driver and application restart + handshake sequence with the host. + + **Resolution/Workaround**: +- Either reboot the guest or remove and reinsert the hv_uio_generic module. ++ Either reboot the guest or remove and reinsert the uio_hv_generic module. + + **Affected Environment/Platform**: + Linux Hyper-V. +@@ -816,7 +816,7 @@ Kernel crash when hot-unplug igb_uio device while DPDK application is running + + **Reason**: + When device is hot-unplugged, igb_uio driver will be removed which will destroy UIO resources. +- Later trying to access any uio resource will cause kernel crash. ++ Later trying to access any UIO resource will cause kernel crash. + + **Resolution/Workaround**: + If using DPDK for PCI HW hot-unplug, prefer to bind device with VFIO instead of IGB_UIO. +@@ -885,14 +885,15 @@ Unsuitable IOVA mode may be picked as the default + **Driver/Module**: + ALL. + +-Vhost multi-queue reconnection failed with QEMU version >= 4.2.0 +----------------------------------------------------------------- ++Vhost multi-queue reconnection failed with QEMU version 4.2.0 to 5.1.0 ++---------------------------------------------------------------------- + + **Description** + It's a QEMU regression bug (bad commit: c6beefd674ff). QEMU only saves + acked features for one vhost-net when vhost quits. When vhost reconnects + to virtio-net/virtio-pmd in multi-queue situations, the features been +- set multiple times are not consistent. ++ set multiple times are not consistent. QEMU-5.2.0 fixes this issue in commit ++ f66337bdbfda ("vhost-user: save features of multiqueues if chardev is closed"). + + **Implication** + Vhost cannot reconnect back to virtio-net/virtio-pmd normally. +diff --git a/dpdk/doc/guides/rel_notes/release_16_04.rst b/dpdk/doc/guides/rel_notes/release_16_04.rst +index e9f1e6ff6c..7e7fcd0ac4 100644 +--- a/dpdk/doc/guides/rel_notes/release_16_04.rst ++++ b/dpdk/doc/guides/rel_notes/release_16_04.rst +@@ -57,7 +57,7 @@ New Features + + * **Enabled Virtio 1.0 support.** + +- Enabled Virtio 1.0 support for Virtio pmd driver. ++ Enabled Virtio 1.0 support for Virtio PMD. + + * **Supported Virtio for ARM.** + +diff --git a/dpdk/doc/guides/rel_notes/release_16_07.rst b/dpdk/doc/guides/rel_notes/release_16_07.rst +index af89cf60a2..d0db3b44d9 100644 +--- a/dpdk/doc/guides/rel_notes/release_16_07.rst ++++ b/dpdk/doc/guides/rel_notes/release_16_07.rst +@@ -231,7 +231,7 @@ EAL + + * **igb_uio: Fixed possible mmap failure for Linux >= 4.5.** + +- The mmaping of the iomem range of the PCI device fails for kernels that ++ The mmapping of the iomem range of the PCI device fails for kernels that + enabled the ``CONFIG_IO_STRICT_DEVMEM`` option. The error seen by the + user is as similar to the following:: + +diff --git a/dpdk/doc/guides/rel_notes/release_16_11.rst b/dpdk/doc/guides/rel_notes/release_16_11.rst +index 92e0ec694e..3cec9143cf 100644 +--- a/dpdk/doc/guides/rel_notes/release_16_11.rst ++++ b/dpdk/doc/guides/rel_notes/release_16_11.rst +@@ -77,7 +77,7 @@ New Features + the current version, even 64 bytes packets take two slots with Virtio PMD on guest + side. + +- The main impact is better performance for 0% packet loss use-cases, as it ++ The main impact is better performance for 0% packet loss use cases, as it + behaves as if the virtqueue size was enlarged, so more packets can be buffered + in the case of system perturbations. On the downside, small performance degradations + were measured when running micro-benchmarks. +diff --git a/dpdk/doc/guides/rel_notes/release_17_08.rst b/dpdk/doc/guides/rel_notes/release_17_08.rst +index dc6224097a..ee4288a6a6 100644 +--- a/dpdk/doc/guides/rel_notes/release_17_08.rst ++++ b/dpdk/doc/guides/rel_notes/release_17_08.rst +@@ -290,7 +290,7 @@ API Changes + * The ``rte_cryptodev_configure()`` function does not create the session + mempool for the device anymore. + * The ``rte_cryptodev_queue_pair_attach_sym_session()`` and +- ``rte_cryptodev_queue_pair_dettach_sym_session()`` functions require ++ ``rte_cryptodev_queue_pair_detach_sym_session()`` functions require + the new parameter ``device id``. + * Parameters of ``rte_cryptodev_sym_session_create()`` were modified to + accept ``mempool``, instead of ``device id`` and ``rte_crypto_sym_xform``. +diff --git a/dpdk/doc/guides/rel_notes/release_18_02.rst b/dpdk/doc/guides/rel_notes/release_18_02.rst +index 3523ea7fdc..4a1b8a92db 100644 +--- a/dpdk/doc/guides/rel_notes/release_18_02.rst ++++ b/dpdk/doc/guides/rel_notes/release_18_02.rst +@@ -142,9 +142,9 @@ New Features + * ``VIRTIO_NET_F_GUEST_UFO``, ``VIRTIO_NET_F_HOST_UFO`` + * ``VIRTIO_NET_F_GSO`` + +- Also added ``VIRTIO_NET_F_GUEST_ANNOUNCE`` feature support in virtio pmd. ++ Also added ``VIRTIO_NET_F_GUEST_ANNOUNCE`` feature support in virtio PMD. + In a scenario where the vhost backend doesn't have the ability to generate +- RARP packets, the VM running virtio pmd can still be live migrated if ++ RARP packets, the VM running virtio PMD can still be live migrated if + ``VIRTIO_NET_F_GUEST_ANNOUNCE`` feature is negotiated. + + * **Updated the AESNI-MB PMD.** +diff --git a/dpdk/doc/guides/rel_notes/release_19_05.rst b/dpdk/doc/guides/rel_notes/release_19_05.rst +index b4c6972e35..52829bdb08 100644 +--- a/dpdk/doc/guides/rel_notes/release_19_05.rst ++++ b/dpdk/doc/guides/rel_notes/release_19_05.rst +@@ -93,13 +93,13 @@ New Features + Updated the KNI kernel module to set the ``max_mtu`` according to the given + initial MTU size. Without it, the maximum MTU was 1500. + +- Updated the KNI PMD driver to set the ``mbuf_size`` and MTU based on ++ Updated the KNI PMD to set the ``mbuf_size`` and MTU based on + the given mb-pool. This provide the ability to pass jumbo frames + if the mb-pool contains a suitable buffer size. + + * **Added the AF_XDP PMD.** + +- Added a Linux-specific PMD driver for AF_XDP. This PMD can create an AF_XDP socket ++ Added a Linux-specific PMD for AF_XDP. This PMD can create an AF_XDP socket + and bind it to a specific netdev queue. It allows a DPDK application to send + and receive raw packets through the socket which would bypass the kernel + network stack to achieve high performance packet processing. +@@ -311,7 +311,7 @@ ABI Changes + + The ``rte_eth_dev_info`` structure has had two extra fields + added: ``min_mtu`` and ``max_mtu``. Each of these are of type ``uint16_t``. +- The values of these fields can be set specifically by the PMD drivers as ++ The values of these fields can be set specifically by the PMDs as + supported values can vary from device to device. + + * cryptodev: in 18.08 a new structure ``rte_crypto_asym_op`` was introduced and +diff --git a/dpdk/doc/guides/rel_notes/release_19_08.rst b/dpdk/doc/guides/rel_notes/release_19_08.rst +index cbb27e8dc3..d2baa828b1 100644 +--- a/dpdk/doc/guides/rel_notes/release_19_08.rst ++++ b/dpdk/doc/guides/rel_notes/release_19_08.rst +@@ -151,7 +151,7 @@ New Features + * Added multi-queue support to allow one af_xdp vdev with multiple netdev + queues. + * Enabled "need_wakeup" feature which can provide efficient support for the +- usecase where the application and driver executing on the same core. ++ use case where the application and driver executing on the same core. + + * **Enabled infinite Rx in the PCAP PMD.** + +diff --git a/dpdk/doc/guides/rel_notes/release_19_11.rst b/dpdk/doc/guides/rel_notes/release_19_11.rst +index 0261d28431..e493628614 100644 +--- a/dpdk/doc/guides/rel_notes/release_19_11.rst ++++ b/dpdk/doc/guides/rel_notes/release_19_11.rst +@@ -236,7 +236,7 @@ New Features + + * **Added Marvell OCTEON TX2 crypto PMD.** + +- Added a new PMD driver for hardware crypto offload block on ``OCTEON TX2`` ++ Added a new PMD for hardware crypto offload block on ``OCTEON TX2`` + SoC. + + See :doc:`../cryptodevs/octeontx2` for more details +diff --git a/dpdk/doc/guides/rel_notes/release_20_05.rst b/dpdk/doc/guides/rel_notes/release_20_05.rst +index 985c845de4..b59576a575 100644 +--- a/dpdk/doc/guides/rel_notes/release_20_05.rst ++++ b/dpdk/doc/guides/rel_notes/release_20_05.rst +@@ -121,6 +121,13 @@ New Features + * Added flow counters to extended stats. + * Added PCI function stats to extended stats. + ++* **Updated Cisco enic driver.** ++ ++ Updated Cisco enic driver GENEVE tunneling support: ++ ++ * Added support to control GENEVE tunneling via UCSM/CIMC and removed devarg. ++ * Added GENEVE port number configuration. ++ + * **Updated Hisilicon hns3 driver.** + + Updated Hisilicon hns3 driver with new features and improvements, including: +diff --git a/dpdk/doc/guides/rel_notes/release_20_11.rst b/dpdk/doc/guides/rel_notes/release_20_11.rst +index e6a7f121c8..725dd6f6da 100644 +--- a/dpdk/doc/guides/rel_notes/release_20_11.rst ++++ b/dpdk/doc/guides/rel_notes/release_20_11.rst +@@ -238,7 +238,7 @@ New Features + + * **Added Wangxun txgbe PMD.** + +- Added a new PMD driver for Wangxun 10 Gigabit Ethernet NICs. ++ Added a new PMD for Wangxun 10 Gigabit Ethernet NICs. + + See the :doc:`../nics/txgbe` for more details. + +@@ -334,7 +334,7 @@ New Features + + * **Added Marvell OCTEON TX2 regex PMD.** + +- Added a new PMD driver for the hardware regex offload block for OCTEON TX2 SoC. ++ Added a new PMD for the hardware regex offload block for OCTEON TX2 SoC. + + See the :doc:`../regexdevs/octeontx2` for more details. + +@@ -1553,3 +1553,2000 @@ Tested Platforms + https://bugzilla.kernel.org/show_bug.cgi?id=207075 + * vm2vm virtio-net connectivity between two vms randomly fails due + to lost connection after vhost reconnect. ++ ++20.11.2 Release Notes ++--------------------- ++ ++20.11.2 Fixes ++~~~~~~~~~~~~~ ++ ++* acl: fix build with GCC 11 ++* app/bbdev: check memory allocation ++* app/bbdev: fix HARQ error messages ++* app/crypto-perf: check memory allocation ++* app/eventdev: fix lcore parsing skipping last core ++* app/eventdev: fix overflow in lcore list parsing ++* app/eventdev: fix timeout accuracy ++* app: fix exit messages ++* app/flow-perf: fix encap/decap actions ++* app/regex: fix usage text ++* app/testpmd: check MAC address query ++* app/testpmd: fix bitmap of link speeds when force speed ++* app/testpmd: fix build with musl ++* app/testpmd: fix DCB forwarding configuration ++* app/testpmd: fix DCB re-configuration ++* app/testpmd: fix division by zero on socket memory dump ++* app/testpmd: fix forward lcores number for DCB ++* app/testpmd: fix max queue number for Tx offloads ++* app/testpmd: fix NVGRE encap configuration ++* app/testpmd: fix segment number check ++* app/testpmd: fix tunnel offload flows cleanup ++* app/testpmd: fix Tx/Rx descriptor query error log ++* app/testpmd: fix usage text ++* app/testpmd: remove unnecessary UDP tunnel check ++* app/testpmd: verify DCB config during forward config ++* bpf: fix JSLT validation ++* build: detect execinfo library on Linux ++* build: exclude meson files from examples installation ++* build: fix drivers selection without Python ++* build: remove redundant _GNU_SOURCE definitions ++* buildtools: fix all drivers disabled on Windows ++* buildtools: fix build with busybox ++* bus/dpaa: fix 64-bit arch detection ++* bus/dpaa: fix build with musl ++* bus/dpaa: fix statistics reading ++* bus/fslmc: fix random portal hangs with qbman 5.0 ++* bus/fslmc: remove unused debug macro ++* bus/pci: fix Windows kernel driver categories ++* bus/pci: skip probing some Windows NDIS devices ++* bus/pci: support I/O port operations with musl ++* ci: catch coredumps ++* ci: enable v21 ABI checks ++* ci: fix package installation in GitHub Actions ++* ci: hook to GitHub Actions ++* ci: ignore APT update failure in GitHub Actions ++* common/dpaax/caamflib: fix build with musl ++* common/dpaax: fix possible null pointer access ++* common/iavf: fix duplicated offload bit ++* common/mlx5: add DevX commands for queue counters ++* common/mlx5: add DevX command to query WQ ++* common/mlx5: add timestamp format support to DevX ++* common/mlx5: fix DevX read output buffer size ++* common/mlx5/linux: add glue function to query WQ ++* common/qat: increase IM buffer size for GEN3 ++* common/sfc_efx/base: add missing MCDI response length checks ++* common/sfc_efx/base: fix dereferencing null pointer ++* common/sfc_efx/base: fix indication of MAE encap support ++* common/sfc_efx/base: limit reported MCDI response length ++* common/sfc_efx: remove GENEVE from supported tunnels ++* compress/qat: enable compression on GEN3 ++* config/ppc: reduce number of cores and NUMA nodes ++* crypto/dpaa2_sec: fix close and uninit functions ++* crypto/dpaa_sec: affine the thread portal affinity ++* crypto/octeontx: fix session-less mode ++* crypto/qat: fix null authentication request ++* crypto/qat: fix offset for out-of-place scatter-gather ++* crypto/zuc: fix build with GCC 11 ++* devtools: fix orphan symbols check with busybox ++* doc: fix build with Sphinx 4 ++* doc: fix formatting in testpmd guide ++* doc: fix HiSilicon copyright syntax ++* doc: fix matching versions in ice guide ++* doc: fix multiport syntax in nfp guide ++* doc: fix names of UIO drivers ++* doc: fix runtime options in DLB2 guide ++* doc: fix sphinx rtd theme import in GHA ++* doc: remove PDF requirements ++* doc: update recommended versions for i40e ++* drivers: fix log level after loading ++* drivers/net: fix FW version query ++* eal: add C++ include guard for reciprocal header ++* eal/arm64: fix platform register bit ++* eal: fix build with musl ++* eal: fix comment of OS-specific header files ++* eal: fix evaluation of log level option ++* eal: fix hang in control thread creation ++* eal: fix leak in shared lib mode detection ++* eal: fix memory mapping on 32-bit target ++* eal: fix race in control thread creation ++* eal: fix service core list parsing ++* eal/windows: add missing SPDX license tag ++* eal/windows: fix default thread priority ++* eal/windows: fix return codes of pthread shim layer ++* ethdev: add missing buses in device iterator ++* ethdev: update flow item GTP QFI definition ++* ethdev: validate input in EEPROM info ++* ethdev: validate input in module EEPROM dump ++* ethdev: validate input in register info ++* eventdev: fix case to initiate crypto adapter service ++* eventdev: fix memory leakage on thread creation failure ++* eventdev: remove redundant thread name setting ++* event/dlb2: remove references to deferred scheduling ++* event/dlb: fix header includes for musl ++* event/dpaa2: remove unused macros ++* event/octeontx2: configure crypto adapter xaq pool ++* event/octeontx2: fix crypto adapter queue pair operations ++* event/octeontx2: fix device reconfigure for single slot ++* event/octeontx2: fix XAQ pool reconfigure ++* examples: add eal cleanup to examples ++* examples/bbdev: fix header include for musl ++* examples/ethtool: remove unused parsing ++* examples: fix pkg-config override ++* examples/flow_classify: fix NUMA check of port and core ++* examples/l2fwd-cat: fix NUMA check of port and core ++* examples/l2fwd-crypto: fix packet length while decryption ++* examples/l2fwd-crypto: skip masked devices ++* examples/l3fwd: fix LPM IPv6 subnets ++* examples/l3fwd-power: fix empty poll thresholds ++* examples/packet_ordering: fix port configuration ++* examples/ptpclient: remove wrong comment ++* examples/rxtx_callbacks: fix port ID format specifier ++* examples/skeleton: fix NUMA check of port and core ++* examples/timer: fix time interval ++* examples/vhost: check memory table query ++* examples/vhost_crypto: remove unused short option ++* fbarray: fix log message on truncation error ++* ipc: check malloc sync reply result ++* ipc: use monotonic clock ++* ip_frag: fix fragmenting IPv4 packet with header option ++* kni: fix kernel deadlock with bifurcated device ++* kni: refactor user request processing ++* kni: support async user request ++* license: fix typos ++* log/linux: make default output stderr ++* mbuf: check shared memory before dumping dynamic space ++* mem: fix freeing segments in --huge-unlink mode ++* net/af_xdp: fix error handling during Rx queue setup ++* net/ark: fix leak on thread termination ++* net/ark: refactor Rx buffer recovery ++* net/ark: update packet director initial state ++* net/bnx2x: fix build with GCC 11 ++* net/bnx2x: fix build with GCC 11 ++* net/bnxt: check kvargs parsing ++* net/bnxt: check PCI config read ++* net/bnxt: drop unused attribute ++* net/bnxt: fix configuring LRO ++* net/bnxt: fix device readiness check ++* net/bnxt: fix double free in port start failure ++* net/bnxt: fix dynamic VNIC count ++* net/bnxt: fix firmware fatal error handling ++* net/bnxt: fix FW readiness check during recovery ++* net/bnxt: fix handling of null flow mask ++* net/bnxt: fix health check alarm cancellation ++* net/bnxt: fix HWRM and FW incompatibility handling ++* net/bnxt: fix link state operations ++* net/bnxt: fix memory allocation for command response ++* net/bnxt: fix mismatched type comparison in MAC restore ++* net/bnxt: fix mismatched type comparison in Rx ++* net/bnxt: fix PCI write check ++* net/bnxt: fix PTP support for Thor ++* net/bnxt: fix queues per VNIC ++* net/bnxt: fix resource cleanup ++* net/bnxt: fix ring count calculation for Thor ++* net/bnxt: fix RSS context cleanup ++* net/bnxt: fix Rx and Tx timestamps ++* net/bnxt: fix Rx buffer posting ++* net/bnxt: fix Rx descriptor status ++* net/bnxt: fix Rx queue count ++* net/bnxt: fix Rx timestamp when FIFO pending bit is set ++* net/bnxt: fix single PF per port check ++* net/bnxt: fix timesync when PTP is not supported ++* net/bnxt: fix Tx length hint threshold ++* net/bnxt: fix Tx timestamp init ++* net/bnxt: fix VF info allocation ++* net/bnxt: fix VNIC configuration ++* net/bnxt: fix xstats get ++* net/bnxt: mute some failure logs ++* net/bnxt: prevent device access in error state ++* net/bnxt: refactor multi-queue Rx configuration ++* net/bnxt: remove unnecessary forward declarations ++* net/bnxt: remove unused function parameters ++* net/bnxt: remove unused macro ++* net/bnxt: use prefix on global function ++* net/bonding: fix adding itself as its slave ++* net/bonding: fix LACP system address check ++* net/bonding: fix leak on remove ++* net/bonding: fix socket ID check ++* net/cxgbe: remove use of uint type ++* net/dpaa2: fix getting link status ++* net/dpaa: fix getting link status ++* net/e1000/base: fix timeout for shadow RAM write ++* net/e1000: fix flow error message object ++* net/e1000: fix max Rx packet size ++* net/e1000: fix Rx error counter for bad length ++* net/e1000: remove MTU setting limitation ++* net/ena/base: destroy multiple wait events ++* net/ena/base: fix type conversions by explicit casting ++* net/ena/base: improve style and comments ++* net/ena: fix crash with unsupported device argument ++* net/ena: fix parsing of large LLQ header device argument ++* net/ena: fix releasing Tx ring mbufs ++* net/ena: indicate Rx RSS hash presence ++* net/ena: remove endian swap functions ++* net/ena: report default ring size ++* net/ena: switch memcpy to optimized version ++* net/enic: enable GENEVE offload via VNIC configuration ++* net/enic: fix flow initialization error handling ++* net/failsafe: fix RSS hash offload reporting ++* net/failsafe: report minimum and maximum MTU ++* net: fix comment in IPv6 header ++* net/hinic: fix crash in secondary process ++* net/hns3: clear hash map on flow director clear ++* net/hns3: delete redundant blank line ++* net/hns3: fail setting FEC if one bit mode is not supported ++* net/hns3: fix concurrent interrupt handling ++* net/hns3: fix configure FEC when concurrent with reset ++* net/hns3: fix copyright date ++* net/hns3: fix DCB configuration ++* net/hns3: fix DCB mode check ++* net/hns3: fix DCB reconfiguration ++* net/hns3: fix device capabilities for copper media type ++* net/hns3: fix flow control exception ++* net/hns3: fix flow control mode ++* net/hns3: fix flow counter value ++* net/hns3: fix flow director lock ++* net/hns3: fix FLR miss detection ++* net/hns3: fix handling link update ++* net/hns3: fix HW buffer size on MTU update ++* net/hns3: fix link speed when port is down ++* net/hns3: fix link speed when VF device is down ++* net/hns3: fix link status when port is stopped ++* net/hns3: fix link update when failed to get link info ++* net/hns3: fix log on flow director clear ++* net/hns3: fix long task queue pairs reset time ++* net/hns3: fix mailbox error message ++* net/hns3: fix mailbox message ID in log ++* net/hns3: fix mbuf leakage ++* net/hns3: fix missing outer L4 UDP flag for VXLAN ++* net/hns3: fix MTU config complexity ++* net/hns3: fix ordering in secondary process initialization ++* net/hns3: fix possible mismatched response of mailbox ++* net/hns3: fix processing link status message on PF ++* net/hns3: fix processing Tx offload flags ++* net/hns3: fix querying flow director counter for out param ++* net/hns3: fix queue state when concurrent with reset ++* net/hns3: fix reporting undefined speed ++* net/hns3: fix requested FC mode rollback ++* net/hns3: fix rollback after setting PVID failure ++* net/hns3: fix Rx/Tx queue numbers check ++* net/hns3: fix secondary process request start/stop Rx/Tx ++* net/hns3: fix setting default MAC address in bonding of VF ++* net/hns3: fix some packet types ++* net/hns3: fix time delta calculation ++* net/hns3: fix timing in mailbox ++* net/hns3: fix timing in resetting queues ++* net/hns3: fix TM QCN error event report by MSI-X ++* net/hns3: fix Tx checksum for UDP packets with special port ++* net/hns3: fix typos on comments ++* net/hns3: fix use of command status enumeration ++* net/hns3: fix vector Rx burst limitation ++* net/hns3: fix verification of NEON support ++* net/hns3: fix VF alive notification after config restore ++* net/hns3: fix VF handling LSC event in secondary process ++* net/hns3: fix VF mailbox head field ++* net/hns3: fix VMDq mode check ++* net/hns3: increase readability in logs ++* net/hns3: log time delta in decimal format ++* net/hns3: remove meaningless packet buffer rollback ++* net/hns3: remove read when enabling TM QCN error event ++* net/hns3: remove redundant mailbox response ++* net/hns3: remove unused macro ++* net/hns3: remove unused macros ++* net/hns3: remove unused macros ++* net/hns3: remove unused mailbox macro and struct ++* net/hns3: remove unused parameter markers ++* net/hns3: remove unused VMDq code ++* net/hns3: remove VLAN/QinQ ptypes from support list ++* net/hns3: return error on PCI config write failure ++* net/hns3: support get device version when dump register ++* net/hns3: update HiSilicon copyright syntax ++* net/i40e: announce request queue capability in PF ++* net/i40e: fix flow director config after flow validate ++* net/i40e: fix flow director for common pctypes ++* net/i40e: fix input set field mask ++* net/i40e: fix IPv4 fragment offload ++* net/i40e: fix lack of MAC type when set MAC address ++* net/i40e: fix negative VEB index ++* net/i40e: fix parsing packet type for NEON ++* net/i40e: fix primary MAC type when starting port ++* net/i40e: fix VF RSS configuration ++* net/i40e: remove redundant VSI check in Tx queue setup ++* net/i40evf: fix packet loss for X722 ++* net/iavf: fix crash in AVX512 ++* net/iavf: fix lack of MAC type when set MAC address ++* net/iavf: fix packet length parsing in AVX512 ++* net/iavf: fix primary MAC type when starting port ++* net/iavf: fix TSO max segment size ++* net/iavf: fix VF to PF command failure handling ++* net/iavf: fix wrong Tx context descriptor ++* net/ice/base: cleanup filter list on error ++* net/ice/base: fix build with GCC 11 ++* net/ice/base: fix memory allocation for MAC addresses ++* net/ice/base: fix memory allocation wrapper ++* net/ice/base: fix payload indicator on ptype ++* net/ice/base: fix uninitialized struct ++* net/ice: check some functions return ++* net/ice: fix crash in AVX512 ++* net/ice: fix disabling promiscuous mode ++* net/ice: fix fast mbuf freeing ++* net/ice: fix illegal access when removing MAC filter ++* net/ice: fix leak on thread termination ++* net/ice: fix RSS for L2 packet ++* net/ice: fix RSS hash update ++* net/ice: fix VLAN filter with PF ++* net/ice: fix VSI array out of bounds access ++* net/igc: fix Rx error counter for bad length ++* net/igc: fix Rx packet size ++* net/igc: fix Rx RSS hash offload capability ++* net/igc: fix speed configuration ++* net/igc: remove MTU setting limitation ++* net/igc: remove use of uint type ++* net/ionic: fix completion type in lif init ++* net/ixgbe: fix RSS RETA being reset after port start ++* net/ixgbe: fix Rx errors statistics for UDP checksum ++* net/kni: check init result ++* net/kni: warn on stop failure ++* net/memif: fix Tx bps statistics for zero-copy ++* net/mlx4: fix buffer leakage on device close ++* net/mlx4: fix leak when configured repeatedly ++* net/mlx4: fix RSS action with null hash key ++* net/mlx4: fix secondary process initialization ordering ++* net/mlx5: fix counter offset detection ++* net/mlx5: fix drop action for Direct Rules/Verbs ++* net/mlx5: fix external buffer pool registration for Rx queue ++* net/mlx5: fix flow actions index in cache ++* net/mlx5: fix flow age event triggering ++* net/mlx5: fix hashed list size for tunnel flow groups ++* net/mlx5: fix leak when configured repeatedly ++* net/mlx5: fix loopback for Direct Verbs queue ++* net/mlx5: fix metadata item validation for ingress flows ++* net/mlx5: fix missing shared RSS hash types ++* net/mlx5: fix probing device in legacy bonding mode ++* net/mlx5: fix receiving queue timestamp format ++* net/mlx5: fix redundant flow after RSS expansion ++* net/mlx5: fix resource release for mirror flow ++* net/mlx5: fix RSS flow item expansion for GRE key ++* net/mlx5: fix RSS flow item expansion for NVGRE ++* net/mlx5: fix Rx metadata leftovers ++* net/mlx5: fix Rx segmented packets on mbuf starvation ++* net/mlx5: fix secondary process initialization ordering ++* net/mlx5: fix shared inner RSS ++* net/mlx5: fix tunnel offload private items location ++* net/mlx5: fix UAR allocation diagnostics messages ++* net/mlx5: fix using flow tunnel before null check ++* net/mlx5/linux: fix firmware version ++* net/mlx5: remove drop queue function prototypes ++* net/mlx5: support RSS expansion for IPv6 GRE ++* net/mlx5: support timestamp format ++* net/nfp: fix reporting of RSS capabilities ++* net/octeontx2: fix VLAN filter ++* net/pcap: fix file descriptor leak on close ++* net/pcap: fix format string ++* net/qede: accept bigger RSS table ++* net/qede: reduce log verbosity ++* net/sfc: fix buffer size for flow parse ++* net/sfc: fix error path inconsistency ++* net/sfc: fix mark support in EF100 native Rx datapath ++* net/sfc: fix outer rule rollback on error ++* net/tap: check ioctl on restore ++* net/tap: fix build with GCC 11 ++* net/tap: fix interrupt vector array size ++* net/txgbe: fix QinQ strip ++* net/txgbe: fix Rx missed packet counter ++* net/txgbe: remove unused functions ++* net/txgbe: update packet type ++* net/vhost: restore pseudo TSO support ++* net/virtio: fix getline memory leakage ++* net/virtio: fix interrupt unregistering for listening socket ++* net/virtio: fix vectorized Rx queue rearm ++* pipeline: fix endianness conversions ++* pipeline: fix instruction translation ++* power: do not skip saving original P-state governor ++* power: fix sanity checks for guest channel read ++* power: remove duplicated symbols from map file ++* power: save original ACPI governor always ++* raw/ifpga: fix device name format ++* raw/ioat: fix script for configuring small number of queues ++* raw/ntb: check memory allocations ++* raw/ntb: check SPAD user index ++* raw/octeontx2_dma: assign PCI device in DPI VF ++* raw/skeleton: add missing check after setting attribute ++* regex/mlx5: support timestamp format ++* regex/octeontx2: remove unused include directory ++* sched: fix traffic class oversubscription parameter ++* service: clean references to removed symbol ++* stack: allow lock-free only on relevant architectures ++* table: fix actions with different data size ++* telemetry: fix race on callbacks list ++* test/bpf: fix error message ++* test: check flow classifier creation ++* test: check thread creation ++* test/cmdline: fix inputs array ++* test/cmdline: silence clang 12 warning ++* test/crypto: copy offset data to OOP destination buffer ++* test/crypto: fix auth-cipher compare length in OOP ++* test/crypto: fix build with GCC 11 ++* test/crypto: fix return value of a skipped test ++* test/distributor: fix burst flush on worker quit ++* test/distributor: fix worker notification in burst mode ++* test/event: fix timeout accuracy ++* test: fix autotest handling of skipped tests ++* test: fix build with GCC 11 ++* test: fix division by zero ++* test: fix TCP header initialization ++* test/kni: check init result ++* test/kni: fix a comment ++* test/mem: fix page size for external memory ++* test/mempool: fix object initializer ++* test/power: add delay before checking CPU frequency ++* test/power: add turbo mode to frequency check ++* test/power: fix CPU frequency check ++* test/power: fix low frequency test when turbo enabled ++* test/power: fix turbo test ++* test/power: round CPU frequency to check ++* test: proceed if timer subsystem already initialized ++* test/table: fix build with GCC 11 ++* test/timer: check memzone allocation ++* test/trace: fix race on collected perf data ++* vdpa/ifc: check PCI config read ++* vdpa/mlx5: fix device unplug ++* vdpa/mlx5: fix virtq cleaning ++* vdpa/mlx5: support timestamp format ++* version: 20.11.2-rc1 ++* version: 20.11.2-rc2 ++* vfio: do not merge contiguous areas ++* vfio: fix API description ++* vfio: fix DMA mapping granularity for IOVA as VA ++* vfio: fix duplicated user mem map ++* vhost: fix batch dequeue potential buffer overflow ++* vhost: fix initialization of async temporary header ++* vhost: fix initialization of temporary header ++* vhost: fix offload flags in Rx path ++* vhost: fix packed ring potential buffer overflow ++* vhost: fix queue initialization ++* vhost: fix redundant vring status change notification ++* vhost: fix split ring potential buffer overflow ++ ++20.11.2 Validation ++~~~~~~~~~~~~~~~~~~ ++ ++* Intel(R) Testing ++ ++ * Basic Intel(R) NIC testing ++ ++ * PF(i40e, ixgbe, ice) ++ * VF(i40e, ixgbe, ice) ++ * Compile testing ++ * Intel NIC single core/NIC performance ++ ++ * Basic cryptodev and virtio testing ++ ++ * Virtio function and performance ++ * Cryptodev function and performance ++ ++* Nvidia(R) Testing ++ ++ * Basic functionality with testpmd ++ ++ * Tx/Rx ++ * xstats ++ * Timestamps ++ * Link status ++ * RTE flow and flow_director ++ * RSS ++ * VLAN stripping and insertion ++ * Checksum/TSO ++ * ptype ++ * link_status_interrupt example application ++ * l3fwd-power example application ++ * Multi-process example applications ++ * Hardware LRO ++ ++ * Build tests ++ ++ * Ubuntu 20.04.2 with MLNX_OFED_LINUX-5.3-1.0.0.1. ++ * Ubuntu 20.04.2 with rdma-core master (a66e2a5). ++ * Ubuntu 20.04.2 with rdma-core v28.0. ++ * Ubuntu 18.04.5 with rdma-core v17.1. ++ * Ubuntu 18.04.5 with rdma-core master (a66e2a5) (i386). ++ * Ubuntu 16.04.7 with rdma-core v22.7. ++ * Fedora 34 with rdma-core v35.0. ++ * Fedora 35 (Rawhide) with rdma-core v35.0 (only with gcc). ++ * CentOS 7 7.9.2009 with rdma-core master (a66e2a5). ++ * CentOS 7 7.9.2009 with MLNX_OFED_LINUX-5.3-1.0.0.1. ++ * CentOS 8 8.3.2011 with rdma-core master (7f2d460). ++ * OpenSUSE Leap 15.3 with rdma-core v31.0. ++ ++ * ConnectX-4 Lx ++ ++ * OS: Ubuntu 20.04 LTS ++ * Driver: MLNX_OFED_LINUX-5.3-1.0.0.1 ++ * Firmware: 14.30.1004 ++ ++ * ConnectX-5 ++ ++ * OS: Ubuntu 20.04 LTS ++ * Driver: MLNX_OFED_LINUX-5.3-1.0.0.1 ++ * Firmware: 16.30.1004 ++ ++* Broadcom(R) Testing ++ ++ * Functionality ++ ++ * Tx/Rx ++ * Link status ++ * RSS ++ * TSO ++ * VLAN filtering ++ * MAC filtering ++ * statistics ++ * Checksum offload ++ * MTU ++ * Promiscuous mode ++ * Multicast ++ ++ * Platform ++ ++ * BCM57414 NetXtreme-E 10Gb/25Gb Ethernet Controller, Firmware: 219.0.88.0 ++ * BCM57508 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb/200Gb Ethernet, Firmware : 220.0.0.100 ++ ++20.11.2 Known Issues ++~~~~~~~~~~~~~~~~~~~~ ++ ++* Build ++ ++ * Clang build is failing in Fedora 35. ++ https://bugs.dpdk.org/show_bug.cgi?id=745 ++ ++* I40E/IXGBE ++ ++ * Flow director does not work. ++ Fixed in 21.08. ++ ++* ICE ++ ++ * Packet can't be distributed to the same queue after reconfiguration. ++ Fixed in 21.08. ++ * The hash value remains unchanged when the SCTP port value changed. ++ Fixed in 21.08 new feature. ++ ++20.11.3 Release Notes ++--------------------- ++ ++20.11.3 Fixes ++~~~~~~~~~~~~~ ++ ++* app/crypto-perf: fix out-of-place mempool allocation ++* app/test: fix IPv6 header initialization ++* app/testpmd: change port link speed without stopping all ++* app/testpmd: fix help string for port reset ++* app/testpmd: fix IPv4 checksum ++* app/testpmd: fix MAC address after port reset ++* app/testpmd: fix offloads for newly attached port ++* app/testpmd: fix Tx checksum calculation for tunnel ++* app/testpmd: fix type of FEC mode parsing output ++* bitmap: fix buffer overrun in bitmap init ++* build: support drivers symlink on Windows ++* bus: clarify log for non-NUMA-aware devices ++* bus/dpaa: fix freeing in FMAN interface destructor ++* bus/pci: fix IOVA as VA support for PowerNV ++* bus/pci: fix leak for unbound devices ++* common/mlx5: fix compatibility with OFED port query API ++* common/mlx5: fix memory region leak ++* common/mlx5: fix Netlink port name padding in probing ++* common/mlx5: fix Netlink receive message buffer size ++* common/mlx5: use new port query API if available ++* crypto/aesni_gcm: fix performance on some AVX512 CPUs ++* cryptodev: fix freeing after device release ++* crypto/mvsam: fix AES-GCM session parameters ++* crypto/mvsam: fix capabilities ++* crypto/mvsam: fix options parsing ++* crypto/mvsam: fix session data reset ++* crypto/octeontx2: fix IPsec session member overlap ++* crypto/octeontx2: fix lookaside IPsec IV pointer ++* crypto/octeontx: fix freeing after device release ++* crypto/qat: disable asymmetric crypto on GEN3 ++* crypto/qat: fix Arm build with special memcpy ++* devtools: fix file listing in maintainers check ++* distributor: fix 128-bit write alignment ++* doc: add limitation for ConnectX-4 with L2 in mlx5 guide ++* doc: fix build on Windows with Meson 0.58 ++* doc: fix default burst size in testpmd ++* doc: fix spelling ++* doc: fix typo in SPDX tag ++* doc: remove old deprecation notice for sched ++* doc: update atomic operation deprecation ++* drivers/net: fix memzone allocations for DMA memory ++* eal/windows: check callback parameter of alarm functions ++* eal/windows: cleanup virt2phys handle ++* ethdev: fix doc of flow action ++* eventdev: fix event port setup in Tx adapter ++* examples/l2fwd: fix [no-]mac-updating options ++* flow_classify: fix leaking rules on delete ++* graph: fix memory leak in stats ++* graph: fix null dereference in stats ++* ipc: stop mp control thread on cleanup ++* kni: fix crash on userspace VA for segmented packets ++* kni: fix mbuf allocation for kernel side use ++* malloc: fix size annotation for NUMA-aware realloc ++* mempool/octeontx2: fix shift calculation ++* net/bnxt: check access to possible null pointer ++* net/bnxt: cleanup code ++* net/bnxt: clear cached statistics ++* net/bnxt: detect bad opaque in Rx completion ++* net/bnxt: fix aarch32 build ++* net/bnxt: fix auto-negociation on Whitney+ ++* net/bnxt: fix check for PTP support in FW ++* net/bnxt: fix error handling in VNIC prepare ++* net/bnxt: fix error messages in VNIC prepare ++* net/bnxt: fix missing barriers in completion handling ++* net/bnxt: fix nested lock during bonding ++* net/bnxt: fix null dereference in interrupt handler ++* net/bnxt: fix ring allocation and free ++* net/bnxt: fix ring and context memory allocation ++* net/bnxt: fix Rx burst size constraint ++* net/bnxt: fix Rx interrupt setting ++* net/bnxt: fix scalar Tx completion handling ++* net/bnxt: fix Tx descriptor status implementation ++* net/bnxt: fix typo in log message ++* net/bnxt: improve probing log message ++* net/bnxt: invoke device removal event on recovery failure ++* net/bnxt: remove unnecessary code ++* net/bnxt: remove unnecessary comment ++* net/bnxt: remove workaround for default VNIC ++* net/bnxt: set flow error after tunnel redirection free ++* net/bnxt: set flow error when free filter not available ++* net/bnxt: use common function to free VNIC resource ++* net/bnxt: workaround spurious zero stats in Thor ++* net/bonding: check flow setting ++* net/bonding: fix error message on flow verify ++* net/dpaa: fix headroom in VSP case ++* net/ena: enable multi-segment in Tx offload flags ++* net/ena: trigger reset on Tx prepare failure ++* net/hinic/base: fix LRO ++* net/hinic: fix MTU consistency with firmware ++* net/hinic: increase protection of the VLAN ++* net/hns3: fix Arm SVE build with GCC 8.3 ++* net/hns3: fix delay for waiting to stop Rx/Tx ++* net/hns3: fix fake queue rollback ++* net/hns3: fix filter parsing comment ++* net/hns3: fix flow rule list in multi-process ++* net/hns3: fix maximum queues on configuration failure ++* net/hns3: fix residual MAC address entry ++* net/hns3: fix timing of clearing interrupt source ++* net/hns3: fix Tx prepare after stop ++* net/hns3: fix VLAN strip log ++* net/hns3: increase VF reset retry maximum ++* net/i40e: fix descriptor scan on Arm ++* net/i40e: fix flow director input set conflict ++* net/i40e: fix multi-process shared data ++* net/i40e: fix raw packet flow director ++* net/i40e: fix use after free in FDIR release ++* net/iavf: fix handling of unsupported promiscuous ++* net/iavf: fix RSS key access out of bound ++* net/iavf: fix scalar Rx ++* net/iavf: fix Tx threshold check ++* net/ice: fix data path in secondary process ++* net/ice: fix data path selection in secondary process ++* net/ice: fix default RSS key generation ++* net/ice: fix memzone leak when firmware is missing ++* net/ice: fix overflow in maximum packet length config ++* net/ixgbe: fix flow entry access after freeing ++* net/memif: fix abstract socket address length ++* net/mlx5: add Tx scheduling check on queue creation ++* net/mlx5: export PMD-specific API file ++* net/mlx5: fix default queue number in RSS flow rule ++* net/mlx5: fix flow engine type in function name ++* net/mlx5: fix imissed statistics ++* net/mlx5: fix indirect action modify rollback ++* net/mlx5: fix IPIP multi-tunnel validation ++* net/mlx5: fix match MPLS over GRE with key ++* net/mlx5: fix missing RSS expandable items ++* net/mlx5: fix missing RSS expansion of IPv6 frag ++* net/mlx5: fix MPLS RSS expansion ++* net/mlx5: fix multi-segment inline for the first segments ++* net/mlx5: fix overflow in mempool argument ++* net/mlx5: fix pattern expansion in RSS flow rules ++* net/mlx5: fix queue leaking in hairpin auto bind check ++* net/mlx5: fix representor interrupt handler ++* net/mlx5: fix RoCE LAG bond device probing ++* net/mlx5: fix RSS expansion for GTP ++* net/mlx5: fix RSS flow rule with L4 mismatch ++* net/mlx5: fix RSS pattern expansion ++* net/mlx5: fix r/w lock usage in DMA unmap ++* net/mlx5: fix Rx/Tx queue checks ++* net/mlx5: fix switchdev mode recognition ++* net/mlx5: fix threshold for mbuf replenishment in MPRQ ++* net/mlx5: fix timestamp initialization on empty clock queue ++* net/mlx5: fix TSO multi-segment inline length ++* net/mlx5: fix typo in vectorized Rx comments ++* net/mlx5: reject inner ethernet matching in GTP ++* net/mlx5: remove redundant operations in NEON Rx ++* net/mlx5: remove unsupported flow item MPLS over IP ++* net/mlx5: workaround drop action with old kernel ++* net/mvpp2: fix configured state dependency ++* net/mvpp2: fix port speed overflow ++* net/octeontx2: fix default MCAM allocation size ++* net/octeontx2: fix flow creation limit on CN98xx ++* net/octeontx2: fix TM node statistics query ++* net/octeontx2: use runtime LSO format indices ++* net/octeontx/base: fix debug build with clang ++* net/pfe: remove unnecessary null check ++* net/sfc: check ID overflow in action port ID ++* net/sfc: fix aarch32 build ++* net/sfc: fix MAC stats lock in xstats query by ID ++* net/sfc: fix MAC stats update for stopped device ++* net/sfc: fix outer L4 checksum Rx ++* net/sfc: fix outer match in MAE backend ++* net/sfc: fix reading adapter state without locking ++* net/sfc: fix xstats query by ID according to ethdev ++* net/sfc: fix xstats query by unsorted list of IDs ++* net/softnic: fix connection memory leak ++* net/softnic: fix memory leak as profile is freed ++* net/softnic: fix memory leak in arguments parsing ++* net/softnic: fix null dereference in arguments parsing ++* net/tap: fix Rx checksum flags on IP options packets ++* net/tap: fix Rx checksum flags on TCP packets ++* net/virtio: fix aarch32 build ++* net/virtio: fix default duplex mode ++* net/virtio: fix interrupt handle leak ++* net/virtio: fix refill order in packed ring datapath ++* net/virtio: fix Rx scatter offload ++* net/virtio: report maximum MTU in device info ++* raw/ioat: fix config script queue size calculation ++* regex/mlx5: fix redundancy in device removal ++* regex/mlx5: fix size of setup constants ++* rib: fix max depth IPv6 lookup ++* sched: fix profile allocation failure handling ++* sched: rework configuration failure handling ++* table: fix bucket empty check ++* test/crypto: fix autotest function parameters ++* test/crypto: fix mbuf reset after null check ++* test/crypto: fix mempool size for session-less ++* test/crypto: fix typo in AES case ++* test/crypto: fix typo in ESN case ++* test/mbuf: fix virtual address conversion ++* test/power: fix CPU frequency check for intel_pstate ++* test/power: fix CPU frequency when turbo enabled ++* tests/cmdline: fix memory leaks ++* tests/eal: fix memory leak ++* vdpa/mlx5: fix overflow in queue attribute ++* vdpa/mlx5: fix TSO offload without checksum ++* version: 20.11.3-rc1 ++* vfio: add stdbool include ++* vhost: check header for legacy dequeue offload ++* vhost/crypto: check request pointer before dereference ++* vhost: fix crash on reconnect ++* vhost: fix lock on device readiness notification ++* vhost: fix missing guest pages table NUMA realloc ++* vhost: fix missing memory table NUMA realloc ++* vhost: fix NUMA reallocation with multi-queue ++ ++20.11.3 Validation ++~~~~~~~~~~~~~~~~~~ ++ ++* Intel(R) Testing ++ ++ * Basic Intel(R) NIC(ixgbe, i40e and ice) testing ++ * PF (i40e) ++ * PF (ixgbe) ++ * PF (ice) ++ * VF (i40e) ++ * VF (ixgbe) ++ * VF (ice) ++ * Compile Testing ++ * Intel NIC single core/NIC performance ++ * Power and IPsec ++ ++ * Basic cryptodev and virtio testing ++ ++ * vhost/virtio basic loopback, PVP and performance test ++ * cryptodev Function/Performance ++ ++ ++* Nvidia(R) Testing ++ ++ * Basic functionality with testpmd ++ ++ * Tx/Rx ++ * xstats ++ * Timestamps ++ * Link status ++ * RTE flow and flow_director ++ * RSS ++ * VLAN stripping and insertion ++ * Checksum/TSO ++ * ptype ++ * link_status_interrupt example application ++ * l3fwd-power example application ++ * Multi-process example applications ++ * Hardware LRO tests ++ ++ * Build tests ++ ++ * Ubuntu 20.04.2 with MLNX_OFED_LINUX-5.4-1.0.3.0. ++ * Ubuntu 20.04.2 with rdma-core master (64d1ae5). ++ * Ubuntu 20.04.2 with rdma-core v28.0. ++ * Ubuntu 18.04.5 with rdma-core v17.1. ++ * Ubuntu 18.04.5 with rdma-core master (5b0f5b2) (i386). ++ * Ubuntu 16.04.7 with rdma-core v22.7. ++ * Fedora 34 with rdma-core v36.0. ++ * Fedora 36 (Rawhide) with rdma-core v36.0 (only with gcc). ++ * CentOS 7 7.9.2009 with rdma-core master (64d1ae5). ++ * CentOS 7 7.9.2009 with MLNX_OFED_LINUX-5.4-1.0.3.0. ++ * CentOS 8 8.3.2011 with rdma-core master (64d1ae5). ++ * OpenSUSE Leap 15.3 with rdma-core v31.0. ++ ++ * ConnectX-5 ++ ++ * Ubuntu 20.04 ++ * Driver MLNX_OFED_LINUX-5.4-1.0.3.0 ++ * Kernel: 5.14.0-rc6 / Driver: rdma-core v36.0 ++ * fw 16.31.1014 ++ ++ * ConnectX-4 Lx ++ ++ * Ubuntu 20.04 ++ * Driver MLNX_OFED_LINUX-5.4-1.0.3.0 ++ * Kernel: 5.14.0-rc6 / Driver: rdma-core v36.0 ++ * fw 14.31.1014 ++ ++ ++* Red Hat(R) Testing ++ ++ * Platform ++ ++ * RHEL 8 ++ * Kernel 4.18 ++ * Qemu 6.0 ++ * X540-AT2 NIC(ixgbe, 10G) ++ ++ * Functionality ++ ++ * Guest with device assignment(PF) throughput testing(1G hugepage size) ++ * Guest with device assignment(PF) throughput testing(2M hugepage size) ++ * Guest with device assignment(VF) throughput testing ++ * PVP (host dpdk testpmd as vswitch) 1Q: throughput testing ++ * PVP vhost-user 2Q throughput testing ++ * PVP vhost-user 1Q cross numa node throughput testing ++ * Guest with vhost-user 2 queues throughput testing ++ * vhost-user reconnect with dpdk-client, qemu-server: qemu reconnect ++ * vhost-user reconnect with dpdk-client, qemu-server: ovs reconnect ++ * PVP 1Q live migration testing ++ * PVP 1Q cross numa node live migration testing ++ * Guest with ovs+dpdk+vhost-user 1Q live migration testing ++ * Guest with ovs+dpdk+vhost-user 1Q live migration testing (2M) ++ * Guest with ovs+dpdk+vhost-user 2Q live migration testing ++ * Host PF + DPDK testing ++ * Host VF + DPDK testing ++ ++* Canonical(R) Testing ++ ++ * Build tests of DPDK & OVS 2.15.0 on Ubuntu 21.04 (meson based) ++ * Functional and performance tests based on OVS-DPDK on x86_64 ++ * Autopkgtests for DPDK and OpenvSwitch ++ ++20.11.3 Known Issues ++~~~~~~~~~~~~~~~~~~~~ ++ ++* ICE ++ ++ * creating 512 acl rules after creating a full mask switch rule fails. ++ ++* vhost/virtio ++ ++ * udp-fragmentation-offload cannot be setup on Ubuntu 19.10 VMs. ++ https://bugzilla.kernel.org/show_bug.cgi?id=207075 ++ * vm2vm virtio-net connectivity between two vms randomly fails due ++ to lost connection after vhost reconnect. ++ ++* unit tests ++ ++ * unit_tests_power/power_cpufreq fails. ++ https://bugs.dpdk.org/show_bug.cgi?id=790 ++ ++* IAVF ++ ++ * cvl_advanced_iavf_rss: after changing the SCTP port value, the hash value ++ remains unchanged. ++ ++20.11.4 Release Notes ++--------------------- ++ ++ ++20.11.4 Fixes ++~~~~~~~~~~~~~ ++ ++* app/crypto-perf: fix AAD template copy overrun ++* app/eventdev: fix terminal colour after control-c exit ++* app/flow-perf: fix parsing of invalid option ++* app/testpmd: add tunnel types ++* app/testpmd: fix access to DSCP table entries ++* app/testpmd: fix check without outer checksum ++* app/testpmd: fix DCB in VT configuration ++* app/testpmd: fix dump of Tx offload flags ++* app/testpmd: fix hexadecimal parser with odd length ++* app/testpmd: fix hex string parser in flow commands ++* app/testpmd: fix packet burst spreading stats ++* app/testpmd: fix RSS key length ++* app/testpmd: fix RSS type display ++* app/testpmd: fix tunnel offload validation ++* app/testpmd: fix txonly forwarding ++* app/testpmd: fix Tx retry in flowgen engine ++* app/testpmd: remove double dependency on bitrate lib ++* app/testpmd: remove unused header file ++* app/testpmd: retain all original dev conf when config DCB ++* app/testpmd: update forward engine beginning ++* baseband/acc100: fix 4GUL outbound size ++* bitrate: fix calculation to match API description ++* bitrate: fix registration to match API description ++* bpf: allow self-xor operation ++* build: disable Windows warnings for insecure funtions ++* build: propagate Windows system dependencies to pkg-config ++* bus/fslmc: remove unused device count ++* bus/pci: fix unknown NUMA node value on Windows ++* bus/pci: fix unknown NUMA node value on Windows ++* bus/vmbus: fix leak on device scan ++* bus/vmbus: fix ring buffer mapping in secondary process ++* common/cpt: fix KASUMI input length ++* common/dpaax/caamflib: fix IV for short MAC-I in SNOW3G ++* common/dpaax: fix physical address conversion ++* common/iavf: fix ARQ resource leak ++* common/mlx5: create wrapped MR ++* common/mlx5: fix build for zero-length headroom array ++* common/mlx5: fix flex parser DevX creation routine ++* common/mlx5: fix physical port name recognition ++* common/mlx5: fix UAR allocation diagnostics messages ++* common/mlx5: fix user mode register access attribute ++* common/mlx5: glue MR registration with IOVA ++* common/mlx5: remove unreachable branch in UAR allocation ++* common/qat: fix queue pairs number ++* common/qat: revert fix queut pairs number ++* common/sfc_efx: fix debug compilation control ++* config/ppc: ignore GCC 11 psabi warnings ++* config/x86: skip GNU binutils bug check for LLVM ++* cryptodev: fix multi-segment raw vector processing ++* crypto/ipsec_mb: fix cipher key setting ++* crypto/octeontx2: fix lookaside IPsec IPv6 ++* crypto/octeontx2: fix unaligned access to device memory ++* crypto/openssl: fix CCM processing 0 length source ++* crypto/qat: fix status in RSA decryption ++* crypto/qat: fix uncleared cookies after operation ++* devtools: fix letter case check in commit title ++* doc: capitalise PMD ++* doc: describe timestamp limitations for mlx5 ++* doc: fix a typo in EAL guide ++* doc: fix bonding driver name ++* doc: fix default mempool option in guides ++* doc: fix Doxygen examples build on FreeBSD ++* doc: fix emulated device names in e1000 guide ++* doc: fix memif driver acronyms ++* doc: fix numbers power of 2 in LPM6 guide ++* doc: fix typo in coding style ++* doc: remove repeated repeated words ++* doc: strip build artefacts for examples file list ++* doc: update NIC feature matrix for bnxt ++* drivers/crypto: fix IPsec TTL decrement option ++* drivers/net: fix typo in vector Rx comment ++* drivers/net: fix vector Rx comments ++* drivers/net: remove queue xstats auto-fill flag ++* eal/common: exclude code unsupported on Windows ++* eal: fix device iterator when no bus is selected ++* eal: fix memory leak when saving arguments ++* eal/freebsd: fix IOVA mode selection ++* eal/freebsd: ignore in-memory option ++* eal/freebsd: lock memory device to prevent conflicts ++* eal/linux: fix uevent message parsing ++* eal/linux: remove unused variable for socket memory ++* eal/ppc: ignore GCC 10 stringop-overflow warnings ++* eal: remove Windows-specific list of common files ++* eal: reset lcore task callback and argument ++* eal/windows: do not install virt2phys header ++* eal/windows: export version function ++* eal/windows: fix CPU cores counting ++* eal/windows: fix IOVA mode detection and handling ++* eal/x86: avoid cast-align warning in memcpy functions ++* eal/x86: fix some CPU extended features definitions ++* ethdev: fix crash on owner delete ++* ethdev: fix PCI device release in secondary process ++* ethdev: fix typo in Rx queue setup API comment ++* ethdev: fix typos ++* ethdev: fix xstats by ID API documentation ++* ethdev: forbid closing started device ++* eventdev/eth_rx: fix WRR buffer overrun ++* eventdev/eth_tx: fix queue delete logic ++* event/dlb2: fix delayed pop test in selftest ++* event/sw: remove unused inflight events count ++* examples/fips_validation: fix device start ++* examples/fips_validation: fix resetting pointer ++* examples/fips_validation: remove unused allocation ++* examples/ipsec-secgw: fix parsing of flow queue ++* examples/ipsec-secgw: move global array from header ++* examples/l3fwd-power: fix early shutdown ++* examples/multi_process: fix Rx packets distribution ++* examples/ntb: fix build dependency ++* examples/performance-thread: fix build with clang 12.0.1 ++* examples/performance-thread: remove unused hits count ++* examples/ptpclient: fix delay request message ++* examples/service_cores: fix lcore count check ++* fix PMD wording ++* fix spelling in comments and doxygen ++* hash: fix Doxygen comment of Toeplitz file ++* interrupt: fix request notifier interrupt processing ++* kni: check error code of allmulticast mode switch ++* kni: fix build for SLES15-SP3 ++* kni: restrict bifurcated device support ++* kvargs: fix comments style ++* lpm6: fix buffer overflow ++* malloc: fix allocation with unknown socket ID ++* mbuf: avoid cast-align warning in data offset macro ++* mbuf: enforce no option for dynamic fields and flags ++* mbuf: fix dump of dynamic fields and flags ++* mbuf: fix reset on mbuf free ++* mbuf: fix typo in comment ++* mem: fix dynamic hugepage mapping in container ++* mempool: deprecate unused physical page defines ++* net/af_packet: fix ignoring full ring on Tx ++* net/af_xdp: disable secondary process support ++* net/af_xdp: fix zero-copy Tx queue drain ++* net: avoid cast-align warning in VLAN insert function ++* net/axgbe: fix unreleased lock in I2C transfer ++* net/bnxt: check FW capability for VLAN offloads ++* net/bnxt: fix autoneg on PAM4 links ++* net/bnxt: fix crash after port stop/start ++* net/bnxt: fix double allocation of ring groups ++* net/bnxt: fix firmware version query ++* net/bnxt: fix function driver register/unregister ++* net/bnxt: fix mbuf VLAN in scalar Rx ++* net/bnxt: fix memzone free for Tx and Rx rings ++* net/bnxt: fix ring group free ++* net/bnxt: fix Rx next consumer index in mbuf alloc fail ++* net/bnxt: fix tunnel port accounting ++* net/bnxt: fix Tx queue startup state ++* net/bnxt: fix VLAN indication in Rx mbuf ++* net/bnxt: remove some unused variables ++* net/bnxt: update ring group after ring stop start ++* net/bonding: fix dedicated queue mode in vector burst ++* net/bonding: fix memory leak on closing device ++* net/bonding: fix RSS key length ++* net/e1000: fix memzone leak on queue re-configure ++* net/ena: advertise scattered Rx capability ++* net/ena: fix offload capabilities verification ++* net/ena: fix per-queue offload capabilities ++* net/enic: avoid error message when no advanced filtering ++* net/enic: fix filter mode detection ++* net/failsafe: fix secondary process probe ++* net: fix aliasing in checksum computation ++* net: fix checksum API documentation ++* net: fix checksum offload for outer IPv4 ++* net/hinic/base: remove some unused variables ++* net/hns3: fix input parameters of MAC functions ++* net/hns3: fix interrupt vector freeing ++* net/hns3: fix mailbox communication with HW ++* net/hns3: fix multi-process action register and unregister ++* net/hns3: fix queue flow action validation ++* net/hns3: fix residual MAC after setting default MAC ++* net/hns3: fix secondary process reference count ++* net/hns3: fix taskqueue pair reset command ++* net/hns3: optimize Tx performance by mbuf fast free ++* net/hns3: simplify queue DMA address arithmetic ++* net/hns3: unregister MP action on close for secondary ++* net/i40e/base: fix AOC media type ++* net/i40e/base: fix function name in comments ++* net/i40e/base: fix PF reset ++* net/i40e/base: fix PHY identifiers for 2.5G and 5G adapters ++* net/i40e/base: fix potentially uninitialized variables ++* net/i40e/base: fix resource leakage ++* net/i40e/base: fix update link data for X722 ++* net/i40e/base: fix using checksum before check ++* net/i40e: fix 32-bit build ++* net/i40e: fix buffer size alignment ++* net/i40e: fix device startup resource release ++* net/i40e: fix forward outer IPv6 VXLAN ++* net/i40e: fix i40evf device initialization ++* net/i40e: fix mbuf leak ++* net/i40e: fix memzone leak on queue re-configure ++* net/i40e: fix risk in descriptor read in NEON Rx ++* net/i40e: fix risk in descriptor read in scalar Rx ++* net/i40e: fix Rx packet statistics ++* net/i40e: support 25G AOC/ACC cables ++* net/i40evf: extend the polling times of vf reset ++* net/iavf: fix high CPU usage on frequent command ++* net/iavf: fix mbuf leak ++* net/iavf: fix mbuf leak ++* net/iavf: fix multi-process shared data ++* net/iavf: fix overflow in maximum packet length config ++* net/iavf: fix pointer of meta data ++* net/iavf: fix Rx queue buffer size alignment ++* net/iavf: fix Rx queue IRQ resource leak ++* net/iavf: fix shared data in multi-process ++* net/ice/base: calculate logical PF ID ++* net/ice/base: fix PF ID for DCF ++* net/ice/base: fix typo in comment ++* net/ice: fix deadlock on flow query ++* net/ice: fix deadlock on flow redirect ++* net/ice: fix double free ACL flow entry ++* net/ice: fix flow redirect ++* net/ice: fix function pointer in multi-process ++* net/ice: fix generic build on FreeBSD ++* net/ice: fix max entry number for ACL normal priority ++* net/ice: fix memzone leak after device init failure ++* net/ice: fix memzone leak on queue re-configure ++* net/ice: fix performance with writeback policy ++* net/ice: fix queue config in DCF ++* net/ice: fix RXDID default value in DCF ++* net/ice: retry getting VF VSI map after failure ++* net/ice: save rule on switch filter creation ++* net/ixgbe: fix hash handle leak ++* net/ixgbe: fix MAC resource leak ++* net/ixgbe: fix mbuf leak ++* net/ixgbe: fix memzone leak on queue re-configure ++* net/ixgbe: fix port initialization if MTU config fails ++* net/ixgbe: fix queue release ++* net/ixgbe: fix queue resource leak ++* net/ixgbe: fix Rx multicast statistics after reset ++* net/liquidio: remove unused counter ++* net/memif: allow stopping and closing device ++* net/memif: fix chained mbuf determination ++* net/mlx4: fix empty Ethernet spec with VLAN ++* net/mlx5: add Ethernet header to GENEVE RSS expansion ++* net/mlx5: close tools socket with last device ++* net/mlx5: do not close stdin on error ++* net/mlx5: fix Altivec Rx ++* net/mlx5: fix devargs validation for multi-class probing ++* net/mlx5: fix eCPRI matching ++* net/mlx5: fix flow mark with sampling and metering ++* net/mlx5: fix flow shared age action reference counting ++* net/mlx5: fix flow tables double release ++* net/mlx5: fix GENEVE and VXLAN-GPE flow item matching ++* net/mlx5: fix GENEVE protocol type translation ++* net/mlx5: fix GRE flow item matching ++* net/mlx5: fix GRE protocol type translation ++* net/mlx5: fix mbuf replenishment check for zipped CQE ++* net/mlx5: fix memory leak on context allocation failure ++* net/mlx5: fix metadata and meter split shared tag ++* net/mlx5: fix MPLS tunnel outer layer overwrite ++* net/mlx5: fix multi-segment packet wraparound ++* net/mlx5: fix mutex unlock in Tx packet pacing cleanup ++* net/mlx5: fix partial inline of fine grain packets ++* net/mlx5: fix RETA update without stopping device ++* net/mlx5: fix RSS expansion for explicit graph node ++* net/mlx5: fix RSS expansion for inner tunnel VLAN ++* net/mlx5: fix RSS expansion for L2/L3 VXLAN ++* net/mlx5: fix RSS expansion scheme for GRE header ++* net/mlx5: fix RSS expansion traversal over next nodes ++* net/mlx5: fix RSS expansion with EtherType ++* net/mlx5: fix RSS RETA update ++* net/mlx5: fix Rx queue memory allocation return value ++* net/mlx5: fix Rx queue resource cleanup ++* net/mlx5: fix shared RSS destruction ++* net/mlx5: fix software parsing support query ++* net/mlx5: fix tag ID conflict with sample action ++* net/mlx5: fix tunneling support query ++* net/mlx5: fix tunnel offload validation ++* net/mlx5: fix Tx scheduling check ++* net/mlx5: fix VXLAN-GPE next protocol translation ++* net/mlx5: remove duplicated reference of Tx doorbell ++* net/mlx5: support more tunnel types ++* net/mlx5: workaround MR creation for flow counter ++* net/nfp: cancel delayed LSC work in port close logic ++* net/nfp: fix minimum descriptor sizes ++* net/nfp: remove unused message length ++* net/octeontx2: fix MTU when PTP is enabled ++* net/octeontx: fix access to indirect buffers ++* net/octeontx: remove unused packet length ++* net/pcap: fix resource leakage on port probe ++* net/qede/base: remove unused message size ++* net/sfc: free MAE lock once switch domain is assigned ++* net/sfc: set FDIR bit for flow mark in EF100 Rx ++* net/sfc: update comment about representor support ++* net/softnic: fix useless address check ++* net/txgbe: fix packet statistics ++* net/txgbe: fix reading SFP module SFF-8472 data ++* net/txgbe: fix to get interrupt status ++* net/virtio: avoid unneeded link interrupt configuration ++* net/virtio: do not use PMD log type ++* net/virtio: fix avail descriptor ID ++* net/virtio: fix check scatter on all Rx queues ++* net/virtio: fix device configure without jumbo Rx offload ++* net/virtio: fix indirect descriptor reconnection ++* net/virtio: fix link update in speed feature ++* net/virtio: fix mbuf count on Rx queue setup ++* net/virtio: fix repeated freeing of virtqueue ++* net/virtio: fix split queue vectorized Rx ++* net/virtio: fix Tx checksum for tunnel packets ++* net/virtio: fix Tx cleanup functions to have same signature ++* net/virtio: fix Tx completed mbuf leak on device stop ++* net/virtio-user: fix Rx interrupts with multi-queue ++* net/vmxnet3: fix build with clang 13 ++* pipeline: fix instruction label check ++* power: fix build with clang 13 ++* raw/ifpga/base: fix linking with librt ++* raw/octeontx2_ep: remove unused variable ++* remove repeated 'the' in the code ++* rib: fix IPv6 depth mask ++* ring: fix Doxygen comment of internal function ++* sched: get 64-bit greatest common divisor ++* stack: fix reload head when pop fails ++* table: fix missing headers on ARM64 ++* telemetry: fix JSON output buffer length ++* test/atomic: fix 128-bit atomic test with many cores ++* test/bpf: fix undefined behavior with clang ++* test/cmdline: fix memory leak ++* test/compress: fix buffer overflow ++* test/compress-perf: remove unused variable ++* test/crypto: fix data lengths ++* test/crypto: fix max length for raw data path ++* test/crypto: fix missing return checks ++* test/crypto: remove unnecessary stats retrieval ++* test/crypto: skip plain text compare for null cipher ++* test/distributor: remove unused counter ++* test/event_crypto: fix event crypto metadata write ++* test/event: fix timer adapter creation test ++* test: fix ring PMD initialisation ++* test/func_reentrancy: free memzones after test ++* test/hash: fix buffer overflow with jhash ++* test/latency: fix loop boundary ++* test/mbuf: fix access to freed memory ++* test/mem: fix memory autotests on FreeBSD ++* test/red: fix typo in test description ++* test/service: fix race in attr check ++* test/service: fix some comment ++* usertools: fix handling EOF for telemetry input pipe ++* usertools/pmdinfo: fix plugin auto scan ++* vdpa/mlx5: fix large VM memory region registration ++* vdpa/mlx5: fix mkey creation check ++* vdpa/mlx5: retry VAR allocation during vDPA restart ++* vdpa/mlx5: workaround dirty bitmap MR creation ++* vdpa/mlx5: workaround FW first completion in start ++* vdpa/mlx5: workaround guest MR registrations ++* version: 20.11.4-rc1 ++* vfio: fix FreeBSD clear group stub ++* vfio: fix FreeBSD documentation ++* vfio: set errno on unsupported OS ++* vhost: add sanity check on inflight last index ++* vhost: clean IOTLB cache on vring stop ++* vhost: fix crash on port deletion ++* vhost: log socket path on adding connection ++ ++20.11.4 Validation ++~~~~~~~~~~~~~~~~~~ ++ ++* Intel(R) Testing ++ ++ * Basic Intel(R) NIC testing ++ ++ * PF(i40e, ixgbe, ice) ++ * VF(i40e, ixgbe, ice) ++ * Compile testing ++ * Intel NIC single core/NIC performance ++ * IPsec ++ ++ * Basic cryptodev and virtio testing ++ ++ * Virtio function and performance ++ * Cryptodev function and performance ++ ++* RedHat Testing ++ ++ # Functionality ++ ++ * Guest(PF, VF) ++ * Host(PF, PF) ++ * Vswitch (throughput, live migration) ++ * Vhost-user(server, client) ++ * OVS-DPDK live migration ++ ++ # Platform ++ ++ * RHEL8, kernel 4.18, qemu 6.1 ++ * X540-AT2 NIC(ixgbe, 10G) ++ ++* Nvidia(R) Testing ++ ++ * Basic functionality with testpmd ++ ++ * Tx/Rx ++ * xstats ++ * Timestamps ++ * Link status ++ * RTE flow and flow_director ++ * RSS ++ * VLAN filtering, stripping and insertion ++ * Checksum/TSO ++ * ptype ++ * link_status_interrupt ++ * l3fwd-power example application ++ * Multi-process example applications ++ * Hardware LRO ++ ++ * Build tests ++ ++ * Ubuntu 20.04.3 with MLNX_OFED_LINUX-5.5-1.0.3.2. ++ * Ubuntu 20.04.3 with rdma-core master (c52b43e). ++ * Ubuntu 20.04.3 with rdma-core v28.0. ++ * Ubuntu 18.04.6 with rdma-core v17.1. ++ * Ubuntu 18.04.6 with rdma-core master (c52b43e) (i386). ++ * Ubuntu 16.04.7 with rdma-core v22.7. ++ * Fedora 35 with rdma-core v38.0. ++ * Fedora 36 (Rawhide) with rdma-core v38.0. ++ * CentOS 7 7.9.2009 with rdma-core master (940f53f). ++ * CentOS 7 7.9.2009 with MLNX_OFED_LINUX-5.5-1.0.3.2. ++ * CentOS 8 8.4.2105 with rdma-core master (940f53f). ++ * OpenSUSE Leap 15.3 with rdma-core v31.0. ++ * Windows Server 2019 with Clang 11.0.0 ++ ++ * Test platform ++ ++ * ConnectX-4 Lx / OS: Ubuntu 20.04 LTS / Driver: MLNX_OFED_LINUX-5.5-1.0.3.2 / Firmware: 14.32.1010 ++ * ConnectX-4 Lx / OS: Ubuntu 20.04 LTS / Kernel: 5.16.0-rc5 / Driver: rdma-core v38.0 / Firmware: 14.32.1010 ++ * ConnectX-5 / OS: Ubuntu 20.04 LTS / Driver: MLNX_OFED_LINUX-5.5-1.0.3.2 / Firmware: 16.32.1010 ++ * ConnectX-5 / OS: Ubuntu 20.04 LTS / Kernel: 5.16.0-rc5 / Driver: v38.0 / Firmware: 16.32.1010 ++ * ConnectX-6 Dx / OS: Ubuntu 20.04 LTS / Driver: MLNX_OFED_LINUX-5.5-1.0.3.2 / Firmware: 22.32.1010 ++ ++20.11.4 Known Issues ++~~~~~~~~~~~~~~~~~~~~ ++ ++# mlx5 ++ ++ * MLX5 PMD 2% single core forwarding performance degradation. ++ https://bugs.dpdk.org/show_bug.cgi?id=916 ++ ++20.11.5 Release Notes ++--------------------- ++ ++ ++20.11.5 Fixes ++~~~~~~~~~~~~~ ++ ++* acl: add missing C++ guards ++* app/compress-perf: fix cycle count operations allocation ++* app/compress-perf: fix number of queue pairs to setup ++* app/compress-perf: fix socket ID type during init ++* app/compress-perf: optimize operations pool allocation ++* app/fib: fix division by zero ++* app/pdump: abort on multi-core capture limit ++* app/testpmd: check starting port is not in bonding ++* app/testpmd: fix bonding mode set ++* app/testpmd: fix build without drivers ++* app/testpmd: fix dereference before null check ++* app/testpmd: fix external buffer allocation ++* app/testpmd: fix GENEVE parsing in checksum mode ++* app/testpmd: fix show RSS RETA on Windows ++* app/testpmd: fix stack overflow for EEPROM display ++* app/testpmd: fix Tx scheduling interval ++* baseband/acc100: avoid out-of-bounds access ++* bpf: fix build with some libpcap version on FreeBSD ++* build: fix warning about using -Wextra flag ++* build: fix warnings when running external commands ++* build: remove deprecated Meson functions ++* bus/dpaa: fix C++ include guard ++* bus/ifpga: remove useless check while browsing devices ++* common/mlx5: add minimum WQE size for striding RQ ++* common/mlx5: add Netlink event helpers ++* common/mlx5: fix error handling in multi-class probe ++* common/mlx5: fix queue pair ack timeout configuration ++* common/octeontx2: update mailbox version to 0xb ++* compressdev: add missing C++ guards ++* compressdev: fix missing space in log macro ++* compressdev: fix socket ID type ++* compress/octeontx: fix null pointer dereference ++* config: add arch define for Arm ++* config/ppc: fix build with GCC >= 10 ++* cryptodev: add backward-compatible enum ++* cryptodev: fix clang C++ include ++* cryptodev: fix RSA key type name ++* crypto/dpaax_sec: fix auth/cipher xform chain checks ++* crypto/ipsec_mb: fix ZUC authentication verify ++* crypto/ipsec_mb: fix ZUC operation overwrite ++* crypto/virtio: fix out-of-bounds access ++* devtools: fix comment detection in forbidden token check ++* distributor: fix potential overflow ++* dma/idxd: configure maximum batch size to high value ++* dma/idxd: fix paths to driver sysfs directory ++* doc: correct name of BlueField-2 in mlx5 guide ++* doc: fix dlb2 guide ++* doc: fix FIPS guide ++* doc: fix KNI PMD name typo ++* doc: fix missing note on UIO module in Linux guide ++* doc: fix typos and punctuation in flow API guide ++* doc: remove dependency on findutils on FreeBSD ++* doc: remove obsolete vector Tx explanations from mlx5 guide ++* doc: replace broken links in mlx guides ++* doc: replace characters for (R) symbol in Linux guide ++* doc: replace deprecated distutils version parsing ++* doc: update matching versions in ice guide ++* dpaa2: fix build with RTE_LIBRTE_IEEE1588 ++* eal: add missing C++ guards ++* eal: fix C++ include ++* eal/freebsd: add missing C++ include guards ++* eal/linux: fix illegal memory access in uevent handler ++* eal/linux: log hugepage create errors with filename ++* eal/windows: fix error code for not supported API ++* eal/windows: remove useless C++ include guard ++* efd: fix uninitialized structure ++* ethdev: add internal function to device struct from name ++* ethdev: add missing C++ guards ++* ethdev: fix cast for C++ compatibility ++* ethdev: fix doxygen comments for device info struct ++* eventdev: add missing C++ guards ++* eventdev/eth_tx: fix queue add error code ++* eventdev: fix C++ include ++* examples/distributor: reduce Tx queue number to 1 ++* examples/flow_classify: fix failure message ++* examples/ipsec-secgw: fix default flow rule creation ++* examples/ipsec-secgw: fix eventdev start sequence ++* examples/kni: add missing trailing newline in log ++* examples/l2fwd-crypto: fix port mask overflow ++* examples/l3fwd: fix buffer overflow in Tx ++* examples/l3fwd: fix Rx burst size for event mode ++* examples/l3fwd: make Rx and Tx queue size configurable ++* examples/l3fwd: share queue size variables ++* examples/qos_sched: fix compile failure ++* examples/qos_sched: fix core mask overflow ++* examples/vhost: fix launch with physical port ++* fix spelling in comments and strings ++* graph: fix C++ include ++* ipc: end multiprocess thread during cleanup ++* ipsec: fix C++ include ++* kni: add missing C++ guards ++* kni: fix freeing order in device release ++* kni: fix ioctl signature ++* kni: update kernel API to set random MAC address ++* maintainers: update for stable branches ++* mem: check allocation in dynamic hugepage init ++* metrics: add missing C++ guards ++* net/af_xdp: add missing trailing newline in logs ++* net/af_xdp: ensure socket is deleted on Rx queue setup error ++* net/af_xdp: fix build with -Wunused-function ++* net/axgbe: use PCI root complex device to distinguish device ++* net/bnxt: add null check for mark table ++* net/bnxt: cap maximum number of unicast MAC addresses ++* net/bnxt: check VF representor pointer before access ++* net/bnxt: fix check for autoneg enablement ++* net/bnxt: fix handling of VF configuration change ++* net/bnxt: fix memzone allocation per VNIC ++* net/bnxt: fix multicast address set ++* net/bnxt: fix multicast MAC restore during reset recovery ++* net/bnxt: fix null dereference in session cleanup ++* net/bnxt: fix PAM4 mask setting ++* net/bnxt: fix queue stop operation ++* net/bnxt: fix restoring VLAN filtering after recovery ++* net/bnxt: fix ring calculation for representors ++* net/bnxt: fix VF resource allocation strategy ++* net/bnxt: fix xstats names query overrun ++* net/bnxt: fix xstats query ++* net/bnxt: get maximum supported multicast filters count ++* net/bnxt: handle ring cleanup in case of error ++* net/bnxt: restore RSS configuration after reset recovery ++* net/bonding: fix mode type mismatch ++* net/bonding: fix offloading configuration ++* net/bonding: fix promiscuous and allmulticast state ++* net/bonding: fix reference count on mbufs ++* net/bonding: fix RSS with early configure ++* net/cxgbe: fix dangling pointer by mailbox access rework ++* net/cxgbe: remove useless address check ++* net/cxgbe: remove useless C++ include guard ++* net/dpaa2: fix timestamping for IEEE1588 ++* net/dpaa2: fix unregistering interrupt handler ++* net/dpaa2: remove useless C++ include guard ++* net/ena: check memory BAR before initializing LLQ ++* net/ena: fix checksum flag for L4 ++* net/ena: fix meta descriptor DF flag setup ++* net/ena: fix reset reason being overwritten ++* net/ena: remove unused enumeration ++* net/ena: remove unused offload variables ++* net/ena: skip timer if reset is triggered ++* net/enic: fix dereference before null check ++* net/hns3: delete duplicated RSS type ++* net/hns3: fix insecure way to query MAC statistics ++* net/hns3: fix max packet size rollback in PF ++* net/hns3: fix operating queue when TCAM table is invalid ++* net/hns3: fix RSS key with null ++* net/hns3: fix RSS TC mode entry ++* net/hns3: fix using enum as boolean ++* net/hns3: fix VF RSS TC mode entry ++* net/hns3: increase time waiting for PF reset completion ++* net/hns3: remove duplicate macro definition ++* net/i40e: enable maximum frame size at port level ++* net/i40e: fix unintentional integer overflow ++* net/iavf: count continuous DD bits for Arm ++* net/iavf: count continuous DD bits for Arm in flex Rx ++* net/iavf: fix function pointer in multi-process ++* net/iavf: fix potential out-of-bounds access ++* net/ice/base: add profile validation on switch filter ++* net/ice: fix build with 16-byte Rx descriptor ++* net/ice: fix link up when starting device ++* net/ice: fix overwriting of LSE bit by DCF ++* net/ice: fix Tx checksum offload ++* net/ice: fix Tx checksum offload capability ++* net/ice: fix Tx offload path choice ++* net/ice: track DCF state of PF ++* net/ixgbe: add vector Rx parameter check ++* net/ixgbe: check filter init failure ++* net/ixgbe: fix FSP check for X550EM devices ++* net/ixgbe: reset security context pointer on close ++* net/kni: fix config initialization ++* net/memif: remove pointer deference before null check ++* net/memif: remove unnecessary Rx interrupt stub ++* net/mlx5: fix assertion on flags set in packet mbuf ++* net/mlx5: fix committed bucket size ++* net/mlx5: fix GRE item translation in Verbs ++* net/mlx5: fix GRE protocol type translation for Verbs ++* net/mlx5: fix ineffective metadata argument adjustment ++* net/mlx5: fix inet IPIP protocol type ++* net/mlx5: fix initial link status detection ++* net/mlx5: fix inline length for multi-segment TSO ++* net/mlx5: fix link status change detection ++* net/mlx5: fix mark enabling for Rx ++* net/mlx5: fix matcher priority with ICMP or ICMPv6 ++* net/mlx5: fix maximum packet headers size for TSO ++* net/mlx5: fix memory socket selection in ASO management ++* net/mlx5: fix modify port action validation ++* net/mlx5: fix MPLS/GRE Verbs spec ordering ++* net/mlx5: fix MPRQ stride devargs adjustment ++* net/mlx5: fix next protocol RSS expansion ++* net/mlx5: fix NIC egress flow mismatch in switchdev mode ++* net/mlx5: fix port matching in sample flow rule ++* net/mlx5: fix RSS expansion with explicit next protocol ++* net/mlx5: fix sample flow action on trusted device ++* net/mlx5: fix shared RSS destroy ++* net/mlx5: fix sibling device config check ++* net/mlx5: improve stride parameter names ++* net/mlx5: reject jump to root table ++* net/mlx5: relax headroom assertion ++* net/mlx5: remove unused reference counter ++* net/mlx5: workaround ASO memory region creation ++* net/nfb: fix array indexes in deinit functions ++* net/nfb: fix multicast/promiscuous mode switching ++* net/nfp: free HW rings memzone on queue release ++* net/nfp: remove duplicated check when setting MAC address ++* net/nfp: remove useless range checks ++* net/octeontx2:: fix base rule merge ++* net/octeontx2: fix flow MCAM priority management ++* net/qede: fix redundant condition in debug code ++* net/qede: fix Rx bulk mbuf allocation ++* net/sfc: demand Tx fast free offload on EF10 simple datapath ++* net/sfc: do not push fast free offload to default TxQ config ++* net/sfc: validate queue span when parsing flow action RSS ++* net/tap: fix to populate FDs in secondary process ++* net/txgbe: fix debug logs ++* net/txgbe: fix queue statistics mapping ++* net/virtio: fix Tx queue 0 overriden by queue 128 ++* net/virtio-user: check FD flags getting failure ++* net/virtio-user: fix resource leak on probing failure ++* pmdinfogen: fix compilation with Clang 3.4.2 on CentOS 7 ++* raw/ifpga/base: fix port feature ID ++* raw/ifpga/base: fix SPI transaction ++* raw/ifpga: fix build with optimization ++* raw/ifpga: fix interrupt handle allocation ++* raw/ifpga: fix monitor thread ++* raw/ifpga: fix thread closing ++* raw/ifpga: fix variable initialization in probing ++* raw/ntb: clear all valid doorbell bits on init ++* regexdev: fix section attribute of symbols ++* regex/mlx5: fix memory allocation check ++* Revert "regexdev: fix section attribute of symbols" ++* ring: fix error code when creating ring ++* ring: fix overflow in memory size calculation ++* ring: optimize corner case for enqueue/dequeue ++* stack: fix stubs header export ++* table: fix C++ include ++* telemetry: add missing C++ guards ++* test/efd: fix sockets mask size ++* test/mbuf: fix mbuf data content check ++* test/mem: fix error check ++* vdpa/ifc: fix log info mismatch ++* vdpa/mlx5: workaround queue stop with traffic ++* version: 20.11.5-rc1 ++* vfio: cleanup the multiprocess sync handle ++* vhost: add missing C++ guards ++* vhost: fix C++ include ++* vhost: fix FD leak with inflight messages ++* vhost: fix field naming in guest page struct ++* vhost: fix guest to host physical address mapping ++* vhost: fix queue number check when setting inflight FD ++* vhost: fix unsafe vring addresses modifications ++ ++20.11.5 Validation ++~~~~~~~~~~~~~~~~~~ ++ ++* Red Hat(R) Testing ++ ++ * Platform ++ ++ * RHEL 8 ++ * Kernel 4.18 ++ * Qemu 6.2 ++ * X540-AT2 NIC(ixgbe, 10G) ++ ++ * Functionality ++ ++ * Guest with device assignment(PF) throughput testing(1G hugepage size) ++ * Guest with device assignment(PF) throughput testing(2M hugepage size) ++ * Guest with device assignment(VF) throughput testing ++ * PVP (host dpdk testpmd as vswitch) 1Q: throughput testing ++ * PVP vhost-user 2Q throughput testing ++ * PVP vhost-user 1Q cross numa node throughput testing ++ * Guest with vhost-user 2 queues throughput testing ++ * vhost-user reconnect with dpdk-client, qemu-server: qemu reconnect ++ * vhost-user reconnect with dpdk-client, qemu-server: ovs reconnect ++ * PVP 1Q live migration testing ++ * PVP 1Q cross numa node live migration testing ++ * Guest with ovs+dpdk+vhost-user 1Q live migration testing ++ * Guest with ovs+dpdk+vhost-user 1Q live migration testing (2M) ++ * Guest with ovs+dpdk+vhost-user 2Q live migration testing ++ * Guest with ovs+dpdk+vhost-user 4Q live migration testing ++ * Host PF + DPDK testing ++ * Host VF + DPDK testing ++ ++ ++* Intel(R) Testing ++ ++ * Basic Intel(R) NIC(ixgbe, i40e and ice) testing ++ * PF (i40e) ++ * PF (ixgbe) ++ * PF (ice) ++ * VF (i40e) ++ * VF (ixgbe) ++ * VF (ice) ++ * Compile Testing ++ * Intel NIC single core/NIC performance ++ * Power and IPsec ++ ++ * Basic cryptodev and virtio testing ++ ++ * vhost/virtio basic loopback, PVP and performance test ++ * cryptodev Function/Performance ++ ++ ++* Nvidia(R) Testing ++ ++ * Basic functionality with testpmd ++ ++ * Tx/Rx ++ * xstats ++ * Timestamps ++ * Link status ++ * RTE flow and flow_director ++ * RSS ++ * VLAN stripping and insertion ++ * Checksum/TSO ++ * ptype ++ * link_status_interrupt example application ++ * l3fwd-power example application ++ * Multi-process example applications ++ * Hardware LRO tests ++ ++ * Build tests ++ ++ * Ubuntu 20.04.2 with MLNX_OFED_LINUX-5.4-1.0.3.0. ++ * Ubuntu 20.04.2 with rdma-core master (64d1ae5). ++ * Ubuntu 20.04.2 with rdma-core v28.0. ++ * Ubuntu 18.04.5 with rdma-core v17.1. ++ * Ubuntu 18.04.5 with rdma-core master (5b0f5b2) (i386). ++ * Ubuntu 16.04.7 with rdma-core v22.7. ++ * Fedora 34 with rdma-core v36.0. ++ * Fedora 36 (Rawhide) with rdma-core v36.0 (only with gcc). ++ * CentOS 7 7.9.2009 with rdma-core master (64d1ae5). ++ * CentOS 7 7.9.2009 with MLNX_OFED_LINUX-5.4-1.0.3.0. ++ * CentOS 8 8.3.2011 with rdma-core master (64d1ae5). ++ * OpenSUSE Leap 15.3 with rdma-core v31.0. ++ ++ * ConnectX-6 Dx ++ ++ * Ubuntu 20.04 ++ * Driver MLNX_OFED_LINUX-5.5-1.0.3.2 ++ * fw 22.32.2004 ++ ++ * ConnectX-5 ++ ++ * Ubuntu 20.04 ++ * Driver MLNX_OFED_LINUX-5.5-1.0.3.2 ++ * Kernel: 5.17.0 / Driver: rdma-core v39.0 ++ * fw 16.32.1010 ++ ++ * ConnectX-4 Lx ++ ++ * Ubuntu 20.04 ++ * Driver MLNX_OFED_LINUX-5.5-1.0.3.2 ++ * Kernel: 5.17.0 / Driver: rdma-core v39.0 ++ * fw 14.32.1010 ++ ++ ++* Canonical(R) Testing ++ ++ * Build tests of DPDK & OVS 2.15.0 on Ubuntu 21.10 (meson based) ++ * Functional and performance tests based on OVS-DPDK on x86_64 ++ * Autopkgtests for DPDK and OpenvSwitch ++ ++20.11.5 Known Issues ++~~~~~~~~~~~~~~~~~~~~ ++ ++* vhost/virtio ++ ++ * build failure with gcc 12: https://bugs.dpdk.org/show_bug.cgi?id=925 ++ ++20.11.6 Release Notes ++--------------------- ++ ++ ++20.11.6 Fixes ++~~~~~~~~~~~~~ ++ ++* acl: fix rules with 8-byte field size ++* app/flow-perf: fix build with GCC 12 ++* app/procinfo: show all non-owned ports ++* app/testpmd: check statistics query before printing ++* app/testpmd: do not poll stopped queues ++* app/testpmd: fix bonding slave devices not released ++* app/testpmd: fix metering and policing command for RFC4115 ++* app/testpmd: fix multicast address pool leak ++* app/testpmd: fix packet segment allocation ++* app/testpmd: fix port status of bonding slave device ++* app/testpmd: fix supported RSS offload display ++* app/testpmd: perform SW IP checksum for GRO/GSO packets ++* app/testpmd: remove useless pointer checks ++* app/testpmd: replace hardcoded min mbuf number with macro ++* app/testpmd: revert MAC update in checksum forwarding ++* baseband/acc100: add protection for some negative scenario ++* baseband/acc100: remove prefix of internal file ++* baseband/acc100: remove RTE prefix for internal macros ++* baseband/acc100: update companion PF configure function ++* bus/fslmc: fix VFIO setup ++* ci: enable C++ check for Arm and PPC ++* common/cpt: fix build with GCC 12 ++* common/dpaax: fix short MAC-I IV calculation for ZUC ++* config: fix C++ cross compiler for Arm and PPC ++* crypto/dpaa2_sec: fix buffer pool ID check ++* crypto/dpaa2_sec: fix crypto operation pointer ++* crypto/dpaa2_sec: fix fle buffer leak ++* crypto/dpaa2_sec: fix operation status for simple FD ++* crypto/dpaa_sec: fix digest size ++* crypto/dpaa_sec: fix secondary process probing ++* crypto/ipsec_mb: fix GMAC parameters setting ++* crypto/ipsec_mb: fix length and offset settings ++* crypto/qat: fix DOCSIS crash ++* crypto/scheduler: fix queue pair in scheduler failover ++* devtools: fix null test for NUMA systems ++* doc: add missing auth algo for IPsec example ++* doc: add more instructions for running as non-root ++* doc: fix API index Markdown syntax ++* doc: fix formatting and link in BPF library guide ++* doc: fix grammar and formatting in compressdev guide ++* doc: fix grammar and parameters in l2fwd-crypto guide ++* doc: fix readability in vhost guide ++* doc: fix vhost multi-queue reconnection ++* doc: update matching versions in i40e guide ++* doc: update matching versions in ice guide ++* drivers/crypto: fix warnings for OpenSSL version ++* eal: fix C++ include for device event and DMA ++* eal/freebsd: fix use of newer cpuset macros ++* eal/windows: add missing C++ include guards ++* eal/windows: fix data race when creating threads ++* eal/x86: drop export of internal alignment macro ++* eal/x86: fix unaligned access for small memcpy ++* ethdev: clarify null location case in xstats get ++* ethdev: fix memory leak in xstats telemetry ++* ethdev: fix port close in secondary process ++* ethdev: fix port state when stop ++* ethdev: fix possible null pointer access ++* ethdev: fix RSS update when RSS is disabled ++* ethdev: prohibit polling stopped queue ++* eventdev/eth_tx: fix adapter creation ++* eventdev/eth_tx: fix queue delete ++* examples/bond: fix invalid use of trylock ++* examples/distributor: fix distributor on Rx core ++* examples/dma: fix Tx drop statistics ++* examples/fips_validation: handle empty payload ++* examples/ipsec-secgw: fix promiscuous mode option ++* examples/ipsec-secgw: fix uninitialized memory access ++* examples/l2fwd-crypto: fix stats refresh rate ++* examples/l3fwd: fix scalar LPM ++* examples/link_status_interrupt: fix stats refresh rate ++* examples/vhost: fix crash when no VMDq ++* gro: fix identifying fragmented packets ++* kni: fix build ++* kni: fix build with Linux 5.18 ++* kni: use dedicated function to set MAC address ++* kni: use dedicated function to set random MAC address ++* malloc: fix allocation of almost hugepage size ++* mbuf: dump outer VLAN ++* mem: skip attaching external memory in secondary process ++* net/axgbe: fix xstats get return if xstats is null ++* net/bnxt: allow Tx only or Rx only ++* net/bnxt: avoid unnecessary endianness conversion ++* net/bnxt: fix compatibility with some old firmwares ++* net/bnxt: fix device capability reporting ++* net/bnxt: fix freeing VNIC filters ++* net/bnxt: fix link status when port is stopped ++* net/bnxt: fix reordering in NEON Rx ++* net/bnxt: fix ring group on Rx restart ++* net/bnxt: fix Rx configuration ++* net/bnxt: fix setting forced speed ++* net/bnxt: fix speed autonegotiation ++* net/bnxt: fix switch domain allocation ++* net/bnxt: fix tunnel stateless offloads ++* net/bnxt: force PHY update on certain configurations ++* net/bnxt: recheck FW readiness if in reset process ++* net/bnxt: remove unused macro ++* net/bonding: fix mbuf fast free usage ++* net/bonding: fix RSS inconsistency between ports ++* net/bonding: fix RSS key config with extended key length ++* net/bonding: fix slave stop and remove on port close ++* net/bonding: fix stopping non-active slaves ++* net/cxgbe: fix port ID in Rx mbuf ++* net/cxgbe: fix Tx queue stuck with mbuf chain coalescing ++* net/dpaa: fix event queue detach ++* net/hns3: fix an unreasonable memset ++* net/hns3: fix descriptors check with SVE ++* net/hns3: fix return value for unsupported tuple ++* net/hns3: fix rollback on RSS hash update ++* net/hns3: fix RSS disable ++* net/hns3: fix xstats get return if xstats is null ++* net/hns3: remove duplicate definition ++* net/hns3: remove redundant RSS tuple field ++* net/hns3: remove unnecessary RSS switch ++* net/hns3: support backplane media type ++* net/i40e: fix max frame size config at port level ++* net/i40e: populate error in flow director parser ++* net/iavf: fix data path selection ++* net/iavf: fix HW ring scan method selection ++* net/iavf: fix mbuf release in multi-process ++* net/iavf: fix queue start exception handling ++* net/iavf: fix Rx queue interrupt setting ++* net/iavf: increase reset complete wait count ++* net/ice/base: fix build with GCC 12 ++* net/ice/base: fix getting sched node from ID type ++* net/ice: fix build with GCC 12 ++* net/ice: fix MTU info for DCF ++* net/ice: fix outer L4 checksum in scalar Rx ++* net/igc: support multi-process ++* net/ipn3ke: fix xstats get return if xstats is null ++* net/ixgbe: add option for link up check on pin SDP3 ++* net/memif: fix overwriting of head segment ++* net/mlx5: destroy indirect actions on port stop ++* net/mlx5: fix build with clang 14 ++* net/mlx5: fix GTP handling in header modify action ++* net/mlx5: fix LRO validation in Rx setup ++* net/mlx5: fix MPRQ pool registration ++* net/mlx5: fix RSS expansion for patterns with ICMP item ++* net/mlx5: fix RSS hash types adjustment ++* net/mlx5: fix Rx queue recovery mechanism ++* net/mlx5: fix Rx/Tx stats concurrency ++* net/mlx5: fix stack buffer overflow in drop action ++* net/mlx5: fix Tx recovery ++* net/mlx5: fix Tx when inlining is impossible ++* net/mlx5: handle MPRQ incompatibility with external buffers ++* net/mlx5/linux: fix missed Rx packet stats ++* net/mvpp2: fix xstats get return if xstats is null ++* net/netvsc: fix calculation of checksums based on mbuf flag ++* net/netvsc: fix vmbus device reference in multi-process ++* net/nfp: fix disabling VLAN stripping ++* net/nfp: remove unneeded header inclusion ++* net/octeontx: fix port close ++* net/qede: fix build with GCC 12 ++* net/qede: fix build with GCC 13 ++* net/txgbe: fix max number of queues for SR-IOV ++* net/txgbe: fix register polling ++* net/vhost: fix access to freed memory ++* net/vhost: fix deadlock on vring state change ++* net/vhost: fix TSO feature default disablement ++* net/virtio: restore some optimisations with AVX512 ++* net/virtio-user: fix socket non-blocking mode ++* raw/ifpga: remove virtual devices on close ++* raw/ifpga: unregister interrupt on close ++* raw/ioat: fix build when ioat dmadev enabled ++* rib: fix references for IPv6 implementation ++* rib: fix traversal with /32 route ++* service: fix lingering active status ++* test: avoid hang if queues are full and Tx fails ++* test/bonding: fix RSS test when disable RSS ++* test: check memory allocation for CRC ++* test/crypto: fix authentication IV for ZUC SGL ++* test/crypto: fix cipher offset for ZUC ++* test/crypto: fix null check for ZUC authentication ++* test/crypto: fix SNOW3G vector IV format ++* test/crypto: fix ZUC vector IV format ++* test/hash: fix out of bound access ++* test/hash: report non HTM numbers for single thread ++* test/ipsec: fix build with GCC 12 ++* test/ipsec: fix performance test ++* test/ring: remove excessive inlining ++* test/table: fix buffer overflow on lpm entry ++* trace: fix init with long file prefix ++* vdpa/ifc: fix build with GCC 12 ++* vdpa/mlx5: fix dead loop when process interrupted ++* vdpa/mlx5: fix interrupt trash that leads to crash ++* vdpa/mlx5: fix maximum number of virtqs ++* vdpa/mlx5: workaround var offset within page ++* version: 20.11.6-rc1 ++* vhost: add some trailing newline in log messages ++* vhost/crypto: fix build with GCC 12 ++* vhost/crypto: fix descriptor processing ++* vhost: discard too small descriptor chains ++* vhost: fix async access ++* vhost: fix deadlock when message handling failed ++* vhost: fix header spanned across more than two descriptors ++* vhost: fix missing enqueue pseudo-header calculation ++* vhost: fix missing virtqueue lock protection ++* vhost: prevent async register ++ ++20.11.6 Validation ++~~~~~~~~~~~~~~~~~~ ++ ++* Intel(R) Testing ++ ++ * Basic Intel(R) NIC testing ++ ++ * PF(i40e, ixgbe, ice) ++ * VF(i40e, ixgbe, ice) ++ * Compile testing ++ * Intel NIC single core/NIC performance ++ * IPsec ++ ++ * Basic cryptodev and virtio testing ++ ++ * Virtio function and performance ++ * Cryptodev function and performance ++ ++* RedHat Testing ++ ++ # Functionality ++ ++ * Guest(PF, VF) ++ * Host ++ * Vswitch (throughput, live migration) ++ * Vhost-user(server, client) ++ * OVS-DPDK live migration ++ ++ # Platform ++ ++ * RHEL8, kernel 4.18, qemu 6.2 ++ * X540-AT2 NIC(ixgbe, 10G) ++ ++* Nvidia(R) Testing ++ ++ * Basic functionality with testpmd ++ ++ * Tx/Rx ++ * xstats ++ * Timestamps ++ * Link status ++ * RTE flow and flow_director ++ * RSS ++ * VLAN filtering, stripping and insertion ++ * Checksum/TSO ++ * ptype ++ * link_status_interrupt ++ * l3fwd-power example application ++ * Multi-process example applications ++ * Hardware LRO ++ ++ * Build tests ++ ++ * Ubuntu 20.04.4 with MLNX_OFED_LINUX-5.7-1.0.2.0. ++ * Ubuntu 20.04.4 with rdma-core master (23a0021). ++ * Ubuntu 20.04.4 with rdma-core v28.0. ++ * Ubuntu 18.04.6 with rdma-core v17.1. ++ * Ubuntu 18.04.6 with rdma-core master (23a0021) (i386). ++ * Ubuntu 16.04.7 with rdma-core v22.7. ++ * Fedora 35 with rdma-core v39.0. ++ * Fedora 37 (Rawhide) with rdma-core v39.0 (with clang only). ++ * CentOS 7 7.9.2009 with rdma-core master (23a0021). ++ * CentOS 7 7.9.2009 with MLNX_OFED_LINUX-5.7-1.0.2.0. ++ * CentOS 8 8.4.2105 with rdma-core master (23a0021). ++ * OpenSUSE Leap 15.4 with rdma-core v38.1. ++ * Windows Server 2019 with Clang 11.0.0. ++ ++ * Test platform ++ ++ * NIC: ConnectX-4 Lx / OS: Ubuntu 20.04 LTS / Driver: MLNX_OFED_LINUX-5.7-1.0.2.0 / Firmware: 14.32.1010 ++ * NIC: ConnectX-5 / OS: Ubuntu 20.04 LTS / Driver: MLNX_OFED_LINUX-5.7-1.0.2.0 / Firmware: 16.34.1002 ++ * NIC: ConnectX-6 Dx / OS: Ubuntu 20.04 LTS / Driver: MLNX_OFED_LINUX-5.7-1.0.2.0 / Firmware: 22.34.1002 ++ * DPU: BlueField-2 / DOCA SW version: 1.4.0 ++ ++20.11.6 Known Issues ++~~~~~~~~~~~~~~~~~~~~ ++ ++ +diff --git a/dpdk/doc/guides/rel_notes/release_2_1.rst b/dpdk/doc/guides/rel_notes/release_2_1.rst +index beadc51ba4..919ed77559 100644 +--- a/dpdk/doc/guides/rel_notes/release_2_1.rst ++++ b/dpdk/doc/guides/rel_notes/release_2_1.rst +@@ -243,7 +243,7 @@ New Features + * **Added fm10k jumbo frame support.** + + Added support for jumbo frame less than 15K in both VF and PF functions in the +- fm10k pmd. ++ fm10k PMD. + + + * **Added fm10k mac vlan filtering support.** +@@ -673,7 +673,7 @@ Resolved Issues + value 0. + + +- Fixes: 40b966a211ab ("ivshmem: library changes for mmaping using ivshmem") ++ Fixes: 40b966a211ab ("ivshmem: library changes for mmapping using ivshmem") + + + * **ixgbe/base: Fix SFP probing.** +diff --git a/dpdk/doc/guides/rel_notes/release_2_2.rst b/dpdk/doc/guides/rel_notes/release_2_2.rst +index cea5c8746d..029b758e90 100644 +--- a/dpdk/doc/guides/rel_notes/release_2_2.rst ++++ b/dpdk/doc/guides/rel_notes/release_2_2.rst +@@ -10,8 +10,8 @@ New Features + * **Introduce ARMv7 and ARMv8 architectures.** + + * It is now possible to build DPDK for the ARMv7 and ARMv8 platforms. +- * ARMv7 can be tested with virtual PMD drivers. +- * ARMv8 can be tested with virtual and physical PMD drivers. ++ * ARMv7 can be tested with virtual PMDs. ++ * ARMv8 can be tested with virtual and physical PMDs. + + * **Enabled freeing of ring.** + +@@ -322,7 +322,7 @@ Drivers + + Several customers have reported a link flap issue on 82579. The symptoms + are random and intermittent link losses when 82579 is connected to specific +- switches. the Issue was root caused as an inter-operability problem between ++ switches. the Issue was root caused as an interoperability problem between + the NIC and at least some Broadcom PHYs in the Energy Efficient Ethernet + wake mechanism. + +diff --git a/dpdk/doc/guides/sample_app_ug/bbdev_app.rst b/dpdk/doc/guides/sample_app_ug/bbdev_app.rst +index 45e69e36e2..7f02f0ed90 100644 +--- a/dpdk/doc/guides/sample_app_ug/bbdev_app.rst ++++ b/dpdk/doc/guides/sample_app_ug/bbdev_app.rst +@@ -31,7 +31,7 @@ Limitations + Compiling the Application + ------------------------- + +-DPDK needs to be built with ``baseband_turbo_sw`` PMD driver enabled along ++DPDK needs to be built with ``baseband_turbo_sw`` PMD enabled along + with ``FLEXRAN SDK`` Libraries. Refer to *SW Turbo Poll Mode Driver* + documentation for more details on this. + +diff --git a/dpdk/doc/guides/sample_app_ug/fips_validation.rst b/dpdk/doc/guides/sample_app_ug/fips_validation.rst +index ca37fc0b38..39baea3346 100644 +--- a/dpdk/doc/guides/sample_app_ug/fips_validation.rst ++++ b/dpdk/doc/guides/sample_app_ug/fips_validation.rst +@@ -77,11 +77,12 @@ Compiling the Application + .. code-block:: console + + dos2unix AES/req/* +- dos2unix AES_GCM/req/* ++ dos2unix GCM/req/* + dos2unix CCM/req/* + dos2unix CMAC/req/* + dos2unix HMAC/req/* + dos2unix TDES/req/* ++ dos2unix SHA/req/* + + Running the Application + ----------------------- +@@ -113,7 +114,7 @@ where, + * mbuf-dataroom: By default the application creates mbuf pool with maximum + possible data room (65535 bytes). If the user wants to test scatter-gather + list feature of the PMD he or she may set this value to reduce the dataroom +- size so that the input data may be dividied into multiple chained mbufs. ++ size so that the input data may be divided into multiple chained mbufs. + + + To run the application in linux environment to test one AES FIPS test data +diff --git a/dpdk/doc/guides/sample_app_ug/hello_world.rst b/dpdk/doc/guides/sample_app_ug/hello_world.rst +index 7cb9279e99..6ec93e0054 100644 +--- a/dpdk/doc/guides/sample_app_ug/hello_world.rst ++++ b/dpdk/doc/guides/sample_app_ug/hello_world.rst +@@ -1,4 +1,4 @@ +-o.. SPDX-License-Identifier: BSD-3-Clause ++.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2010-2014 Intel Corporation. + + Hello World Sample Application +diff --git a/dpdk/doc/guides/sample_app_ug/ipsec_secgw.rst b/dpdk/doc/guides/sample_app_ug/ipsec_secgw.rst +index 176e292d3f..c276976a39 100644 +--- a/dpdk/doc/guides/sample_app_ug/ipsec_secgw.rst ++++ b/dpdk/doc/guides/sample_app_ug/ipsec_secgw.rst +@@ -93,7 +93,7 @@ Additionally the event mode introduces two submodes of processing packets: + protocol use case, the worker thread resembles l2fwd worker thread as the IPsec + processing is done entirely in HW. This mode can be used to benchmark the raw + performance of the HW. The driver submode is selected with --single-sa option +- (used also by poll mode). When --single-sa option is used in conjution with event ++ (used also by poll mode). When --single-sa option is used in conjunction with event + mode then index passed to --single-sa is ignored. + + * App submode: This submode has all the features currently implemented with the +@@ -106,7 +106,8 @@ Constraints + + * No IPv6 options headers. + * No AH mode. +-* Supported algorithms: AES-CBC, AES-CTR, AES-GCM, 3DES-CBC, HMAC-SHA1 and NULL. ++* Supported algorithms: AES-CBC, AES-CTR, AES-GCM, 3DES-CBC, HMAC-SHA1, ++ HMAC-SHA256 and NULL. + * Each SA must be handle by a unique lcore (*1 RX queue per port*). + + Compiling the Application +@@ -421,7 +422,7 @@ where each options means: + + * *protect *: the specified traffic is protected by SA rule + with id SA_idx +- * *bypass*: the specified traffic traffic is bypassed ++ * *bypass*: the specified traffic is bypassed + * *discard*: the specified traffic is discarded + + ```` +@@ -564,6 +565,7 @@ where each options means: + + * *null*: NULL algorithm + * *sha1-hmac*: HMAC SHA1 algorithm ++ * *sha256-hmac*: HMAC SHA256 algorithm + + ```` + +diff --git a/dpdk/doc/guides/sample_app_ug/keep_alive.rst b/dpdk/doc/guides/sample_app_ug/keep_alive.rst +index c9392c9951..2ecfe3cfce 100644 +--- a/dpdk/doc/guides/sample_app_ug/keep_alive.rst ++++ b/dpdk/doc/guides/sample_app_ug/keep_alive.rst +@@ -78,7 +78,7 @@ options. + Explanation + ----------- + +-The following sections provide some explanation of the The ++The following sections provide some explanation of the + Keep-Alive/'Liveliness' conceptual scheme. As mentioned in the + overview section, the initialization and run-time paths are very + similar to those of the :doc:`l2_forward_real_virtual`. +diff --git a/dpdk/doc/guides/sample_app_ug/l2_forward_crypto.rst b/dpdk/doc/guides/sample_app_ug/l2_forward_crypto.rst +index e2c0f9f1ec..b24f16a479 100644 +--- a/dpdk/doc/guides/sample_app_ug/l2_forward_crypto.rst ++++ b/dpdk/doc/guides/sample_app_ug/l2_forward_crypto.rst +@@ -15,7 +15,7 @@ Overview + The L2 Forwarding with Crypto sample application performs a crypto operation (cipher/hash) + specified by the user from command line (or using the default values), + with a crypto device capable of doing that operation, +-for each packet that is received on a RX_PORT and performs L2 forwarding. ++for each packet that is received on an RX_PORT and performs L2 forwarding. + The destination port is the adjacent port from the enabled portmask, that is, + if the first four ports are enabled (portmask 0xf), + ports 0 and 1 forward into each other, and ports 2 and 3 forward into each other. +@@ -53,35 +53,35 @@ The application requires a number of command line options: + + where, + +-* p PORTMASK: A hexadecimal bitmask of the ports to configure (default is all the ports) ++* p PORTMASK: A hexadecimal bitmask of the ports to configure. (Default is all the ports.) + +-* q NQ: A number of queues (=ports) per lcore (default is 1) ++* q NQ: A number of queues (=ports) per lcore. (Default is 1.) + +-* s: manage all ports from single core ++* s: manage all ports from a single core. + +-* T PERIOD: statistics will be refreshed each PERIOD seconds ++* T PERIOD: statistics will be refreshed each PERIOD seconds. + +- (0 to disable, 10 default, 86400 maximum) ++ (0 to disable, 10 default, 86400 maximum.) + +-* cdev_type: select preferred crypto device type: HW, SW or anything (ANY) ++* cdev_type: select preferred crypto device type: HW, SW or anything (ANY). + +- (default is ANY) ++ (Default is ANY.) + + * chain: select the operation chaining to perform: Cipher->Hash (CIPHER_HASH), + + Hash->Cipher (HASH_CIPHER), Cipher (CIPHER_ONLY), Hash (HASH_ONLY) + +- or AEAD (AEAD) ++ or AEAD (AEAD). + +- (default is Cipher->Hash) ++ (Default is Cipher->Hash.) + +-* cipher_algo: select the ciphering algorithm (default is aes-cbc) ++* cipher_algo: select the ciphering algorithm. (Default is aes-cbc.) + +-* cipher_op: select the ciphering operation to perform: ENCRYPT or DECRYPT ++* cipher_op: select the ciphering operation to perform: ENCRYPT or DECRYPT. + +- (default is ENCRYPT) ++ (Default is ENCRYPT.) + +-* cipher_key: set the ciphering key to be used. Bytes has to be separated with ":" ++* cipher_key: set the ciphering key to be used. Bytes have to be separated with ":". + + * cipher_key_random_size: set the size of the ciphering key, + +@@ -89,19 +89,19 @@ where, + + Note that if --cipher_key is used, this will be ignored. + +-* cipher_iv: set the cipher IV to be used. Bytes has to be separated with ":" ++* cipher_iv: set the cipher IV to be used. Bytes have to be separated with ":". + + * cipher_iv_random_size: set the size of the cipher IV, which will be generated randomly. + + Note that if --cipher_iv is used, this will be ignored. + +-* auth_algo: select the authentication algorithm (default is sha1-hmac) ++* auth_algo: select the authentication algorithm. (Default is sha1-hmac.) + +-* auth_op: select the authentication operation to perform: GENERATE or VERIFY ++* auth_op: select the authentication operation to perform: GENERATE or VERIFY. + +- (default is GENERATE) ++ (Default is GENERATE.) + +-* auth_key: set the authentication key to be used. Bytes has to be separated with ":" ++* auth_key: set the authentication key to be used. Bytes have to be separated with ":". + + * auth_key_random_size: set the size of the authentication key, + +@@ -109,19 +109,19 @@ where, + + Note that if --auth_key is used, this will be ignored. + +-* auth_iv: set the auth IV to be used. Bytes has to be separated with ":" ++* auth_iv: set the auth IV to be used. Bytes have to be separated with ":". + + * auth_iv_random_size: set the size of the auth IV, which will be generated randomly. + + Note that if --auth_iv is used, this will be ignored. + +-* aead_algo: select the AEAD algorithm (default is aes-gcm) ++* aead_algo: select the AEAD algorithm. (Default is aes-gcm.) + +-* aead_op: select the AEAD operation to perform: ENCRYPT or DECRYPT ++* aead_op: select the AEAD operation to perform: ENCRYPT or DECRYPT. + +- (default is ENCRYPT) ++ (Default is ENCRYPT.) + +-* aead_key: set the AEAD key to be used. Bytes has to be separated with ":" ++* aead_key: set the AEAD key to be used. Bytes have to be separated with ":". + + * aead_key_random_size: set the size of the AEAD key, + +@@ -129,13 +129,13 @@ where, + + Note that if --aead_key is used, this will be ignored. + +-* aead_iv: set the AEAD IV to be used. Bytes has to be separated with ":" ++* aead_iv: set the AEAD IV to be used. Bytes have to be separated with ":". + + * aead_iv_random_size: set the size of the AEAD IV, which will be generated randomly. + + Note that if --aead_iv is used, this will be ignored. + +-* aad: set the AAD to be used. Bytes has to be separated with ":" ++* aad: set the AAD to be used. Bytes have to be separated with ":". + + * aad_random_size: set the size of the AAD, which will be generated randomly. + +@@ -148,9 +148,9 @@ where, + * cryptodev_mask: A hexadecimal bitmask of the cryptodevs to be used by the + application. + +- (default is all cryptodevs). ++ (Default is all cryptodevs.) + +-* [no-]mac-updating: Enable or disable MAC addresses updating (enabled by default). ++* [no-]mac-updating: Enable or disable MAC addresses updating. (Enabled by default.) + + + The application requires that crypto devices capable of performing +@@ -162,7 +162,7 @@ To run the application in linux environment with 2 lcores, 2 ports and 2 crypto + + .. code-block:: console + +- $ .//examples/dpdk-l2fwd-crypto -l 0-1 -n 4 --vdev "crypto_aesni_mb0" \ ++ $ .//examples/dpdk-l2fwd-crypto -l 0-1 --vdev "crypto_aesni_mb0" \ + --vdev "crypto_aesni_mb1" -- -p 0x3 --chain CIPHER_HASH \ + --cipher_op ENCRYPT --cipher_algo aes-cbc \ + --cipher_key 00:01:02:03:04:05:06:07:08:09:0a:0b:0c:0d:0e:0f \ +@@ -176,7 +176,7 @@ and the Environment Abstraction Layer (EAL) options. + + * The ``l2fwd-crypto`` sample application requires IPv4 packets for crypto operation. + +- * If multiple Ethernet ports is passed, then equal number of crypto devices are to be passed. ++ * If multiple Ethernet ports are passed, then equal number of crypto devices are to be passed. + + * All crypto devices shall use the same session. + +@@ -184,7 +184,7 @@ Explanation + ----------- + + The L2 forward with Crypto application demonstrates the performance of a crypto operation +-on a packet received on a RX PORT before forwarding it to a TX PORT. ++on a packet received on an RX PORT before forwarding it to a TX PORT. + + The following figure illustrates a sample flow of a packet in the application, + from reception until transmission. +@@ -193,7 +193,7 @@ from reception until transmission. + + .. figure:: img/l2_fwd_encrypt_flow.* + +- Encryption flow Through the L2 Forwarding with Crypto Application ++ Encryption flow through the L2 Forwarding with Crypto Application + + + The following sections provide some explanation of the application. +@@ -203,8 +203,8 @@ Crypto operation specification + + All the packets received in all the ports get transformed by the crypto device/s + (ciphering and/or authentication). +-The crypto operation to be performed on the packet is parsed from the command line +-(go to "Running the Application" section for all the options). ++The crypto operation to be performed on the packet is parsed from the command line. ++(Go to "Running the Application" section for all the options.) + + If no parameter is passed, the default crypto operation is: + +@@ -241,7 +241,7 @@ when running the application. + + The initialize_cryptodevs() function performs the device initialization. + It iterates through the list of the available crypto devices and +-check which ones are capable of performing the operation. ++checks which ones are capable of performing the operation. + Each device has a set of capabilities associated with it, + which are stored in the device info structure, so the function checks if the operation + is within the structure of each device. +@@ -368,7 +368,7 @@ This session is created and is later attached to the crypto operation: + Crypto operation creation + ~~~~~~~~~~~~~~~~~~~~~~~~~ + +-Given N packets received from a RX PORT, N crypto operations are allocated ++Given N packets received from an RX PORT, N crypto operations are allocated + and filled: + + .. code-block:: c +diff --git a/dpdk/doc/guides/sample_app_ug/l3_forward_access_ctrl.rst b/dpdk/doc/guides/sample_app_ug/l3_forward_access_ctrl.rst +index 2cf6e4556f..b252efd7a7 100644 +--- a/dpdk/doc/guides/sample_app_ug/l3_forward_access_ctrl.rst ++++ b/dpdk/doc/guides/sample_app_ug/l3_forward_access_ctrl.rst +@@ -220,7 +220,7 @@ Once the application starts, it transitions through three phases: + + * **Final Phase** - Perform the following tasks: + +- Calls the EAL, PMD driver and ACL library to free resource, then quits. ++ Calls the EAL, PMD and ACL library to free resource, then quits. + + Compiling the Application + ------------------------- +diff --git a/dpdk/doc/guides/sample_app_ug/performance_thread.rst b/dpdk/doc/guides/sample_app_ug/performance_thread.rst +index 4c6a1dbe5c..9b09838f64 100644 +--- a/dpdk/doc/guides/sample_app_ug/performance_thread.rst ++++ b/dpdk/doc/guides/sample_app_ug/performance_thread.rst +@@ -1176,7 +1176,7 @@ Tracing of events can be individually masked, and the mask may be programmed + at run time. An unmasked event results in a callback that provides information + about the event. The default callback simply prints trace information. The + default mask is 0 (all events off) the mask can be modified by calling the +-function ``lthread_diagniostic_set_mask()``. ++function ``lthread_diagnostic_set_mask()``. + + It is possible register a user callback function to implement more + sophisticated diagnostic functions. +diff --git a/dpdk/doc/guides/sample_app_ug/vhost.rst b/dpdk/doc/guides/sample_app_ug/vhost.rst +index 15aaff2493..fa2cf4e3a6 100644 +--- a/dpdk/doc/guides/sample_app_ug/vhost.rst ++++ b/dpdk/doc/guides/sample_app_ug/vhost.rst +@@ -72,7 +72,7 @@ Run testpmd inside guest + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Make sure you have DPDK built inside the guest. Also make sure the +-corresponding virtio-net PCI device is bond to a uio driver, which ++corresponding virtio-net PCI device is bond to a UIO driver, which + could be done by: + + .. code-block:: console +@@ -206,11 +206,11 @@ Common Issues + * Option "builtin-net-driver" is incompatible with QEMU + + QEMU vhost net device start will fail if protocol feature is not negotiated. +- DPDK virtio-user pmd can be the replacement of QEMU. ++ DPDK virtio-user PMD can be the replacement of QEMU. + + * Device start fails when enabling "builtin-net-driver" without memory + pre-allocation + + The builtin example doesn't support dynamic memory allocation. When vhost + backend enables "builtin-net-driver", "--socket-mem" option should be +- added at virtio-user pmd side as a startup item. ++ added at virtio-user PMD side as a startup item. +diff --git a/dpdk/doc/guides/sample_app_ug/vm_power_management.rst b/dpdk/doc/guides/sample_app_ug/vm_power_management.rst +index 35afdac63f..9ce87956c9 100644 +--- a/dpdk/doc/guides/sample_app_ug/vm_power_management.rst ++++ b/dpdk/doc/guides/sample_app_ug/vm_power_management.rst +@@ -236,7 +236,7 @@ Compiling and Running the Host Application + Compiling the Host Application + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-For information on compiling the DPDK and sample applications, see ++For information on compiling the DPDK and sample applications, + see :doc:`compiling`. + + The application is located in the ``vm_power_manager`` subdirectory. +@@ -681,7 +681,7 @@ The following is an example JSON string for a power management request. + "resource_id": 10 + }} + +-To query the available frequences of an lcore, use the query_cpu_freq command. ++To query the available frequencies of an lcore, use the query_cpu_freq command. + Where {core_num} is the lcore to query. + Before using this command, please enable responses via the set_query command on the host. + +diff --git a/dpdk/doc/guides/testpmd_app_ug/run_app.rst b/dpdk/doc/guides/testpmd_app_ug/run_app.rst +index ca67105b70..d76eb1032e 100644 +--- a/dpdk/doc/guides/testpmd_app_ug/run_app.rst ++++ b/dpdk/doc/guides/testpmd_app_ug/run_app.rst +@@ -112,7 +112,7 @@ The command line options are: + Set the data size of the mbufs used to N bytes, where N < 65536. + The default value is 2048. If multiple mbuf-size values are specified the + extra memory pools will be created for allocating mbufs to receive packets +- with buffer splittling features. ++ with buffer splitting features. + + * ``--total-num-mbufs=N`` + +@@ -524,8 +524,10 @@ The command line options are: + + * ``--hairpin-mode=0xXX`` + +- Set the hairpin port mode with bitmask, only valid when hairpin queues number is set. +- bit 4 - explicit Tx flow rule +- bit 1 - two hairpin ports paired +- bit 0 - two hairpin ports loop ++ Set the hairpin port mode with bitmask, only valid when hairpin queues number is set:: ++ ++ bit 4 - explicit Tx flow rule ++ bit 1 - two hairpin ports paired ++ bit 0 - two hairpin ports loop ++ + The default value is 0. Hairpin will use single port mode and implicit Tx flow mode. +diff --git a/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst +index 6a00245fc8..3487e1718c 100644 +--- a/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst ++++ b/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst +@@ -1732,7 +1732,7 @@ List all items from the ptype mapping table:: + + Where: + +-* ``valid_only``: A flag indicates if only list valid items(=1) or all itemss(=0). ++* ``valid_only``: A flag indicates if only list valid items(=1) or all items(=0). + + Replace a specific or a group of software defined ptype with a new one:: + +@@ -3465,7 +3465,7 @@ Tunnel offload + Indicate tunnel offload rule type + + - ``tunnel_set {tunnel_id}``: mark rule as tunnel offload decap_set type. +-- ``tunnel_match {tunnel_id}``: mark rule as tunel offload match type. ++- ``tunnel_match {tunnel_id}``: mark rule as tunnel offload match type. + + Matching pattern + ^^^^^^^^^^^^^^^^ +@@ -3729,7 +3729,8 @@ This section lists supported pattern items and their attributes, if any. + - ``gtp_psc``: match GTP PDU extension header with type 0x85. + + - ``pdu_type {unsigned}``: PDU type. +- - ``qfi {unsigned}``: QoS flow identifier. ++ ++ - ``qfi {unsigned}``: PPP, RQI and QoS flow identifier. + + - ``pppoes``, ``pppoed``: match PPPoE header. + +@@ -4766,7 +4767,7 @@ Sample Raw encapsulation rule + + Raw encapsulation configuration can be set by the following commands + +-Eecapsulating VxLAN:: ++Encapsulating VxLAN:: + + testpmd> set raw_encap 4 eth src is 10:11:22:33:44:55 / vlan tci is 1 + inner_type is 0x0800 / ipv4 / udp dst is 4789 / vxlan vni +diff --git a/dpdk/doc/guides/tools/hugepages.rst b/dpdk/doc/guides/tools/hugepages.rst +index 6d3f410b20..dd24c803de 100644 +--- a/dpdk/doc/guides/tools/hugepages.rst ++++ b/dpdk/doc/guides/tools/hugepages.rst +@@ -62,7 +62,7 @@ Options + + .. warning:: + +- While any user can run the ``dpdk-hugpages.py`` script to view the ++ While any user can run the ``dpdk-hugepages.py`` script to view the + status of huge pages, modifying the setup requires root privileges. + + +@@ -71,8 +71,8 @@ Examples + + To display current huge page settings:: + +- dpdk-hugpages.py -s ++ dpdk-hugepages.py -s + + To a complete setup of with 2 Gigabyte of 1G huge pages:: + +- dpdk-hugpages.py -p 1G --setup 2G ++ dpdk-hugepages.py -p 1G --setup 2G +diff --git a/dpdk/doc/guides/tools/testeventdev.rst b/dpdk/doc/guides/tools/testeventdev.rst +index ad1788a3d8..4317f3170a 100644 +--- a/dpdk/doc/guides/tools/testeventdev.rst ++++ b/dpdk/doc/guides/tools/testeventdev.rst +@@ -207,7 +207,7 @@ to the ordered queue. The worker receives the events from ordered queue and + forwards to atomic queue. Since the events from an ordered queue can be + processed in parallel on the different workers, the ingress order of events + might have changed on the downstream atomic queue enqueue. On enqueue to the +-atomic queue, the eventdev PMD driver reorders the event to the original ++atomic queue, the eventdev PMD reorders the event to the original + ingress order(i.e producer ingress order). + + When the event is dequeued from the atomic queue by the worker, this test +diff --git a/dpdk/doc/guides/vdpadevs/ifc.rst b/dpdk/doc/guides/vdpadevs/ifc.rst +index f968489a75..706dbf740e 100644 +--- a/dpdk/doc/guides/vdpadevs/ifc.rst ++++ b/dpdk/doc/guides/vdpadevs/ifc.rst +@@ -17,9 +17,9 @@ IFCVF vDPA Implementation + IFCVF's vendor ID and device ID are same as that of virtio net pci device, + with its specific subsystem vendor ID and device ID. To let the device be + probed by IFCVF driver, adding "vdpa=1" parameter helps to specify that this +-device is to be used in vDPA mode, rather than polling mode, virtio pmd will ++device is to be used in vDPA mode, rather than polling mode, virtio PMD will + skip when it detects this message. If no this parameter specified, device +-will not be used as a vDPA device, and it will be driven by virtio pmd. ++will not be used as a vDPA device, and it will be driven by virtio PMD. + + Different VF devices serve different virtio frontends which are in different + VMs, so each VF needs to have its own DMA address translation service. During +diff --git a/dpdk/doc/meson.build b/dpdk/doc/meson.build +index c5410d85d6..d6cf85a900 100644 +--- a/dpdk/doc/meson.build ++++ b/dpdk/doc/meson.build +@@ -11,5 +11,5 @@ if doc_targets.length() == 0 + else + message = 'Building docs:' + endif +-run_target('doc', command: ['echo', message, doc_target_names], ++run_target('doc', command: [echo, message, doc_target_names], + depends: doc_targets) +diff --git a/dpdk/drivers/baseband/acc100/acc100_pf_enum.h b/dpdk/drivers/baseband/acc100/acc100_pf_enum.h +index a1ee416d26..2fba667627 100644 +--- a/dpdk/drivers/baseband/acc100/acc100_pf_enum.h ++++ b/dpdk/drivers/baseband/acc100/acc100_pf_enum.h +@@ -238,6 +238,24 @@ enum { + HWPfPermonBTotalLatLowBusMon = 0x00BAC504, + HWPfPermonBTotalLatUpperBusMon = 0x00BAC508, + HWPfPermonBTotalReqCntBusMon = 0x00BAC50C, ++ HwPfFabI2MArbCntrlReg = 0x00BB0000, ++ HWPfFabricMode = 0x00BB1000, ++ HwPfFabI2MGrp0DebugReg = 0x00BBF000, ++ HwPfFabI2MGrp1DebugReg = 0x00BBF004, ++ HwPfFabI2MGrp2DebugReg = 0x00BBF008, ++ HwPfFabI2MGrp3DebugReg = 0x00BBF00C, ++ HwPfFabI2MBuf0DebugReg = 0x00BBF010, ++ HwPfFabI2MBuf1DebugReg = 0x00BBF014, ++ HwPfFabI2MBuf2DebugReg = 0x00BBF018, ++ HwPfFabI2MBuf3DebugReg = 0x00BBF01C, ++ HwPfFabM2IBuf0Grp0DebugReg = 0x00BBF020, ++ HwPfFabM2IBuf1Grp0DebugReg = 0x00BBF024, ++ HwPfFabM2IBuf0Grp1DebugReg = 0x00BBF028, ++ HwPfFabM2IBuf1Grp1DebugReg = 0x00BBF02C, ++ HwPfFabM2IBuf0Grp2DebugReg = 0x00BBF030, ++ HwPfFabM2IBuf1Grp2DebugReg = 0x00BBF034, ++ HwPfFabM2IBuf0Grp3DebugReg = 0x00BBF038, ++ HwPfFabM2IBuf1Grp3DebugReg = 0x00BBF03C, + HWPfFecUl5gCntrlReg = 0x00BC0000, + HWPfFecUl5gI2MThreshReg = 0x00BC0004, + HWPfFecUl5gVersionReg = 0x00BC0100, +diff --git a/dpdk/drivers/baseband/acc100/rte_acc100_pmd.h b/dpdk/drivers/baseband/acc100/acc100_pmd.h +similarity index 96% +rename from dpdk/drivers/baseband/acc100/rte_acc100_pmd.h +rename to dpdk/drivers/baseband/acc100/acc100_pmd.h +index 03ed0b3e1a..b3956e9928 100644 +--- a/dpdk/drivers/baseband/acc100/rte_acc100_pmd.h ++++ b/dpdk/drivers/baseband/acc100/acc100_pmd.h +@@ -27,14 +27,9 @@ + #define ACC100VF_DRIVER_NAME intel_acc100_vf + + /* ACC100 PCI vendor & device IDs */ +-#define RTE_ACC100_VENDOR_ID (0x8086) +-#define RTE_ACC100_PF_DEVICE_ID (0x0d5c) +-#define RTE_ACC100_VF_DEVICE_ID (0x0d5d) +- +-/* Define as 1 to use only a single FEC engine */ +-#ifndef RTE_ACC100_SINGLE_FEC +-#define RTE_ACC100_SINGLE_FEC 0 +-#endif ++#define ACC100_VENDOR_ID (0x8086) ++#define ACC100_PF_DEVICE_ID (0x0d5c) ++#define ACC100_VF_DEVICE_ID (0x0d5d) + + /* Values used in filling in descriptors */ + #define ACC100_DMA_DESC_TYPE 2 +@@ -113,6 +108,7 @@ + #define ACC100_SW_RING_MEM_ALLOC_ATTEMPTS 5 + #define ACC100_MAX_QUEUE_DEPTH 1024 + #define ACC100_DMA_MAX_NUM_POINTERS 14 ++#define ACC100_DMA_MAX_NUM_POINTERS_IN 7 + #define ACC100_DMA_DESC_PADDING 8 + #define ACC100_FCW_PADDING 12 + #define ACC100_DESC_FCW_OFFSET 192 +@@ -152,6 +148,12 @@ + #define ACC100_CFG_QMGR_HI_P 0x0F0F + #define ACC100_CFG_PCI_AXI 0xC003 + #define ACC100_CFG_PCI_BRIDGE 0x40006033 ++#define ACC100_QUAD_NUMS 4 ++#define ACC100_LANES_PER_QUAD 4 ++#define ACC100_PCIE_LANE_OFFSET 0x200 ++#define ACC100_PCIE_QUAD_OFFSET 0x2000 ++#define ACC100_PCS_EQ 0x6007 ++#define ACC100_ADAPT 0x8400 + #define ACC100_ENGINE_OFFSET 0x1000 + #define ACC100_RESET_HI 0x20100 + #define ACC100_RESET_LO 0x20000 +@@ -159,6 +161,15 @@ + #define ACC100_ENGINES_MAX 9 + #define ACC100_LONG_WAIT 1000 + #define ACC100_GPEX_AXIMAP_NUM 17 ++#define ACC100_CLOCK_GATING_EN 0x30000 ++#define ACC100_FABRIC_MODE 0xB ++/* DDR Size per VF - 512MB by default ++ * Can be increased up to 4 GB with single PF/VF ++ */ ++#define ACC100_HARQ_DDR (512 * 1) ++#define ACC100_PRQ_DDR_VER 0x10092020 ++#define ACC100_MS_IN_US (1000) ++#define ACC100_DDR_TRAINING_MAX (5000) + + /* ACC100 DMA Descriptor triplet */ + struct acc100_dma_triplet { +diff --git a/dpdk/drivers/baseband/acc100/rte_acc100_pmd.c b/dpdk/drivers/baseband/acc100/rte_acc100_pmd.c +index 5e663a62dd..453343cfb6 100644 +--- a/dpdk/drivers/baseband/acc100/rte_acc100_pmd.c ++++ b/dpdk/drivers/baseband/acc100/rte_acc100_pmd.c +@@ -21,7 +21,7 @@ + + #include + #include +-#include "rte_acc100_pmd.h" ++#include "acc100_pmd.h" + + #ifdef RTE_LIBRTE_BBDEV_DEBUG + RTE_LOG_REGISTER(acc100_logtype, pmd.bb.acc100, DEBUG); +@@ -141,8 +141,8 @@ aqDepth(int qg_idx, struct rte_acc100_conf *acc100_conf) + int acc_enum = accFromQgid(qg_idx, acc100_conf); + qtopFromAcc(&q_top, acc_enum, acc100_conf); + if (unlikely(q_top == NULL)) +- return 0; +- return q_top->aq_depth_log2; ++ return 1; ++ return RTE_MAX(1, q_top->aq_depth_log2); + } + + /* Return the AQ depth for a Queue Group Index */ +@@ -1131,7 +1131,7 @@ static const struct rte_bbdev_ops acc100_bbdev_ops = { + /* ACC100 PCI PF address map */ + static struct rte_pci_id pci_id_acc100_pf_map[] = { + { +- RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID, RTE_ACC100_PF_DEVICE_ID) ++ RTE_PCI_DEVICE(ACC100_VENDOR_ID, ACC100_PF_DEVICE_ID) + }, + {.device_id = 0}, + }; +@@ -1139,7 +1139,7 @@ static struct rte_pci_id pci_id_acc100_pf_map[] = { + /* ACC100 PCI VF address map */ + static struct rte_pci_id pci_id_acc100_vf_map[] = { + { +- RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID, RTE_ACC100_VF_DEVICE_ID) ++ RTE_PCI_DEVICE(ACC100_VENDOR_ID, ACC100_VF_DEVICE_ID) + }, + {.device_id = 0}, + }; +@@ -1234,6 +1234,8 @@ get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index) + return (bg == 1 ? ACC100_K0_3_1 : ACC100_K0_3_2) * z_c; + } + /* LBRM case - includes a division by N */ ++ if (unlikely(z_c == 0)) ++ return 0; + if (rv_index == 1) + return (((bg == 1 ? ACC100_K0_1_1 : ACC100_K0_1_2) * n_cb) + / n) * z_c; +@@ -1458,8 +1460,7 @@ acc100_dma_fill_blk_type_in(struct acc100_dma_req_desc *desc, + next_triplet++; + + while (cb_len > 0) { +- if (next_triplet < ACC100_DMA_MAX_NUM_POINTERS && +- m->next != NULL) { ++ if (next_triplet < ACC100_DMA_MAX_NUM_POINTERS_IN && m->next != NULL) { + + m = m->next; + *seg_total_left = rte_pktmbuf_data_len(m); +@@ -1744,7 +1745,8 @@ acc100_dma_desc_td_fill(struct rte_bbdev_dec_op *op, + + next_triplet = acc100_dma_fill_blk_type_out( + desc, h_output, *h_out_offset, +- k >> 3, next_triplet, ACC100_DMA_BLKID_OUT_HARD); ++ (k - crc24_overlap) >> 3, next_triplet, ++ ACC100_DMA_BLKID_OUT_HARD); + if (unlikely(next_triplet < 0)) { + rte_bbdev_log(ERR, + "Mismatch between data to process and mbuf data length in bbdev_op: %p", +@@ -1758,6 +1760,10 @@ acc100_dma_desc_td_fill(struct rte_bbdev_dec_op *op, + + /* Soft output */ + if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) { ++ if (op->turbo_dec.soft_output.data == 0) { ++ rte_bbdev_log(ERR, "Soft output is not defined"); ++ return -1; ++ } + if (check_bit(op->turbo_dec.op_flags, + RTE_BBDEV_TURBO_EQUALIZER)) + *s_out_length = e; +@@ -4406,7 +4412,7 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + { + rte_bbdev_log(INFO, "rte_acc100_configure"); + uint32_t value, address, status; +- int qg_idx, template_idx, vf_idx, acc, i; ++ int qg_idx, template_idx, vf_idx, acc, i, j; + struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name); + + /* Compile time checks */ +@@ -4426,6 +4432,9 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + /* Store configuration */ + rte_memcpy(&d->acc100_conf, conf, sizeof(d->acc100_conf)); + ++ value = acc100_reg_read(d, HwPfPcieGpexBridgeControl); ++ bool firstCfg = (value != ACC100_CFG_PCI_BRIDGE); ++ + /* PCIe Bridge configuration */ + acc100_reg_write(d, HwPfPcieGpexBridgeControl, ACC100_CFG_PCI_BRIDGE); + for (i = 1; i < ACC100_GPEX_AXIMAP_NUM; i++) +@@ -4446,20 +4455,9 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + value = 1; + acc100_reg_write(d, address, value); + +- /* DDR Configuration */ +- address = HWPfDdrBcTim6; +- value = acc100_reg_read(d, address); +- value &= 0xFFFFFFFB; /* Bit 2 */ +-#ifdef ACC100_DDR_ECC_ENABLE +- value |= 0x4; +-#endif +- acc100_reg_write(d, address, value); +- address = HWPfDdrPhyDqsCountNum; +-#ifdef ACC100_DDR_ECC_ENABLE +- value = 9; +-#else +- value = 8; +-#endif ++ /* Enable granular dynamic clock gating */ ++ address = HWPfHiClkGateHystReg; ++ value = ACC100_CLOCK_GATING_EN; + acc100_reg_write(d, address, value); + + /* Set default descriptor signature */ +@@ -4477,6 +4475,17 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + address = HWPfDmaAxcacheReg; + acc100_reg_write(d, address, value); + ++ /* Adjust PCIe Lane adaptation */ ++ for (i = 0; i < ACC100_QUAD_NUMS; i++) ++ for (j = 0; j < ACC100_LANES_PER_QUAD; j++) ++ acc100_reg_write(d, HwPfPcieLnAdaptctrl + i * ACC100_PCIE_QUAD_OFFSET ++ + j * ACC100_PCIE_LANE_OFFSET, ACC100_ADAPT); ++ ++ /* Enable PCIe live adaptation */ ++ for (i = 0; i < ACC100_QUAD_NUMS; i++) ++ acc100_reg_write(d, HwPfPciePcsEqControl + ++ i * ACC100_PCIE_QUAD_OFFSET, ACC100_PCS_EQ); ++ + /* Default DMA Configuration (Qmgr Enabled) */ + address = HWPfDmaConfig0Reg; + value = 0; +@@ -4495,6 +4504,11 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + value = HWPfQmgrEgressQueuesTemplate; + acc100_reg_write(d, address, value); + ++ /* Default Fabric Mode */ ++ address = HWPfFabricMode; ++ value = ACC100_FABRIC_MODE; ++ acc100_reg_write(d, address, value); ++ + /* ===== Qmgr Configuration ===== */ + /* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2 for UL */ + int totalQgs = conf->q_ul_4g.num_qgroups + +@@ -4513,22 +4527,17 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + } + + /* Template Priority in incremental order */ +- for (template_idx = 0; template_idx < ACC100_NUM_TMPL; +- template_idx++) { +- address = HWPfQmgrGrpTmplateReg0Indx + +- ACC100_BYTES_IN_WORD * (template_idx % 8); ++ for (template_idx = 0; template_idx < ACC100_NUM_TMPL; template_idx++) { ++ address = HWPfQmgrGrpTmplateReg0Indx + ACC100_BYTES_IN_WORD * template_idx; + value = ACC100_TMPL_PRI_0; + acc100_reg_write(d, address, value); +- address = HWPfQmgrGrpTmplateReg1Indx + +- ACC100_BYTES_IN_WORD * (template_idx % 8); ++ address = HWPfQmgrGrpTmplateReg1Indx + ACC100_BYTES_IN_WORD * template_idx; + value = ACC100_TMPL_PRI_1; + acc100_reg_write(d, address, value); +- address = HWPfQmgrGrpTmplateReg2indx + +- ACC100_BYTES_IN_WORD * (template_idx % 8); ++ address = HWPfQmgrGrpTmplateReg2indx + ACC100_BYTES_IN_WORD * template_idx; + value = ACC100_TMPL_PRI_2; + acc100_reg_write(d, address, value); +- address = HWPfQmgrGrpTmplateReg3Indx + +- ACC100_BYTES_IN_WORD * (template_idx % 8); ++ address = HWPfQmgrGrpTmplateReg3Indx + ACC100_BYTES_IN_WORD * template_idx; + value = ACC100_TMPL_PRI_3; + acc100_reg_write(d, address, value); + } +@@ -4579,9 +4588,6 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + numEngines++; + } else + acc100_reg_write(d, address, 0); +-#if RTE_ACC100_SINGLE_FEC == 1 +- value = 0; +-#endif + } + printf("Number of 5GUL engines %d\n", numEngines); + /* 4GDL */ +@@ -4596,9 +4602,6 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + address = HWPfQmgrGrpTmplateReg4Indx + + ACC100_BYTES_IN_WORD * template_idx; + acc100_reg_write(d, address, value); +-#if RTE_ACC100_SINGLE_FEC == 1 +- value = 0; +-#endif + } + /* 5GDL */ + numQqsAcc += numQgs; +@@ -4612,13 +4615,10 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + address = HWPfQmgrGrpTmplateReg4Indx + + ACC100_BYTES_IN_WORD * template_idx; + acc100_reg_write(d, address, value); +-#if RTE_ACC100_SINGLE_FEC == 1 +- value = 0; +-#endif + } + + /* Queue Group Function mapping */ +- int qman_func_id[5] = {0, 2, 1, 3, 4}; ++ int qman_func_id[8] = {0, 2, 1, 3, 4, 0, 0, 0}; + address = HWPfQmgrGrpFunction0; + value = 0; + for (qg_idx = 0; qg_idx < 8; qg_idx++) { +@@ -4649,7 +4649,7 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + } + } + +- /* This pointer to ARAM (256kB) is shifted by 2 (4B per register) */ ++ /* This pointer to ARAM (128kB) is shifted by 2 (4B per register) */ + uint32_t aram_address = 0; + for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) { + for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) { +@@ -4674,6 +4674,11 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + + /* ==== HI Configuration ==== */ + ++ /* No Info Ring/MSI by default */ ++ acc100_reg_write(d, HWPfHiInfoRingIntWrEnRegPf, 0); ++ acc100_reg_write(d, HWPfHiInfoRingVf2pfLoWrEnReg, 0); ++ acc100_reg_write(d, HWPfHiCfgMsiIntWrEnRegPf, 0xFFFFFFFF); ++ acc100_reg_write(d, HWPfHiCfgMsiVf2pfLoWrEnReg, 0xFFFFFFFF); + /* Prevent Block on Transmit Error */ + address = HWPfHiBlockTransmitOnErrorEn; + value = 0; +@@ -4686,10 +4691,6 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + address = HWPfHiPfMode; + value = (conf->pf_mode_en) ? ACC100_PF_VAL : 0; + acc100_reg_write(d, address, value); +- /* Enable Error Detection in HW */ +- address = HWPfDmaErrorDetectionEn; +- value = 0x3D7; +- acc100_reg_write(d, address, value); + + /* QoS overflow init */ + value = 1; +@@ -4699,7 +4700,7 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + acc100_reg_write(d, address, value); + + /* HARQ DDR Configuration */ +- unsigned int ddrSizeInMb = 512; /* Fixed to 512 MB per VF for now */ ++ unsigned int ddrSizeInMb = ACC100_HARQ_DDR; + for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) { + address = HWPfDmaVfDdrBaseRw + vf_idx + * 0x10; +@@ -4713,6 +4714,88 @@ rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf) + if (numEngines < (ACC100_SIG_UL_5G_LAST + 1)) + poweron_cleanup(bbdev, d, conf); + ++ uint32_t version = 0; ++ for (i = 0; i < 4; i++) ++ version += acc100_reg_read(d, ++ HWPfDdrPhyIdtmFwVersion + 4 * i) << (8 * i); ++ if (version != ACC100_PRQ_DDR_VER) { ++ printf("* Note: Not on DDR PRQ version %8x != %08x\n", ++ version, ACC100_PRQ_DDR_VER); ++ } else if (firstCfg) { ++ /* ---- DDR configuration at boot up --- */ ++ /* Read Clear Ddr training status */ ++ acc100_reg_read(d, HWPfChaDdrStDoneStatus); ++ /* Reset PHY/IDTM/UMMC */ ++ acc100_reg_write(d, HWPfChaDdrWbRstCfg, 3); ++ acc100_reg_write(d, HWPfChaDdrApbRstCfg, 2); ++ acc100_reg_write(d, HWPfChaDdrPhyRstCfg, 2); ++ acc100_reg_write(d, HWPfChaDdrCpuRstCfg, 3); ++ acc100_reg_write(d, HWPfChaDdrSifRstCfg, 2); ++ usleep(ACC100_MS_IN_US); ++ /* Reset WB and APB resets */ ++ acc100_reg_write(d, HWPfChaDdrWbRstCfg, 2); ++ acc100_reg_write(d, HWPfChaDdrApbRstCfg, 3); ++ /* Configure PHY-IDTM */ ++ acc100_reg_write(d, HWPfDdrPhyIdletimeout, 0x3e8); ++ /* IDTM timing registers */ ++ acc100_reg_write(d, HWPfDdrPhyRdLatency, 0x13); ++ acc100_reg_write(d, HWPfDdrPhyRdLatencyDbi, 0x15); ++ acc100_reg_write(d, HWPfDdrPhyWrLatency, 0x10011); ++ /* Configure SDRAM MRS registers */ ++ acc100_reg_write(d, HWPfDdrPhyMr01Dimm, 0x3030b70); ++ acc100_reg_write(d, HWPfDdrPhyMr01DimmDbi, 0x3030b50); ++ acc100_reg_write(d, HWPfDdrPhyMr23Dimm, 0x30); ++ acc100_reg_write(d, HWPfDdrPhyMr67Dimm, 0xc00); ++ acc100_reg_write(d, HWPfDdrPhyMr45Dimm, 0x4000000); ++ /* Configure active lanes */ ++ acc100_reg_write(d, HWPfDdrPhyDqsCountMax, 0x9); ++ acc100_reg_write(d, HWPfDdrPhyDqsCountNum, 0x9); ++ /* Configure WR/RD leveling timing registers */ ++ acc100_reg_write(d, HWPfDdrPhyWrlvlWwRdlvlRr, 0x101212); ++ /* Configure what trainings to execute */ ++ acc100_reg_write(d, HWPfDdrPhyTrngType, 0x2d3c); ++ /* Releasing PHY reset */ ++ acc100_reg_write(d, HWPfChaDdrPhyRstCfg, 3); ++ /* Configure Memory Controller registers */ ++ acc100_reg_write(d, HWPfDdrMemInitPhyTrng0, 0x3); ++ acc100_reg_write(d, HWPfDdrBcDram, 0x3c232003); ++ acc100_reg_write(d, HWPfDdrBcAddrMap, 0x31); ++ /* Configure UMMC BC timing registers */ ++ acc100_reg_write(d, HWPfDdrBcRef, 0xa22); ++ acc100_reg_write(d, HWPfDdrBcTim0, 0x4050501); ++ acc100_reg_write(d, HWPfDdrBcTim1, 0xf0b0476); ++ acc100_reg_write(d, HWPfDdrBcTim2, 0x103); ++ acc100_reg_write(d, HWPfDdrBcTim3, 0x144050a1); ++ acc100_reg_write(d, HWPfDdrBcTim4, 0x23300); ++ acc100_reg_write(d, HWPfDdrBcTim5, 0x4230276); ++ acc100_reg_write(d, HWPfDdrBcTim6, 0x857914); ++ acc100_reg_write(d, HWPfDdrBcTim7, 0x79100232); ++ acc100_reg_write(d, HWPfDdrBcTim8, 0x100007ce); ++ acc100_reg_write(d, HWPfDdrBcTim9, 0x50020); ++ acc100_reg_write(d, HWPfDdrBcTim10, 0x40ee); ++ /* Configure UMMC DFI timing registers */ ++ acc100_reg_write(d, HWPfDdrDfiInit, 0x5000); ++ acc100_reg_write(d, HWPfDdrDfiTim0, 0x15030006); ++ acc100_reg_write(d, HWPfDdrDfiTim1, 0x11305); ++ acc100_reg_write(d, HWPfDdrDfiPhyUpdEn, 0x1); ++ acc100_reg_write(d, HWPfDdrUmmcIntEn, 0x1f); ++ /* Release IDTM CPU out of reset */ ++ acc100_reg_write(d, HWPfChaDdrCpuRstCfg, 0x2); ++ /* Wait PHY-IDTM to finish static training */ ++ for (i = 0; i < ACC100_DDR_TRAINING_MAX; i++) { ++ usleep(ACC100_MS_IN_US); ++ value = acc100_reg_read(d, ++ HWPfChaDdrStDoneStatus); ++ if (value & 1) ++ break; ++ } ++ printf("DDR Training completed in %d ms", i); ++ /* Enable Memory Controller */ ++ acc100_reg_write(d, HWPfDdrUmmcCtrl, 0x401); ++ /* Release AXI interface reset */ ++ acc100_reg_write(d, HWPfChaDdrSifRstCfg, 3); ++ } ++ + rte_bbdev_log_debug("PF Tip configuration complete for %s", dev_name); + return 0; + } +diff --git a/dpdk/drivers/baseband/fpga_lte_fec/fpga_lte_fec.c b/dpdk/drivers/baseband/fpga_lte_fec/fpga_lte_fec.c +index a0fb11cb47..fe9c941683 100644 +--- a/dpdk/drivers/baseband/fpga_lte_fec/fpga_lte_fec.c ++++ b/dpdk/drivers/baseband/fpga_lte_fec/fpga_lte_fec.c +@@ -2094,7 +2094,7 @@ dequeue_enc_one_op_cb(struct fpga_queue *q, struct rte_bbdev_enc_op **op, + rte_bbdev_log_debug("DMA response desc %p", desc); + + *op = desc->enc_req.op_addr; +- /* Check the decriptor error field, return 1 on error */ ++ /* Check the descriptor error field, return 1 on error */ + desc_error = check_desc_error(desc->enc_req.error); + (*op)->status = desc_error << RTE_BBDEV_DATA_ERROR; + +@@ -2136,7 +2136,7 @@ dequeue_enc_one_op_tb(struct fpga_queue *q, struct rte_bbdev_enc_op **op, + for (cb_idx = 0; cb_idx < cbs_in_op; ++cb_idx) { + desc = q->ring_addr + ((q->head_free_desc + desc_offset + + cb_idx) & q->sw_ring_wrap_mask); +- /* Check the decriptor error field, return 1 on error */ ++ /* Check the descriptor error field, return 1 on error */ + desc_error = check_desc_error(desc->enc_req.error); + status |= desc_error << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log_debug("DMA response desc %p", desc); +@@ -2174,7 +2174,7 @@ dequeue_dec_one_op_cb(struct fpga_queue *q, struct rte_bbdev_dec_op **op, + (*op)->turbo_dec.iter_count = (desc->dec_req.iter + 2) >> 1; + /* crc_pass = 0 when decoder fails */ + (*op)->status = !(desc->dec_req.crc_pass) << RTE_BBDEV_CRC_ERROR; +- /* Check the decriptor error field, return 1 on error */ ++ /* Check the descriptor error field, return 1 on error */ + desc_error = check_desc_error(desc->enc_req.error); + (*op)->status |= desc_error << RTE_BBDEV_DATA_ERROR; + return 1; +@@ -2218,7 +2218,7 @@ dequeue_dec_one_op_tb(struct fpga_queue *q, struct rte_bbdev_dec_op **op, + iter_count = RTE_MAX(iter_count, (uint8_t) desc->dec_req.iter); + /* crc_pass = 0 when decoder fails, one fails all */ + status |= !(desc->dec_req.crc_pass) << RTE_BBDEV_CRC_ERROR; +- /* Check the decriptor error field, return 1 on error */ ++ /* Check the descriptor error field, return 1 on error */ + desc_error = check_desc_error(desc->enc_req.error); + status |= desc_error << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log_debug("DMA response desc %p", desc); +diff --git a/dpdk/drivers/baseband/null/bbdev_null.c b/dpdk/drivers/baseband/null/bbdev_null.c +index 6cf3988b88..62773d0024 100644 +--- a/dpdk/drivers/baseband/null/bbdev_null.c ++++ b/dpdk/drivers/baseband/null/bbdev_null.c +@@ -31,7 +31,7 @@ struct bbdev_null_params { + uint16_t queues_num; /*< Null BBDEV queues number */ + }; + +-/* Accecptable params for null BBDEV devices */ ++/* Acceptable params for null BBDEV devices */ + #define BBDEV_NULL_MAX_NB_QUEUES_ARG "max_nb_queues" + #define BBDEV_NULL_SOCKET_ID_ARG "socket_id" + +diff --git a/dpdk/drivers/baseband/turbo_sw/bbdev_turbo_software.c b/dpdk/drivers/baseband/turbo_sw/bbdev_turbo_software.c +index aa7f122382..21c2b922cd 100644 +--- a/dpdk/drivers/baseband/turbo_sw/bbdev_turbo_software.c ++++ b/dpdk/drivers/baseband/turbo_sw/bbdev_turbo_software.c +@@ -61,7 +61,7 @@ struct turbo_sw_params { + uint16_t queues_num; /*< Turbo SW device queues number */ + }; + +-/* Accecptable params for Turbo SW devices */ ++/* Acceptable params for Turbo SW devices */ + #define TURBO_SW_MAX_NB_QUEUES_ARG "max_nb_queues" + #define TURBO_SW_SOCKET_ID_ARG "socket_id" + +diff --git a/dpdk/drivers/bus/dpaa/base/fman/fman.c b/dpdk/drivers/bus/dpaa/base/fman/fman.c +index 39102bc1f3..997c94f087 100644 +--- a/dpdk/drivers/bus/dpaa/base/fman/fman.c ++++ b/dpdk/drivers/bus/dpaa/base/fman/fman.c +@@ -50,7 +50,7 @@ if_destructor(struct __fman_if *__if) + free(bp); + } + cleanup: +- free(__if); ++ rte_free(__if); + } + + static int +diff --git a/dpdk/drivers/bus/dpaa/base/fman/fman_hw.c b/dpdk/drivers/bus/dpaa/base/fman/fman_hw.c +index 4ab49f7853..af9bac76c2 100644 +--- a/dpdk/drivers/bus/dpaa/base/fman/fman_hw.c ++++ b/dpdk/drivers/bus/dpaa/base/fman/fman_hw.c +@@ -1,6 +1,6 @@ + /* SPDX-License-Identifier: BSD-3-Clause + * +- * Copyright 2017 NXP ++ * Copyright 2017,2020 NXP + * + */ + +@@ -219,20 +219,20 @@ fman_if_stats_get(struct fman_if *p, struct rte_eth_stats *stats) + struct memac_regs *regs = m->ccsr_map; + + /* read recved packet count */ +- stats->ipackets = ((u64)in_be32(®s->rfrm_u)) << 32 | +- in_be32(®s->rfrm_l); +- stats->ibytes = ((u64)in_be32(®s->roct_u)) << 32 | +- in_be32(®s->roct_l); +- stats->ierrors = ((u64)in_be32(®s->rerr_u)) << 32 | +- in_be32(®s->rerr_l); ++ stats->ipackets = (u64)in_be32(®s->rfrm_l) | ++ ((u64)in_be32(®s->rfrm_u)) << 32; ++ stats->ibytes = (u64)in_be32(®s->roct_l) | ++ ((u64)in_be32(®s->roct_u)) << 32; ++ stats->ierrors = (u64)in_be32(®s->rerr_l) | ++ ((u64)in_be32(®s->rerr_u)) << 32; + + /* read xmited packet count */ +- stats->opackets = ((u64)in_be32(®s->tfrm_u)) << 32 | +- in_be32(®s->tfrm_l); +- stats->obytes = ((u64)in_be32(®s->toct_u)) << 32 | +- in_be32(®s->toct_l); +- stats->oerrors = ((u64)in_be32(®s->terr_u)) << 32 | +- in_be32(®s->terr_l); ++ stats->opackets = (u64)in_be32(®s->tfrm_l) | ++ ((u64)in_be32(®s->tfrm_u)) << 32; ++ stats->obytes = (u64)in_be32(®s->toct_l) | ++ ((u64)in_be32(®s->toct_u)) << 32; ++ stats->oerrors = (u64)in_be32(®s->terr_l) | ++ ((u64)in_be32(®s->terr_u)) << 32; + } + + void +@@ -244,10 +244,9 @@ fman_if_stats_get_all(struct fman_if *p, uint64_t *value, int n) + uint64_t base_offset = offsetof(struct memac_regs, reoct_l); + + for (i = 0; i < n; i++) +- value[i] = ((u64)in_be32((char *)regs +- + base_offset + 8 * i + 4)) << 32 | +- ((u64)in_be32((char *)regs +- + base_offset + 8 * i)); ++ value[i] = (((u64)in_be32((char *)regs + base_offset + 8 * i) | ++ (u64)in_be32((char *)regs + base_offset + ++ 8 * i + 4)) << 32); + } + + void +diff --git a/dpdk/drivers/bus/dpaa/base/fman/netcfg_layer.c b/dpdk/drivers/bus/dpaa/base/fman/netcfg_layer.c +index b7009f2299..120deb0bb6 100644 +--- a/dpdk/drivers/bus/dpaa/base/fman/netcfg_layer.c ++++ b/dpdk/drivers/bus/dpaa/base/fman/netcfg_layer.c +@@ -8,7 +8,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +@@ -90,7 +90,7 @@ netcfg_acquire(void) + */ + skfd = socket(AF_PACKET, SOCK_RAW, 0); + if (unlikely(skfd < 0)) { +- error(0, errno, "%s(): open(SOCK_RAW)", __func__); ++ err(0, "%s(): open(SOCK_RAW)", __func__); + return NULL; + } + +diff --git a/dpdk/drivers/bus/dpaa/base/qbman/bman_driver.c b/dpdk/drivers/bus/dpaa/base/qbman/bman_driver.c +index 750b756b93..ee35e03da1 100644 +--- a/dpdk/drivers/bus/dpaa/base/qbman/bman_driver.c ++++ b/dpdk/drivers/bus/dpaa/base/qbman/bman_driver.c +@@ -11,6 +11,7 @@ + #include + #include "bman_priv.h" + #include ++#include + + /* + * Global variables of the max portal/pool number this bman version supported +@@ -40,7 +41,8 @@ static int fsl_bman_portal_init(uint32_t idx, int is_shared) + ret = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), + &cpuset); + if (ret) { +- error(0, ret, "pthread_getaffinity_np()"); ++ errno = ret; ++ err(0, "pthread_getaffinity_np()"); + return ret; + } + pcfg.cpu = -1; +@@ -60,7 +62,8 @@ static int fsl_bman_portal_init(uint32_t idx, int is_shared) + map.index = idx; + ret = process_portal_map(&map); + if (ret) { +- error(0, ret, "process_portal_map()"); ++ errno = ret; ++ err(0, "process_portal_map()"); + return ret; + } + /* Make the portal's cache-[enabled|inhibited] regions */ +@@ -104,8 +107,10 @@ static int fsl_bman_portal_finish(void) + cfg = bman_destroy_affine_portal(); + DPAA_BUG_ON(cfg != &pcfg); + ret = process_portal_unmap(&map.addr); +- if (ret) +- error(0, ret, "process_portal_unmap()"); ++ if (ret) { ++ errno = ret; ++ err(0, "process_portal_unmap()"); ++ } + return ret; + } + +diff --git a/dpdk/drivers/bus/dpaa/base/qbman/qman_driver.c b/dpdk/drivers/bus/dpaa/base/qbman/qman_driver.c +index 6d9aaff164..dfbafe581a 100644 +--- a/dpdk/drivers/bus/dpaa/base/qbman/qman_driver.c ++++ b/dpdk/drivers/bus/dpaa/base/qbman/qman_driver.c +@@ -9,6 +9,8 @@ + #include + #include "qman_priv.h" + #include ++#include ++ + #include + + /* Global variable containing revision id (even on non-control plane systems +@@ -50,7 +52,8 @@ static int fsl_qman_portal_init(uint32_t index, int is_shared) + map.index = index; + ret = process_portal_map(&map); + if (ret) { +- error(0, ret, "process_portal_map()"); ++ errno = ret; ++ err(0, "process_portal_map()"); + return ret; + } + qpcfg.channel = map.channel; +@@ -96,8 +99,10 @@ static int fsl_qman_portal_finish(void) + cfg = qman_destroy_affine_portal(NULL); + DPAA_BUG_ON(cfg != &qpcfg); + ret = process_portal_unmap(&map.addr); +- if (ret) +- error(0, ret, "process_portal_unmap()"); ++ if (ret) { ++ errno = ret; ++ err(0, "process_portal_unmap()"); ++ } + return ret; + } + +@@ -146,7 +151,8 @@ struct qman_portal *fsl_qman_fq_portal_create(int *fd) + + q_pcfg = kzalloc((sizeof(struct qm_portal_config)), 0); + if (!q_pcfg) { +- error(0, -1, "q_pcfg kzalloc failed"); ++ /* kzalloc sets errno */ ++ err(0, "q_pcfg kzalloc failed"); + return NULL; + } + +@@ -155,7 +161,8 @@ struct qman_portal *fsl_qman_fq_portal_create(int *fd) + q_map.index = QBMAN_ANY_PORTAL_IDX; + ret = process_portal_map(&q_map); + if (ret) { +- error(0, ret, "process_portal_map()"); ++ errno = ret; ++ err(0, "process_portal_map()"); + kfree(q_pcfg); + return NULL; + } +diff --git a/dpdk/drivers/bus/dpaa/dpaa_bus.c b/dpdk/drivers/bus/dpaa/dpaa_bus.c +index 3098e23093..c0102d62b6 100644 +--- a/dpdk/drivers/bus/dpaa/dpaa_bus.c ++++ b/dpdk/drivers/bus/dpaa/dpaa_bus.c +@@ -71,7 +71,7 @@ compare_dpaa_devices(struct rte_dpaa_device *dev1, + { + int comp = 0; + +- /* Segragating ETH from SEC devices */ ++ /* Segregating ETH from SEC devices */ + if (dev1->device_type > dev2->device_type) + comp = 1; + else if (dev1->device_type < dev2->device_type) +diff --git a/dpdk/drivers/bus/dpaa/include/fsl_fman.h b/dpdk/drivers/bus/dpaa/include/fsl_fman.h +index a3cf77f0e3..b5408337c8 100644 +--- a/dpdk/drivers/bus/dpaa/include/fsl_fman.h ++++ b/dpdk/drivers/bus/dpaa/include/fsl_fman.h +@@ -9,10 +9,6 @@ + + #include + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /* Status field in FD is updated on Rx side by FMAN with following information. + * Refer to field description in FM BG. + */ +diff --git a/dpdk/drivers/bus/dpaa/include/fsl_qman.h b/dpdk/drivers/bus/dpaa/include/fsl_qman.h +index 10212f0fd5..9b63e559bc 100644 +--- a/dpdk/drivers/bus/dpaa/include/fsl_qman.h ++++ b/dpdk/drivers/bus/dpaa/include/fsl_qman.h +@@ -16,7 +16,7 @@ extern "C" { + #include + + /* FQ lookups (turn this on for 64bit user-space) */ +-#if (__WORDSIZE == 64) ++#ifdef RTE_ARCH_64 + #define CONFIG_FSL_QMAN_FQ_LOOKUP + /* if FQ lookups are supported, this controls the number of initialised, + * s/w-consumed FQs that can be supported at any one time. +@@ -1353,7 +1353,7 @@ __rte_internal + int qman_irqsource_add(u32 bits); + + /** +- * qman_fq_portal_irqsource_add - samilar to qman_irqsource_add, but it ++ * qman_fq_portal_irqsource_add - similar to qman_irqsource_add, but it + * takes portal (fq specific) as input rather than using the thread affined + * portal. + */ +@@ -1416,7 +1416,7 @@ __rte_internal + struct qm_dqrr_entry *qman_dequeue(struct qman_fq *fq); + + /** +- * qman_dqrr_consume - Consume the DQRR entriy after volatile dequeue ++ * qman_dqrr_consume - Consume the DQRR entry after volatile dequeue + * @fq: Frame Queue on which the volatile dequeue command is issued + * @dq: DQRR entry to consume. This is the one which is provided by the + * 'qbman_dequeue' command. +@@ -2017,7 +2017,7 @@ int qman_create_cgr_to_dcp(struct qman_cgr *cgr, u32 flags, u16 dcp_portal, + * @cgr: the 'cgr' object to deregister + * + * "Unplugs" this CGR object from the portal affine to the cpu on which this API +- * is executed. This must be excuted on the same affine portal on which it was ++ * is executed. This must be executed on the same affine portal on which it was + * created. + */ + __rte_internal +diff --git a/dpdk/drivers/bus/dpaa/include/fsl_usd.h b/dpdk/drivers/bus/dpaa/include/fsl_usd.h +index dcf35e4adb..97279421ad 100644 +--- a/dpdk/drivers/bus/dpaa/include/fsl_usd.h ++++ b/dpdk/drivers/bus/dpaa/include/fsl_usd.h +@@ -40,7 +40,7 @@ struct dpaa_raw_portal { + /* Specifies the stash request queue this portal should use */ + uint8_t sdest; + +- /* Specifes a specific portal index to map or QBMAN_ANY_PORTAL_IDX ++ /* Specifies a specific portal index to map or QBMAN_ANY_PORTAL_IDX + * for don't care. The portal index will be populated by the + * driver when the ioctl() successfully completes. + */ +diff --git a/dpdk/drivers/bus/dpaa/include/netcfg.h b/dpdk/drivers/bus/dpaa/include/netcfg.h +index d7d1befd24..bb18a34e3d 100644 +--- a/dpdk/drivers/bus/dpaa/include/netcfg.h ++++ b/dpdk/drivers/bus/dpaa/include/netcfg.h +@@ -9,7 +9,6 @@ + #define __NETCFG_H + + #include +-#include + + /* Configuration information related to a specific ethernet port */ + struct fm_eth_port_cfg { +diff --git a/dpdk/drivers/bus/dpaa/include/process.h b/dpdk/drivers/bus/dpaa/include/process.h +index be52e6f72d..70b18c2ef1 100644 +--- a/dpdk/drivers/bus/dpaa/include/process.h ++++ b/dpdk/drivers/bus/dpaa/include/process.h +@@ -49,7 +49,7 @@ struct dpaa_portal_map { + struct dpaa_ioctl_portal_map { + /* Input parameter, is a qman or bman portal required. */ + enum dpaa_portal_type type; +- /* Specifes a specific portal index to map or 0xffffffff ++ /* Specifies a specific portal index to map or 0xffffffff + * for don't care. + */ + uint32_t index; +diff --git a/dpdk/drivers/bus/dpaa/rte_dpaa_bus.h b/dpdk/drivers/bus/dpaa/rte_dpaa_bus.h +index 48d5cf4625..467baa09b4 100644 +--- a/dpdk/drivers/bus/dpaa/rte_dpaa_bus.h ++++ b/dpdk/drivers/bus/dpaa/rte_dpaa_bus.h +@@ -17,6 +17,10 @@ + #include + #include + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /* This sequence number field is used to store event entry index for + * driver specific usage. For parallel mode queues, invalid + * index will be set and for atomic mode queues, valid value +diff --git a/dpdk/drivers/bus/fslmc/fslmc_bus.c b/dpdk/drivers/bus/fslmc/fslmc_bus.c +index 58435589b2..c96aac7341 100644 +--- a/dpdk/drivers/bus/fslmc/fslmc_bus.c ++++ b/dpdk/drivers/bus/fslmc/fslmc_bus.c +@@ -303,7 +303,6 @@ static int + rte_fslmc_scan(void) + { + int ret; +- int device_count = 0; + char fslmc_dirpath[PATH_MAX]; + DIR *dir; + struct dirent *entry; +@@ -337,7 +336,6 @@ rte_fslmc_scan(void) + /* Error in parsing directory - exit gracefully */ + goto scan_fail_cleanup; + } +- device_count += 1; + } + + closedir(dir); +@@ -529,7 +527,7 @@ rte_fslmc_driver_unregister(struct rte_dpaa2_driver *driver) + + fslmc_bus = driver->fslmc_bus; + +- /* Cleanup the PA->VA Translation table; From whereever this function ++ /* Cleanup the PA->VA Translation table; From wherever this function + * is called from. + */ + if (rte_eal_iova_mode() == RTE_IOVA_PA) +diff --git a/dpdk/drivers/bus/fslmc/fslmc_logs.h b/dpdk/drivers/bus/fslmc/fslmc_logs.h +index dd74cb7dcf..a1e14dd84e 100644 +--- a/dpdk/drivers/bus/fslmc/fslmc_logs.h ++++ b/dpdk/drivers/bus/fslmc/fslmc_logs.h +@@ -18,8 +18,6 @@ extern int dpaa2_logtype_bus; + rte_log(RTE_LOG_DEBUG, dpaa2_logtype_bus, "fslmc: %s(): " fmt "\n", \ + __func__, ##args) + +-#define BUS_INIT_FUNC_TRACE() DPAA2_BUS_DEBUG(" >>") +- + #define DPAA2_BUS_INFO(fmt, args...) \ + DPAA2_BUS_LOG(INFO, fmt, ## args) + #define DPAA2_BUS_ERR(fmt, args...) \ +diff --git a/dpdk/drivers/bus/fslmc/fslmc_vfio.c b/dpdk/drivers/bus/fslmc/fslmc_vfio.c +index b52f36c33e..2ab1368202 100644 +--- a/dpdk/drivers/bus/fslmc/fslmc_vfio.c ++++ b/dpdk/drivers/bus/fslmc/fslmc_vfio.c +@@ -968,6 +968,7 @@ fslmc_vfio_setup_group(void) + { + int groupid; + int ret; ++ int vfio_container_fd; + struct vfio_group_status status = { .argsz = sizeof(status) }; + + /* if already done once */ +@@ -986,8 +987,15 @@ fslmc_vfio_setup_group(void) + return 0; + } + ++ ret = rte_vfio_container_create(); ++ if (ret < 0) { ++ DPAA2_BUS_ERR("Failed to open VFIO container"); ++ return ret; ++ } ++ vfio_container_fd = ret; ++ + /* Get the actual group fd */ +- ret = rte_vfio_get_group_fd(groupid); ++ ret = rte_vfio_container_group_bind(vfio_container_fd, groupid); + if (ret < 0) + return ret; + vfio_group.fd = ret; +diff --git a/dpdk/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c b/dpdk/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c +index 1a9dd18b99..983c84419c 100644 +--- a/dpdk/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c ++++ b/dpdk/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c +@@ -178,7 +178,7 @@ static int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev, int cpu_id) + dpio_epoll_fd = epoll_create(1); + ret = rte_dpaa2_intr_enable(&dpio_dev->intr_handle, 0); + if (ret) { +- DPAA2_BUS_ERR("Interrupt registeration failed"); ++ DPAA2_BUS_ERR("Interrupt registration failed"); + return -1; + } + +diff --git a/dpdk/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/dpdk/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h +index ac24f01451..910f3ef6ce 100644 +--- a/dpdk/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h ++++ b/dpdk/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h +@@ -156,7 +156,7 @@ struct dpaa2_queue { + struct rte_cryptodev_data *crypto_data; + }; + uint32_t fqid; /*!< Unique ID of this queue */ +- uint16_t flow_id; /*!< To be used by DPAA2 frmework */ ++ uint16_t flow_id; /*!< To be used by DPAA2 framework */ + uint8_t tc_index; /*!< traffic class identifier */ + uint8_t cgid; /*! < Congestion Group id for this queue */ + uint64_t rx_pkts; +diff --git a/dpdk/drivers/bus/fslmc/qbman/include/compat.h b/dpdk/drivers/bus/fslmc/qbman/include/compat.h +index 1ddd69e127..a4471a80af 100644 +--- a/dpdk/drivers/bus/fslmc/qbman/include/compat.h ++++ b/dpdk/drivers/bus/fslmc/qbman/include/compat.h +@@ -8,9 +8,6 @@ + #ifndef HEADER_COMPAT_H + #define HEADER_COMPAT_H + +-#ifndef _GNU_SOURCE +-#define _GNU_SOURCE +-#endif + #include + #include + #include +diff --git a/dpdk/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h b/dpdk/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h +index eb68c9cab5..5375ea386d 100644 +--- a/dpdk/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h ++++ b/dpdk/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h +@@ -510,7 +510,7 @@ int qbman_result_has_new_result(struct qbman_swp *s, + struct qbman_result *dq); + + /** +- * qbman_check_command_complete() - Check if the previous issued dq commnd ++ * qbman_check_command_complete() - Check if the previous issued dq command + * is completed and results are available in memory. + * @s: the software portal object. + * @dq: the dequeue result read from the memory. +@@ -687,7 +687,7 @@ uint16_t qbman_result_DQ_seqnum(const struct qbman_result *dq); + + /** + * qbman_result_DQ_odpid() - Get the seqnum field in dequeue response +- * odpid is valid only if ODPVAILD flag is TRUE. ++ * odpid is valid only if ODPVALID flag is TRUE. + * @dq: the dequeue result. + * + * Return odpid. +@@ -743,7 +743,7 @@ const struct qbman_fd *qbman_result_DQ_fd(const struct qbman_result *dq); + * qbman_result_SCN_state() - Get the state field in State-change notification + * @scn: the state change notification. + * +- * Return the state in the notifiation. ++ * Return the state in the notification. + */ + __rte_internal + uint8_t qbman_result_SCN_state(const struct qbman_result *scn); +@@ -825,7 +825,7 @@ uint64_t qbman_result_bpscn_ctx(const struct qbman_result *scn); + + /* Parsing CGCU */ + /** +- * qbman_result_cgcu_cgid() - Check CGCU resouce id, i.e. cgid ++ * qbman_result_cgcu_cgid() - Check CGCU resource id, i.e. cgid + * @scn: the state change notification. + * + * Return the CGCU resource id. +@@ -903,14 +903,14 @@ void qbman_eq_desc_clear(struct qbman_eq_desc *d); + __rte_internal + void qbman_eq_desc_set_no_orp(struct qbman_eq_desc *d, int respond_success); + /** +- * qbman_eq_desc_set_orp() - Set order-resotration in the enqueue descriptor ++ * qbman_eq_desc_set_orp() - Set order-restoration in the enqueue descriptor + * @d: the enqueue descriptor. + * @response_success: 1 = enqueue with response always; 0 = enqueue with + * rejections returned on a FQ. + * @opr_id: the order point record id. + * @seqnum: the order restoration sequence number. +- * @incomplete: indiates whether this is the last fragments using the same +- * sequeue number. ++ * @incomplete: indicates whether this is the last fragments using the same ++ * sequence number. + */ + __rte_internal + void qbman_eq_desc_set_orp(struct qbman_eq_desc *d, int respond_success, +@@ -1052,10 +1052,10 @@ __rte_internal + uint8_t qbman_result_eqresp_rspid(struct qbman_result *eqresp); + + /** +- * qbman_result_eqresp_rc() - determines if enqueue command is sucessful. ++ * qbman_result_eqresp_rc() - determines if enqueue command is successful. + * @eqresp: enqueue response. + * +- * Return 0 when command is sucessful. ++ * Return 0 when command is successful. + */ + __rte_internal + uint8_t qbman_result_eqresp_rc(struct qbman_result *eqresp); +@@ -1250,7 +1250,7 @@ int qbman_swp_fq_force(struct qbman_swp *s, uint32_t fqid); + /** + * These functions change the FQ flow-control stuff between XON/XOFF. (The + * default is XON.) This setting doesn't affect enqueues to the FQ, just +- * dequeues. XOFF FQs will remain in the tenatively-scheduled state, even when ++ * dequeues. XOFF FQs will remain in the tentatively-scheduled state, even when + * non-empty, meaning they won't be selected for scheduled dequeuing. If a FQ is + * changed to XOFF after it had already become truly-scheduled to a channel, and + * a pull dequeue of that channel occurs that selects that FQ for dequeuing, +diff --git a/dpdk/drivers/bus/fslmc/qbman/qbman_portal.c b/dpdk/drivers/bus/fslmc/qbman/qbman_portal.c +index 77c9d508c4..aedcad9258 100644 +--- a/dpdk/drivers/bus/fslmc/qbman/qbman_portal.c ++++ b/dpdk/drivers/bus/fslmc/qbman/qbman_portal.c +@@ -339,17 +339,9 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d) + eqcr_pi = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_PI); + p->eqcr.pi = eqcr_pi & p->eqcr.pi_ci_mask; + p->eqcr.pi_vb = eqcr_pi & QB_VALID_BIT; +- if ((p->desc.qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 +- && (d->cena_access_mode == qman_cena_fastest_access)) +- p->eqcr.ci = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_PI) +- & p->eqcr.pi_ci_mask; +- else +- p->eqcr.ci = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_CI) +- & p->eqcr.pi_ci_mask; +- p->eqcr.available = p->eqcr.pi_ring_size - +- qm_cyc_diff(p->eqcr.pi_ring_size, +- p->eqcr.ci & (p->eqcr.pi_ci_mask<<1), +- p->eqcr.pi & (p->eqcr.pi_ci_mask<<1)); ++ p->eqcr.ci = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_CI) ++ & p->eqcr.pi_ci_mask; ++ p->eqcr.available = p->eqcr.pi_ring_size; + + portal_idx_map[p->desc.idx] = p; + return p; +diff --git a/dpdk/drivers/bus/ifpga/ifpga_bus.c b/dpdk/drivers/bus/ifpga/ifpga_bus.c +index bb8b3dcfb9..56e58e04a1 100644 +--- a/dpdk/drivers/bus/ifpga/ifpga_bus.c ++++ b/dpdk/drivers/bus/ifpga/ifpga_bus.c +@@ -64,8 +64,7 @@ ifpga_find_afu_dev(const struct rte_rawdev *rdev, + struct rte_afu_device *afu_dev = NULL; + + TAILQ_FOREACH(afu_dev, &ifpga_afu_dev_list, next) { +- if (afu_dev && +- afu_dev->rawdev == rdev && ++ if (afu_dev->rawdev == rdev && + !ifpga_afu_id_cmp(&afu_dev->id, afu_id)) + return afu_dev; + } +@@ -78,8 +77,7 @@ rte_ifpga_find_afu_by_name(const char *name) + struct rte_afu_device *afu_dev = NULL; + + TAILQ_FOREACH(afu_dev, &ifpga_afu_dev_list, next) { +- if (afu_dev && +- !strcmp(afu_dev->device.name, name)) ++ if (!strcmp(afu_dev->device.name, name)) + return afu_dev; + } + return NULL; +diff --git a/dpdk/drivers/bus/pci/linux/pci.c b/dpdk/drivers/bus/pci/linux/pci.c +index 2e1808b902..e8d1faa4c9 100644 +--- a/dpdk/drivers/bus/pci/linux/pci.c ++++ b/dpdk/drivers/bus/pci/linux/pci.c +@@ -331,7 +331,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) + else + dev->kdrv = RTE_PCI_KDRV_UNKNOWN; + } else { +- dev->kdrv = RTE_PCI_KDRV_NONE; ++ free(dev); + return 0; + } + /* device is valid, add in list (sorted) */ +@@ -569,7 +569,7 @@ pci_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) + + /* Check for a PowerNV platform */ + while (getline(&line, &len, fp) != -1) { +- if (strstr(line, "platform") != NULL) ++ if (strstr(line, "platform") == NULL) + continue; + + if (strstr(line, "PowerNV") != NULL) { +diff --git a/dpdk/drivers/bus/pci/linux/pci_uio.c b/dpdk/drivers/bus/pci/linux/pci_uio.c +index f3305a2f28..624b2e2ecf 100644 +--- a/dpdk/drivers/bus/pci/linux/pci_uio.c ++++ b/dpdk/drivers/bus/pci/linux/pci_uio.c +@@ -535,21 +535,33 @@ pci_uio_ioport_write(struct rte_pci_ioport *p, + if (len >= 4) { + size = 4; + #if defined(RTE_ARCH_X86) ++#ifdef __GLIBC__ + outl_p(*(const uint32_t *)s, reg); ++#else ++ outl(*(const uint32_t *)s, reg); ++#endif + #else + *(volatile uint32_t *)reg = *(const uint32_t *)s; + #endif + } else if (len >= 2) { + size = 2; + #if defined(RTE_ARCH_X86) ++#ifdef __GLIBC__ + outw_p(*(const uint16_t *)s, reg); ++#else ++ outw(*(const uint16_t *)s, reg); ++#endif + #else + *(volatile uint16_t *)reg = *(const uint16_t *)s; + #endif + } else { + size = 1; + #if defined(RTE_ARCH_X86) ++#ifdef __GLIBC__ + outb_p(*s, reg); ++#else ++ outb(*s, reg); ++#endif + #else + *(volatile uint8_t *)reg = *s; + #endif +diff --git a/dpdk/drivers/bus/pci/linux/pci_vfio.c b/dpdk/drivers/bus/pci/linux/pci_vfio.c +index e3f7b6abeb..a4ce51be7b 100644 +--- a/dpdk/drivers/bus/pci/linux/pci_vfio.c ++++ b/dpdk/drivers/bus/pci/linux/pci_vfio.c +@@ -786,7 +786,7 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev) + continue; + } + +- /* skip non-mmapable BARs */ ++ /* skip non-mmappable BARs */ + if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) { + free(reg); + continue; +diff --git a/dpdk/drivers/bus/pci/pci_common.c b/dpdk/drivers/bus/pci/pci_common.c +index 9b8d769287..fa887de11b 100644 +--- a/dpdk/drivers/bus/pci/pci_common.c ++++ b/dpdk/drivers/bus/pci/pci_common.c +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -190,7 +191,9 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr, + } + + if (dev->device.numa_node < 0) { +- RTE_LOG(WARNING, EAL, " Invalid NUMA socket, default to 0\n"); ++ if (rte_socket_count() > 1) ++ RTE_LOG(INFO, EAL, "Device %s is not NUMA-aware, defaulting socket to 0\n", ++ dev->name); + dev->device.numa_node = 0; + } + +diff --git a/dpdk/drivers/bus/pci/rte_bus_pci.h b/dpdk/drivers/bus/pci/rte_bus_pci.h +index fdda046515..876abddefb 100644 +--- a/dpdk/drivers/bus/pci/rte_bus_pci.h ++++ b/dpdk/drivers/bus/pci/rte_bus_pci.h +@@ -52,12 +52,13 @@ TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver); + struct rte_devargs; + + enum rte_pci_kernel_driver { +- RTE_PCI_KDRV_UNKNOWN = 0, +- RTE_PCI_KDRV_IGB_UIO, +- RTE_PCI_KDRV_VFIO, +- RTE_PCI_KDRV_UIO_GENERIC, +- RTE_PCI_KDRV_NIC_UIO, +- RTE_PCI_KDRV_NONE, ++ RTE_PCI_KDRV_UNKNOWN = 0, /* may be misc UIO or bifurcated driver */ ++ RTE_PCI_KDRV_IGB_UIO, /* igb_uio for Linux */ ++ RTE_PCI_KDRV_VFIO, /* VFIO for Linux */ ++ RTE_PCI_KDRV_UIO_GENERIC, /* uio_pci_generic for Linux */ ++ RTE_PCI_KDRV_NIC_UIO, /* nic_uio for FreeBSD */ ++ RTE_PCI_KDRV_NONE, /* no attached driver */ ++ RTE_PCI_KDRV_NET_UIO, /* NetUIO for Windows */ + }; + + /** +diff --git a/dpdk/drivers/bus/pci/windows/pci.c b/dpdk/drivers/bus/pci/windows/pci.c +index f662584528..4944c21f67 100644 +--- a/dpdk/drivers/bus/pci/windows/pci.c ++++ b/dpdk/drivers/bus/pci/windows/pci.c +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include + + #include "private.h" + #include "pci_netuio.h" +@@ -23,20 +24,22 @@ DEFINE_DEVPROPKEY(DEVPKEY_Device_Numa_Node, 0x540b947e, 0x8b40, 0x45bc, + * the registry hive for PCI devices. + */ + +-/* The functions below are not implemented on Windows, ++/* Some of the functions below are not implemented on Windows, + * but need to be defined for compilation purposes + */ + + /* Map pci device */ + int +-rte_pci_map_device(struct rte_pci_device *dev __rte_unused) ++rte_pci_map_device(struct rte_pci_device *dev) + { +- /* This function is not implemented on Windows. +- * We really should short-circuit the call to these functions by +- * clearing the RTE_PCI_DRV_NEED_MAPPING flag +- * in the rte_pci_driver flags. ++ /* Only return success for devices bound to netuio. ++ * Devices that are bound to netuio are mapped at ++ * the bus probing stage. + */ +- return 0; ++ if (dev->kdrv == RTE_PCI_KDRV_NET_UIO) ++ return 0; ++ else ++ return -1; + } + + /* Unmap pci device */ +@@ -202,14 +205,14 @@ get_device_resource_info(HDEVINFO dev_info, + int ret; + + switch (dev->kdrv) { +- case RTE_PCI_KDRV_NONE: +- /* mem_resource - Unneeded for RTE_PCI_KDRV_NONE */ ++ case RTE_PCI_KDRV_UNKNOWN: ++ /* bifurcated driver case - mem_resource is unneeded */ + dev->mem_resource[0].phys_addr = 0; + dev->mem_resource[0].len = 0; + dev->mem_resource[0].addr = NULL; + break; +- case RTE_PCI_KDRV_NIC_UIO: +- /* get device info from netuio kernel driver */ ++ case RTE_PCI_KDRV_NET_UIO: ++ /* get device info from NetUIO kernel driver */ + ret = get_netuio_device_info(dev_info, dev_info_data, dev); + if (ret != 0) { + RTE_LOG(DEBUG, EAL, +@@ -231,6 +234,7 @@ get_device_resource_info(HDEVINFO dev_info, + } + + /* Get NUMA node using DEVPKEY_Device_Numa_Node */ ++ dev->device.numa_node = SOCKET_ID_ANY; + res = SetupDiGetDevicePropertyW(dev_info, dev_info_data, + &DEVPKEY_Device_Numa_Node, &property_type, + (BYTE *)&numa_node, sizeof(numa_node), NULL, 0); +@@ -300,9 +304,9 @@ set_kernel_driver_type(PSP_DEVINFO_DATA device_info_data, + { + /* set kernel driver type based on device class */ + if (IsEqualGUID(&(device_info_data->ClassGuid), &GUID_DEVCLASS_NETUIO)) +- dev->kdrv = RTE_PCI_KDRV_NIC_UIO; ++ dev->kdrv = RTE_PCI_KDRV_NET_UIO; + else +- dev->kdrv = RTE_PCI_KDRV_NONE; ++ dev->kdrv = RTE_PCI_KDRV_UNKNOWN; + } + + static int +diff --git a/dpdk/drivers/bus/vdev/rte_bus_vdev.h b/dpdk/drivers/bus/vdev/rte_bus_vdev.h +index d14eeb41b0..38b51e2f74 100644 +--- a/dpdk/drivers/bus/vdev/rte_bus_vdev.h ++++ b/dpdk/drivers/bus/vdev/rte_bus_vdev.h +@@ -192,7 +192,7 @@ rte_vdev_remove_custom_scan(rte_vdev_scan_callback callback, void *user_arg); + int rte_vdev_init(const char *name, const char *args); + + /** +- * Uninitalize a driver specified by name. ++ * Uninitialize a driver specified by name. + * + * @param name + * The pointer to a driver name to be uninitialized. +diff --git a/dpdk/drivers/bus/vmbus/linux/vmbus_bus.c b/dpdk/drivers/bus/vmbus/linux/vmbus_bus.c +index 3c924eee14..68f6cc5742 100644 +--- a/dpdk/drivers/bus/vmbus/linux/vmbus_bus.c ++++ b/dpdk/drivers/bus/vmbus/linux/vmbus_bus.c +@@ -236,13 +236,14 @@ vmbus_scan_one(const char *name) + char filename[PATH_MAX]; + char dirname[PATH_MAX]; + unsigned long tmp; ++ char *dev_name; + + dev = calloc(1, sizeof(*dev)); + if (dev == NULL) + return -1; + + dev->device.bus = &rte_vmbus_bus.bus; +- dev->device.name = strdup(name); ++ dev->device.name = dev_name = strdup(name); + if (!dev->device.name) + goto error; + +@@ -261,6 +262,7 @@ vmbus_scan_one(const char *name) + + /* skip non-network devices */ + if (rte_uuid_compare(dev->class_id, vmbus_nic_uuid) != 0) { ++ free(dev_name); + free(dev); + return 0; + } +@@ -312,6 +314,7 @@ vmbus_scan_one(const char *name) + } else { /* already registered */ + VMBUS_LOG(NOTICE, + "%s already registered", name); ++ free(dev_name); + free(dev); + } + return 0; +@@ -322,6 +325,7 @@ vmbus_scan_one(const char *name) + error: + VMBUS_LOG(DEBUG, "failed"); + ++ free(dev_name); + free(dev); + return -1; + } +diff --git a/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c b/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c +index 5dc0c47de6..fd64be93b0 100644 +--- a/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c ++++ b/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c +@@ -11,6 +11,7 @@ + #include + #include + ++#include + #include + #include + #include +@@ -203,6 +204,37 @@ static int vmbus_uio_map_subchan(const struct rte_vmbus_device *dev, + struct stat sb; + void *mapaddr; + int fd; ++ struct mapped_vmbus_resource *uio_res; ++ int channel_idx; ++ ++ uio_res = vmbus_uio_find_resource(dev); ++ if (!uio_res) { ++ VMBUS_LOG(ERR, "can not find resources for mapping subchan"); ++ return -ENOMEM; ++ } ++ ++ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { ++ if (uio_res->nb_subchannels >= UIO_MAX_SUBCHANNEL) { ++ VMBUS_LOG(ERR, ++ "exceeding max subchannels UIO_MAX_SUBCHANNEL(%d)", ++ UIO_MAX_SUBCHANNEL); ++ VMBUS_LOG(ERR, "Change UIO_MAX_SUBCHANNEL and recompile"); ++ return -ENOMEM; ++ } ++ } else { ++ for (channel_idx = 0; channel_idx < uio_res->nb_subchannels; ++ channel_idx++) ++ if (uio_res->subchannel_maps[channel_idx].relid == ++ chan->relid) ++ break; ++ if (channel_idx == uio_res->nb_subchannels) { ++ VMBUS_LOG(ERR, ++ "couldn't find sub channel %d from shared mapping in primary", ++ chan->relid); ++ return -ENOMEM; ++ } ++ vmbus_map_addr = uio_res->subchannel_maps[channel_idx].addr; ++ } + + snprintf(ring_path, sizeof(ring_path), + "%s/%s/channels/%u/ring", +@@ -239,58 +271,33 @@ static int vmbus_uio_map_subchan(const struct rte_vmbus_device *dev, + if (mapaddr == MAP_FAILED) + return -EIO; + ++ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { ++ ++ /* Add this mapping to uio_res for use by secondary */ ++ uio_res->subchannel_maps[uio_res->nb_subchannels].relid = ++ chan->relid; ++ uio_res->subchannel_maps[uio_res->nb_subchannels].addr = ++ mapaddr; ++ uio_res->subchannel_maps[uio_res->nb_subchannels].size = ++ file_size; ++ uio_res->nb_subchannels++; ++ ++ vmbus_map_addr = RTE_PTR_ADD(mapaddr, file_size); ++ } else { ++ if (mapaddr != vmbus_map_addr) { ++ VMBUS_LOG(ERR, "failed to map channel %d to addr %p", ++ chan->relid, mapaddr); ++ vmbus_unmap_resource(mapaddr, file_size); ++ return -EIO; ++ } ++ } ++ + *ring_size = file_size / 2; + *ring_buf = mapaddr; + +- vmbus_map_addr = RTE_PTR_ADD(mapaddr, file_size); + return 0; + } + +-int +-vmbus_uio_map_secondary_subchan(const struct rte_vmbus_device *dev, +- const struct vmbus_channel *chan) +-{ +- const struct vmbus_br *br = &chan->txbr; +- char ring_path[PATH_MAX]; +- void *mapaddr, *ring_buf; +- uint32_t ring_size; +- int fd; +- +- snprintf(ring_path, sizeof(ring_path), +- "%s/%s/channels/%u/ring", +- SYSFS_VMBUS_DEVICES, dev->device.name, +- chan->relid); +- +- ring_buf = br->vbr; +- ring_size = br->dsize + sizeof(struct vmbus_bufring); +- VMBUS_LOG(INFO, "secondary ring_buf %p size %u", +- ring_buf, ring_size); +- +- fd = open(ring_path, O_RDWR); +- if (fd < 0) { +- VMBUS_LOG(ERR, "Cannot open %s: %s", +- ring_path, strerror(errno)); +- return -errno; +- } +- +- mapaddr = vmbus_map_resource(ring_buf, fd, 0, 2 * ring_size, 0); +- close(fd); +- +- if (mapaddr == ring_buf) +- return 0; +- +- if (mapaddr == MAP_FAILED) +- VMBUS_LOG(ERR, +- "mmap subchan %u in secondary failed", chan->relid); +- else { +- VMBUS_LOG(ERR, +- "mmap subchan %u in secondary address mismatch", +- chan->relid); +- vmbus_unmap_resource(mapaddr, 2 * ring_size); +- } +- return -1; +-} +- + int vmbus_uio_map_rings(struct vmbus_channel *chan) + { + const struct rte_vmbus_device *dev = chan->device; +diff --git a/dpdk/drivers/bus/vmbus/private.h b/dpdk/drivers/bus/vmbus/private.h +index f19b14e4a6..5b8b01b808 100644 +--- a/dpdk/drivers/bus/vmbus/private.h ++++ b/dpdk/drivers/bus/vmbus/private.h +@@ -36,6 +36,13 @@ struct vmbus_map { + uint64_t size; /* length */ + }; + ++#define UIO_MAX_SUBCHANNEL 128 ++struct subchannel_map { ++ uint16_t relid; ++ void *addr; ++ uint64_t size; ++}; ++ + /* + * For multi-process we need to reproduce all vmbus mappings in secondary + * processes, so save them in a tailq. +@@ -44,10 +51,14 @@ struct mapped_vmbus_resource { + TAILQ_ENTRY(mapped_vmbus_resource) next; + + rte_uuid_t id; ++ + int nb_maps; +- struct vmbus_channel *primary; + struct vmbus_map maps[VMBUS_MAX_RESOURCE]; ++ + char path[PATH_MAX]; ++ ++ int nb_subchannels; ++ struct subchannel_map subchannel_maps[UIO_MAX_SUBCHANNEL]; + }; + + TAILQ_HEAD(mapped_vmbus_res_list, mapped_vmbus_resource); +@@ -66,6 +77,8 @@ struct vmbus_channel { + uint16_t relid; + uint16_t subchannel_id; + uint8_t monitor_id; ++ ++ struct vmbus_mon_page *monitor_page; + }; + + #define VMBUS_MAX_CHANNELS 64 +@@ -108,8 +121,6 @@ bool vmbus_uio_subchannels_supported(const struct rte_vmbus_device *dev, + int vmbus_uio_get_subchan(struct vmbus_channel *primary, + struct vmbus_channel **subchan); + int vmbus_uio_map_rings(struct vmbus_channel *chan); +-int vmbus_uio_map_secondary_subchan(const struct rte_vmbus_device *dev, +- const struct vmbus_channel *chan); + + void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen); + +diff --git a/dpdk/drivers/bus/vmbus/rte_bus_vmbus.h b/dpdk/drivers/bus/vmbus/rte_bus_vmbus.h +index 4cf73ce815..81b17817f1 100644 +--- a/dpdk/drivers/bus/vmbus/rte_bus_vmbus.h ++++ b/dpdk/drivers/bus/vmbus/rte_bus_vmbus.h +@@ -292,7 +292,7 @@ struct iova_list { + * @param data + * Pointer to the buffer additional data to send + * @param dlen +- * Maximum size of what the the buffer will hold ++ * Maximum size of what the buffer will hold + * @param xact + * Identifier of the request + * @param flags +diff --git a/dpdk/drivers/bus/vmbus/vmbus_channel.c b/dpdk/drivers/bus/vmbus/vmbus_channel.c +index f67f1c438a..9bd01679c3 100644 +--- a/dpdk/drivers/bus/vmbus/vmbus_channel.c ++++ b/dpdk/drivers/bus/vmbus/vmbus_channel.c +@@ -27,7 +27,7 @@ vmbus_sync_set_bit(volatile uint32_t *addr, uint32_t mask) + } + + static inline void +-vmbus_set_monitor(const struct rte_vmbus_device *dev, uint32_t monitor_id) ++vmbus_set_monitor(const struct vmbus_channel *channel, uint32_t monitor_id) + { + uint32_t *monitor_addr, monitor_mask; + unsigned int trigger_index; +@@ -35,15 +35,14 @@ vmbus_set_monitor(const struct rte_vmbus_device *dev, uint32_t monitor_id) + trigger_index = monitor_id / HV_MON_TRIG_LEN; + monitor_mask = 1u << (monitor_id % HV_MON_TRIG_LEN); + +- monitor_addr = &dev->monitor_page->trigs[trigger_index].pending; ++ monitor_addr = &channel->monitor_page->trigs[trigger_index].pending; + vmbus_sync_set_bit(monitor_addr, monitor_mask); + } + + static void +-vmbus_set_event(const struct rte_vmbus_device *dev, +- const struct vmbus_channel *chan) ++vmbus_set_event(const struct vmbus_channel *chan) + { +- vmbus_set_monitor(dev, chan->monitor_id); ++ vmbus_set_monitor(chan, chan->monitor_id); + } + + /* +@@ -81,7 +80,6 @@ rte_vmbus_set_latency(const struct rte_vmbus_device *dev, + void + rte_vmbus_chan_signal_tx(const struct vmbus_channel *chan) + { +- const struct rte_vmbus_device *dev = chan->device; + const struct vmbus_br *tbr = &chan->txbr; + + /* Make sure all updates are done before signaling host */ +@@ -91,7 +89,7 @@ rte_vmbus_chan_signal_tx(const struct vmbus_channel *chan) + if (tbr->vbr->imask) + return; + +- vmbus_set_event(dev, chan); ++ vmbus_set_event(chan); + } + + +@@ -218,7 +216,7 @@ void rte_vmbus_chan_signal_read(struct vmbus_channel *chan, uint32_t bytes_read) + if (write_sz <= pending_sz) + return; + +- vmbus_set_event(chan->device, chan); ++ vmbus_set_event(chan); + } + + int rte_vmbus_chan_recv(struct vmbus_channel *chan, void *data, uint32_t *len, +@@ -325,6 +323,7 @@ int vmbus_chan_create(const struct rte_vmbus_device *device, + chan->subchannel_id = subid; + chan->relid = relid; + chan->monitor_id = monitor_id; ++ chan->monitor_page = device->monitor_page; + *new_chan = chan; + + err = vmbus_uio_map_rings(chan); +@@ -351,10 +350,8 @@ int rte_vmbus_chan_open(struct rte_vmbus_device *device, + + err = vmbus_chan_create(device, device->relid, 0, + device->monitor_id, new_chan); +- if (!err) { ++ if (!err) + device->primary = *new_chan; +- uio_res->primary = *new_chan; +- } + + return err; + } +diff --git a/dpdk/drivers/bus/vmbus/vmbus_common.c b/dpdk/drivers/bus/vmbus/vmbus_common.c +index 39b3308577..da047f2a54 100644 +--- a/dpdk/drivers/bus/vmbus/vmbus_common.c ++++ b/dpdk/drivers/bus/vmbus/vmbus_common.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -112,7 +113,9 @@ vmbus_probe_one_driver(struct rte_vmbus_driver *dr, + dev->driver = dr; + + if (dev->device.numa_node < 0) { +- VMBUS_LOG(WARNING, " Invalid NUMA socket, default to 0"); ++ if (rte_socket_count() > 1) ++ VMBUS_LOG(INFO, "Device %s is not NUMA-aware, defaulting socket to 0", ++ guid); + dev->device.numa_node = 0; + } + +@@ -131,7 +134,7 @@ vmbus_probe_one_driver(struct rte_vmbus_driver *dr, + + /* + * If device class GUID matches, call the probe function of +- * registere drivers for the vmbus device. ++ * register drivers for the vmbus device. + * Return -1 if initialization failed, + * and 1 if no driver found for this device. + */ +diff --git a/dpdk/drivers/bus/vmbus/vmbus_common_uio.c b/dpdk/drivers/bus/vmbus/vmbus_common_uio.c +index a689bf11b3..158e05c889 100644 +--- a/dpdk/drivers/bus/vmbus/vmbus_common_uio.c ++++ b/dpdk/drivers/bus/vmbus/vmbus_common_uio.c +@@ -69,8 +69,10 @@ vmbus_uio_map_secondary(struct rte_vmbus_device *dev) + fd, offset, + uio_res->maps[i].size, 0); + +- if (mapaddr == uio_res->maps[i].addr) ++ if (mapaddr == uio_res->maps[i].addr) { ++ dev->resource[i].addr = mapaddr; + continue; /* successful map */ ++ } + + if (mapaddr == MAP_FAILED) + VMBUS_LOG(ERR, +@@ -88,19 +90,39 @@ vmbus_uio_map_secondary(struct rte_vmbus_device *dev) + /* fd is not needed in secondary process, close it */ + close(fd); + +- dev->primary = uio_res->primary; +- if (!dev->primary) { +- VMBUS_LOG(ERR, "missing primary channel"); +- return -1; ++ /* Create and map primary channel */ ++ if (vmbus_chan_create(dev, dev->relid, 0, ++ dev->monitor_id, &dev->primary)) { ++ VMBUS_LOG(ERR, "cannot create primary channel"); ++ goto failed_primary; + } + +- STAILQ_FOREACH(chan, &dev->primary->subchannel_list, next) { +- if (vmbus_uio_map_secondary_subchan(dev, chan) != 0) { +- VMBUS_LOG(ERR, "cannot map secondary subchan"); +- return -1; ++ /* Create and map sub channels */ ++ for (i = 0; i < uio_res->nb_subchannels; i++) { ++ if (rte_vmbus_subchan_open(dev->primary, &chan)) { ++ VMBUS_LOG(ERR, ++ "failed to create subchannel at index %d", i); ++ goto failed_secondary; + } + } ++ + return 0; ++ ++failed_secondary: ++ while (!STAILQ_EMPTY(&dev->primary->subchannel_list)) { ++ chan = STAILQ_FIRST(&dev->primary->subchannel_list); ++ vmbus_unmap_resource(chan->txbr.vbr, chan->txbr.dsize * 2); ++ rte_vmbus_chan_close(chan); ++ } ++ rte_vmbus_chan_close(dev->primary); ++ ++failed_primary: ++ for (i = 0; i != uio_res->nb_maps; i++) { ++ vmbus_unmap_resource( ++ uio_res->maps[i].addr, uio_res->maps[i].size); ++ } ++ ++ return -1; + } + + static int +@@ -188,6 +210,11 @@ vmbus_uio_unmap(struct mapped_vmbus_resource *uio_res) + if (uio_res == NULL) + return; + ++ for (i = 0; i < uio_res->nb_subchannels; i++) { ++ vmbus_unmap_resource(uio_res->subchannel_maps[i].addr, ++ uio_res->subchannel_maps[i].size); ++ } ++ + for (i = 0; i != uio_res->nb_maps; i++) { + vmbus_unmap_resource(uio_res->maps[i].addr, + (size_t)uio_res->maps[i].size); +@@ -211,8 +238,11 @@ vmbus_uio_unmap_resource(struct rte_vmbus_device *dev) + return; + + /* secondary processes - just free maps */ +- if (rte_eal_process_type() != RTE_PROC_PRIMARY) +- return vmbus_uio_unmap(uio_res); ++ if (rte_eal_process_type() != RTE_PROC_PRIMARY) { ++ vmbus_uio_unmap(uio_res); ++ rte_free(dev->primary); ++ return; ++ } + + TAILQ_REMOVE(uio_res_list, uio_res, next); + +diff --git a/dpdk/drivers/common/cpt/cpt_hw_types.h b/dpdk/drivers/common/cpt/cpt_hw_types.h +index a1f969eb14..522844c351 100644 +--- a/dpdk/drivers/common/cpt/cpt_hw_types.h ++++ b/dpdk/drivers/common/cpt/cpt_hw_types.h +@@ -466,7 +466,7 @@ typedef union { + uint64_t dbell_cnt : 20; + /** [ 19: 0](R/W/H) Number of instruction queue 64-bit words + * to add to the CPT instruction doorbell count. Readback value +- * is the the current number of pending doorbell requests. ++ * is the current number of pending doorbell requests. + * + * If counter overflows CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. + * +diff --git a/dpdk/drivers/common/cpt/cpt_mcode_defines.h b/dpdk/drivers/common/cpt/cpt_mcode_defines.h +index 56a745f419..188f9f7511 100644 +--- a/dpdk/drivers/common/cpt/cpt_mcode_defines.h ++++ b/dpdk/drivers/common/cpt/cpt_mcode_defines.h +@@ -366,7 +366,7 @@ typedef struct buf_ptr { + /* IOV Pointer */ + typedef struct{ + int buf_cnt; +- buf_ptr_t bufs[0]; ++ buf_ptr_t bufs[]; + } iov_ptr_t; + + typedef struct fc_params { +diff --git a/dpdk/drivers/common/cpt/cpt_ucode.h b/dpdk/drivers/common/cpt/cpt_ucode.h +index 0536620710..b7e89de964 100644 +--- a/dpdk/drivers/common/cpt/cpt_ucode.h ++++ b/dpdk/drivers/common/cpt/cpt_ucode.h +@@ -246,7 +246,7 @@ cpt_fc_ciph_set_key(struct cpt_ctx *cpt_ctx, cipher_type_t type, + if (cpt_ctx->fc_type == FC_GEN) { + /* + * We need to always say IV is from DPTR as user can +- * sometimes iverride IV per operation. ++ * sometimes override IV per operation. + */ + fctx->enc.iv_source = CPT_FROM_DPTR; + +@@ -394,27 +394,26 @@ fill_sg_comp_from_iov(sg_comp_t *list, + int32_t j; + uint32_t extra_len = extra_buf ? extra_buf->size : 0; + uint32_t size = *psize; +- buf_ptr_t *bufs; + +- bufs = from->bufs; + for (j = 0; (j < from->buf_cnt) && size; j++) { ++ phys_addr_t dma_addr = from->bufs[j].dma_addr; ++ uint32_t buf_sz = from->bufs[j].size; ++ sg_comp_t *to = &list[i >> 2]; + phys_addr_t e_dma_addr; + uint32_t e_len; +- sg_comp_t *to = &list[i >> 2]; + + if (unlikely(from_offset)) { +- if (from_offset >= bufs[j].size) { +- from_offset -= bufs[j].size; ++ if (from_offset >= buf_sz) { ++ from_offset -= buf_sz; + continue; + } +- e_dma_addr = bufs[j].dma_addr + from_offset; +- e_len = (size > (bufs[j].size - from_offset)) ? +- (bufs[j].size - from_offset) : size; ++ e_dma_addr = dma_addr + from_offset; ++ e_len = (size > (buf_sz - from_offset)) ? ++ (buf_sz - from_offset) : size; + from_offset = 0; + } else { +- e_dma_addr = bufs[j].dma_addr; +- e_len = (size > bufs[j].size) ? +- bufs[j].size : size; ++ e_dma_addr = dma_addr; ++ e_len = (size > buf_sz) ? buf_sz : size; + } + + to->u.s.len[i % 4] = rte_cpu_to_be_16(e_len); +@@ -2248,7 +2247,7 @@ cpt_kasumi_dec_prep(uint64_t d_offs, + /* consider iv len */ + encr_offset += iv_len; + +- inputlen = iv_len + (RTE_ALIGN(encr_data_len, 8) / 8); ++ inputlen = encr_offset + (RTE_ALIGN(encr_data_len, 8) / 8); + outputlen = inputlen; + + /* save space for offset ctrl & iv */ +@@ -2948,7 +2947,7 @@ prepare_iov_from_pkt_inplace(struct rte_mbuf *pkt, + tailroom = rte_pktmbuf_tailroom(pkt); + if (likely((headroom >= 24) && + (tailroom >= 8))) { +- /* In 83XX this is prerequivisit for Direct mode */ ++ /* In 83XX this is prerequisite for Direct mode */ + *flags |= SINGLE_BUF_HEADTAILROOM; + } + param->bufs[0].vaddr = seg_data; +diff --git a/dpdk/drivers/common/cpt/cpt_ucode_asym.h b/dpdk/drivers/common/cpt/cpt_ucode_asym.h +index 50c6f58d3a..3b5f94c8de 100644 +--- a/dpdk/drivers/common/cpt/cpt_ucode_asym.h ++++ b/dpdk/drivers/common/cpt/cpt_ucode_asym.h +@@ -740,7 +740,7 @@ cpt_ecdsa_verify_prep(struct rte_crypto_ecdsa_op_param *ecdsa, + * Set dlen = sum(sizeof(fpm address), ROUNDUP8(message len), + * ROUNDUP8(sign len(r and s), public key len(x and y coordinates), + * prime len, order len)). +- * Please note sign, public key and order can not excede prime length ++ * Please note sign, public key and order can not exceed prime length + * i.e. 6 * p_align + */ + dlen = sizeof(fpm_table_iova) + m_align + (6 * p_align); +diff --git a/dpdk/drivers/common/dpaax/caamflib/compat.h b/dpdk/drivers/common/dpaax/caamflib/compat.h +index 36ee4b5335..c1a693498d 100644 +--- a/dpdk/drivers/common/dpaax/caamflib/compat.h ++++ b/dpdk/drivers/common/dpaax/caamflib/compat.h +@@ -11,7 +11,7 @@ + #include + #include + +-#ifdef __GLIBC__ ++#ifdef RTE_EXEC_ENV_LINUX + #include + #include + #include +@@ -24,7 +24,7 @@ + #error "Undefined endianness" + #endif + +-#else ++#else /* !RTE_EXEC_ENV_LINUX */ + #error Environment not supported! + #endif + +@@ -40,7 +40,7 @@ + #define __maybe_unused __rte_unused + #endif + +-#if defined(__GLIBC__) && !defined(pr_debug) ++#if !defined(pr_debug) + #if !defined(SUPPRESS_PRINTS) && defined(RTA_DEBUG) + #define pr_debug(fmt, ...) \ + RTE_LOG(DEBUG, PMD, "%s(): " fmt "\n", __func__, ##__VA_ARGS__) +@@ -49,7 +49,7 @@ + #endif + #endif /* pr_debug */ + +-#if defined(__GLIBC__) && !defined(pr_err) ++#if !defined(pr_err) + #if !defined(SUPPRESS_PRINTS) + #define pr_err(fmt, ...) \ + RTE_LOG(ERR, PMD, "%s(): " fmt "\n", __func__, ##__VA_ARGS__) +@@ -58,7 +58,7 @@ + #endif + #endif /* pr_err */ + +-#if defined(__GLIBC__) && !defined(pr_warn) ++#if !defined(pr_warn) + #if !defined(SUPPRESS_PRINTS) + #define pr_warn(fmt, ...) \ + RTE_LOG(WARNING, PMD, "%s(): " fmt "\n", __func__, ##__VA_ARGS__) +@@ -101,7 +101,7 @@ + #endif + + /* Use Linux naming convention */ +-#ifdef __GLIBC__ ++#if defined(RTE_EXEC_ENV_LINUX) || defined(__GLIBC__) + #define swab16(x) rte_bswap16(x) + #define swab32(x) rte_bswap32(x) + #define swab64(x) rte_bswap64(x) +diff --git a/dpdk/drivers/common/dpaax/caamflib/desc/algo.h b/dpdk/drivers/common/dpaax/caamflib/desc/algo.h +index 41cac5abd0..30a34d784f 100644 +--- a/dpdk/drivers/common/dpaax/caamflib/desc/algo.h ++++ b/dpdk/drivers/common/dpaax/caamflib/desc/algo.h +@@ -67,7 +67,7 @@ cnstr_shdsc_zuce(uint32_t *descbuf, bool ps, bool swap, + * @authlen: size of digest + * + * The IV prepended before hmac payload must be 8 bytes consisting +- * of COUNT||BEAERER||DIR. The COUNT is of 32-bits, bearer is of 5 bits and ++ * of COUNT||BEARER||DIR. The COUNT is of 32-bits, bearer is of 5 bits and + * direction is of 1 bit - totalling to 38 bits. + * + * Return: size of descriptor written in words or negative number on error +diff --git a/dpdk/drivers/common/dpaax/caamflib/desc/pdcp.h b/dpdk/drivers/common/dpaax/caamflib/desc/pdcp.h +index f084cf1de0..6dd1122e15 100644 +--- a/dpdk/drivers/common/dpaax/caamflib/desc/pdcp.h ++++ b/dpdk/drivers/common/dpaax/caamflib/desc/pdcp.h +@@ -1,6 +1,6 @@ + /* SPDX-License-Identifier: BSD-3-Clause or GPL-2.0+ + * Copyright 2008-2013 Freescale Semiconductor, Inc. +- * Copyright 2019-2020 NXP ++ * Copyright 2019-2021 NXP + */ + + #ifndef __DESC_PDCP_H__ +@@ -3710,9 +3710,10 @@ cnstr_shdsc_pdcp_short_mac(uint32_t *descbuf, + break; + + case PDCP_AUTH_TYPE_SNOW: ++ /* IV calculation based on 3GPP specs. 36331, section:5.3.7.4 */ + iv[0] = 0xFFFFFFFF; +- iv[1] = swap ? swab32(0x04000000) : 0x04000000; +- iv[2] = swap ? swab32(0xF8000000) : 0xF8000000; ++ iv[1] = swab32(0x04000000); ++ iv[2] = swab32(0xF8000000); + + KEY(p, KEY2, authdata->key_enc_flags, authdata->key, + authdata->keylen, INLINE_KEY(authdata)); +@@ -3789,7 +3790,7 @@ cnstr_shdsc_pdcp_short_mac(uint32_t *descbuf, + return -ENOTSUP; + } + iv[0] = 0xFFFFFFFF; +- iv[1] = swap ? swab32(0xFC000000) : 0xFC000000; ++ iv[1] = swab32(0xFC000000); + iv[2] = 0x00000000; /* unused */ + + KEY(p, KEY2, authdata->key_enc_flags, authdata->key, +diff --git a/dpdk/drivers/common/dpaax/caamflib/desc/sdap.h b/dpdk/drivers/common/dpaax/caamflib/desc/sdap.h +index 6523db1733..b179ea4815 100644 +--- a/dpdk/drivers/common/dpaax/caamflib/desc/sdap.h ++++ b/dpdk/drivers/common/dpaax/caamflib/desc/sdap.h +@@ -416,10 +416,10 @@ pdcp_sdap_insert_snoop_op(struct program *p, bool swap __maybe_unused, + + /* Set the variable size of data the register will write */ + if (dir == OP_TYPE_ENCAP_PROTOCOL) { +- /* We will add the interity data so add its length */ ++ /* We will add the integrity data so add its length */ + MATHI(p, SEQINSZ, ADD, PDCP_MAC_I_LEN, VSEQOUTSZ, 4, IMMED2); + } else { +- /* We will check the interity data so remove its length */ ++ /* We will check the integrity data so remove its length */ + MATHI(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, VSEQOUTSZ, 4, IMMED2); + /* Do not take the ICV in the out-snooping configuration */ + MATHI(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, VSEQINSZ, 4, IMMED2); +@@ -721,7 +721,7 @@ static inline int pdcp_sdap_insert_no_snoop_op( + CLRW_CLR_C1MODE, + CLRW, 0, 4, IMMED); + +- /* Load the key for authentcation */ ++ /* Load the key for authentication */ + KEY(p, KEY1, authdata->key_enc_flags, authdata->key, + authdata->keylen, INLINE_KEY(authdata)); + +diff --git a/dpdk/drivers/common/dpaax/compat.h b/dpdk/drivers/common/dpaax/compat.h +index 1a5f36e99e..7166f8cceb 100644 +--- a/dpdk/drivers/common/dpaax/compat.h ++++ b/dpdk/drivers/common/dpaax/compat.h +@@ -10,10 +10,6 @@ + #define __COMPAT_H + + #include +- +-#ifndef _GNU_SOURCE +-#define _GNU_SOURCE +-#endif + #include + #include + #include +@@ -34,7 +30,6 @@ + #include + #include + #include +-#include + #include + #include + #include +diff --git a/dpdk/drivers/common/dpaax/dpaax_iova_table.c b/dpdk/drivers/common/dpaax/dpaax_iova_table.c +index 91bee65e7b..ddc65b5ec2 100644 +--- a/dpdk/drivers/common/dpaax/dpaax_iova_table.c ++++ b/dpdk/drivers/common/dpaax/dpaax_iova_table.c +@@ -261,7 +261,7 @@ dpaax_iova_table_depopulate(void) + rte_free(dpaax_iova_table_p->entries); + dpaax_iova_table_p = NULL; + +- DPAAX_DEBUG("IOVA Table cleanedup"); ++ DPAAX_DEBUG("IOVA Table cleaned"); + } + + int +@@ -366,8 +366,10 @@ dpaax_iova_table_dump(void) + } + + DPAAX_DEBUG(" === Start of PA->VA Translation Table ==="); +- if (dpaax_iova_table_p == NULL) ++ if (dpaax_iova_table_p == NULL) { + DPAAX_DEBUG("\tNULL"); ++ return; ++ } + + entry = dpaax_iova_table_p->entries; + for (i = 0; i < dpaax_iova_table_p->count; i++) { +diff --git a/dpdk/drivers/common/dpaax/dpaax_iova_table.h b/dpdk/drivers/common/dpaax/dpaax_iova_table.h +index 230fba8ba0..b1f2300c52 100644 +--- a/dpdk/drivers/common/dpaax/dpaax_iova_table.h ++++ b/dpdk/drivers/common/dpaax/dpaax_iova_table.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright 2018 NXP ++ * Copyright 2018-2021 NXP + */ + + #ifndef _DPAAX_IOVA_TABLE_H_ +@@ -101,6 +101,12 @@ dpaax_iova_table_get_va(phys_addr_t paddr) { + + /* paddr > entry->start && paddr <= entry->(start+len) */ + index = (paddr_align - entry[i].start)/DPAAX_MEM_SPLIT; ++ /* paddr is within range, but no vaddr entry ever written ++ * at index ++ */ ++ if ((void *)(uintptr_t)entry[i].pages[index] == NULL) ++ return NULL; ++ + vaddr = (void *)((uintptr_t)entry[i].pages[index] + offset); + break; + } while (1); +diff --git a/dpdk/drivers/common/dpaax/meson.build b/dpdk/drivers/common/dpaax/meson.build +index 4535482701..b7f177a62e 100644 +--- a/dpdk/drivers/common/dpaax/meson.build ++++ b/dpdk/drivers/common/dpaax/meson.build +@@ -10,7 +10,6 @@ sources = files('dpaax_iova_table.c', 'dpaa_of.c', 'caamflib.c') + + includes += include_directories('caamflib') + +-cflags += ['-D_GNU_SOURCE'] + if cc.has_argument('-Wno-cast-qual') + cflags += '-Wno-cast-qual' + endif +diff --git a/dpdk/drivers/common/iavf/iavf_adminq.c b/dpdk/drivers/common/iavf/iavf_adminq.c +index 8bae51a46a..8d03de0553 100644 +--- a/dpdk/drivers/common/iavf/iavf_adminq.c ++++ b/dpdk/drivers/common/iavf/iavf_adminq.c +@@ -417,7 +417,7 @@ enum iavf_status iavf_init_arq(struct iavf_hw *hw) + /* initialize base registers */ + ret_code = iavf_config_arq_regs(hw); + if (ret_code != IAVF_SUCCESS) +- goto init_adminq_free_rings; ++ goto init_config_regs; + + /* success! */ + hw->aq.arq.count = hw->aq.num_arq_entries; +@@ -425,6 +425,10 @@ enum iavf_status iavf_init_arq(struct iavf_hw *hw) + + init_adminq_free_rings: + iavf_free_adminq_arq(hw); ++ return ret_code; ++ ++init_config_regs: ++ iavf_free_arq_bufs(hw); + + init_adminq_exit: + return ret_code; +diff --git a/dpdk/drivers/common/iavf/iavf_impl.c b/dpdk/drivers/common/iavf/iavf_impl.c +index fc0da31753..f80878b9fd 100644 +--- a/dpdk/drivers/common/iavf/iavf_impl.c ++++ b/dpdk/drivers/common/iavf/iavf_impl.c +@@ -6,7 +6,6 @@ + #include + + #include +-#include + #include + #include + +@@ -19,13 +18,15 @@ iavf_allocate_dma_mem_d(__rte_unused struct iavf_hw *hw, + u64 size, + u32 alignment) + { ++ static uint64_t iavf_dma_memzone_id; + const struct rte_memzone *mz = NULL; + char z_name[RTE_MEMZONE_NAMESIZE]; + + if (!mem) + return IAVF_ERR_PARAM; + +- snprintf(z_name, sizeof(z_name), "iavf_dma_%"PRIu64, rte_rand()); ++ snprintf(z_name, sizeof(z_name), "iavf_dma_%" PRIu64, ++ __atomic_fetch_add(&iavf_dma_memzone_id, 1, __ATOMIC_RELAXED)); + mz = rte_memzone_reserve_bounded(z_name, size, SOCKET_ID_ANY, + RTE_MEMZONE_IOVA_CONTIG, alignment, + RTE_PGSIZE_2M); +diff --git a/dpdk/drivers/common/iavf/iavf_type.h b/dpdk/drivers/common/iavf/iavf_type.h +index 0990c9aa33..b154e35c30 100644 +--- a/dpdk/drivers/common/iavf/iavf_type.h ++++ b/dpdk/drivers/common/iavf/iavf_type.h +@@ -1003,7 +1003,7 @@ struct iavf_profile_tlv_section_record { + u8 data[12]; + }; + +-/* Generic AQ section in proflie */ ++/* Generic AQ section in profile */ + struct iavf_profile_aq_section { + u16 opcode; + u16 flags; +diff --git a/dpdk/drivers/common/iavf/virtchnl.h b/dpdk/drivers/common/iavf/virtchnl.h +index b931da61e5..4c34d35ba7 100644 +--- a/dpdk/drivers/common/iavf/virtchnl.h ++++ b/dpdk/drivers/common/iavf/virtchnl.h +@@ -248,9 +248,11 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); + #define VIRTCHNL_VF_OFFLOAD_RSS_REG 0x00000010 + #define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020 + #define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES 0x00000040 +-#define VIRTCHNL_VF_OFFLOAD_CRC 0x00000080 ++/* used to negotiate communicating link speeds in Mbps */ ++#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED 0x00000080 + /* 0X00000100 is reserved */ + #define VIRTCHNL_VF_LARGE_NUM_QPAIRS 0x00000200 ++#define VIRTCHNL_VF_OFFLOAD_CRC 0x00000400 + #define VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000 + #define VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 + #define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 +@@ -268,8 +270,6 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); + #define VIRTCHNL_VF_CAP_DCF 0X40000000 + /* 0X80000000 is reserved */ + +-/* Define below the capability flags that are not offloads */ +-#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED 0x00000080 + #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ + VIRTCHNL_VF_OFFLOAD_VLAN | \ + VIRTCHNL_VF_OFFLOAD_RSS_PF) +diff --git a/dpdk/drivers/common/mlx5/linux/meson.build b/dpdk/drivers/common/mlx5/linux/meson.build +index fa9686fdaf..7e64583b4a 100644 +--- a/dpdk/drivers/common/mlx5/linux/meson.build ++++ b/dpdk/drivers/common/mlx5/linux/meson.build +@@ -37,7 +37,7 @@ foreach libname:libnames + endforeach + if static_ibverbs or dlopen_ibverbs + # Build without adding shared libs to Requires.private +- ibv_cflags = run_command(pkgconf, '--cflags', 'libibverbs').stdout() ++ ibv_cflags = run_command(pkgconf, '--cflags', 'libibverbs', check: true).stdout() + ext_deps += declare_dependency(compile_args: ibv_cflags.split()) + endif + if static_ibverbs +@@ -94,6 +94,10 @@ has_sym_args = [ + 'IBV_WQ_FLAG_RX_END_PADDING' ], + [ 'HAVE_MLX5DV_DR_DEVX_PORT', 'infiniband/mlx5dv.h', + 'mlx5dv_query_devx_port' ], ++ [ 'HAVE_MLX5DV_DR_DEVX_PORT_V35', 'infiniband/mlx5dv.h', ++ 'mlx5dv_query_port' ], ++ [ 'HAVE_MLX5DV_DR_CREATE_DEST_IB_PORT', 'infiniband/mlx5dv.h', ++ 'mlx5dv_dr_action_create_dest_ib_port' ], + [ 'HAVE_IBV_DEVX_OBJ', 'infiniband/mlx5dv.h', + 'mlx5dv_devx_obj_create' ], + [ 'HAVE_IBV_FLOW_DEVX_COUNTERS', 'infiniband/mlx5dv.h', +@@ -184,6 +188,8 @@ has_sym_args = [ + [ 'HAVE_DEVLINK', 'linux/devlink.h', 'DEVLINK_GENL_NAME' ], + [ 'HAVE_MLX5_DR_CREATE_ACTION_ASO', 'infiniband/mlx5dv.h', + 'mlx5dv_dr_action_create_aso' ], ++ [ 'HAVE_MLX5_IBV_REG_MR_IOVA', 'infiniband/verbs.h', ++ 'ibv_reg_mr_iova' ], + ] + config = configuration_data() + foreach arg:has_sym_args +diff --git a/dpdk/drivers/common/mlx5/linux/mlx5_common_os.c b/dpdk/drivers/common/mlx5/linux/mlx5_common_os.c +index 0edd78ea6d..96e036fc66 100644 +--- a/dpdk/drivers/common/mlx5/linux/mlx5_common_os.c ++++ b/dpdk/drivers/common/mlx5/linux/mlx5_common_os.c +@@ -97,22 +97,34 @@ void + mlx5_translate_port_name(const char *port_name_in, + struct mlx5_switch_info *port_info_out) + { +- char pf_c1, pf_c2, vf_c1, vf_c2, eol; ++ char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol; + char *end; + int sc_items; + +- /* +- * Check for port-name as a string of the form pf0vf0 +- * (support kernel ver >= 5.0 or OFED ver >= 4.6). +- */ ++ sc_items = sscanf(port_name_in, "%c%d", ++ &ctrl, &port_info_out->ctrl_num); ++ if (sc_items == 2 && ctrl == 'c') { ++ port_name_in++; /* 'c' */ ++ port_name_in += snprintf(NULL, 0, "%d", ++ port_info_out->ctrl_num); ++ } ++ /* Check for port-name as a string of the form pf0vf0 or pf0sf0 */ + sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c", + &pf_c1, &pf_c2, &port_info_out->pf_num, + &vf_c1, &vf_c2, &port_info_out->port_name, &eol); +- if (sc_items == 6 && +- pf_c1 == 'p' && pf_c2 == 'f' && +- vf_c1 == 'v' && vf_c2 == 'f') { +- port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF; +- return; ++ if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') { ++ if (vf_c1 == 'v' && vf_c2 == 'f') { ++ /* Kernel ver >= 5.0 or OFED ver >= 4.6 */ ++ port_info_out->name_type = ++ MLX5_PHYS_PORT_NAME_TYPE_PFVF; ++ return; ++ } ++ if (vf_c1 == 's' && vf_c2 == 'f') { ++ /* Kernel ver >= 5.11 or OFED ver >= 5.1 */ ++ port_info_out->name_type = ++ MLX5_PHYS_PORT_NAME_TYPE_PFSF; ++ return; ++ } + } + /* + * Check for port-name as a string of the form p0 +@@ -411,3 +423,59 @@ mlx5_glue_constructor(void) + mlx5_glue = NULL; + } + ++ ++/* ++ * Create direct mkey using the kernel ibv_reg_mr API and wrap it with a new ++ * indirect mkey created by the DevX API. ++ * This mkey should be used for DevX commands requesting mkey as a parameter. ++ */ ++int ++mlx5_os_wrapped_mkey_create(void *ctx, void *pd, uint32_t pdn, void *addr, ++ size_t length, struct mlx5_pmd_wrapped_mr *pmd_mr) ++{ ++ struct mlx5_klm klm = { ++ .byte_count = length, ++ .address = (uintptr_t)addr, ++ }; ++ struct mlx5_devx_mkey_attr mkey_attr = { ++ .pd = pdn, ++ .klm_array = &klm, ++ .klm_num = 1, ++ }; ++ struct mlx5_devx_obj *mkey; ++ struct ibv_mr *ibv_mr = mlx5_glue->reg_mr(pd, addr, length, ++ IBV_ACCESS_LOCAL_WRITE | ++ (haswell_broadwell_cpu ? 0 : ++ IBV_ACCESS_RELAXED_ORDERING)); ++ ++ if (!ibv_mr) { ++ rte_errno = errno; ++ return -rte_errno; ++ } ++ klm.mkey = ibv_mr->lkey; ++ mkey_attr.addr = (uintptr_t)addr; ++ mkey_attr.size = length; ++ mkey = mlx5_devx_cmd_mkey_create(ctx, &mkey_attr); ++ if (!mkey) { ++ claim_zero(mlx5_glue->dereg_mr(ibv_mr)); ++ return -rte_errno; ++ } ++ pmd_mr->addr = addr; ++ pmd_mr->len = length; ++ pmd_mr->obj = (void *)ibv_mr; ++ pmd_mr->imkey = mkey; ++ pmd_mr->lkey = mkey->id; ++ return 0; ++} ++ ++void ++mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr *pmd_mr) ++{ ++ if (!pmd_mr) ++ return; ++ if (pmd_mr->imkey) ++ claim_zero(mlx5_devx_cmd_destroy(pmd_mr->imkey)); ++ if (pmd_mr->obj) ++ claim_zero(mlx5_glue->dereg_mr(pmd_mr->obj)); ++ memset(pmd_mr, 0, sizeof(*pmd_mr)); ++} +diff --git a/dpdk/drivers/common/mlx5/linux/mlx5_common_verbs.c b/dpdk/drivers/common/mlx5/linux/mlx5_common_verbs.c +index 339535dd04..aa560f05f2 100644 +--- a/dpdk/drivers/common/mlx5/linux/mlx5_common_verbs.c ++++ b/dpdk/drivers/common/mlx5/linux/mlx5_common_verbs.c +@@ -37,7 +37,6 @@ mlx5_common_verbs_reg_mr(void *pd, void *addr, size_t length, + { + struct ibv_mr *ibv_mr; + +- memset(pmd_mr, 0, sizeof(*pmd_mr)); + ibv_mr = mlx5_glue->reg_mr(pd, addr, length, + IBV_ACCESS_LOCAL_WRITE | + (haswell_broadwell_cpu ? 0 : +diff --git a/dpdk/drivers/common/mlx5/linux/mlx5_glue.c b/dpdk/drivers/common/mlx5/linux/mlx5_glue.c +index 8146c79287..97ac5d3b5e 100644 +--- a/dpdk/drivers/common/mlx5/linux/mlx5_glue.c ++++ b/dpdk/drivers/common/mlx5/linux/mlx5_glue.c +@@ -224,6 +224,23 @@ mlx5_glue_reg_mr(struct ibv_pd *pd, void *addr, size_t length, int access) + return ibv_reg_mr(pd, addr, length, access); + } + ++static struct ibv_mr * ++mlx5_glue_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length, ++ uint64_t iova, int access) ++{ ++#ifdef HAVE_MLX5_IBV_REG_MR_IOVA ++ return ibv_reg_mr_iova(pd, addr, length, iova, access); ++#else ++ (void)pd; ++ (void)addr; ++ (void)length; ++ (void)iova; ++ (void)access; ++ errno = ENOTSUP; ++ return NULL; ++#endif ++} ++ + static struct ibv_mr * + mlx5_glue_alloc_null_mr(struct ibv_pd *pd) + { +@@ -391,7 +408,7 @@ mlx5_glue_dr_create_flow_action_dest_flow_tbl(void *tbl) + static void * + mlx5_glue_dr_create_flow_action_dest_port(void *domain, uint32_t port) + { +-#ifdef HAVE_MLX5DV_DR_DEVX_PORT ++#ifdef HAVE_MLX5DV_DR_CREATE_DEST_IB_PORT + return mlx5dv_dr_action_create_dest_ib_port(domain, port); + #else + #ifdef HAVE_MLX5DV_DR_ESWITCH +@@ -1068,21 +1085,75 @@ mlx5_glue_devx_qp_query(struct ibv_qp *qp, + } + + static int +-mlx5_glue_devx_port_query(struct ibv_context *ctx, +- uint32_t port_num, +- struct mlx5dv_devx_port *mlx5_devx_port) ++mlx5_glue_devx_wq_query(struct ibv_wq *wq, const void *in, size_t inlen, ++ void *out, size_t outlen) + { +-#ifdef HAVE_MLX5DV_DR_DEVX_PORT +- return mlx5dv_query_devx_port(ctx, port_num, mlx5_devx_port); ++#ifdef HAVE_IBV_DEVX_QP ++ return mlx5dv_devx_wq_query(wq, in, inlen, out, outlen); + #else +- (void)ctx; +- (void)port_num; +- (void)mlx5_devx_port; ++ (void)wq; ++ (void)in; ++ (void)inlen; ++ (void)out; ++ (void)outlen; + errno = ENOTSUP; + return errno; + #endif + } + ++static int ++mlx5_glue_devx_port_query(struct ibv_context *ctx, ++ uint32_t port_num, ++ struct mlx5_port_info *info) ++{ ++ int err = 0; ++ ++ info->query_flags = 0; ++#ifdef HAVE_MLX5DV_DR_DEVX_PORT_V35 ++ /* The DevX port query API is implemented (rdma-core v35 and above). */ ++ struct mlx5_ib_uapi_query_port devx_port; ++ ++ memset(&devx_port, 0, sizeof(devx_port)); ++ err = mlx5dv_query_port(ctx, port_num, &devx_port); ++ if (err) ++ return err; ++ if (devx_port.flags & MLX5DV_QUERY_PORT_VPORT_REG_C0) { ++ info->vport_meta_tag = devx_port.reg_c0.value; ++ info->vport_meta_mask = devx_port.reg_c0.mask; ++ info->query_flags |= MLX5_PORT_QUERY_REG_C0; ++ } ++ if (devx_port.flags & MLX5DV_QUERY_PORT_VPORT) { ++ info->vport_id = devx_port.vport; ++ info->query_flags |= MLX5_PORT_QUERY_VPORT; ++ } ++#else ++#ifdef HAVE_MLX5DV_DR_DEVX_PORT ++ /* The legacy DevX port query API is implemented (prior v35). */ ++ struct mlx5dv_devx_port devx_port = { ++ .comp_mask = MLX5DV_DEVX_PORT_VPORT | ++ MLX5DV_DEVX_PORT_MATCH_REG_C_0 ++ }; ++ ++ err = mlx5dv_query_devx_port(ctx, port_num, &devx_port); ++ if (err) ++ return err; ++ if (devx_port.comp_mask & MLX5DV_DEVX_PORT_MATCH_REG_C_0) { ++ info->vport_meta_tag = devx_port.reg_c_0.value; ++ info->vport_meta_mask = devx_port.reg_c_0.mask; ++ info->query_flags |= MLX5_PORT_QUERY_REG_C0; ++ } ++ if (devx_port.comp_mask & MLX5DV_DEVX_PORT_VPORT) { ++ info->vport_id = devx_port.vport_num; ++ info->query_flags |= MLX5_PORT_QUERY_VPORT; ++ } ++#else ++ RTE_SET_USED(ctx); ++ RTE_SET_USED(port_num); ++#endif /* HAVE_MLX5DV_DR_DEVX_PORT */ ++#endif /* HAVE_MLX5DV_DR_DEVX_PORT_V35 */ ++ return err; ++} ++ + static int + mlx5_glue_dr_dump_domain(FILE *file, void *domain) + { +@@ -1335,6 +1406,7 @@ const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue) { + .destroy_qp = mlx5_glue_destroy_qp, + .modify_qp = mlx5_glue_modify_qp, + .reg_mr = mlx5_glue_reg_mr, ++ .reg_mr_iova = mlx5_glue_reg_mr_iova, + .alloc_null_mr = mlx5_glue_alloc_null_mr, + .dereg_mr = mlx5_glue_dereg_mr, + .create_counter_set = mlx5_glue_create_counter_set, +@@ -1403,6 +1475,7 @@ const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue) { + .devx_umem_reg = mlx5_glue_devx_umem_reg, + .devx_umem_dereg = mlx5_glue_devx_umem_dereg, + .devx_qp_query = mlx5_glue_devx_qp_query, ++ .devx_wq_query = mlx5_glue_devx_wq_query, + .devx_port_query = mlx5_glue_devx_port_query, + .dr_dump_domain = mlx5_glue_dr_dump_domain, + .dr_reclaim_domain_memory = mlx5_glue_dr_reclaim_domain_memory, +diff --git a/dpdk/drivers/common/mlx5/linux/mlx5_glue.h b/dpdk/drivers/common/mlx5/linux/mlx5_glue.h +index 8be446a902..d48a06d443 100644 +--- a/dpdk/drivers/common/mlx5/linux/mlx5_glue.h ++++ b/dpdk/drivers/common/mlx5/linux/mlx5_glue.h +@@ -84,6 +84,20 @@ struct mlx5dv_dr_action; + struct mlx5dv_devx_port; + #endif + ++#ifndef HAVE_MLX5DV_DR_DEVX_PORT_V35 ++struct mlx5dv_port; ++#endif ++ ++#define MLX5_PORT_QUERY_VPORT (1u << 0) ++#define MLX5_PORT_QUERY_REG_C0 (1u << 1) ++ ++struct mlx5_port_info { ++ uint16_t query_flags; ++ uint16_t vport_id; /* Associated VF vport index (if any). */ ++ uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */ ++ uint32_t vport_meta_mask; /* Used for vport index field match mask. */ ++}; ++ + #ifndef HAVE_MLX5_DR_CREATE_ACTION_FLOW_METER + struct mlx5dv_dr_flow_meter_attr; + #endif +@@ -182,6 +196,9 @@ struct mlx5_glue { + int attr_mask); + struct ibv_mr *(*reg_mr)(struct ibv_pd *pd, void *addr, + size_t length, int access); ++ struct ibv_mr *(*reg_mr_iova)(struct ibv_pd *pd, void *addr, ++ size_t length, uint64_t iova, ++ int access); + struct ibv_mr *(*alloc_null_mr)(struct ibv_pd *pd); + int (*dereg_mr)(struct ibv_mr *mr); + struct ibv_counter_set *(*create_counter_set) +@@ -307,9 +324,11 @@ struct mlx5_glue { + int (*devx_qp_query)(struct ibv_qp *qp, + const void *in, size_t inlen, + void *out, size_t outlen); ++ int (*devx_wq_query)(struct ibv_wq *wq, const void *in, size_t inlen, ++ void *out, size_t outlen); + int (*devx_port_query)(struct ibv_context *ctx, + uint32_t port_num, +- struct mlx5dv_devx_port *mlx5_devx_port); ++ struct mlx5_port_info *info); + int (*dr_dump_domain)(FILE *file, void *domain); + int (*devx_query_eqn)(struct ibv_context *context, uint32_t cpus, + uint32_t *eqn); +diff --git a/dpdk/drivers/common/mlx5/linux/mlx5_nl.c b/dpdk/drivers/common/mlx5/linux/mlx5_nl.c +index ef7a521379..15a5f7b303 100644 +--- a/dpdk/drivers/common/mlx5/linux/mlx5_nl.c ++++ b/dpdk/drivers/common/mlx5/linux/mlx5_nl.c +@@ -33,6 +33,8 @@ + #define MLX5_SEND_BUF_SIZE 32768 + /* Receive buffer size for the Netlink socket */ + #define MLX5_RECV_BUF_SIZE 32768 ++/* Maximal physical port name length. */ ++#define MLX5_PHYS_PORT_NAME_MAX 128 + + /** Parameters of VLAN devices created by driver. */ + #define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx" +@@ -178,19 +180,22 @@ uint32_t atomic_sn; + * + * @param protocol + * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA). ++ * @param groups ++ * Groups to listen (e.g. RTMGRP_LINK), can be 0. + * + * @return + * A file descriptor on success, a negative errno value otherwise and + * rte_errno is set. + */ + int +-mlx5_nl_init(int protocol) ++mlx5_nl_init(int protocol, int groups) + { + int fd; +- int sndbuf_size = MLX5_SEND_BUF_SIZE; +- int rcvbuf_size = MLX5_RECV_BUF_SIZE; ++ int buf_size; ++ socklen_t opt_size; + struct sockaddr_nl local = { + .nl_family = AF_NETLINK, ++ .nl_groups = groups, + }; + int ret; + +@@ -199,16 +204,36 @@ mlx5_nl_init(int protocol) + rte_errno = errno; + return -rte_errno; + } +- ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int)); ++ opt_size = sizeof(buf_size); ++ ret = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf_size, &opt_size); + if (ret == -1) { + rte_errno = errno; + goto error; + } +- ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int)); ++ DRV_LOG(DEBUG, "Netlink socket send buffer: %d", buf_size); ++ if (buf_size < MLX5_SEND_BUF_SIZE) { ++ ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, ++ &buf_size, sizeof(buf_size)); ++ if (ret == -1) { ++ rte_errno = errno; ++ goto error; ++ } ++ } ++ opt_size = sizeof(buf_size); ++ ret = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &buf_size, &opt_size); + if (ret == -1) { + rte_errno = errno; + goto error; + } ++ DRV_LOG(DEBUG, "Netlink socket recv buffer: %d", buf_size); ++ if (buf_size < MLX5_RECV_BUF_SIZE) { ++ ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, ++ &buf_size, sizeof(buf_size)); ++ if (ret == -1) { ++ rte_errno = errno; ++ goto error; ++ } ++ } + ret = bind(fd, (struct sockaddr *)&local, sizeof(local)); + if (ret == -1) { + rte_errno = errno; +@@ -330,11 +355,7 @@ mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), + void *arg) + { + struct sockaddr_nl sa; +- void *buf = mlx5_malloc(0, MLX5_RECV_BUF_SIZE, 0, SOCKET_ID_ANY); +- struct iovec iov = { +- .iov_base = buf, +- .iov_len = MLX5_RECV_BUF_SIZE, +- }; ++ struct iovec iov; + struct msghdr msg = { + .msg_name = &sa, + .msg_namelen = sizeof(sa), +@@ -342,18 +363,43 @@ mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg), + /* One message at a time */ + .msg_iovlen = 1, + }; ++ void *buf = NULL; + int multipart = 0; + int ret = 0; + +- if (!buf) { +- rte_errno = ENOMEM; +- return -rte_errno; +- } + do { + struct nlmsghdr *nh; +- int recv_bytes = 0; ++ int recv_bytes; + + do { ++ /* Query length of incoming message. */ ++ iov.iov_base = NULL; ++ iov.iov_len = 0; ++ recv_bytes = recvmsg(nlsk_fd, &msg, ++ MSG_PEEK | MSG_TRUNC); ++ if (recv_bytes < 0) { ++ rte_errno = errno; ++ ret = -rte_errno; ++ goto exit; ++ } ++ if (recv_bytes == 0) { ++ rte_errno = ENODATA; ++ ret = -rte_errno; ++ goto exit; ++ } ++ /* Allocate buffer to fetch the message. */ ++ if (recv_bytes < MLX5_RECV_BUF_SIZE) ++ recv_bytes = MLX5_RECV_BUF_SIZE; ++ mlx5_free(buf); ++ buf = mlx5_malloc(0, recv_bytes, 0, SOCKET_ID_ANY); ++ if (!buf) { ++ rte_errno = ENOMEM; ++ ret = -rte_errno; ++ goto exit; ++ } ++ /* Fetch the message. */ ++ iov.iov_base = buf; ++ iov.iov_len = recv_bytes; + recv_bytes = recvmsg(nlsk_fd, &msg, 0); + if (recv_bytes == -1) { + rte_errno = errno; +@@ -746,6 +792,7 @@ mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, + int i; + int ret; + ++ memset(macs, 0, n * sizeof(macs[0])); + ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n); + if (ret) + return; +@@ -1158,6 +1205,8 @@ mlx5_nl_check_switch_info(bool num_vf_set, + case MLX5_PHYS_PORT_NAME_TYPE_PFHPF: + /* Fallthrough */ + case MLX5_PHYS_PORT_NAME_TYPE_PFVF: ++ /* Fallthrough */ ++ case MLX5_PHYS_PORT_NAME_TYPE_PFSF: + /* New representors naming schema. */ + switch_info->representor = 1; + break; +@@ -1188,6 +1237,7 @@ mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) + size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + bool switch_id_set = false; + bool num_vf_set = false; ++ int len; + + if (nh->nlmsg_type != RTM_NEWLINK) + goto error; +@@ -1203,7 +1253,24 @@ mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) + num_vf_set = true; + break; + case IFLA_PHYS_PORT_NAME: +- mlx5_translate_port_name((char *)payload, &info); ++ len = RTA_PAYLOAD(ra); ++ /* Some kernels do not pad attributes with zero. */ ++ if (len > 0 && len < MLX5_PHYS_PORT_NAME_MAX) { ++ char name[MLX5_PHYS_PORT_NAME_MAX]; ++ ++ /* ++ * We can't just patch the message with padding ++ * zero - it might corrupt the following items ++ * in the message, we have to copy the string ++ * by attribute length and pad the copied one. ++ */ ++ memcpy(name, payload, len); ++ name[len] = 0; ++ mlx5_translate_port_name(name, &info); ++ } else { ++ info.name_type = ++ MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; ++ } + break; + case IFLA_PHYS_SWITCH_ID: + info.switch_id = 0; +@@ -1734,3 +1801,103 @@ mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, + /* Now, need to reload the driver. */ + return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr); + } ++ ++/** ++ * Try to parse a Netlink message as a link status update. ++ * ++ * @param hdr ++ * Netlink message header. ++ * @param[out] ifindex ++ * Index of the updated interface. ++ * @param[out] flags ++ * New interface flags. ++ * ++ * @return ++ * 0 on success, negative on failure. ++ */ ++int ++mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex) ++{ ++ struct ifinfomsg *info; ++ ++ switch (hdr->nlmsg_type) { ++ case RTM_NEWLINK: ++ case RTM_DELLINK: ++ case RTM_GETLINK: ++ case RTM_SETLINK: ++ info = NLMSG_DATA(hdr); ++ *ifindex = info->ifi_index; ++ return 0; ++ } ++ return -1; ++} ++ ++/** ++ * Read pending events from a Netlink socket. ++ * ++ * @param nlsk_fd ++ * Netlink socket. ++ * @param cb ++ * Callback invoked for each of the events. ++ * @param cb_arg ++ * User data for the callback. ++ * ++ * @return ++ * 0 on success, including the case when there are no events. ++ * Negative on failure and rte_errno is set. ++ */ ++int ++mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg) ++{ ++ char buf[8192]; ++ struct sockaddr_nl addr; ++ struct iovec iov = { ++ .iov_base = buf, ++ .iov_len = sizeof(buf), ++ }; ++ struct msghdr msg = { ++ .msg_name = &addr, ++ .msg_namelen = sizeof(addr), ++ .msg_iov = &iov, ++ .msg_iovlen = 1, ++ }; ++ struct nlmsghdr *hdr; ++ ssize_t size; ++ ++ while (1) { ++ size = recvmsg(nlsk_fd, &msg, MSG_DONTWAIT); ++ if (size < 0) { ++ if (errno == EAGAIN) ++ return 0; ++ if (errno == EINTR) ++ continue; ++ DRV_LOG(DEBUG, "Failed to receive netlink message: %s", ++ strerror(errno)); ++ rte_errno = errno; ++ return -rte_errno; ++ ++ } ++ hdr = (struct nlmsghdr *)buf; ++ while (size >= (ssize_t)sizeof(*hdr)) { ++ ssize_t msg_len = hdr->nlmsg_len; ++ ssize_t data_len = msg_len - sizeof(*hdr); ++ ssize_t aligned_len; ++ ++ if (data_len < 0) { ++ DRV_LOG(DEBUG, "Netlink message too short"); ++ rte_errno = EINVAL; ++ return -rte_errno; ++ } ++ aligned_len = NLMSG_ALIGN(msg_len); ++ if (aligned_len > size) { ++ DRV_LOG(DEBUG, "Netlink message too long"); ++ rte_errno = EINVAL; ++ return -rte_errno; ++ } ++ cb(hdr, cb_arg); ++ hdr = RTE_PTR_ADD(hdr, aligned_len); ++ size -= aligned_len; ++ } ++ } ++ return 0; ++} +diff --git a/dpdk/drivers/common/mlx5/linux/mlx5_nl.h b/dpdk/drivers/common/mlx5/linux/mlx5_nl.h +index 15129ffdc8..05189e997a 100644 +--- a/dpdk/drivers/common/mlx5/linux/mlx5_nl.h ++++ b/dpdk/drivers/common/mlx5/linux/mlx5_nl.h +@@ -11,6 +11,7 @@ + + #include "mlx5_common.h" + ++typedef void (mlx5_nl_event_cb)(struct nlmsghdr *hdr, void *user_data); + + /* VLAN netdev for VLAN workaround. */ + struct mlx5_nl_vlan_dev { +@@ -30,7 +31,7 @@ struct mlx5_nl_vlan_vmwa_context { + }; + + __rte_internal +-int mlx5_nl_init(int protocol); ++int mlx5_nl_init(int protocol, int groups); + __rte_internal + int mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own, + struct rte_ether_addr *mac, uint32_t index); +@@ -77,4 +78,9 @@ __rte_internal + int mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr, + int enable); + ++__rte_internal ++int mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg); ++__rte_internal ++int mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex); ++ + #endif /* RTE_PMD_MLX5_NL_H_ */ +diff --git a/dpdk/drivers/common/mlx5/mlx5_common.c b/dpdk/drivers/common/mlx5/mlx5_common.c +index 044513223c..a29990d712 100644 +--- a/dpdk/drivers/common/mlx5/mlx5_common.c ++++ b/dpdk/drivers/common/mlx5/mlx5_common.c +@@ -16,8 +16,6 @@ + #include "mlx5_malloc.h" + #include "mlx5_common_pci.h" + +-int mlx5_common_logtype; +- + uint8_t haswell_broadwell_cpu; + + /* In case this is an x86_64 intel processor to check if +@@ -43,17 +41,12 @@ static inline void mlx5_cpu_id(unsigned int level, + } + #endif + +-RTE_INIT_PRIO(mlx5_log_init, LOG) +-{ +- mlx5_common_logtype = rte_log_register("pmd.common.mlx5"); +- if (mlx5_common_logtype >= 0) +- rte_log_set_level(mlx5_common_logtype, RTE_LOG_NOTICE); +-} ++RTE_LOG_REGISTER(mlx5_common_logtype, pmd.common.mlx5, NOTICE) + + static bool mlx5_common_initialized; + + /** +- * One time innitialization routine for run-time dependency on glue library ++ * One time initialization routine for run-time dependency on glue library + * for multiple PMDs. Each mlx5 PMD that depends on mlx5_common module, + * must invoke in its constructor. + */ +@@ -255,11 +248,11 @@ mlx5_release_dbr(struct mlx5_dbr_page_list *head, uint32_t umem_id, + * attributes (if supported by the host), the + * writes to the UAR registers must be followed + * by write memory barrier. +- * MLX5DV_UAR_ALLOC_TYPE_NC - allocate as non-cached nenory, all writes are ++ * MLX5DV_UAR_ALLOC_TYPE_NC - allocate as non-cached memory, all writes are + * promoted to the registers immediately, no + * memory barriers needed. +- * mapping < 0 - the first attempt is performed with MLX5DV_UAR_ALLOC_TYPE_BF, +- * if this fails the next attempt with MLX5DV_UAR_ALLOC_TYPE_NC ++ * mapping < 0 - the first attempt is performed with MLX5DV_UAR_ALLOC_TYPE_NC, ++ * if this fails the next attempt with MLX5DV_UAR_ALLOC_TYPE_BF + * is performed. The drivers specifying negative values should + * always provide the write memory barrier operation after UAR + * register writings. +@@ -291,26 +284,12 @@ mlx5_devx_alloc_uar(void *ctx, int mapping) + #endif + uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); + #ifdef MLX5DV_UAR_ALLOC_TYPE_NC +- if (!uar && +- mapping < 0 && +- uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) { +- /* +- * In some environments like virtual machine the +- * Write Combining mapped might be not supported and +- * UAR allocation fails. We tried "Non-Cached" mapping +- * for the case. +- */ +- DRV_LOG(WARNING, "Failed to allocate DevX UAR (BF)"); +- uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; +- uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); +- } else if (!uar && +- mapping < 0 && +- uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) { ++ if (!uar && mapping < 0) { + /* + * If Verbs/kernel does not support "Non-Cached" + * try the "Write-Combining". + */ +- DRV_LOG(WARNING, "Failed to allocate DevX UAR (NC)"); ++ DRV_LOG(DEBUG, "Failed to allocate DevX UAR (NC)"); + uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; + uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping); + } +@@ -328,7 +307,7 @@ mlx5_devx_alloc_uar(void *ctx, int mapping) + * IB device context, on context closure all UARs + * will be freed, should be no memory/object leakage. + */ +- DRV_LOG(WARNING, "Retrying to allocate DevX UAR"); ++ DRV_LOG(DEBUG, "Retrying to allocate DevX UAR"); + uar = NULL; + } + /* Check whether we finally succeeded with valid UAR allocation. */ +diff --git a/dpdk/drivers/common/mlx5/mlx5_common.h b/dpdk/drivers/common/mlx5/mlx5_common.h +index a484b74b9c..86d690af09 100644 +--- a/dpdk/drivers/common/mlx5/mlx5_common.h ++++ b/dpdk/drivers/common/mlx5/mlx5_common.h +@@ -153,6 +153,7 @@ enum mlx5_nl_phys_port_name_type { + MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */ + MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 */ + MLX5_PHYS_PORT_NAME_TYPE_PFHPF, /* pf0, kernel ver >= 5.7, HPF rep */ ++ MLX5_PHYS_PORT_NAME_TYPE_PFSF, /* pf0sf0, kernel ver >= 5.0 */ + MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, /* Unrecognized. */ + }; + +@@ -161,6 +162,7 @@ struct mlx5_switch_info { + uint32_t master:1; /**< Master device. */ + uint32_t representor:1; /**< Representor device. */ + enum mlx5_nl_phys_port_name_type name_type; /** < Port name type. */ ++ int32_t ctrl_num; /**< Controller number (valid for c#pf#vf# format). */ + int32_t pf_num; /**< PF number (valid for pfxvfx format only). */ + int32_t port_name; /**< Representor port name. */ + uint64_t switch_id; /**< Switch identifier. */ +@@ -268,4 +270,22 @@ extern uint8_t haswell_broadwell_cpu; + __rte_internal + void mlx5_common_init(void); + ++/* mlx5 PMD wrapped MR struct. */ ++struct mlx5_pmd_wrapped_mr { ++ uint32_t lkey; ++ void *addr; ++ size_t len; ++ void *obj; /* verbs mr object or devx umem object. */ ++ void *imkey; /* DevX indirect mkey object. */ ++}; ++ ++__rte_internal ++int ++mlx5_os_wrapped_mkey_create(void *ctx, void *pd, uint32_t pdn, void *addr, ++ size_t length, struct mlx5_pmd_wrapped_mr *pmd_mr); ++ ++__rte_internal ++void ++mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr *pmd_mr); ++ + #endif /* RTE_PMD_MLX5_COMMON_H_ */ +diff --git a/dpdk/drivers/common/mlx5/mlx5_common_mr.c b/dpdk/drivers/common/mlx5/mlx5_common_mr.c +index 7c25541dc4..d01f86837d 100644 +--- a/dpdk/drivers/common/mlx5/mlx5_common_mr.c ++++ b/dpdk/drivers/common/mlx5/mlx5_common_mr.c +@@ -1060,6 +1060,95 @@ mlx5_create_mr_ext(void *pd, uintptr_t addr, size_t len, int socket_id, + return mr; + } + ++/** ++ * Callback for memory free event. Iterate freed memsegs and check whether it ++ * belongs to an existing MR. If found, clear the bit from bitmap of MR. As a ++ * result, the MR would be fragmented. If it becomes empty, the MR will be freed ++ * later by mlx5_mr_garbage_collect(). Even if this callback is called from a ++ * secondary process, the garbage collector will be called in primary process ++ * as the secondary process can't call mlx5_mr_create(). ++ * ++ * The global cache must be rebuilt if there's any change and this event has to ++ * be propagated to dataplane threads to flush the local caches. ++ * ++ * @param share_cache ++ * Pointer to a global shared MR cache. ++ * @param ibdev_name ++ * Name of ibv device. ++ * @param addr ++ * Address of freed memory. ++ * @param len ++ * Size of freed memory. ++ */ ++void ++mlx5_free_mr_by_addr(struct mlx5_mr_share_cache *share_cache, ++ const char *ibdev_name, const void *addr, size_t len) ++{ ++ const struct rte_memseg_list *msl; ++ struct mlx5_mr *mr; ++ int ms_n; ++ int i; ++ int rebuild = 0; ++ ++ DRV_LOG(DEBUG, "device %s free callback: addr=%p, len=%zu", ++ ibdev_name, addr, len); ++ msl = rte_mem_virt2memseg_list(addr); ++ /* addr and len must be page-aligned. */ ++ MLX5_ASSERT((uintptr_t)addr == ++ RTE_ALIGN((uintptr_t)addr, msl->page_sz)); ++ MLX5_ASSERT(len == RTE_ALIGN(len, msl->page_sz)); ++ ms_n = len / msl->page_sz; ++ rte_rwlock_write_lock(&share_cache->rwlock); ++ /* Clear bits of freed memsegs from MR. */ ++ for (i = 0; i < ms_n; ++i) { ++ const struct rte_memseg *ms; ++ struct mr_cache_entry entry; ++ uintptr_t start; ++ int ms_idx; ++ uint32_t pos; ++ ++ /* Find MR having this memseg. */ ++ start = (uintptr_t)addr + i * msl->page_sz; ++ mr = mlx5_mr_lookup_list(share_cache, &entry, start); ++ if (mr == NULL) ++ continue; ++ MLX5_ASSERT(mr->msl); /* Can't be external memory. */ ++ ms = rte_mem_virt2memseg((void *)start, msl); ++ MLX5_ASSERT(ms != NULL); ++ MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); ++ ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); ++ pos = ms_idx - mr->ms_base_idx; ++ MLX5_ASSERT(rte_bitmap_get(mr->ms_bmp, pos)); ++ MLX5_ASSERT(pos < mr->ms_bmp_n); ++ DRV_LOG(DEBUG, "device %s MR(%p): clear bitmap[%u] for addr %p", ++ ibdev_name, (void *)mr, pos, (void *)start); ++ rte_bitmap_clear(mr->ms_bmp, pos); ++ if (--mr->ms_n == 0) { ++ LIST_REMOVE(mr, mr); ++ LIST_INSERT_HEAD(&share_cache->mr_free_list, mr, mr); ++ DRV_LOG(DEBUG, "device %s remove MR(%p) from list", ++ ibdev_name, (void *)mr); ++ } ++ /* ++ * MR is fragmented or will be freed. the global cache must be ++ * rebuilt. ++ */ ++ rebuild = 1; ++ } ++ if (rebuild) { ++ mlx5_mr_rebuild_cache(share_cache); ++ /* ++ * No explicit wmb is needed after updating dev_gen due to ++ * store-release ordering in unlock that provides the ++ * implicit barrier at the software visible level. ++ */ ++ ++share_cache->dev_gen; ++ DRV_LOG(DEBUG, "broadcasting local cache flush, gen=%d", ++ share_cache->dev_gen); ++ } ++ rte_rwlock_write_unlock(&share_cache->rwlock); ++} ++ + /** + * Dump all the created MRs and the global cache entries. + * +diff --git a/dpdk/drivers/common/mlx5/mlx5_common_mr.h b/dpdk/drivers/common/mlx5/mlx5_common_mr.h +index da0a0f0c79..09d39ddb5b 100644 +--- a/dpdk/drivers/common/mlx5/mlx5_common_mr.h ++++ b/dpdk/drivers/common/mlx5/mlx5_common_mr.h +@@ -143,6 +143,9 @@ void mlx5_mr_rebuild_cache(struct mlx5_mr_share_cache *share_cache); + __rte_internal + void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl); + __rte_internal ++void mlx5_free_mr_by_addr(struct mlx5_mr_share_cache *share_cache, ++ const char *ibdev_name, const void *addr, size_t len); ++__rte_internal + int + mlx5_mr_insert_cache(struct mlx5_mr_share_cache *share_cache, + struct mlx5_mr *mr); +diff --git a/dpdk/drivers/common/mlx5/mlx5_common_pci.c b/dpdk/drivers/common/mlx5/mlx5_common_pci.c +index 5208972bb6..fa6e89efd3 100644 +--- a/dpdk/drivers/common/mlx5/mlx5_common_pci.c ++++ b/dpdk/drivers/common/mlx5/mlx5_common_pci.c +@@ -203,7 +203,6 @@ drivers_remove(struct mlx5_pci_device *dev, uint32_t enabled_classes) + unsigned int i = 0; + int ret = 0; + +- enabled_classes &= dev->classes_loaded; + while (enabled_classes) { + driver = driver_get(RTE_BIT64(i)); + if (driver) { +@@ -254,9 +253,11 @@ drivers_probe(struct mlx5_pci_device *dev, struct rte_pci_driver *pci_drv, + dev->classes_loaded |= enabled_classes; + return 0; + probe_err: +- /* Only unload drivers which are enabled which were enabled +- * in this probe instance. ++ /* ++ * Need to remove only drivers which were not probed before this probe ++ * instance, but have already been probed before this failure. + */ ++ enabled_classes &= ~dev->classes_loaded; + drivers_remove(dev, enabled_classes); + return ret; + } +diff --git a/dpdk/drivers/common/mlx5/mlx5_devx_cmds.c b/dpdk/drivers/common/mlx5/mlx5_devx_cmds.c +index eafee65f22..ef82981927 100644 +--- a/dpdk/drivers/common/mlx5/mlx5_devx_cmds.c ++++ b/dpdk/drivers/common/mlx5/mlx5_devx_cmds.c +@@ -53,8 +53,8 @@ mlx5_devx_cmd_register_read(void *ctx, uint16_t reg_id, uint32_t arg, + MLX5_SET(access_register_in, in, register_id, reg_id); + MLX5_SET(access_register_in, in, argument, arg); + rc = mlx5_glue->devx_general_cmd(ctx, in, sizeof(in), out, +- MLX5_ST_SZ_DW(access_register_out) * +- sizeof(uint32_t) + dw_cnt); ++ MLX5_ST_SZ_BYTES(access_register_out) + ++ sizeof(uint32_t) * dw_cnt); + if (rc) + goto error; + status = MLX5_GET(access_register_out, out, status); +@@ -518,10 +518,9 @@ mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj, + return ret; + } + +- + struct mlx5_devx_obj * + mlx5_devx_cmd_create_flex_parser(void *ctx, +- struct mlx5_devx_graph_node_attr *data) ++ struct mlx5_devx_graph_node_attr *data) + { + uint32_t in[MLX5_ST_SZ_DW(create_flex_parser_in)] = {0}; + uint32_t out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; +@@ -545,12 +544,18 @@ mlx5_devx_cmd_create_flex_parser(void *ctx, + MLX5_GENERAL_OBJ_TYPE_FLEX_PARSE_GRAPH); + MLX5_SET(parse_graph_flex, flex, header_length_mode, + data->header_length_mode); ++ MLX5_SET64(parse_graph_flex, flex, modify_field_select, ++ data->modify_field_select); + MLX5_SET(parse_graph_flex, flex, header_length_base_value, + data->header_length_base_value); + MLX5_SET(parse_graph_flex, flex, header_length_field_offset, + data->header_length_field_offset); + MLX5_SET(parse_graph_flex, flex, header_length_field_shift, + data->header_length_field_shift); ++ MLX5_SET(parse_graph_flex, flex, next_header_field_offset, ++ data->next_header_field_offset); ++ MLX5_SET(parse_graph_flex, flex, next_header_field_size, ++ data->next_header_field_size); + MLX5_SET(parse_graph_flex, flex, header_length_field_mask, + data->header_length_field_mask); + for (i = 0; i < MLX5_GRAPH_NODE_SAMPLE_NUM; i++) { +@@ -648,6 +653,7 @@ mlx5_devx_cmd_query_hca_attr(void *ctx, + uint32_t out[MLX5_ST_SZ_DW(query_hca_cap_out)] = {0}; + void *hcattr; + int status, syndrome, rc, i; ++ bool hca_cap_2_sup; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, op_mod, +@@ -667,6 +673,7 @@ mlx5_devx_cmd_query_hca_attr(void *ctx, + return -1; + } + hcattr = MLX5_ADDR_OF(query_hca_cap_out, out, capability); ++ hca_cap_2_sup = MLX5_GET(cmd_hca_cap, hcattr, hca_cap_2); + attr->flow_counter_bulk_alloc_bitmap = + MLX5_GET(cmd_hca_cap, hcattr, flow_counter_bulk_alloc); + attr->flow_counters_dump = MLX5_GET(cmd_hca_cap, hcattr, +@@ -714,6 +721,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx, + device_frequency_khz); + attr->scatter_fcs_w_decap_disable = + MLX5_GET(cmd_hca_cap, hcattr, scatter_fcs_w_decap_disable); ++ attr->roce = MLX5_GET(cmd_hca_cap, hcattr, roce); ++ attr->rq_ts_format = MLX5_GET(cmd_hca_cap, hcattr, rq_ts_format); ++ attr->sq_ts_format = MLX5_GET(cmd_hca_cap, hcattr, sq_ts_format); + attr->regex = MLX5_GET(cmd_hca_cap, hcattr, regexp); + attr->regexp_num_of_engines = MLX5_GET(cmd_hca_cap, hcattr, + regexp_num_of_engines); +@@ -725,6 +735,32 @@ mlx5_devx_cmd_query_hca_attr(void *ctx, + mini_cqe_resp_flow_tag); + attr->mini_cqe_resp_l3_l4_tag = MLX5_GET(cmd_hca_cap, hcattr, + mini_cqe_resp_l3_l4_tag); ++ if (hca_cap_2_sup) { ++ memset(in, 0, sizeof(in)); ++ memset(out, 0, sizeof(out)); ++ MLX5_SET(query_hca_cap_in, in, opcode, ++ MLX5_CMD_OP_QUERY_HCA_CAP); ++ MLX5_SET(query_hca_cap_in, in, op_mod, ++ MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 | ++ MLX5_HCA_CAP_OPMOD_GET_CUR); ++ rc = mlx5_glue->devx_general_cmd(ctx, in, sizeof(in), ++ out, sizeof(out)); ++ if (rc) ++ goto error; ++ status = MLX5_GET(query_hca_cap_out, out, status); ++ syndrome = MLX5_GET(query_hca_cap_out, out, syndrome); ++ if (status) { ++ DRV_LOG(DEBUG, ++ "Failed to query DevX HCA capabilities 2," ++ " status %x, syndrome = %x", status, syndrome); ++ return -1; ++ } ++ hcattr = MLX5_ADDR_OF(query_hca_cap_out, out, capability); ++ attr->log_min_stride_wqe_sz = MLX5_GET(cmd_hca_cap_2, hcattr, ++ log_min_stride_wqe_sz); ++ } ++ if (attr->log_min_stride_wqe_sz == 0) ++ attr->log_min_stride_wqe_sz = MLX5_MPRQ_LOG_MIN_STRIDE_WQE_SIZE; + if (attr->qos.sup) { + MLX5_SET(query_hca_cap_in, in, op_mod, + MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP | +@@ -839,9 +875,32 @@ mlx5_devx_cmd_query_hca_attr(void *ctx, + attr->tunnel_stateless_gtp = MLX5_GET + (per_protocol_networking_offload_caps, + hcattr, tunnel_stateless_gtp); +- if (attr->wqe_inline_mode != MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) +- return 0; +- if (attr->eth_virt) { ++ /* Query HCA attribute for ROCE. */ ++ if (attr->roce) { ++ memset(in, 0, sizeof(in)); ++ memset(out, 0, sizeof(out)); ++ MLX5_SET(query_hca_cap_in, in, opcode, ++ MLX5_CMD_OP_QUERY_HCA_CAP); ++ MLX5_SET(query_hca_cap_in, in, op_mod, ++ MLX5_GET_HCA_CAP_OP_MOD_ROCE | ++ MLX5_HCA_CAP_OPMOD_GET_CUR); ++ rc = mlx5_glue->devx_general_cmd(ctx, in, sizeof(in), ++ out, sizeof(out)); ++ if (rc) ++ goto error; ++ status = MLX5_GET(query_hca_cap_out, out, status); ++ syndrome = MLX5_GET(query_hca_cap_out, out, syndrome); ++ if (status) { ++ DRV_LOG(DEBUG, ++ "Failed to query devx HCA ROCE capabilities, " ++ "status %x, syndrome = %x", status, syndrome); ++ return -1; ++ } ++ hcattr = MLX5_ADDR_OF(query_hca_cap_out, out, capability); ++ attr->qp_ts_format = MLX5_GET(roce_caps, hcattr, qp_ts_format); ++ } ++ if (attr->eth_virt && ++ attr->wqe_inline_mode == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) { + rc = mlx5_devx_cmd_query_nic_vport_context(ctx, 0, attr); + if (rc) { + attr->eth_virt = 0; +@@ -982,6 +1041,7 @@ mlx5_devx_cmd_create_rq(void *ctx, + MLX5_SET(rqc, rq_ctx, cqn, rq_attr->cqn); + MLX5_SET(rqc, rq_ctx, counter_set_id, rq_attr->counter_set_id); + MLX5_SET(rqc, rq_ctx, rmpn, rq_attr->rmpn); ++ MLX5_SET(sqc, rq_ctx, ts_format, rq_attr->ts_format); + wq_ctx = MLX5_ADDR_OF(rqc, rq_ctx, wq); + wq_attr = &rq_attr->wq_attr; + devx_cmd_fill_wq_data(wq_ctx, wq_attr); +@@ -1354,6 +1414,7 @@ mlx5_devx_cmd_create_sq(void *ctx, + sq_attr->packet_pacing_rate_limit_index); + MLX5_SET(sqc, sq_ctx, tis_lst_sz, sq_attr->tis_lst_sz); + MLX5_SET(sqc, sq_ctx, tis_num_0, sq_attr->tis_num); ++ MLX5_SET(sqc, sq_ctx, ts_format, sq_attr->ts_format); + wq_ctx = MLX5_ADDR_OF(sqc, sq_ctx, wq); + wq_attr = &sq_attr->wq_attr; + devx_cmd_fill_wq_data(wq_ctx, wq_attr); +@@ -1502,7 +1563,7 @@ mlx5_devx_cmd_create_td(void *ctx) + * Pointer to file stream. + * + * @return +- * 0 on success, a nagative value otherwise. ++ * 0 on success, a negative value otherwise. + */ + int + mlx5_devx_cmd_flow_dump(void *fdb_domain __rte_unused, +@@ -1800,6 +1861,7 @@ mlx5_devx_cmd_create_qp(void *ctx, + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pd, attr->pd); ++ MLX5_SET(qpc, qpc, ts_format, attr->ts_format); + if (attr->uar_index) { + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, uar_page, attr->uar_index); +@@ -1913,7 +1975,7 @@ mlx5_devx_cmd_modify_qp_state(struct mlx5_devx_obj *qp, uint32_t qp_st_mod_op, + case MLX5_CMD_OP_RTR2RTS_QP: + qpc = MLX5_ADDR_OF(rtr2rts_qp_in, &in, qpc); + MLX5_SET(rtr2rts_qp_in, &in, qpn, qp->id); +- MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 14); ++ MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 16); + MLX5_SET(qpc, qpc, log_ack_req_freq, 0); + MLX5_SET(qpc, qpc, retry_count, 7); + MLX5_SET(qpc, qpc, rnr_retry, 7); +@@ -2049,3 +2111,104 @@ mlx5_devx_cmd_create_flow_hit_aso_obj(void *ctx, uint32_t pd) + flow_hit_aso_obj->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return flow_hit_aso_obj; + } ++ ++int ++mlx5_devx_cmd_wq_query(void *wq, uint32_t *counter_set_id) ++{ ++#ifdef HAVE_IBV_FLOW_DV_SUPPORT ++ uint32_t in[MLX5_ST_SZ_DW(query_rq_in)] = {0}; ++ uint32_t out[MLX5_ST_SZ_DW(query_rq_out)] = {0}; ++ int rc; ++ void *rq_ctx; ++ ++ MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ); ++ MLX5_SET(query_rq_in, in, rqn, ((struct ibv_wq *)wq)->wq_num); ++ rc = mlx5_glue->devx_wq_query(wq, in, sizeof(in), out, sizeof(out)); ++ if (rc) { ++ rte_errno = errno; ++ DRV_LOG(ERR, "Failed to query WQ counter set ID using DevX - " ++ "rc = %d, errno = %d.", rc, errno); ++ return -rc; ++ }; ++ rq_ctx = MLX5_ADDR_OF(query_rq_out, out, rq_context); ++ *counter_set_id = MLX5_GET(rqc, rq_ctx, counter_set_id); ++ return 0; ++#else ++ (void)wq; ++ (void)counter_set_id; ++ return -ENOTSUP; ++#endif ++} ++ ++/* ++ * Allocate queue counters via devx interface. ++ * ++ * @param[in] ctx ++ * Context returned from mlx5 open_device() glue function. ++ * ++ * @return ++ * Pointer to counter object on success, a NULL value otherwise and ++ * rte_errno is set. ++ */ ++struct mlx5_devx_obj * ++mlx5_devx_cmd_queue_counter_alloc(void *ctx) ++{ ++ struct mlx5_devx_obj *dcs = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*dcs), 0, ++ SOCKET_ID_ANY); ++ uint32_t in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0}; ++ uint32_t out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0}; ++ ++ if (!dcs) { ++ rte_errno = ENOMEM; ++ return NULL; ++ } ++ MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); ++ dcs->obj = mlx5_glue->devx_obj_create(ctx, in, sizeof(in), out, ++ sizeof(out)); ++ if (!dcs->obj) { ++ DRV_LOG(DEBUG, "Can't allocate q counter set by DevX - error " ++ "%d.", errno); ++ rte_errno = errno; ++ mlx5_free(dcs); ++ return NULL; ++ } ++ dcs->id = MLX5_GET(alloc_q_counter_out, out, counter_set_id); ++ return dcs; ++} ++ ++/** ++ * Query queue counters values. ++ * ++ * @param[in] dcs ++ * devx object of the queue counter set. ++ * @param[in] clear ++ * Whether hardware should clear the counters after the query or not. ++ * @param[out] out_of_buffers ++ * Number of dropped occurred due to lack of WQE for the associated QPs/RQs. ++ * ++ * @return ++ * 0 on success, a negative value otherwise. ++ */ ++int ++mlx5_devx_cmd_queue_counter_query(struct mlx5_devx_obj *dcs, int clear, ++ uint32_t *out_of_buffers) ++{ ++ uint32_t out[MLX5_ST_SZ_BYTES(query_q_counter_out)] = {0}; ++ uint32_t in[MLX5_ST_SZ_DW(query_q_counter_in)] = {0}; ++ int rc; ++ ++ MLX5_SET(query_q_counter_in, in, opcode, ++ MLX5_CMD_OP_QUERY_Q_COUNTER); ++ MLX5_SET(query_q_counter_in, in, op_mod, 0); ++ MLX5_SET(query_q_counter_in, in, counter_set_id, dcs->id); ++ MLX5_SET(query_q_counter_in, in, clear, !!clear); ++ rc = mlx5_glue->devx_obj_query(dcs->obj, in, sizeof(in), out, ++ sizeof(out)); ++ if (rc) { ++ DRV_LOG(ERR, "Failed to query devx q counter set - rc %d", rc); ++ rte_errno = rc; ++ return -rc; ++ } ++ *out_of_buffers = MLX5_GET(query_q_counter_out, out, out_of_buffer); ++ return 0; ++} +diff --git a/dpdk/drivers/common/mlx5/mlx5_devx_cmds.h b/dpdk/drivers/common/mlx5/mlx5_devx_cmds.h +index 78202eba9d..c4ead8a724 100644 +--- a/dpdk/drivers/common/mlx5/mlx5_devx_cmds.h ++++ b/dpdk/drivers/common/mlx5/mlx5_devx_cmds.h +@@ -79,6 +79,7 @@ struct mlx5_hca_attr { + uint32_t eswitch_manager:1; + uint32_t flow_counters_dump:1; + uint32_t log_max_rqt_size:5; ++ uint32_t log_min_stride_wqe_sz:5; + uint32_t parse_graph_flex_node:1; + uint8_t flow_counter_bulk_alloc_bitmap; + uint32_t eth_net_offloads:1; +@@ -112,6 +113,10 @@ struct mlx5_hca_attr { + uint32_t dev_freq_khz; /* Timestamp counter frequency, kHz. */ + uint32_t scatter_fcs_w_decap_disable:1; + uint32_t flow_hit_aso:1; /* General obj type FLOW_HIT_ASO supported. */ ++ uint32_t roce:1; ++ uint32_t rq_ts_format:2; ++ uint32_t sq_ts_format:2; ++ uint32_t qp_ts_format:2; + uint32_t regex:1; + uint32_t regexp_num_of_engines; + uint32_t log_max_ft_sampler_num:8; +@@ -161,6 +166,7 @@ struct mlx5_devx_create_rq_attr { + uint32_t state:4; + uint32_t flush_in_error_en:1; + uint32_t hairpin:1; ++ uint32_t ts_format:2; + uint32_t user_index:24; + uint32_t cqn:24; + uint32_t counter_set_id:8; +@@ -244,6 +250,7 @@ struct mlx5_devx_create_sq_attr { + uint32_t hairpin:1; + uint32_t non_wire:1; + uint32_t static_sq_wq:1; ++ uint32_t ts_format:2; + uint32_t user_index:24; + uint32_t cqn:24; + uint32_t packet_pacing_rate_limit_index:16; +@@ -324,6 +331,7 @@ struct mlx5_devx_qp_attr { + uint32_t rq_size:17; /* Must be power of 2. */ + uint32_t log_rq_stride:3; + uint32_t sq_size:17; /* Must be power of 2. */ ++ uint32_t ts_format:2; + uint32_t dbr_umem_valid:1; + uint32_t dbr_umem_id; + uint64_t dbr_address; +@@ -502,4 +510,13 @@ __rte_internal + struct mlx5_devx_obj *mlx5_devx_cmd_create_flow_hit_aso_obj(void *ctx, + uint32_t pd); + ++ ++__rte_internal ++int mlx5_devx_cmd_wq_query(void *wq, uint32_t *counter_set_id); ++ ++__rte_internal ++struct mlx5_devx_obj *mlx5_devx_cmd_queue_counter_alloc(void *ctx); ++__rte_internal ++int mlx5_devx_cmd_queue_counter_query(struct mlx5_devx_obj *dcs, int clear, ++ uint32_t *out_of_buffers); + #endif /* RTE_PMD_MLX5_DEVX_CMDS_H_ */ +diff --git a/dpdk/drivers/common/mlx5/mlx5_malloc.c b/dpdk/drivers/common/mlx5/mlx5_malloc.c +index 5a3267f730..faba0e2556 100644 +--- a/dpdk/drivers/common/mlx5/mlx5_malloc.c ++++ b/dpdk/drivers/common/mlx5/mlx5_malloc.c +@@ -57,7 +57,7 @@ static struct mlx5_sys_mem mlx5_sys_mem = { + * Check if the address belongs to memory seg list. + * + * @param addr +- * Memory address to be ckeced. ++ * Memory address to be checked. + * @param msl + * Memory seg list. + * +@@ -108,7 +108,7 @@ mlx5_mem_update_msl(void *addr) + * Check if the address belongs to rte memory. + * + * @param addr +- * Memory address to be ckeced. ++ * Memory address to be checked. + * + * @return + * True if it belongs, false otherwise. +diff --git a/dpdk/drivers/common/mlx5/mlx5_malloc.h b/dpdk/drivers/common/mlx5/mlx5_malloc.h +index 8aea414635..fbc129edf1 100644 +--- a/dpdk/drivers/common/mlx5/mlx5_malloc.h ++++ b/dpdk/drivers/common/mlx5/mlx5_malloc.h +@@ -19,7 +19,7 @@ extern "C" { + + enum mlx5_mem_flags { + MLX5_MEM_ANY = 0, +- /* Memory will be allocated dpends on sys_mem_en. */ ++ /* Memory will be allocated depends on sys_mem_en. */ + MLX5_MEM_SYS = 1 << 0, + /* Memory should be allocated from system. */ + MLX5_MEM_RTE = 1 << 1, +diff --git a/dpdk/drivers/common/mlx5/mlx5_prm.h b/dpdk/drivers/common/mlx5/mlx5_prm.h +index 00b425ac85..ca3b2a1a4c 100644 +--- a/dpdk/drivers/common/mlx5/mlx5_prm.h ++++ b/dpdk/drivers/common/mlx5/mlx5_prm.h +@@ -264,6 +264,9 @@ + /* The maximum log value of segments per RQ WQE. */ + #define MLX5_MAX_LOG_RQ_SEGS 5u + ++/* Log 2 of the default size of a WQE for Multi-Packet RQ. */ ++#define MLX5_MPRQ_LOG_MIN_STRIDE_WQE_SIZE 14U ++ + /* The alignment needed for WQ buffer. */ + #define MLX5_WQE_BUF_ALIGNMENT rte_mem_page_size() + +@@ -843,6 +846,8 @@ enum { + MLX5_CMD_OP_SUSPEND_QP = 0x50F, + MLX5_CMD_OP_RESUME_QP = 0x510, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754, ++ MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, ++ MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, + MLX5_CMD_OP_ACCESS_REGISTER = 0x805, + MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN = 0x816, + MLX5_CMD_OP_CREATE_TIR = 0x900, +@@ -851,6 +856,7 @@ enum { + MLX5_CMD_OP_MODIFY_SQ = 0X905, + MLX5_CMD_OP_CREATE_RQ = 0x908, + MLX5_CMD_OP_MODIFY_RQ = 0x909, ++ MLX5_CMD_OP_QUERY_RQ = 0x90b, + MLX5_CMD_OP_CREATE_TIS = 0x912, + MLX5_CMD_OP_QUERY_TIS = 0x915, + MLX5_CMD_OP_CREATE_RQT = 0x916, +@@ -1053,8 +1059,10 @@ enum { + MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0 << 1, + MLX5_GET_HCA_CAP_OP_MOD_ETHERNET_OFFLOAD_CAPS = 0x1 << 1, + MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP = 0xc << 1, ++ MLX5_GET_HCA_CAP_OP_MOD_ROCE = 0x4 << 1, + MLX5_GET_HCA_CAP_OP_MOD_NIC_FLOW_TABLE = 0x7 << 1, + MLX5_GET_HCA_CAP_OP_MOD_VDPA_EMULATION = 0x13 << 1, ++ MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE_2 = 0x20 << 1, + }; + + #define MLX5_GENERAL_OBJ_TYPES_CAP_VIRTQ_NET_Q \ +@@ -1088,6 +1096,20 @@ enum { + MLX5_INLINE_MODE_INNER_TCP_UDP, + }; + ++/* The supported timestamp formats reported in HCA attributes. */ ++enum { ++ MLX5_HCA_CAP_TIMESTAMP_FORMAT_FR = 0x0, ++ MLX5_HCA_CAP_TIMESTAMP_FORMAT_RT = 0x1, ++ MLX5_HCA_CAP_TIMESTAMP_FORMAT_FR_RT = 0x2, ++}; ++ ++/* The timestamp format attributes to configure queues (RQ/SQ/QP). */ ++enum { ++ MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING = 0x0, ++ MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT = 0x1, ++ MLX5_QPC_TIMESTAMP_FORMAT_REAL_TIME = 0x2, ++}; ++ + /* HCA bit masks indicating which Flex parser protocols are already enabled. */ + #define MLX5_HCA_FLEX_IPV4_OVER_VXLAN_ENABLED (1UL << 0) + #define MLX5_HCA_FLEX_IPV6_OVER_VXLAN_ENABLED (1UL << 1) +@@ -1101,7 +1123,9 @@ enum { + #define MLX5_HCA_FLEX_ICMPV6_ENABLED (1UL << 9) + + struct mlx5_ifc_cmd_hca_cap_bits { +- u8 reserved_at_0[0x30]; ++ u8 reserved_at_0[0x20]; ++ u8 hca_cap_2[0x1]; ++ u8 reserved_at_21[0xf]; + u8 vhca_id[0x10]; + u8 reserved_at_40[0x40]; + u8 log_max_srq_sz[0x8]; +@@ -1122,13 +1146,14 @@ struct mlx5_ifc_cmd_hca_cap_bits { + u8 reserved_at_bc[0x4]; + u8 reserved_at_c0[0x8]; + u8 log_max_cq_sz[0x8]; +- u8 reserved_at_d0[0xb]; ++ u8 reserved_at_d0[0x2]; ++ u8 access_register_user[0x1]; ++ u8 reserved_at_d3[0x8]; + u8 log_max_cq[0x5]; + u8 log_max_eq_sz[0x8]; + u8 relaxed_ordering_write[0x1]; + u8 relaxed_ordering_read[0x1]; +- u8 access_register_user[0x1]; +- u8 log_max_mkey[0x5]; ++ u8 log_max_mkey[0x6]; + u8 reserved_at_f0[0x8]; + u8 dump_fill_mkey[0x1]; + u8 reserved_at_f9[0x3]; +@@ -1350,7 +1375,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { + u8 reserved_at_3f8[0x3]; + u8 log_max_current_uc_list[0x5]; + u8 general_obj_types[0x40]; +- u8 reserved_at_440[0x20]; ++ u8 sq_ts_format[0x2]; ++ u8 rq_ts_format[0x2]; ++ u8 reserved_at_444[0x1C]; + u8 reserved_at_460[0x10]; + u8 max_num_eqs[0x10]; + u8 reserved_at_480[0x3]; +@@ -1540,18 +1567,55 @@ struct mlx5_ifc_flow_table_prop_layout_bits { + u8 reserved_at_c0[0x140]; + }; + ++struct mlx5_ifc_roce_caps_bits { ++ u8 reserved_0[0x1e]; ++ u8 qp_ts_format[0x2]; ++ u8 reserved_at_20[0x7e0]; ++}; ++ + struct mlx5_ifc_flow_table_nic_cap_bits { + u8 reserved_at_0[0x200]; + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties; + }; + ++/* ++ * HCA Capabilities 2 ++ */ ++struct mlx5_ifc_cmd_hca_cap_2_bits { ++ u8 reserved_at_0[0x80]; /* End of DW4. */ ++ u8 reserved_at_80[0x3]; ++ u8 max_num_prog_sample_field[0x5]; ++ u8 reserved_at_88[0x3]; ++ u8 log_max_num_reserved_qpn[0x5]; ++ u8 reserved_at_90[0x3]; ++ u8 log_reserved_qpn_granularity[0x5]; ++ u8 reserved_at_98[0x3]; ++ u8 log_reserved_qpn_max_alloc[0x5]; /* End of DW5. */ ++ u8 max_reformat_insert_size[0x8]; ++ u8 max_reformat_insert_offset[0x8]; ++ u8 max_reformat_remove_size[0x8]; ++ u8 max_reformat_remove_offset[0x8]; /* End of DW6. */ ++ u8 reserved_at_c0[0x3]; ++ u8 log_min_stride_wqe_sz[0x5]; ++ u8 reserved_at_c8[0x3]; ++ u8 log_conn_track_granularity[0x5]; ++ u8 reserved_at_d0[0x3]; ++ u8 log_conn_track_max_alloc[0x5]; ++ u8 reserved_at_d8[0x3]; ++ u8 log_max_conn_track_offload[0x5]; ++ u8 reserved_at_e0[0x20]; /* End of DW7. */ ++ u8 reserved_at_100[0x700]; ++}; ++ + union mlx5_ifc_hca_cap_union_bits { + struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap; ++ struct mlx5_ifc_cmd_hca_cap_2_bits cmd_hca_cap_2; + struct mlx5_ifc_per_protocol_networking_offload_caps_bits + per_protocol_networking_offload_caps; + struct mlx5_ifc_qos_cap_bits qos_cap; + struct mlx5_ifc_virtio_emulation_cap_bits vdpa_caps; + struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; ++ struct mlx5_ifc_roce_caps_bits roce_caps; + u8 reserved_at_0[0x8000]; + }; + +@@ -1768,7 +1832,9 @@ struct mlx5_ifc_rqc_bits { + u8 reserved_at_c[0x1]; + u8 flush_in_error_en[0x1]; + u8 hairpin[0x1]; +- u8 reserved_at_f[0x11]; ++ u8 reserved_at_f[0xB]; ++ u8 ts_format[0x02]; ++ u8 reserved_at_1c[0x4]; + u8 reserved_at_20[0x8]; + u8 user_index[0x18]; + u8 reserved_at_40[0x8]; +@@ -1810,6 +1876,24 @@ struct mlx5_ifc_modify_rq_out_bits { + u8 reserved_at_40[0x40]; + }; + ++struct mlx5_ifc_query_rq_out_bits { ++ u8 status[0x8]; ++ u8 reserved_at_8[0x18]; ++ u8 syndrome[0x20]; ++ u8 reserved_at_40[0xc0]; ++ struct mlx5_ifc_rqc_bits rq_context; ++}; ++ ++struct mlx5_ifc_query_rq_in_bits { ++ u8 opcode[0x10]; ++ u8 reserved_at_10[0x10]; ++ u8 reserved_at_20[0x10]; ++ u8 op_mod[0x10]; ++ u8 reserved_at_40[0x8]; ++ u8 rqn[0x18]; ++ u8 reserved_at_60[0x20]; ++}; ++ + struct mlx5_ifc_create_tis_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; +@@ -2056,7 +2140,9 @@ struct mlx5_ifc_sqc_bits { + u8 hairpin[0x1]; + u8 non_wire[0x1]; + u8 static_sq_wq[0x1]; +- u8 reserved_at_11[0xf]; ++ u8 reserved_at_11[0x9]; ++ u8 ts_format[0x02]; ++ u8 reserved_at_1c[0x4]; + u8 reserved_at_20[0x8]; + u8 user_index[0x18]; + u8 reserved_at_40[0x8]; +@@ -2518,7 +2604,9 @@ struct mlx5_ifc_qpc_bits { + u8 log_rq_stride[0x3]; + u8 no_sq[0x1]; + u8 log_sq_size[0x4]; +- u8 reserved_at_55[0x6]; ++ u8 reserved_at_55[0x3]; ++ u8 ts_format[0x2]; ++ u8 reserved_at_5a[0x1]; + u8 rlky[0x1]; + u8 ulp_stateless_offload_mode[0x4]; + u8 counter_set_id[0x8]; +@@ -3015,6 +3103,85 @@ struct mlx5_ifc_query_regexp_register_out_bits { + u8 register_data[0x20]; + }; + ++/* Queue counters. */ ++struct mlx5_ifc_alloc_q_counter_out_bits { ++ u8 status[0x8]; ++ u8 reserved_at_8[0x18]; ++ u8 syndrome[0x20]; ++ u8 reserved_at_40[0x18]; ++ u8 counter_set_id[0x8]; ++ u8 reserved_at_60[0x20]; ++}; ++ ++struct mlx5_ifc_alloc_q_counter_in_bits { ++ u8 opcode[0x10]; ++ u8 uid[0x10]; ++ u8 reserved_at_20[0x10]; ++ u8 op_mod[0x10]; ++ u8 reserved_at_40[0x40]; ++}; ++ ++struct mlx5_ifc_query_q_counter_out_bits { ++ u8 status[0x8]; ++ u8 reserved_at_8[0x18]; ++ u8 syndrome[0x20]; ++ u8 reserved_at_40[0x40]; ++ u8 rx_write_requests[0x20]; ++ u8 reserved_at_a0[0x20]; ++ u8 rx_read_requests[0x20]; ++ u8 reserved_at_e0[0x20]; ++ u8 rx_atomic_requests[0x20]; ++ u8 reserved_at_120[0x20]; ++ u8 rx_dct_connect[0x20]; ++ u8 reserved_at_160[0x20]; ++ u8 out_of_buffer[0x20]; ++ u8 reserved_at_1a0[0x20]; ++ u8 out_of_sequence[0x20]; ++ u8 reserved_at_1e0[0x20]; ++ u8 duplicate_request[0x20]; ++ u8 reserved_at_220[0x20]; ++ u8 rnr_nak_retry_err[0x20]; ++ u8 reserved_at_260[0x20]; ++ u8 packet_seq_err[0x20]; ++ u8 reserved_at_2a0[0x20]; ++ u8 implied_nak_seq_err[0x20]; ++ u8 reserved_at_2e0[0x20]; ++ u8 local_ack_timeout_err[0x20]; ++ u8 reserved_at_320[0xa0]; ++ u8 resp_local_length_error[0x20]; ++ u8 req_local_length_error[0x20]; ++ u8 resp_local_qp_error[0x20]; ++ u8 local_operation_error[0x20]; ++ u8 resp_local_protection[0x20]; ++ u8 req_local_protection[0x20]; ++ u8 resp_cqe_error[0x20]; ++ u8 req_cqe_error[0x20]; ++ u8 req_mw_binding[0x20]; ++ u8 req_bad_response[0x20]; ++ u8 req_remote_invalid_request[0x20]; ++ u8 resp_remote_invalid_request[0x20]; ++ u8 req_remote_access_errors[0x20]; ++ u8 resp_remote_access_errors[0x20]; ++ u8 req_remote_operation_errors[0x20]; ++ u8 req_transport_retries_exceeded[0x20]; ++ u8 cq_overflow[0x20]; ++ u8 resp_cqe_flush_error[0x20]; ++ u8 req_cqe_flush_error[0x20]; ++ u8 reserved_at_620[0x1e0]; ++}; ++ ++struct mlx5_ifc_query_q_counter_in_bits { ++ u8 opcode[0x10]; ++ u8 uid[0x10]; ++ u8 reserved_at_20[0x10]; ++ u8 op_mod[0x10]; ++ u8 reserved_at_40[0x80]; ++ u8 clear[0x1]; ++ u8 reserved_at_c1[0x1f]; ++ u8 reserved_at_e0[0x18]; ++ u8 counter_set_id[0x8]; ++}; ++ + /* CQE format mask. */ + #define MLX5E_CQE_FORMAT_MASK 0xc + +@@ -3161,4 +3328,21 @@ mlx5_flow_mark_get(uint32_t val) + #endif + } + ++/** ++ * Convert a timestamp format to configure settings in the queue context. ++ * ++ * @param val ++ * timestamp format supported by the queue. ++ * ++ * @return ++ * Converted timestamp format settings. ++ */ ++static inline uint32_t ++mlx5_ts_format_conv(uint32_t ts_format) ++{ ++ return ts_format == MLX5_HCA_CAP_TIMESTAMP_FORMAT_FR ? ++ MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING : ++ MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT; ++} ++ + #endif /* RTE_PMD_MLX5_PRM_H_ */ +diff --git a/dpdk/drivers/common/mlx5/version.map b/dpdk/drivers/common/mlx5/version.map +index 17dd11f635..6c9956c8f1 100644 +--- a/dpdk/drivers/common/mlx5/version.map ++++ b/dpdk/drivers/common/mlx5/version.map +@@ -21,7 +21,7 @@ INTERNAL { + mlx5_devx_cmd_create_tis; + mlx5_devx_cmd_create_virtio_q_counters; + mlx5_devx_cmd_create_virtq; +- mlx5_devx_cmd_create_flow_hit_aso_obj; ++ mlx5_devx_cmd_create_flow_hit_aso_obj; + mlx5_devx_cmd_destroy; + mlx5_devx_cmd_flow_counter_alloc; + mlx5_devx_cmd_flow_counter_query; +@@ -38,7 +38,10 @@ INTERNAL { + mlx5_devx_cmd_query_parse_samples; + mlx5_devx_cmd_query_virtio_q_counters; + mlx5_devx_cmd_query_virtq; ++ mlx5_devx_cmd_queue_counter_alloc; ++ mlx5_devx_cmd_queue_counter_query; + mlx5_devx_cmd_register_read; ++ mlx5_devx_cmd_wq_query; + mlx5_devx_get_out_command_status; + mlx5_devx_alloc_uar; + +@@ -66,6 +69,7 @@ INTERNAL { + mlx5_mr_create_primary; + mlx5_mr_flush_local_cache; + mlx5_mr_free; ++ mlx5_free_mr_by_addr; + + mlx5_nl_allmulti; + mlx5_nl_devlink_family_id_get; +@@ -85,6 +89,9 @@ INTERNAL { + mlx5_nl_vlan_vmwa_create; + mlx5_nl_vlan_vmwa_delete; + ++ mlx5_os_wrapped_mkey_create; ++ mlx5_os_wrapped_mkey_destroy; ++ + mlx5_release_dbr; + + mlx5_translate_port_name; +@@ -96,4 +103,7 @@ INTERNAL { + mlx5_free; + + mlx5_pci_driver_register; ++ ++ mlx5_nl_parse_link_status_update; ++ mlx5_nl_read_events; + }; +diff --git a/dpdk/drivers/common/octeontx2/otx2_dev.h b/dpdk/drivers/common/octeontx2/otx2_dev.h +index cd4fe517db..9d8dcca791 100644 +--- a/dpdk/drivers/common/octeontx2/otx2_dev.h ++++ b/dpdk/drivers/common/octeontx2/otx2_dev.h +@@ -55,6 +55,9 @@ + (RVU_PCI_REV_MINOR(otx2_dev_revid(dev)) == 0x0) && \ + (RVU_PCI_REV_MIDR_ID(otx2_dev_revid(dev)) == 0x0)) + ++#define otx2_dev_is_98xx(dev) \ ++ (RVU_PCI_REV_MIDR_ID(otx2_dev_revid(dev)) == 0x3) ++ + struct otx2_dev; + + /* Link status callback */ +diff --git a/dpdk/drivers/common/octeontx2/otx2_mbox.h b/dpdk/drivers/common/octeontx2/otx2_mbox.h +index f6d884c198..781c18180d 100644 +--- a/dpdk/drivers/common/octeontx2/otx2_mbox.h ++++ b/dpdk/drivers/common/octeontx2/otx2_mbox.h +@@ -90,7 +90,7 @@ struct mbox_msghdr { + #define OTX2_MBOX_RSP_SIG (0xbeef) + /* Signature, for validating corrupted msgs */ + uint16_t __otx2_io sig; +-#define OTX2_MBOX_VERSION (0x000a) ++#define OTX2_MBOX_VERSION (0x000b) + /* Version of msg's structure for this ID */ + uint16_t __otx2_io ver; + /* Offset of next msg within mailbox region */ +@@ -177,6 +177,8 @@ M(SSO_GRP_GET_STATS, 0x609, sso_grp_get_stats, sso_info_req, \ + sso_grp_stats) \ + M(SSO_HWS_GET_STATS, 0x610, sso_hws_get_stats, sso_info_req, \ + sso_hws_stats) \ ++M(SSO_HW_RELEASE_XAQ, 0x611, sso_hw_release_xaq_aura, \ ++ sso_release_xaq, msg_rsp) \ + /* TIM mbox IDs (range 0x800 - 0x9FF) */ \ + M(TIM_LF_ALLOC, 0x800, tim_lf_alloc, tim_lf_alloc_req, \ + tim_lf_alloc_rsp) \ +@@ -366,6 +368,15 @@ struct npc_set_pkind { + #define PKIND_RX BIT_ULL(1) + uint8_t __otx2_io dir; + uint8_t __otx2_io pkind; /* valid only in case custom flag */ ++ uint8_t __otx2_io var_len_off; ++ /* Offset of custom header length field. ++ * Valid only for pkind NPC_RX_CUSTOM_PRE_L2_PKIND ++ */ ++ uint8_t __otx2_io var_len_off_mask; /* Mask for length with in offset */ ++ uint8_t __otx2_io shift_dir; ++ /* Shift direction to get length of the ++ * header at var_len_off ++ */ + }; + + /* Structure for requesting resource provisioning. +@@ -1177,6 +1188,11 @@ struct sso_hw_setconfig { + uint16_t __otx2_io hwgrps; + }; + ++struct sso_release_xaq { ++ struct mbox_msghdr hdr; ++ uint16_t __otx2_io hwgrps; ++}; ++ + struct sso_info_req { + struct mbox_msghdr hdr; + union { +diff --git a/dpdk/drivers/common/qat/qat_adf/adf_transport_access_macros.h b/dpdk/drivers/common/qat/qat_adf/adf_transport_access_macros.h +index 504ffb7236..51f8768aca 100644 +--- a/dpdk/drivers/common/qat/qat_adf/adf_transport_access_macros.h ++++ b/dpdk/drivers/common/qat/qat_adf/adf_transport_access_macros.h +@@ -70,7 +70,7 @@ + #define ADF_SIZE_TO_RING_SIZE_IN_BYTES(SIZE) ((1 << (SIZE - 1)) << 7) + #define ADF_RING_SIZE_IN_BYTES_TO_SIZE(SIZE) ((1 << (SIZE - 1)) >> 7) + +-/* Minimum ring bufer size for memory allocation */ ++/* Minimum ring buffer size for memory allocation */ + #define ADF_RING_SIZE_BYTES_MIN(SIZE) ((SIZE < ADF_RING_SIZE_4K) ? \ + ADF_RING_SIZE_4K : SIZE) + #define ADF_RING_SIZE_MODULO(SIZE) (SIZE + 0x6) +diff --git a/dpdk/drivers/common/qat/qat_device.h b/dpdk/drivers/common/qat/qat_device.h +index e6337c688d..9c6a3ca4e6 100644 +--- a/dpdk/drivers/common/qat/qat_device.h ++++ b/dpdk/drivers/common/qat/qat_device.h +@@ -29,7 +29,7 @@ struct qat_dev_cmd_param { + enum qat_comp_num_im_buffers { + QAT_NUM_INTERM_BUFS_GEN1 = 12, + QAT_NUM_INTERM_BUFS_GEN2 = 20, +- QAT_NUM_INTERM_BUFS_GEN3 = 20 ++ QAT_NUM_INTERM_BUFS_GEN3 = 64 + }; + + struct qat_device_info { +diff --git a/dpdk/drivers/common/sfc_efx/base/ef10_filter.c b/dpdk/drivers/common/sfc_efx/base/ef10_filter.c +index 0e5f04fe3b..5158e07cc9 100644 +--- a/dpdk/drivers/common/sfc_efx/base/ef10_filter.c ++++ b/dpdk/drivers/common/sfc_efx/base/ef10_filter.c +@@ -1225,20 +1225,25 @@ efx_mcdi_get_parser_disp_info( + goto fail1; + } + ++ if (req.emr_out_length_used < MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMIN) { ++ rc = EMSGSIZE; ++ goto fail2; ++ } ++ + matches_count = MCDI_OUT_DWORD(req, + GET_PARSER_DISP_INFO_OUT_NUM_SUPPORTED_MATCHES); + + if (req.emr_out_length_used < + MC_CMD_GET_PARSER_DISP_INFO_OUT_LEN(matches_count)) { + rc = EMSGSIZE; +- goto fail2; ++ goto fail3; + } + + *list_lengthp = matches_count; + + if (buffer_length < matches_count) { + rc = ENOSPC; +- goto fail3; ++ goto fail4; + } + + /* +@@ -1258,6 +1263,8 @@ efx_mcdi_get_parser_disp_info( + + return (0); + ++fail4: ++ EFSYS_PROBE(fail4); + fail3: + EFSYS_PROBE(fail3); + fail2: +diff --git a/dpdk/drivers/common/sfc_efx/base/ef10_nic.c b/dpdk/drivers/common/sfc_efx/base/ef10_nic.c +index 9dccde9576..ccce7b7437 100644 +--- a/dpdk/drivers/common/sfc_efx/base/ef10_nic.c ++++ b/dpdk/drivers/common/sfc_efx/base/ef10_nic.c +@@ -491,11 +491,17 @@ efx_mcdi_get_rxdp_config( + req.emr_out_length = MC_CMD_GET_RXDP_CONFIG_OUT_LEN; + + efx_mcdi_execute(enp, &req); ++ + if (req.emr_rc != 0) { + rc = req.emr_rc; + goto fail1; + } + ++ if (req.emr_out_length_used < MC_CMD_GET_RXDP_CONFIG_OUT_LEN) { ++ rc = EMSGSIZE; ++ goto fail2; ++ } ++ + if (MCDI_OUT_DWORD_FIELD(req, GET_RXDP_CONFIG_OUT_DATA, + GET_RXDP_CONFIG_OUT_PAD_HOST_DMA) == 0) { + /* RX DMA end padding is disabled */ +@@ -514,7 +520,7 @@ efx_mcdi_get_rxdp_config( + break; + default: + rc = ENOTSUP; +- goto fail2; ++ goto fail3; + } + } + +@@ -522,6 +528,8 @@ efx_mcdi_get_rxdp_config( + + return (0); + ++fail3: ++ EFSYS_PROBE(fail3); + fail2: + EFSYS_PROBE(fail2); + fail1: +diff --git a/dpdk/drivers/common/sfc_efx/base/efx_mae.c b/dpdk/drivers/common/sfc_efx/base/efx_mae.c +index 338a0013f9..4bed2d4ab7 100644 +--- a/dpdk/drivers/common/sfc_efx/base/efx_mae.c ++++ b/dpdk/drivers/common/sfc_efx/base/efx_mae.c +@@ -47,17 +47,20 @@ efx_mae_get_capabilities( + + maep->em_encap_types_supported = 0; + +- if (MCDI_OUT_DWORD(req, MAE_GET_CAPS_OUT_ENCAP_TYPE_VXLAN) == 1) { ++ if (MCDI_OUT_DWORD_FIELD(req, MAE_GET_CAPS_OUT_ENCAP_TYPES_SUPPORTED, ++ MAE_GET_CAPS_OUT_ENCAP_TYPE_VXLAN) != 0) { + maep->em_encap_types_supported |= + (1U << EFX_TUNNEL_PROTOCOL_VXLAN); + } + +- if (MCDI_OUT_DWORD(req, MAE_GET_CAPS_OUT_ENCAP_TYPE_GENEVE) == 1) { ++ if (MCDI_OUT_DWORD_FIELD(req, MAE_GET_CAPS_OUT_ENCAP_TYPES_SUPPORTED, ++ MAE_GET_CAPS_OUT_ENCAP_TYPE_GENEVE) != 0) { + maep->em_encap_types_supported |= + (1U << EFX_TUNNEL_PROTOCOL_GENEVE); + } + +- if (MCDI_OUT_DWORD(req, MAE_GET_CAPS_OUT_ENCAP_TYPE_NVGRE) == 1) { ++ if (MCDI_OUT_DWORD_FIELD(req, MAE_GET_CAPS_OUT_ENCAP_TYPES_SUPPORTED, ++ MAE_GET_CAPS_OUT_ENCAP_TYPE_NVGRE) != 0) { + maep->em_encap_types_supported |= + (1U << EFX_TUNNEL_PROTOCOL_NVGRE); + } +@@ -107,17 +110,22 @@ efx_mae_get_outer_rule_caps( + goto fail2; + } + ++ if (req.emr_out_length_used < MC_CMD_MAE_GET_OR_CAPS_OUT_LENMIN) { ++ rc = EMSGSIZE; ++ goto fail3; ++ } ++ + mcdi_field_ncaps = MCDI_OUT_DWORD(req, MAE_GET_OR_CAPS_OUT_COUNT); + + if (req.emr_out_length_used < + MC_CMD_MAE_GET_OR_CAPS_OUT_LEN(mcdi_field_ncaps)) { + rc = EMSGSIZE; +- goto fail3; ++ goto fail4; + } + + if (mcdi_field_ncaps > field_ncaps) { + rc = EMSGSIZE; +- goto fail4; ++ goto fail5; + } + + for (i = 0; i < mcdi_field_ncaps; ++i) { +@@ -145,6 +153,8 @@ efx_mae_get_outer_rule_caps( + + return (0); + ++fail5: ++ EFSYS_PROBE(fail5); + fail4: + EFSYS_PROBE(fail4); + fail3: +@@ -189,17 +199,22 @@ efx_mae_get_action_rule_caps( + goto fail2; + } + +- mcdi_field_ncaps = MCDI_OUT_DWORD(req, MAE_GET_OR_CAPS_OUT_COUNT); ++ if (req.emr_out_length_used < MC_CMD_MAE_GET_AR_CAPS_OUT_LENMIN) { ++ rc = EMSGSIZE; ++ goto fail3; ++ } ++ ++ mcdi_field_ncaps = MCDI_OUT_DWORD(req, MAE_GET_AR_CAPS_OUT_COUNT); + + if (req.emr_out_length_used < + MC_CMD_MAE_GET_AR_CAPS_OUT_LEN(mcdi_field_ncaps)) { + rc = EMSGSIZE; +- goto fail3; ++ goto fail4; + } + + if (mcdi_field_ncaps > field_ncaps) { + rc = EMSGSIZE; +- goto fail4; ++ goto fail5; + } + + for (i = 0; i < mcdi_field_ncaps; ++i) { +@@ -227,6 +242,8 @@ efx_mae_get_action_rule_caps( + + return (0); + ++fail5: ++ EFSYS_PROBE(fail5); + fail4: + EFSYS_PROBE(fail4); + fail3: +@@ -1647,15 +1664,22 @@ efx_mae_outer_rule_remove( + goto fail2; + } + ++ if (req.emr_out_length_used < MC_CMD_MAE_OUTER_RULE_REMOVE_OUT_LENMIN) { ++ rc = EMSGSIZE; ++ goto fail3; ++ } ++ + if (MCDI_OUT_DWORD(req, MAE_OUTER_RULE_REMOVE_OUT_REMOVED_OR_ID) != + or_idp->id) { + /* Firmware failed to remove the outer rule. */ + rc = EAGAIN; +- goto fail3; ++ goto fail4; + } + + return (0); + ++fail4: ++ EFSYS_PROBE(fail4); + fail3: + EFSYS_PROBE(fail3); + fail2: +@@ -1851,15 +1875,22 @@ efx_mae_action_set_free( + goto fail2; + } + ++ if (req.emr_out_length_used < MC_CMD_MAE_ACTION_SET_FREE_OUT_LENMIN) { ++ rc = EMSGSIZE; ++ goto fail3; ++ } ++ + if (MCDI_OUT_DWORD(req, MAE_ACTION_SET_FREE_OUT_FREED_AS_ID) != + aset_idp->id) { + /* Firmware failed to free the action set. */ + rc = EAGAIN; +- goto fail3; ++ goto fail4; + } + + return (0); + ++fail4: ++ EFSYS_PROBE(fail4); + fail3: + EFSYS_PROBE(fail3); + fail2: +@@ -2001,15 +2032,23 @@ efx_mae_action_rule_remove( + goto fail2; + } + ++ if (req.emr_out_length_used < ++ MC_CMD_MAE_ACTION_RULE_DELETE_OUT_LENMIN) { ++ rc = EMSGSIZE; ++ goto fail3; ++ } ++ + if (MCDI_OUT_DWORD(req, MAE_ACTION_RULE_DELETE_OUT_DELETED_AR_ID) != + ar_idp->id) { + /* Firmware failed to delete the action rule. */ + rc = EAGAIN; +- goto fail3; ++ goto fail4; + } + + return (0); + ++fail4: ++ EFSYS_PROBE(fail4); + fail3: + EFSYS_PROBE(fail3); + fail2: +diff --git a/dpdk/drivers/common/sfc_efx/base/efx_mcdi.c b/dpdk/drivers/common/sfc_efx/base/efx_mcdi.c +index ca44267724..59e884dcc6 100644 +--- a/dpdk/drivers/common/sfc_efx/base/efx_mcdi.c ++++ b/dpdk/drivers/common/sfc_efx/base/efx_mcdi.c +@@ -516,6 +516,9 @@ efx_mcdi_finish_response( + bytes = MIN(emrp->emr_out_length_used, emrp->emr_out_length); + efx_mcdi_read_response(enp, emrp->emr_out_buf, resp_off, bytes); + ++ /* Report bytes copied to caller (response message may be larger) */ ++ emrp->emr_out_length_used = bytes; ++ + #if EFSYS_OPT_MCDI_LOGGING + if (emtp->emt_logger != NULL) { + emtp->emt_logger(emtp->emt_context, +@@ -2291,6 +2294,11 @@ efx_mcdi_get_workarounds( + goto fail1; + } + ++ if (req.emr_out_length_used < MC_CMD_GET_WORKAROUNDS_OUT_LEN) { ++ rc = EMSGSIZE; ++ goto fail2; ++ } ++ + if (implementedp != NULL) { + *implementedp = + MCDI_OUT_DWORD(req, GET_WORKAROUNDS_OUT_IMPLEMENTED); +@@ -2302,6 +2310,8 @@ efx_mcdi_get_workarounds( + + return (0); + ++fail2: ++ EFSYS_PROBE(fail2); + fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + +diff --git a/dpdk/drivers/common/sfc_efx/base/efx_pci.c b/dpdk/drivers/common/sfc_efx/base/efx_pci.c +index 1e7307476f..83c9e0449d 100644 +--- a/dpdk/drivers/common/sfc_efx/base/efx_pci.c ++++ b/dpdk/drivers/common/sfc_efx/base/efx_pci.c +@@ -310,7 +310,7 @@ efx_pci_xilinx_cap_tbl_find( + __in boolean_t skip_first, + __inout efsys_dma_addr_t *entry_offsetp) + { +- efsys_dma_addr_t offset = *entry_offsetp; ++ efsys_dma_addr_t offset; + boolean_t skip = skip_first; + efx_qword_t header; + uint32_t format; +@@ -322,6 +322,7 @@ efx_pci_xilinx_cap_tbl_find( + goto fail1; + } + ++ offset = *entry_offsetp; + rc = ENOENT; + /* + * SF-119689-TC Riverhead Host Interface section 4.2.2. +diff --git a/dpdk/drivers/common/sfc_efx/base/rhead_nic.c b/dpdk/drivers/common/sfc_efx/base/rhead_nic.c +index 92bc6fdfae..27ce1d6674 100644 +--- a/dpdk/drivers/common/sfc_efx/base/rhead_nic.c ++++ b/dpdk/drivers/common/sfc_efx/base/rhead_nic.c +@@ -28,7 +28,6 @@ rhead_board_cfg( + */ + encp->enc_tunnel_encapsulations_supported = + (1u << EFX_TUNNEL_PROTOCOL_VXLAN) | +- (1u << EFX_TUNNEL_PROTOCOL_GENEVE) | + (1u << EFX_TUNNEL_PROTOCOL_NVGRE); + + /* +diff --git a/dpdk/drivers/common/sfc_efx/efsys.h b/dpdk/drivers/common/sfc_efx/efsys.h +index a3ae3137e6..47b10b29cd 100644 +--- a/dpdk/drivers/common/sfc_efx/efsys.h ++++ b/dpdk/drivers/common/sfc_efx/efsys.h +@@ -125,7 +125,7 @@ prefetch_read_once(const volatile void *addr) + /* Enable Riverhead support */ + #define EFSYS_OPT_RIVERHEAD 1 + +-#ifdef RTE_LIBRTE_SFC_EFX_DEBUG ++#ifdef RTE_DEBUG_COMMON_SFC_EFX + #define EFSYS_OPT_CHECK_REG 1 + #else + #define EFSYS_OPT_CHECK_REG 0 +@@ -607,14 +607,14 @@ typedef struct efsys_bar_s { + /* DMA SYNC */ + + /* +- * DPDK does not provide any DMA syncing API, and no PMD drivers ++ * DPDK does not provide any DMA syncing API, and no PMDs + * have any traces of explicit DMA syncing. + * DMA mapping is assumed to be coherent. + */ + + #define EFSYS_DMA_SYNC_FOR_KERNEL(_esmp, _offset, _size) ((void)0) + +-/* Just avoid store and compiler (impliciltly) reordering */ ++/* Just avoid store and compiler (implicitly) reordering */ + #define EFSYS_DMA_SYNC_FOR_DEVICE(_esmp, _offset, _size) rte_wmb() + + /* TIMESTAMP */ +@@ -740,7 +740,7 @@ typedef uint64_t efsys_stat_t; + /* RTE_VERIFY from DPDK treats expressions with % operator incorrectly, + * so we re-implement it here + */ +-#ifdef RTE_LIBRTE_SFC_EFX_DEBUG ++#ifdef RTE_DEBUG_COMMON_SFC_EFX + #define EFSYS_ASSERT(_exp) \ + do { \ + if (unlikely(!(_exp))) \ +diff --git a/dpdk/drivers/common/sfc_efx/meson.build b/dpdk/drivers/common/sfc_efx/meson.build +index 6cb9f0737f..1e17f1fd29 100644 +--- a/dpdk/drivers/common/sfc_efx/meson.build ++++ b/dpdk/drivers/common/sfc_efx/meson.build +@@ -5,7 +5,7 @@ + # This software was jointly developed between OKTET Labs (under contract + # for Solarflare) and Solarflare Communications, Inc. + +-if (arch_subdir != 'x86' or not dpdk_conf.get('RTE_ARCH_64')) and (arch_subdir != 'arm' or not host_machine.cpu_family().startswith('aarch64')) ++if (arch_subdir != 'x86' and arch_subdir != 'arm') or (not dpdk_conf.get('RTE_ARCH_64')) + build = false + reason = 'only supported on x86_64 and aarch64' + endif +diff --git a/dpdk/drivers/compress/octeontx/include/zip_regs.h b/dpdk/drivers/compress/octeontx/include/zip_regs.h +index 96e538bb75..94a48cde66 100644 +--- a/dpdk/drivers/compress/octeontx/include/zip_regs.h ++++ b/dpdk/drivers/compress/octeontx/include/zip_regs.h +@@ -195,7 +195,7 @@ union zip_inst_s { + uint64_t bf : 1; + /** Comp/decomp operation */ + uint64_t op : 2; +- /** Data sactter */ ++ /** Data scatter */ + uint64_t ds : 1; + /** Data gather */ + uint64_t dg : 1; +@@ -376,7 +376,7 @@ union zip_inst_s { + uint64_t bf : 1; + /** Comp/decomp operation */ + uint64_t op : 2; +- /** Data sactter */ ++ /** Data scatter */ + uint64_t ds : 1; + /** Data gather */ + uint64_t dg : 1; +diff --git a/dpdk/drivers/compress/octeontx/otx_zip.h b/dpdk/drivers/compress/octeontx/otx_zip.h +index e43f7f5c3e..118a95d738 100644 +--- a/dpdk/drivers/compress/octeontx/otx_zip.h ++++ b/dpdk/drivers/compress/octeontx/otx_zip.h +@@ -31,7 +31,7 @@ extern int octtx_zip_logtype_driver; + /**< PCI device id of ZIP VF */ + #define PCI_DEVICE_ID_OCTEONTX_ZIPVF 0xA037 + +-/* maxmum number of zip vf devices */ ++/* maximum number of zip vf devices */ + #define ZIP_MAX_VFS 8 + + /* max size of one chunk */ +diff --git a/dpdk/drivers/compress/octeontx/otx_zip_pmd.c b/dpdk/drivers/compress/octeontx/otx_zip_pmd.c +index bee90fc7cd..ff40968244 100644 +--- a/dpdk/drivers/compress/octeontx/otx_zip_pmd.c ++++ b/dpdk/drivers/compress/octeontx/otx_zip_pmd.c +@@ -392,6 +392,8 @@ zip_pmd_qp_setup(struct rte_compressdev *dev, uint16_t qp_id, + } + + name = rte_malloc(NULL, RTE_COMPRESSDEV_NAME_MAX_LEN, 0); ++ if (name == NULL) ++ return (-ENOMEM); + snprintf(name, RTE_COMPRESSDEV_NAME_MAX_LEN, + "zip_pmd_%u_qp_%u", + dev->data->dev_id, qp_id); +@@ -399,8 +401,10 @@ zip_pmd_qp_setup(struct rte_compressdev *dev, uint16_t qp_id, + /* Allocate the queue pair data structure. */ + qp = rte_zmalloc_socket(name, sizeof(*qp), + RTE_CACHE_LINE_SIZE, socket_id); +- if (qp == NULL) ++ if (qp == NULL) { ++ rte_free(name); + return (-ENOMEM); ++ } + + qp->name = name; + +diff --git a/dpdk/drivers/compress/qat/qat_comp.c b/dpdk/drivers/compress/qat/qat_comp.c +index 3a064ec3b2..7ac25a3b4c 100644 +--- a/dpdk/drivers/compress/qat/qat_comp.c ++++ b/dpdk/drivers/compress/qat/qat_comp.c +@@ -191,8 +191,8 @@ qat_comp_build_request(void *in_op, uint8_t *out_msg, + ICP_QAT_FW_COMP_EOP + : ICP_QAT_FW_COMP_NOT_EOP, + ICP_QAT_FW_COMP_NOT_BFINAL, +- ICP_QAT_FW_COMP_NO_CNV, +- ICP_QAT_FW_COMP_NO_CNV_RECOVERY); ++ ICP_QAT_FW_COMP_CNV, ++ ICP_QAT_FW_COMP_CNV_RECOVERY); + } + + /* common for sgl and flat buffers */ +@@ -603,7 +603,8 @@ qat_comp_process_response(void **op, uint8_t *resp, void *op_cookie, + rx_op->status = RTE_COMP_OP_STATUS_ERROR; + rx_op->debug_status = ERR_CODE_QAT_COMP_WRONG_FW; + *op = (void *)rx_op; +- QAT_DP_LOG(ERR, "QAT has wrong firmware"); ++ QAT_DP_LOG(ERR, ++ "This QAT hardware doesn't support compression operation"); + ++(*dequeue_err_count); + return 1; + } +diff --git a/dpdk/drivers/compress/qat/qat_comp_pmd.c b/dpdk/drivers/compress/qat/qat_comp_pmd.c +index 18ecb34ba7..4b8594d76c 100644 +--- a/dpdk/drivers/compress/qat/qat_comp_pmd.c ++++ b/dpdk/drivers/compress/qat/qat_comp_pmd.c +@@ -82,13 +82,13 @@ qat_comp_qp_release(struct rte_compressdev *dev, uint16_t queue_pair_id) + qat_private->qat_dev->qps_in_use[QAT_SERVICE_COMPRESSION][queue_pair_id] + = NULL; + +- for (i = 0; i < qp->nb_descriptors; i++) { +- +- struct qat_comp_op_cookie *cookie = qp->op_cookies[i]; ++ if (qp != NULL) ++ for (i = 0; i < qp->nb_descriptors; i++) { ++ struct qat_comp_op_cookie *cookie = qp->op_cookies[i]; + +- rte_free(cookie->qat_sgl_src_d); +- rte_free(cookie->qat_sgl_dst_d); +- } ++ rte_free(cookie->qat_sgl_src_d); ++ rte_free(cookie->qat_sgl_dst_d); ++ } + + return qat_qp_release((struct qat_qp **) + &(dev->data->queue_pairs[queue_pair_id])); +@@ -198,7 +198,7 @@ qat_comp_setup_inter_buffers(struct qat_comp_dev_private *comp_dev, + struct array_of_ptrs *array_of_pointers; + int size_of_ptr_array; + uint32_t full_size; +- uint32_t offset_of_sgls, offset_of_flat_buffs = 0; ++ uint32_t offset_of_flat_buffs; + int i; + int num_im_sgls = qat_gen_config[ + comp_dev->qat_dev->qat_dev_gen].comp_num_im_bufs_required; +@@ -213,31 +213,31 @@ qat_comp_setup_inter_buffers(struct qat_comp_dev_private *comp_dev, + return memzone; + } + +- /* Create a memzone to hold intermediate buffers and associated +- * meta-data needed by the firmware. The memzone contains 3 parts: ++ /* Create multiple memzones to hold intermediate buffers and associated ++ * meta-data needed by the firmware. ++ * The first memzone contains: + * - a list of num_im_sgls physical pointers to sgls +- * - the num_im_sgl sgl structures, each pointing to +- * QAT_NUM_BUFS_IN_IM_SGL flat buffers +- * - the flat buffers: num_im_sgl * QAT_NUM_BUFS_IN_IM_SGL +- * buffers, each of buff_size ++ * All other memzones contain: ++ * - the sgl structure, pointing to QAT_NUM_BUFS_IN_IM_SGL flat buffers ++ * - the flat buffers: QAT_NUM_BUFS_IN_IM_SGL buffers, ++ * each of buff_size + * num_im_sgls depends on the hardware generation of the device + * buff_size comes from the user via the config file + */ + + size_of_ptr_array = num_im_sgls * sizeof(phys_addr_t); +- offset_of_sgls = (size_of_ptr_array + (~QAT_64_BYTE_ALIGN_MASK)) +- & QAT_64_BYTE_ALIGN_MASK; +- offset_of_flat_buffs = +- offset_of_sgls + num_im_sgls * sizeof(struct qat_inter_sgl); ++ offset_of_flat_buffs = sizeof(struct qat_inter_sgl); + full_size = offset_of_flat_buffs + +- num_im_sgls * buff_size * QAT_NUM_BUFS_IN_IM_SGL; ++ buff_size * QAT_NUM_BUFS_IN_IM_SGL; + +- memzone = rte_memzone_reserve_aligned(inter_buff_mz_name, full_size, ++ memzone = rte_memzone_reserve_aligned(inter_buff_mz_name, ++ size_of_ptr_array, + comp_dev->compressdev->data->socket_id, + RTE_MEMZONE_IOVA_CONTIG, QAT_64_BYTE_ALIGN); + if (memzone == NULL) { +- QAT_LOG(ERR, "Can't allocate intermediate buffers" +- " for device %s", comp_dev->qat_dev->name); ++ QAT_LOG(ERR, ++ "Can't allocate intermediate buffers for device %s", ++ comp_dev->qat_dev->name); + return NULL; + } + +@@ -246,17 +246,50 @@ qat_comp_setup_inter_buffers(struct qat_comp_dev_private *comp_dev, + QAT_LOG(DEBUG, "Memzone %s: addr = %p, phys = 0x%"PRIx64 + ", size required %d, size created %zu", + inter_buff_mz_name, mz_start, mz_start_phys, +- full_size, memzone->len); ++ size_of_ptr_array, memzone->len); + + array_of_pointers = (struct array_of_ptrs *)mz_start; + for (i = 0; i < num_im_sgls; i++) { +- uint32_t curr_sgl_offset = +- offset_of_sgls + i * sizeof(struct qat_inter_sgl); +- struct qat_inter_sgl *sgl = +- (struct qat_inter_sgl *)(mz_start + curr_sgl_offset); ++ const struct rte_memzone *mz; ++ struct qat_inter_sgl *sgl; + int lb; +- array_of_pointers->pointer[i] = mz_start_phys + curr_sgl_offset; + ++ snprintf(inter_buff_mz_name, RTE_MEMZONE_NAMESIZE, ++ "%s_inter_buff_%d", comp_dev->qat_dev->name, i); ++ mz = rte_memzone_lookup(inter_buff_mz_name); ++ if (mz == NULL) { ++ mz = rte_memzone_reserve_aligned(inter_buff_mz_name, ++ full_size, ++ comp_dev->compressdev->data->socket_id, ++ RTE_MEMZONE_IOVA_CONTIG, ++ QAT_64_BYTE_ALIGN); ++ if (mz == NULL) { ++ QAT_LOG(ERR, ++ "Can't allocate intermediate buffers for device %s", ++ comp_dev->qat_dev->name); ++ while (--i >= 0) { ++ snprintf(inter_buff_mz_name, ++ RTE_MEMZONE_NAMESIZE, ++ "%s_inter_buff_%d", ++ comp_dev->qat_dev->name, ++ i); ++ rte_memzone_free( ++ rte_memzone_lookup( ++ inter_buff_mz_name)); ++ } ++ rte_memzone_free(memzone); ++ return NULL; ++ } ++ } ++ ++ QAT_LOG(DEBUG, "Memzone %s: addr = %p, phys = 0x%"PRIx64 ++ ", size required %d, size created %zu", ++ inter_buff_mz_name, mz->addr, mz->iova, ++ full_size, mz->len); ++ ++ array_of_pointers->pointer[i] = mz->iova; ++ ++ sgl = (struct qat_inter_sgl *) mz->addr; + sgl->num_bufs = QAT_NUM_BUFS_IN_IM_SGL; + sgl->num_mapped_bufs = 0; + sgl->resrvd = 0; +@@ -268,8 +301,8 @@ qat_comp_setup_inter_buffers(struct qat_comp_dev_private *comp_dev, + #endif + for (lb = 0; lb < QAT_NUM_BUFS_IN_IM_SGL; lb++) { + sgl->buffers[lb].addr = +- mz_start_phys + offset_of_flat_buffs + +- (((i * QAT_NUM_BUFS_IN_IM_SGL) + lb) * buff_size); ++ mz->iova + offset_of_flat_buffs + ++ lb * buff_size; + sgl->buffers[lb].len = buff_size; + sgl->buffers[lb].resrvd = 0; + #if QAT_IM_BUFFER_DEBUG +@@ -281,7 +314,7 @@ qat_comp_setup_inter_buffers(struct qat_comp_dev_private *comp_dev, + } + #if QAT_IM_BUFFER_DEBUG + QAT_DP_HEXDUMP_LOG(DEBUG, "IM buffer memzone start:", +- mz_start, offset_of_flat_buffs + 32); ++ memzone->addr, size_of_ptr_array); + #endif + return memzone; + } +@@ -429,7 +462,7 @@ qat_comp_create_stream_pool(struct qat_comp_dev_private *comp_dev, + } else if (info.error) { + rte_mempool_obj_iter(mp, qat_comp_stream_destroy, NULL); + QAT_LOG(ERR, +- "Destoying mempool %s as at least one element failed initialisation", ++ "Destroying mempool %s as at least one element failed initialisation", + stream_pool_name); + rte_mempool_free(mp); + mp = NULL; +@@ -444,6 +477,16 @@ _qat_comp_dev_config_clear(struct qat_comp_dev_private *comp_dev) + { + /* Free intermediate buffers */ + if (comp_dev->interm_buff_mz) { ++ char mz_name[RTE_MEMZONE_NAMESIZE]; ++ int i = qat_gen_config[ ++ comp_dev->qat_dev->qat_dev_gen].comp_num_im_bufs_required; ++ ++ while (--i >= 0) { ++ snprintf(mz_name, RTE_MEMZONE_NAMESIZE, ++ "%s_inter_buff_%d", ++ comp_dev->qat_dev->name, i); ++ rte_memzone_free(rte_memzone_lookup(mz_name)); ++ } + rte_memzone_free(comp_dev->interm_buff_mz); + comp_dev->interm_buff_mz = NULL; + } +@@ -607,7 +650,8 @@ qat_comp_pmd_dequeue_first_op_burst(void *qp, struct rte_comp_op **ops, + + tmp_qp->qat_dev->comp_dev->compressdev->dev_ops = + &compress_qat_dummy_ops; +- QAT_LOG(ERR, "QAT PMD detected wrong FW version !"); ++ QAT_LOG(ERR, ++ "This QAT hardware doesn't support compression operation"); + + } else { + tmp_qp->qat_dev->comp_dev->compressdev->dequeue_burst = +@@ -656,11 +700,6 @@ qat_comp_dev_create(struct qat_pci_device *qat_pci_dev, + int i = 0; + struct qat_device_info *qat_dev_instance = + &qat_pci_devs[qat_pci_dev->qat_dev_id]; +- if (qat_pci_dev->qat_dev_gen == QAT_GEN3) { +- QAT_LOG(ERR, "Compression PMD not supported on QAT c4xxx"); +- return 0; +- } +- + struct rte_compressdev_pmd_init_params init_params = { + .name = "", + .socket_id = qat_dev_instance->pci_dev->device.numa_node, +diff --git a/dpdk/drivers/compress/qat/qat_comp_pmd.h b/dpdk/drivers/compress/qat/qat_comp_pmd.h +index 252b4b24e3..ed7e917e62 100644 +--- a/dpdk/drivers/compress/qat/qat_comp_pmd.h ++++ b/dpdk/drivers/compress/qat/qat_comp_pmd.h +@@ -12,7 +12,7 @@ + + #include "qat_device.h" + +-/**< Intel(R) QAT Compression PMD driver name */ ++/**< Intel(R) QAT Compression PMD name */ + #define COMPRESSDEV_NAME_QAT_PMD compress_qat + + /** private data structure for a QAT compression device. +diff --git a/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c b/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c +index 0de51202a6..cc4e08512b 100644 +--- a/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c ++++ b/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c +@@ -842,8 +842,14 @@ aesni_gcm_create(const char *name, + init_mb_mgr_avx2(mb_mgr); + break; + case RTE_AESNI_GCM_AVX512: +- dev->feature_flags |= RTE_CRYPTODEV_FF_CPU_AVX512; +- init_mb_mgr_avx512(mb_mgr); ++ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_VAES)) { ++ dev->feature_flags |= RTE_CRYPTODEV_FF_CPU_AVX512; ++ init_mb_mgr_avx512(mb_mgr); ++ } else { ++ dev->feature_flags |= RTE_CRYPTODEV_FF_CPU_AVX2; ++ init_mb_mgr_avx2(mb_mgr); ++ vector_mode = RTE_AESNI_GCM_AVX2; ++ } + break; + default: + AESNI_GCM_LOG(ERR, "Unsupported vector mode %u\n", vector_mode); +diff --git a/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c b/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c +index 03186485f9..94055d8177 100644 +--- a/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c ++++ b/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c +@@ -1057,7 +1057,9 @@ get_session(struct aesni_mb_qp *qp, struct rte_crypto_op *op) + + static inline uint64_t + auth_start_offset(struct rte_crypto_op *op, struct aesni_mb_session *session, +- uint32_t oop) ++ uint32_t oop, const uint32_t auth_offset, ++ const uint32_t cipher_offset, const uint32_t auth_length, ++ const uint32_t cipher_length) + { + struct rte_mbuf *m_src, *m_dst; + uint8_t *p_src, *p_dst; +@@ -1066,7 +1068,7 @@ auth_start_offset(struct rte_crypto_op *op, struct aesni_mb_session *session, + + /* Only cipher then hash needs special calculation. */ + if (!oop || session->chain_order != CIPHER_HASH) +- return op->sym->auth.data.offset; ++ return auth_offset; + + m_src = op->sym->m_src; + m_dst = op->sym->m_dst; +@@ -1074,24 +1076,23 @@ auth_start_offset(struct rte_crypto_op *op, struct aesni_mb_session *session, + p_src = rte_pktmbuf_mtod(m_src, uint8_t *); + p_dst = rte_pktmbuf_mtod(m_dst, uint8_t *); + u_src = (uintptr_t)p_src; +- u_dst = (uintptr_t)p_dst + op->sym->auth.data.offset; ++ u_dst = (uintptr_t)p_dst + auth_offset; + + /** + * Copy the content between cipher offset and auth offset for generating + * correct digest. + */ +- if (op->sym->cipher.data.offset > op->sym->auth.data.offset) +- memcpy(p_dst + op->sym->auth.data.offset, +- p_src + op->sym->auth.data.offset, +- op->sym->cipher.data.offset - +- op->sym->auth.data.offset); +- ++ if (cipher_offset > auth_offset) ++ memcpy(p_dst + auth_offset, ++ p_src + auth_offset, ++ cipher_offset - ++ auth_offset); + /** + * Copy the content between (cipher offset + length) and (auth offset + + * length) for generating correct digest + */ +- cipher_end = op->sym->cipher.data.offset + op->sym->cipher.data.length; +- auth_end = op->sym->auth.data.offset + op->sym->auth.data.length; ++ cipher_end = cipher_offset + cipher_length; ++ auth_end = auth_offset + auth_length; + if (cipher_end < auth_end) + memcpy(p_dst + cipher_end, p_src + cipher_end, + auth_end - cipher_end); +@@ -1246,7 +1247,12 @@ set_mb_job_params(JOB_AES_HMAC *job, struct aesni_mb_qp *qp, + struct rte_mbuf *m_src = op->sym->m_src, *m_dst; + struct aesni_mb_session *session; + uint32_t m_offset, oop; +- ++#if IMB_VERSION(0, 53, 3) <= IMB_VERSION_NUM ++ uint32_t auth_off_in_bytes; ++ uint32_t ciph_off_in_bytes; ++ uint32_t auth_len_in_bytes; ++ uint32_t ciph_len_in_bytes; ++#endif + session = get_session(qp, op); + if (session == NULL) { + op->status = RTE_CRYPTO_OP_STATUS_INVALID_SESSION; +@@ -1267,6 +1273,18 @@ set_mb_job_params(JOB_AES_HMAC *job, struct aesni_mb_qp *qp, + + const int aead = is_aead_algo(job->hash_alg, job->cipher_mode); + ++ if (job->cipher_mode == DES3) { ++ job->aes_enc_key_expanded = ++ session->cipher.exp_3des_keys.ks_ptr; ++ job->aes_dec_key_expanded = ++ session->cipher.exp_3des_keys.ks_ptr; ++ } else { ++ job->aes_enc_key_expanded = ++ session->cipher.expanded_aes_keys.encode; ++ job->aes_dec_key_expanded = ++ session->cipher.expanded_aes_keys.decode; ++ } ++ + switch (job->hash_alg) { + case AES_XCBC: + job->u.XCBC._k1_expanded = session->auth.xcbc.k1_expanded; +@@ -1339,17 +1357,6 @@ set_mb_job_params(JOB_AES_HMAC *job, struct aesni_mb_qp *qp, + job->u.HMAC._hashed_auth_key_xor_ipad = session->auth.pads.inner; + job->u.HMAC._hashed_auth_key_xor_opad = session->auth.pads.outer; + +- if (job->cipher_mode == DES3) { +- job->aes_enc_key_expanded = +- session->cipher.exp_3des_keys.ks_ptr; +- job->aes_dec_key_expanded = +- session->cipher.exp_3des_keys.ks_ptr; +- } else { +- job->aes_enc_key_expanded = +- session->cipher.expanded_aes_keys.encode; +- job->aes_dec_key_expanded = +- session->cipher.expanded_aes_keys.decode; +- } + } + + if (aead) +@@ -1361,6 +1368,7 @@ set_mb_job_params(JOB_AES_HMAC *job, struct aesni_mb_qp *qp, + if (job->cipher_mode == IMB_CIPHER_ZUC_EEA3) { + job->aes_enc_key_expanded = session->cipher.zuc_cipher_key; + job->aes_dec_key_expanded = session->cipher.zuc_cipher_key; ++ m_offset >>= 3; + } else if (job->cipher_mode == IMB_CIPHER_SNOW3G_UEA2_BITLEN) { + job->enc_keys = &session->cipher.pKeySched_snow3g_cipher; + m_offset = 0; +@@ -1417,9 +1425,6 @@ set_mb_job_params(JOB_AES_HMAC *job, struct aesni_mb_qp *qp, + + switch (job->hash_alg) { + case AES_CCM: +- job->cipher_start_src_offset_in_bytes = +- op->sym->aead.data.offset; +- job->msg_len_to_cipher_in_bytes = op->sym->aead.data.length; + job->hash_start_src_offset_in_bytes = op->sym->aead.data.offset; + job->msg_len_to_hash_in_bytes = op->sym->aead.data.length; + +@@ -1429,21 +1434,13 @@ set_mb_job_params(JOB_AES_HMAC *job, struct aesni_mb_qp *qp, + + case AES_GMAC: + if (session->cipher.mode == GCM) { +- job->cipher_start_src_offset_in_bytes = +- op->sym->aead.data.offset; + job->hash_start_src_offset_in_bytes = + op->sym->aead.data.offset; +- job->msg_len_to_cipher_in_bytes = +- op->sym->aead.data.length; + job->msg_len_to_hash_in_bytes = + op->sym->aead.data.length; +- } else { +- job->cipher_start_src_offset_in_bytes = +- op->sym->auth.data.offset; +- job->hash_start_src_offset_in_bytes = +- op->sym->auth.data.offset; +- job->msg_len_to_cipher_in_bytes = 0; ++ } else { /* AES-GMAC only, only AAD used */ + job->msg_len_to_hash_in_bytes = 0; ++ job->hash_start_src_offset_in_bytes = 0; + } + + job->iv = rte_crypto_op_ctod_offset(op, uint8_t *, +@@ -1452,10 +1449,7 @@ set_mb_job_params(JOB_AES_HMAC *job, struct aesni_mb_qp *qp, + + #if IMB_VERSION(0, 54, 3) <= IMB_VERSION_NUM + case IMB_AUTH_CHACHA20_POLY1305: +- job->cipher_start_src_offset_in_bytes = op->sym->aead.data.offset; + job->hash_start_src_offset_in_bytes = op->sym->aead.data.offset; +- job->msg_len_to_cipher_in_bytes = +- op->sym->aead.data.length; + job->msg_len_to_hash_in_bytes = + op->sym->aead.data.length; + +@@ -1463,26 +1457,98 @@ set_mb_job_params(JOB_AES_HMAC *job, struct aesni_mb_qp *qp, + session->iv.offset); + break; + #endif +- default: +- /* For SNOW3G, length and offsets are already in bits */ +- job->cipher_start_src_offset_in_bytes = +- op->sym->cipher.data.offset; +- job->msg_len_to_cipher_in_bytes = op->sym->cipher.data.length; ++#if IMB_VERSION(0, 53, 3) <= IMB_VERSION_NUM ++ /* ZUC and SNOW3G require length in bits and offset in bytes */ ++ case IMB_AUTH_ZUC_EIA3_BITLEN: ++ case IMB_AUTH_SNOW3G_UIA2_BITLEN: ++ auth_off_in_bytes = op->sym->auth.data.offset >> 3; ++ ciph_off_in_bytes = op->sym->cipher.data.offset >> 3; ++ auth_len_in_bytes = op->sym->auth.data.length >> 3; ++ ciph_len_in_bytes = op->sym->cipher.data.length >> 3; ++ ++ job->hash_start_src_offset_in_bytes = auth_start_offset(op, ++ session, oop, auth_off_in_bytes, ++ ciph_off_in_bytes, auth_len_in_bytes, ++ ciph_len_in_bytes); ++ job->msg_len_to_hash_in_bits = op->sym->auth.data.length; ++ ++ job->iv = rte_crypto_op_ctod_offset(op, uint8_t *, ++ session->iv.offset); ++ break; ++ ++ /* KASUMI requires lengths and offset in bytes */ ++ case IMB_AUTH_KASUMI_UIA1: ++ auth_off_in_bytes = op->sym->auth.data.offset >> 3; ++ ciph_off_in_bytes = op->sym->cipher.data.offset >> 3; ++ auth_len_in_bytes = op->sym->auth.data.length >> 3; ++ ciph_len_in_bytes = op->sym->cipher.data.length >> 3; + + job->hash_start_src_offset_in_bytes = auth_start_offset(op, +- session, oop); ++ session, oop, auth_off_in_bytes, ++ ciph_off_in_bytes, auth_len_in_bytes, ++ ciph_len_in_bytes); ++ job->msg_len_to_hash_in_bytes = auth_len_in_bytes; ++ ++ job->iv = rte_crypto_op_ctod_offset(op, uint8_t *, ++ session->iv.offset); ++ break; ++#endif ++ ++ default: ++ job->hash_start_src_offset_in_bytes = auth_start_offset(op, ++ session, oop, op->sym->auth.data.offset, ++ op->sym->cipher.data.offset, ++ op->sym->auth.data.length, ++ op->sym->cipher.data.length); + job->msg_len_to_hash_in_bytes = op->sym->auth.data.length; + + job->iv = rte_crypto_op_ctod_offset(op, uint8_t *, + session->iv.offset); + } + ++ switch (job->cipher_mode) { + #if IMB_VERSION(0, 53, 3) <= IMB_VERSION_NUM +- if (job->cipher_mode == IMB_CIPHER_ZUC_EEA3) +- job->msg_len_to_cipher_in_bytes >>= 3; +- else if (job->hash_alg == IMB_AUTH_KASUMI_UIA1) +- job->msg_len_to_hash_in_bytes >>= 3; ++ /* ZUC requires length and offset in bytes */ ++ case IMB_CIPHER_ZUC_EEA3: ++ job->cipher_start_src_offset_in_bytes = ++ op->sym->cipher.data.offset >> 3; ++ job->msg_len_to_cipher_in_bytes = ++ op->sym->cipher.data.length >> 3; ++ break; ++ /* ZUC and SNOW3G require length and offset in bits */ ++ case IMB_CIPHER_SNOW3G_UEA2_BITLEN: ++ case IMB_CIPHER_KASUMI_UEA1_BITLEN: ++ job->cipher_start_src_offset_in_bits = ++ op->sym->cipher.data.offset; ++ job->msg_len_to_cipher_in_bits = ++ op->sym->cipher.data.length; ++ break; ++#endif ++ case GCM: ++ if (session->cipher.mode == NULL_CIPHER) { ++ /* AES-GMAC only (only AAD used) */ ++ job->msg_len_to_cipher_in_bytes = 0; ++ job->cipher_start_src_offset_in_bytes = 0; ++ } else { ++ job->cipher_start_src_offset_in_bytes = ++ op->sym->aead.data.offset; ++ job->msg_len_to_cipher_in_bytes = ++ op->sym->aead.data.length; ++ } ++ break; ++ case CCM: ++#if IMB_VERSION(0, 54, 3) <= IMB_VERSION_NUM ++ case IMB_CIPHER_CHACHA20_POLY1305: + #endif ++ job->cipher_start_src_offset_in_bytes = ++ op->sym->aead.data.offset; ++ job->msg_len_to_cipher_in_bytes = op->sym->aead.data.length; ++ break; ++ default: ++ job->cipher_start_src_offset_in_bytes = ++ op->sym->cipher.data.offset; ++ job->msg_len_to_cipher_in_bytes = op->sym->cipher.data.length; ++ } + + /* Set user data to be crypto operation data struct */ + job->user_data = op; +diff --git a/dpdk/drivers/crypto/bcmfs/bcmfs_device.h b/dpdk/drivers/crypto/bcmfs/bcmfs_device.h +index e5ca866977..4901a6cfd9 100644 +--- a/dpdk/drivers/crypto/bcmfs/bcmfs_device.h ++++ b/dpdk/drivers/crypto/bcmfs/bcmfs_device.h +@@ -32,7 +32,7 @@ enum bcmfs_device_type { + BCMFS_UNKNOWN + }; + +-/* A table to store registered queue pair opertations */ ++/* A table to store registered queue pair operations */ + struct bcmfs_hw_queue_pair_ops_table { + rte_spinlock_t tl; + /* Number of used ops structs in the table. */ +diff --git a/dpdk/drivers/crypto/bcmfs/bcmfs_logs.c b/dpdk/drivers/crypto/bcmfs/bcmfs_logs.c +index 86f4ff3b53..701da9ecf3 100644 +--- a/dpdk/drivers/crypto/bcmfs/bcmfs_logs.c ++++ b/dpdk/drivers/crypto/bcmfs/bcmfs_logs.c +@@ -8,9 +8,6 @@ + + #include "bcmfs_logs.h" + +-int bcmfs_conf_logtype; +-int bcmfs_dp_logtype; +- + int + bcmfs_hexdump_log(uint32_t level, uint32_t logtype, const char *title, + const void *buf, unsigned int len) +@@ -24,15 +21,5 @@ bcmfs_hexdump_log(uint32_t level, uint32_t logtype, const char *title, + return 0; + } + +-RTE_INIT(bcmfs_device_init_log) +-{ +- /* Configuration and general logs */ +- bcmfs_conf_logtype = rte_log_register("pmd.bcmfs_config"); +- if (bcmfs_conf_logtype >= 0) +- rte_log_set_level(bcmfs_conf_logtype, RTE_LOG_NOTICE); +- +- /* data-path logs */ +- bcmfs_dp_logtype = rte_log_register("pmd.bcmfs_fp"); +- if (bcmfs_dp_logtype >= 0) +- rte_log_set_level(bcmfs_dp_logtype, RTE_LOG_NOTICE); +-} ++RTE_LOG_REGISTER(bcmfs_conf_logtype, pmd.bcmfs_config, NOTICE) ++RTE_LOG_REGISTER(bcmfs_dp_logtype, pmd.bcmfs_fp, NOTICE) +diff --git a/dpdk/drivers/crypto/bcmfs/bcmfs_qp.c b/dpdk/drivers/crypto/bcmfs/bcmfs_qp.c +index cb5ff6c61b..61d457f4e0 100644 +--- a/dpdk/drivers/crypto/bcmfs/bcmfs_qp.c ++++ b/dpdk/drivers/crypto/bcmfs/bcmfs_qp.c +@@ -212,7 +212,7 @@ bcmfs_qp_setup(struct bcmfs_qp **qp_addr, + nb_descriptors = FS_RM_MAX_REQS; + + if (qp_conf->iobase == NULL) { +- BCMFS_LOG(ERR, "IO onfig space null"); ++ BCMFS_LOG(ERR, "IO config space null"); + return -EINVAL; + } + +diff --git a/dpdk/drivers/crypto/bcmfs/bcmfs_sym_defs.h b/dpdk/drivers/crypto/bcmfs/bcmfs_sym_defs.h +index eaefe97e26..9bb8a695a0 100644 +--- a/dpdk/drivers/crypto/bcmfs/bcmfs_sym_defs.h ++++ b/dpdk/drivers/crypto/bcmfs/bcmfs_sym_defs.h +@@ -20,11 +20,11 @@ struct bcmfs_sym_request; + + /** Crypto Request processing successful. */ + #define BCMFS_SYM_RESPONSE_SUCCESS (0) +-/** Crypot Request processing protocol failure. */ ++/** Crypto Request processing protocol failure. */ + #define BCMFS_SYM_RESPONSE_PROTO_FAILURE (1) +-/** Crypot Request processing completion failure. */ ++/** Crypto Request processing completion failure. */ + #define BCMFS_SYM_RESPONSE_COMPL_ERROR (2) +-/** Crypot Request processing hash tag check error. */ ++/** Crypto Request processing hash tag check error. */ + #define BCMFS_SYM_RESPONSE_HASH_TAG_ERROR (3) + + /** Maximum threshold length to adjust AAD in continuation +diff --git a/dpdk/drivers/crypto/bcmfs/bcmfs_sym_engine.h b/dpdk/drivers/crypto/bcmfs/bcmfs_sym_engine.h +index d9594246b5..51ff9f75ed 100644 +--- a/dpdk/drivers/crypto/bcmfs/bcmfs_sym_engine.h ++++ b/dpdk/drivers/crypto/bcmfs/bcmfs_sym_engine.h +@@ -12,7 +12,7 @@ + #include "bcmfs_sym_defs.h" + #include "bcmfs_sym_req.h" + +-/* structure to hold element's arrtibutes */ ++/* structure to hold element's attributes */ + struct fsattr { + void *va; + uint64_t pa; +diff --git a/dpdk/drivers/crypto/bcmfs/hw/bcmfs5_rm.c b/dpdk/drivers/crypto/bcmfs/hw/bcmfs5_rm.c +index 86e53051dd..c677c0cd9b 100644 +--- a/dpdk/drivers/crypto/bcmfs/hw/bcmfs5_rm.c ++++ b/dpdk/drivers/crypto/bcmfs/hw/bcmfs5_rm.c +@@ -441,7 +441,7 @@ static void bcmfs5_write_doorbell(struct bcmfs_qp *qp) + { + struct bcmfs_queue *txq = &qp->tx_q; + +- /* sync in bfeore ringing the door-bell */ ++ /* sync in before ringing the door-bell */ + rte_wmb(); + + FS_MMIO_WRITE32(txq->descs_inflight, +diff --git a/dpdk/drivers/crypto/caam_jr/caam_jr.c b/dpdk/drivers/crypto/caam_jr/caam_jr.c +index 3fb3fe0f8a..6fc8adf752 100644 +--- a/dpdk/drivers/crypto/caam_jr/caam_jr.c ++++ b/dpdk/drivers/crypto/caam_jr/caam_jr.c +@@ -1881,8 +1881,9 @@ caam_jr_set_ipsec_session(__rte_unused struct rte_cryptodev *dev, + session->encap_pdb.options = + (IPVERSION << PDBNH_ESP_ENCAP_SHIFT) | + PDBOPTS_ESP_OIHI_PDB_INL | +- PDBOPTS_ESP_IVSRC | +- PDBHMO_ESP_ENCAP_DTTL; ++ PDBOPTS_ESP_IVSRC; ++ if (ipsec_xform->options.dec_ttl) ++ session->encap_pdb.options |= PDBHMO_ESP_ENCAP_DTTL; + if (ipsec_xform->options.esn) + session->encap_pdb.options |= PDBOPTS_ESP_ESN; + session->encap_pdb.spi = ipsec_xform->spi; +diff --git a/dpdk/drivers/crypto/caam_jr/caam_jr_hw_specific.h b/dpdk/drivers/crypto/caam_jr/caam_jr_hw_specific.h +index bbe8bc3f90..6ee7f7cef3 100644 +--- a/dpdk/drivers/crypto/caam_jr/caam_jr_hw_specific.h ++++ b/dpdk/drivers/crypto/caam_jr/caam_jr_hw_specific.h +@@ -376,7 +376,7 @@ struct sec_job_ring_t { + void *register_base_addr; /* Base address for SEC's + * register memory for this job ring. + */ +- uint8_t coalescing_en; /* notifies if coelescing is ++ uint8_t coalescing_en; /* notifies if coalescing is + * enabled for the job ring + */ + sec_job_ring_state_t jr_state; /* The state of this job ring */ +@@ -479,7 +479,7 @@ void hw_job_ring_error_print(struct sec_job_ring_t *job_ring, int code); + + /* @brief Set interrupt coalescing parameters on the Job Ring. + * @param [in] job_ring The job ring +- * @param [in] irq_coalesing_timer Interrupt coalescing timer threshold. ++ * @param [in] irq_coalescing_timer Interrupt coalescing timer threshold. + * This value determines the maximum + * amount of time after processing a + * descriptor before raising an interrupt. +diff --git a/dpdk/drivers/crypto/caam_jr/caam_jr_pvt.h b/dpdk/drivers/crypto/caam_jr/caam_jr_pvt.h +index 552d6b9b1b..52f872bcd0 100644 +--- a/dpdk/drivers/crypto/caam_jr/caam_jr_pvt.h ++++ b/dpdk/drivers/crypto/caam_jr/caam_jr_pvt.h +@@ -169,7 +169,7 @@ struct sec4_sg_entry { + + /* Structure encompassing a job descriptor which is to be processed + * by SEC. User should also initialise this structure with the callback +- * function pointer which will be called by driver after recieving proccessed ++ * function pointer which will be called by driver after receiving processed + * descriptor from SEC. User data is also passed in this data structure which + * will be sent as an argument to the user callback function. + */ +@@ -288,7 +288,7 @@ int caam_jr_enable_irqs(int uio_fd); + * value that indicates an IRQ disable action into UIO file descriptor + * of this job ring. + * +- * @param [in] uio_fd UIO File descripto ++ * @param [in] uio_fd UIO File descriptor + * @retval 0 for success + * @retval -1 value for error + * +diff --git a/dpdk/drivers/crypto/caam_jr/caam_jr_uio.c b/dpdk/drivers/crypto/caam_jr/caam_jr_uio.c +index e4ee102344..583ba3b523 100644 +--- a/dpdk/drivers/crypto/caam_jr/caam_jr_uio.c ++++ b/dpdk/drivers/crypto/caam_jr/caam_jr_uio.c +@@ -227,7 +227,7 @@ caam_jr_enable_irqs(int uio_fd) + * value that indicates an IRQ disable action into UIO file descriptor + * of this job ring. + * +- * @param [in] uio_fd UIO File descripto ++ * @param [in] uio_fd UIO File descriptor + * @retval 0 for success + * @retval -1 value for error + * +diff --git a/dpdk/drivers/crypto/ccp/ccp_crypto.c b/dpdk/drivers/crypto/ccp/ccp_crypto.c +index db3fb6eff8..e66aa98e2a 100644 +--- a/dpdk/drivers/crypto/ccp/ccp_crypto.c ++++ b/dpdk/drivers/crypto/ccp/ccp_crypto.c +@@ -2,6 +2,8 @@ + * Copyright(c) 2018 Advanced Micro Devices, Inc. All rights reserved. + */ + ++#define OPENSSL_API_COMPAT 0x10100000L ++ + #include + #include + #include +@@ -1282,7 +1284,7 @@ ccp_auth_slot(struct ccp_session *session) + case CCP_AUTH_ALGO_SHA512_HMAC: + /** + * 1. Load PHash1 = H(k ^ ipad); to LSB +- * 2. generate IHash = H(hash on meassage with PHash1 ++ * 2. generate IHash = H(hash on message with PHash1 + * as init values); + * 3. Retrieve IHash 2 slots for 384/512 + * 4. Load Phash2 = H(k ^ opad); to LSB +diff --git a/dpdk/drivers/crypto/ccp/ccp_crypto.h b/dpdk/drivers/crypto/ccp/ccp_crypto.h +index 8e6d03efc8..d307f73ee4 100644 +--- a/dpdk/drivers/crypto/ccp/ccp_crypto.h ++++ b/dpdk/drivers/crypto/ccp/ccp_crypto.h +@@ -70,7 +70,7 @@ + /* Maximum length for digest */ + #define DIGEST_LENGTH_MAX 64 + +-/* SHA LSB intialiazation values */ ++/* SHA LSB initialization values */ + + #define SHA1_H0 0x67452301UL + #define SHA1_H1 0xefcdab89UL +diff --git a/dpdk/drivers/crypto/ccp/ccp_dev.h b/dpdk/drivers/crypto/ccp/ccp_dev.h +index 37e04218ce..1851110081 100644 +--- a/dpdk/drivers/crypto/ccp/ccp_dev.h ++++ b/dpdk/drivers/crypto/ccp/ccp_dev.h +@@ -19,7 +19,7 @@ + #include + #include + +-/**< CCP sspecific */ ++/**< CCP specific */ + #define MAX_HW_QUEUES 5 + #define CCP_MAX_TRNG_RETRIES 10 + #define CCP_ALIGN(x, y) ((((x) + (y - 1)) / y) * y) +diff --git a/dpdk/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c b/dpdk/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c +index 5d91bf910e..839c11ed25 100644 +--- a/dpdk/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c ++++ b/dpdk/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c +@@ -1,7 +1,7 @@ + /* SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved. +- * Copyright 2016-2020 NXP ++ * Copyright 2016-2022 NXP + * + */ + +@@ -59,6 +59,27 @@ + + static uint8_t cryptodev_driver_id; + ++static inline void ++free_fle(const struct qbman_fd *fd) ++{ ++ struct qbman_fle *fle; ++ struct rte_crypto_op *op; ++ struct ctxt_priv *priv; ++ ++#ifdef RTE_LIB_SECURITY ++ if (DPAA2_FD_GET_FORMAT(fd) == qbman_fd_single) ++ return; ++#endif ++ fle = (struct qbman_fle *)DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd)); ++ op = (struct rte_crypto_op *)DPAA2_GET_FLE_ADDR((fle - 1)); ++ /* free the fle memory */ ++ if (likely(rte_pktmbuf_is_contiguous(op->sym->m_src))) { ++ priv = (struct ctxt_priv *)(size_t)DPAA2_GET_FLE_CTXT(fle - 1); ++ rte_mempool_put(priv->fle_pool, (void *)(fle-1)); ++ } else ++ rte_free((void *)(fle-1)); ++} ++ + #ifdef RTE_LIB_SECURITY + static inline int + build_proto_compound_sg_fd(dpaa2_sec_session *sess, +@@ -1508,6 +1529,12 @@ dpaa2_sec_enqueue_burst(void *qp, struct rte_crypto_op **ops, + if (retry_count > DPAA2_MAX_TX_RETRY_COUNT) { + num_tx += loop; + nb_ops -= loop; ++ DPAA2_SEC_DP_DEBUG("Enqueue fail\n"); ++ /* freeing the fle buffers */ ++ while (loop < frames_to_send) { ++ free_fle(&fd_arr[loop]); ++ loop++; ++ } + goto skip_tx; + } + } else { +@@ -1534,6 +1561,10 @@ sec_simple_fd_to_mbuf(const struct qbman_fd *fd) + int16_t diff = 0; + dpaa2_sec_session *sess_priv __rte_unused; + ++ if (unlikely(DPAA2_GET_FD_IVP(fd))) { ++ DPAA2_SEC_ERR("error: non inline buffer"); ++ return NULL; ++ } + struct rte_mbuf *mbuf = DPAA2_INLINE_MBUF_FROM_BUF( + DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd)), + rte_dpaa2_bpid_info[DPAA2_GET_FD_BPID(fd)].meta_data_size); +@@ -1552,6 +1583,14 @@ sec_simple_fd_to_mbuf(const struct qbman_fd *fd) + else + mbuf->data_off += SEC_FLC_DHR_INBOUND; + ++ if (unlikely(fd->simple.frc)) { ++ DPAA2_SEC_ERR("SEC returned Error - %x", ++ fd->simple.frc); ++ op->status = RTE_CRYPTO_OP_STATUS_ERROR; ++ } else { ++ op->status = RTE_CRYPTO_OP_STATUS_SUCCESS; ++ } ++ + return op; + } + #endif +@@ -1580,11 +1619,6 @@ sec_fd_to_mbuf(const struct qbman_fd *fd) + * We can have a better approach to use the inline Mbuf + */ + +- if (unlikely(DPAA2_GET_FD_IVP(fd))) { +- /* TODO complete it. */ +- DPAA2_SEC_ERR("error: non inline buffer"); +- return NULL; +- } + op = (struct rte_crypto_op *)DPAA2_GET_FLE_ADDR((fle - 1)); + + /* Prefeth op */ +@@ -2910,8 +2944,9 @@ dpaa2_sec_set_ipsec_session(struct rte_cryptodev *dev, + encap_pdb.options = (IPVERSION << PDBNH_ESP_ENCAP_SHIFT) | + PDBOPTS_ESP_OIHI_PDB_INL | + PDBOPTS_ESP_IVSRC | +- PDBHMO_ESP_ENCAP_DTTL | + PDBHMO_ESP_SNR; ++ if (ipsec_xform->options.dec_ttl) ++ encap_pdb.options |= PDBHMO_ESP_ENCAP_DTTL; + if (ipsec_xform->options.esn) + encap_pdb.options |= PDBOPTS_ESP_ESN; + encap_pdb.spi = ipsec_xform->spi; +@@ -3534,32 +3569,10 @@ dpaa2_sec_dev_stop(struct rte_cryptodev *dev) + } + + static int +-dpaa2_sec_dev_close(struct rte_cryptodev *dev) ++dpaa2_sec_dev_close(struct rte_cryptodev *dev __rte_unused) + { +- struct dpaa2_sec_dev_private *priv = dev->data->dev_private; +- struct fsl_mc_io *dpseci = (struct fsl_mc_io *)priv->hw; +- int ret; +- + PMD_INIT_FUNC_TRACE(); + +- /* Function is reverse of dpaa2_sec_dev_init. +- * It does the following: +- * 1. Detach a DPSECI from attached resources i.e. buffer pools, dpbp_id +- * 2. Close the DPSECI device +- * 3. Free the allocated resources. +- */ +- +- /*Close the device at underlying layer*/ +- ret = dpseci_close(dpseci, CMD_PRI_LOW, priv->token); +- if (ret) { +- DPAA2_SEC_ERR("Failure closing dpseci device: err(%d)", ret); +- return -1; +- } +- +- /*Free the allocated memory for ethernet private data and dpseci*/ +- priv->hw = NULL; +- rte_free(dpseci); +- + return 0; + } + +@@ -3692,7 +3705,7 @@ dpaa2_sec_process_atomic_event(struct qbman_swp *swp __rte_unused, + struct rte_event *ev) + { + uint8_t dqrr_index; +- struct rte_crypto_op *crypto_op = (struct rte_crypto_op *)ev->event_ptr; ++ struct rte_crypto_op *crypto_op; + /* Prefetching mbuf */ + rte_prefetch0((void *)(size_t)(DPAA2_GET_FD_ADDR(fd)- + rte_dpaa2_bpid_info[DPAA2_GET_FD_BPID(fd)].meta_data_size)); +@@ -3708,12 +3721,13 @@ dpaa2_sec_process_atomic_event(struct qbman_swp *swp __rte_unused, + ev->queue_id = rxq->ev.queue_id; + ev->priority = rxq->ev.priority; + +- ev->event_ptr = sec_fd_to_mbuf(fd); ++ crypto_op = sec_fd_to_mbuf(fd); + dqrr_index = qbman_get_dqrr_idx(dq); + *dpaa2_seqn(crypto_op->sym->m_src) = dqrr_index + 1; + DPAA2_PER_LCORE_DQRR_SIZE++; + DPAA2_PER_LCORE_DQRR_HELD |= 1 << dqrr_index; + DPAA2_PER_LCORE_DQRR_MBUF(dqrr_index) = crypto_op->sym->m_src; ++ ev->event_ptr = crypto_op; + } + + int +@@ -3819,11 +3833,31 @@ static const struct rte_security_ops dpaa2_sec_security_ops = { + static int + dpaa2_sec_uninit(const struct rte_cryptodev *dev) + { +- struct dpaa2_sec_dev_private *internals = dev->data->dev_private; ++ struct dpaa2_sec_dev_private *priv = dev->data->dev_private; ++ struct fsl_mc_io *dpseci = (struct fsl_mc_io *)priv->hw; ++ int ret; + +- rte_free(dev->security_ctx); ++ PMD_INIT_FUNC_TRACE(); ++ ++ /* Function is reverse of dpaa2_sec_dev_init. ++ * It does the following: ++ * 1. Detach a DPSECI from attached resources i.e. buffer pools, dpbp_id ++ * 2. Close the DPSECI device ++ * 3. Free the allocated resources. ++ */ + +- rte_mempool_free(internals->fle_pool); ++ /*Close the device at underlying layer*/ ++ ret = dpseci_close(dpseci, CMD_PRI_LOW, priv->token); ++ if (ret) { ++ DPAA2_SEC_ERR("Failure closing dpseci device: err(%d)", ret); ++ return -1; ++ } ++ ++ /*Free the allocated memory for ethernet private data and dpseci*/ ++ priv->hw = NULL; ++ rte_free(dpseci); ++ rte_free(dev->security_ctx); ++ rte_mempool_free(priv->fle_pool); + + DPAA2_SEC_INFO("Closing DPAA2_SEC device %s on numa socket %u", + dev->data->name, rte_socket_id()); +diff --git a/dpdk/drivers/crypto/dpaa_sec/dpaa_sec.c b/dpdk/drivers/crypto/dpaa_sec/dpaa_sec.c +index 44c742738f..0ce99eecc7 100644 +--- a/dpdk/drivers/crypto/dpaa_sec/dpaa_sec.c ++++ b/dpdk/drivers/crypto/dpaa_sec/dpaa_sec.c +@@ -683,7 +683,7 @@ dpaa_sec_deq(struct dpaa_sec_qp *qp, struct rte_crypto_op **ops, int nb_ops) + } + ops[pkts++] = op; + +- /* report op status to sym->op and then free the ctx memeory */ ++ /* report op status to sym->op and then free the ctx memory */ + rte_mempool_put(ctx->ctx_pool, (void *)ctx); + + qman_dqrr_consume(fq, dq); +@@ -1716,6 +1716,13 @@ dpaa_sec_enqueue_burst(void *qp, struct rte_crypto_op **ops, + uint32_t index, flags[DPAA_SEC_BURST] = {0}; + struct qman_fq *inq[DPAA_SEC_BURST]; + ++ if (unlikely(!DPAA_PER_LCORE_PORTAL)) { ++ if (rte_dpaa_portal_init((void *)0)) { ++ DPAA_SEC_ERR("Failure in affining portal"); ++ return 0; ++ } ++ } ++ + while (nb_ops) { + frames_to_send = (nb_ops > DPAA_SEC_BURST) ? + DPAA_SEC_BURST : nb_ops; +@@ -1916,6 +1923,13 @@ dpaa_sec_dequeue_burst(void *qp, struct rte_crypto_op **ops, + uint16_t num_rx; + struct dpaa_sec_qp *dpaa_qp = (struct dpaa_sec_qp *)qp; + ++ if (unlikely(!DPAA_PER_LCORE_PORTAL)) { ++ if (rte_dpaa_portal_init((void *)0)) { ++ DPAA_SEC_ERR("Failure in affining portal"); ++ return 0; ++ } ++ } ++ + num_rx = dpaa_sec_deq(dpaa_qp, ops, nb_ops); + + dpaa_qp->rx_pkts += num_rx; +@@ -2785,12 +2799,14 @@ dpaa_sec_set_ipsec_session(__rte_unused struct rte_cryptodev *dev, + session->encap_pdb.ip_hdr_len = + sizeof(struct rte_ipv6_hdr); + } ++ + session->encap_pdb.options = + (IPVERSION << PDBNH_ESP_ENCAP_SHIFT) | + PDBOPTS_ESP_OIHI_PDB_INL | + PDBOPTS_ESP_IVSRC | +- PDBHMO_ESP_ENCAP_DTTL | + PDBHMO_ESP_SNR; ++ if (ipsec_xform->options.dec_ttl) ++ session->encap_pdb.options |= PDBHMO_ESP_ENCAP_DTTL; + if (ipsec_xform->options.esn) + session->encap_pdb.options |= PDBOPTS_ESP_ESN; + session->encap_pdb.spi = ipsec_xform->spi; +@@ -2865,11 +2881,13 @@ dpaa_sec_set_pdcp_session(struct rte_cryptodev *dev, + /* find xfrm types */ + if (xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER) { + cipher_xform = &xform->cipher; +- if (xform->next != NULL) ++ if (xform->next != NULL && ++ xform->next->type == RTE_CRYPTO_SYM_XFORM_AUTH) + auth_xform = &xform->next->auth; + } else if (xform->type == RTE_CRYPTO_SYM_XFORM_AUTH) { + auth_xform = &xform->auth; +- if (xform->next != NULL) ++ if (xform->next != NULL && ++ xform->next->type == RTE_CRYPTO_SYM_XFORM_CIPHER) + cipher_xform = &xform->next->cipher; + } else { + DPAA_SEC_ERR("Invalid crypto type"); +@@ -3427,23 +3445,24 @@ cryptodev_dpaa_sec_probe(struct rte_dpaa_driver *dpaa_drv __rte_unused, + + int retval; + ++ if (rte_eal_process_type() != RTE_PROC_PRIMARY) ++ return 0; ++ + snprintf(cryptodev_name, sizeof(cryptodev_name), "%s", dpaa_dev->name); + + cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, rte_socket_id()); + if (cryptodev == NULL) + return -ENOMEM; + +- if (rte_eal_process_type() == RTE_PROC_PRIMARY) { +- cryptodev->data->dev_private = rte_zmalloc_socket( +- "cryptodev private structure", +- sizeof(struct dpaa_sec_dev_private), +- RTE_CACHE_LINE_SIZE, +- rte_socket_id()); ++ cryptodev->data->dev_private = rte_zmalloc_socket( ++ "cryptodev private structure", ++ sizeof(struct dpaa_sec_dev_private), ++ RTE_CACHE_LINE_SIZE, ++ rte_socket_id()); + +- if (cryptodev->data->dev_private == NULL) +- rte_panic("Cannot allocate memzone for private " +- "device data"); +- } ++ if (cryptodev->data->dev_private == NULL) ++ rte_panic("Cannot allocate memzone for private " ++ "device data"); + + dpaa_dev->crypto_dev = cryptodev; + cryptodev->device = &dpaa_dev->device; +@@ -3483,8 +3502,7 @@ cryptodev_dpaa_sec_probe(struct rte_dpaa_driver *dpaa_drv __rte_unused, + retval = -ENXIO; + out: + /* In case of error, cleanup is done */ +- if (rte_eal_process_type() == RTE_PROC_PRIMARY) +- rte_free(cryptodev->data->dev_private); ++ rte_free(cryptodev->data->dev_private); + + rte_cryptodev_pmd_release_device(cryptodev); + +diff --git a/dpdk/drivers/crypto/dpaa_sec/dpaa_sec.h b/dpdk/drivers/crypto/dpaa_sec/dpaa_sec.h +index 368699678b..2af33a8fad 100644 +--- a/dpdk/drivers/crypto/dpaa_sec/dpaa_sec.h ++++ b/dpdk/drivers/crypto/dpaa_sec/dpaa_sec.h +@@ -1,6 +1,6 @@ + /* SPDX-License-Identifier: BSD-3-Clause + * +- * Copyright 2016-2020 NXP ++ * Copyright 2016-2022 NXP + * + */ + +@@ -208,7 +208,7 @@ struct dpaa_sec_job { + struct qm_sg_entry sg[MAX_JOB_SG_ENTRIES]; + }; + +-#define DPAA_MAX_NB_MAX_DIGEST 32 ++#define DPAA_MAX_NB_MAX_DIGEST 64 + struct dpaa_sec_op_ctx { + struct dpaa_sec_job job; + struct rte_crypto_op *op; +diff --git a/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd.c b/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd.c +index bec51c9ff6..5712042b95 100644 +--- a/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd.c ++++ b/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd.c +@@ -360,6 +360,14 @@ mrvl_crypto_set_aead_session_parameters(struct mrvl_crypto_session *sess, + sess->sam_sess_params.cipher_mode = + aead_map[aead_xform->aead.algo].cipher_mode; + ++ if (sess->sam_sess_params.cipher_mode == SAM_CIPHER_GCM) { ++ /* IV must include nonce for all counter modes */ ++ sess->cipher_iv_offset = aead_xform->cipher.iv.offset; ++ ++ /* Set order of authentication then encryption to 0 in GCM */ ++ sess->sam_sess_params.u.basic.auth_then_encrypt = 0; ++ } ++ + /* Assume IV will be passed together with data. */ + sess->sam_sess_params.cipher_iv = NULL; + +@@ -916,14 +924,14 @@ mrvl_pmd_parse_input_args(struct mrvl_pmd_init_params *params, + ret = rte_kvargs_process(kvlist, + RTE_CRYPTODEV_PMD_NAME_ARG, + &parse_name_arg, +- ¶ms->common); ++ ¶ms->common.name); + if (ret < 0) + goto free_kvlist; + + ret = rte_kvargs_process(kvlist, + MRVL_PMD_MAX_NB_SESS_ARG, + &parse_integer_arg, +- params); ++ ¶ms->max_nb_sessions); + if (ret < 0) + goto free_kvlist; + +diff --git a/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd_ops.c b/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd_ops.c +index c61bdca369..4eb7ec97d1 100644 +--- a/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd_ops.c ++++ b/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd_ops.c +@@ -111,7 +111,7 @@ static const struct rte_cryptodev_capabilities + .increment = 1 + }, + .digest_size = { +- .min = 28, ++ .min = 12, + .max = 28, + .increment = 0 + }, +@@ -232,7 +232,7 @@ static const struct rte_cryptodev_capabilities + }, + .digest_size = { + .min = 12, +- .max = 48, ++ .max = 64, + .increment = 4 + }, + }, } +@@ -252,7 +252,7 @@ static const struct rte_cryptodev_capabilities + }, + .digest_size = { + .min = 12, +- .max = 48, ++ .max = 64, + .increment = 0 + }, + }, } +@@ -336,9 +336,9 @@ static const struct rte_cryptodev_capabilities + .increment = 0 + }, + .aad_size = { +- .min = 8, +- .max = 12, +- .increment = 4 ++ .min = 0, ++ .max = 64, ++ .increment = 1 + }, + .iv_size = { + .min = 12, +@@ -793,7 +793,7 @@ mrvl_crypto_pmd_sym_session_clear(struct rte_cryptodev *dev, + MRVL_LOG(ERR, "Error while destroying session!"); + } + +- memset(sess, 0, sizeof(struct mrvl_crypto_session)); ++ memset(mrvl_sess, 0, sizeof(struct mrvl_crypto_session)); + struct rte_mempool *sess_mp = rte_mempool_from_obj(sess_priv); + set_sym_session_private_data(sess, index, NULL); + rte_mempool_put(sess_mp, sess_priv); +diff --git a/dpdk/drivers/crypto/octeontx/otx_cryptodev.c b/dpdk/drivers/crypto/octeontx/otx_cryptodev.c +index 5ce1cf82fd..36cedb3253 100644 +--- a/dpdk/drivers/crypto/octeontx/otx_cryptodev.c ++++ b/dpdk/drivers/crypto/octeontx/otx_cryptodev.c +@@ -71,6 +71,7 @@ otx_cpt_pci_remove(struct rte_pci_device *pci_dev) + { + struct rte_cryptodev *cryptodev; + char name[RTE_CRYPTODEV_NAME_MAX_LEN]; ++ void *dev_priv; + + if (pci_dev == NULL) + return -EINVAL; +@@ -84,11 +85,13 @@ otx_cpt_pci_remove(struct rte_pci_device *pci_dev) + if (pci_dev->driver == NULL) + return -ENODEV; + ++ dev_priv = cryptodev->data->dev_private; ++ + /* free crypto device */ + rte_cryptodev_pmd_release_device(cryptodev); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) +- rte_free(cryptodev->data->dev_private); ++ rte_free(dev_priv); + + cryptodev->device->driver = NULL; + cryptodev->device = NULL; +diff --git a/dpdk/drivers/crypto/octeontx/otx_cryptodev_hw_access.c b/dpdk/drivers/crypto/octeontx/otx_cryptodev_hw_access.c +index 5229e7c8c5..d4ff9e9a7a 100644 +--- a/dpdk/drivers/crypto/octeontx/otx_cryptodev_hw_access.c ++++ b/dpdk/drivers/crypto/octeontx/otx_cryptodev_hw_access.c +@@ -296,7 +296,7 @@ cpt_vq_init(struct cpt_vf *cptvf, uint8_t group) + /* CPT VF device initialization */ + otx_cpt_vfvq_init(cptvf); + +- /* Send msg to PF to assign currnet Q to required group */ ++ /* Send msg to PF to assign current Q to required group */ + cptvf->vfgrp = group; + err = otx_cpt_send_vf_grp_msg(cptvf, group); + if (err) { +diff --git a/dpdk/drivers/crypto/octeontx/otx_cryptodev_mbox.h b/dpdk/drivers/crypto/octeontx/otx_cryptodev_mbox.h +index 508f3afd47..c1eedc1b9e 100644 +--- a/dpdk/drivers/crypto/octeontx/otx_cryptodev_mbox.h ++++ b/dpdk/drivers/crypto/octeontx/otx_cryptodev_mbox.h +@@ -70,7 +70,7 @@ void + otx_cpt_handle_mbox_intr(struct cpt_vf *cptvf); + + /* +- * Checks if VF is able to comminicate with PF ++ * Checks if VF is able to communicate with PF + * and also gets the CPT number this VF is associated to. + */ + int +diff --git a/dpdk/drivers/crypto/octeontx/otx_cryptodev_ops.c b/dpdk/drivers/crypto/octeontx/otx_cryptodev_ops.c +index 0cf760b296..00d6a5c0ff 100644 +--- a/dpdk/drivers/crypto/octeontx/otx_cryptodev_ops.c ++++ b/dpdk/drivers/crypto/octeontx/otx_cryptodev_ops.c +@@ -548,7 +548,7 @@ otx_cpt_enq_single_sym(struct cpt_instance *instance, + &mdata, (void **)&prep_req); + + if (unlikely(ret)) { +- CPT_LOG_DP_ERR("prep cryto req : op %p, cpt_op 0x%x " ++ CPT_LOG_DP_ERR("prep crypto req : op %p, cpt_op 0x%x " + "ret 0x%x", op, (unsigned int)cpt_op, ret); + return ret; + } +@@ -577,8 +577,8 @@ otx_cpt_enq_single_sym_sessless(struct cpt_instance *instance, + int ret; + + /* Create temporary session */ +- +- if (rte_mempool_get(instance->sess_mp, (void **)&sess)) ++ sess = rte_cryptodev_sym_session_create(instance->sess_mp); ++ if (sess == NULL) + return -ENOMEM; + + ret = sym_session_configure(driver_id, sym_op->xform, sess, +diff --git a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_mbox.c b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_mbox.c +index 75e610db5c..d762baf927 100644 +--- a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_mbox.c ++++ b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_mbox.c +@@ -36,7 +36,7 @@ otx2_cpt_hardware_caps_get(const struct rte_cryptodev *dev, + return -EPIPE; + } + +- memcpy(hw_caps, rsp->eng_caps, ++ otx2_mbox_memcpy(hw_caps, rsp->eng_caps, + sizeof(union cpt_eng_caps) * CPT_MAX_ENG_TYPES); + + return 0; +diff --git a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_ops.c b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_ops.c +index 5f2ccc0872..662c8bc001 100644 +--- a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_ops.c ++++ b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_ops.c +@@ -893,10 +893,10 @@ otx2_cpt_sec_post_process(struct rte_crypto_op *cop, uintptr_t *rsp) + + if (word0->s.opcode.major == OTX2_IPSEC_PO_PROCESS_IPSEC_INB) { + data = rte_pktmbuf_mtod(m, char *); ++ ip = (struct rte_ipv4_hdr *)(data + ++ OTX2_IPSEC_PO_INB_RPTR_HDR); + +- if (rsp[4] == RTE_SECURITY_IPSEC_TUNNEL_IPV4) { +- ip = (struct rte_ipv4_hdr *)(data + +- OTX2_IPSEC_PO_INB_RPTR_HDR); ++ if ((ip->version_ihl >> 4) == 4) { + m_len = rte_be_to_cpu_16(ip->total_length); + } else { + ip6 = (struct rte_ipv6_hdr *)(data + +diff --git a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_qp.h b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_qp.h +index 96ff4eb41e..499d54e3ed 100644 +--- a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_qp.h ++++ b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_qp.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright (C) 2020 Marvell International Ltd. ++ * Copyright (C) 2020-2021 Marvell. + */ + + #ifndef _OTX2_CRYPTODEV_QP_H_ +@@ -37,6 +37,8 @@ struct otx2_cpt_qp { + */ + uint8_t ca_enable; + /**< Set when queue pair is added to crypto adapter */ ++ uint8_t qp_ev_bind; ++ /**< Set when queue pair is bound to event queue */ + }; + + #endif /* _OTX2_CRYPTODEV_QP_H_ */ +diff --git a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_sec.c b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_sec.c +index bad9c5ca9f..c6a707c646 100644 +--- a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_sec.c ++++ b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_sec.c +@@ -189,9 +189,6 @@ set_session_misc_attributes(struct otx2_sec_session_ipsec_lp *sess, + sess->auth_iv_length = auth_xform->auth.iv.length; + sess->mac_len = auth_xform->auth.digest_length; + } +- +- sess->ucmd_param1 = OTX2_IPSEC_PO_PER_PKT_IV; +- sess->ucmd_param2 = 0; + } + + static int +@@ -228,7 +225,6 @@ crypto_sec_ipsec_outb_session_create(struct rte_cryptodev *crypto_dev, + lp->ip_id = 0; + lp->seq_lo = 1; + lp->seq_hi = 0; +- lp->tunnel_type = ipsec->tunnel.type; + + ret = ipsec_po_sa_ctl_set(ipsec, crypto_xform, ctl); + if (ret) +@@ -327,6 +323,10 @@ crypto_sec_ipsec_outb_session_create(struct rte_cryptodev *crypto_dev, + lp->ucmd_opcode = (lp->ctx_len << 8) | + (OTX2_IPSEC_PO_PROCESS_IPSEC_OUTB); + ++ /* Set per packet IV and IKEv2 bits */ ++ lp->ucmd_param1 = BIT(11) | BIT(9); ++ lp->ucmd_param2 = 0; ++ + set_session_misc_attributes(lp, crypto_xform, + auth_xform, cipher_xform); + +@@ -367,7 +367,6 @@ crypto_sec_ipsec_inb_session_create(struct rte_cryptodev *crypto_dev, + if (ret) + return ret; + +- lp->tunnel_type = ipsec->tunnel.type; + auth_xform = crypto_xform; + cipher_xform = crypto_xform->next; + +@@ -410,6 +409,10 @@ crypto_sec_ipsec_inb_session_create(struct rte_cryptodev *crypto_dev, + lp->cpt_inst_w7 = inst.u64[7]; + lp->ucmd_opcode = (lp->ctx_len << 8) | + (OTX2_IPSEC_PO_PROCESS_IPSEC_INB); ++ lp->ucmd_param1 = 0; ++ ++ /* Set IKEv2 bit */ ++ lp->ucmd_param2 = BIT(12); + + set_session_misc_attributes(lp, crypto_xform, + auth_xform, cipher_xform); +diff --git a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_sec.h b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_sec.h +index 2849c1ab75..ff3329c9c1 100644 +--- a/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_sec.h ++++ b/dpdk/drivers/crypto/octeontx2/otx2_cryptodev_sec.h +@@ -55,8 +55,6 @@ struct otx2_sec_session_ipsec_lp { + uint8_t iv_length; + /** Auth IV length in bytes */ + uint8_t auth_iv_length; +- /** IPsec tunnel type */ +- enum rte_security_ipsec_tunnel_type tunnel_type; + }; + + int otx2_crypto_sec_ctx_create(struct rte_cryptodev *crypto_dev); +diff --git a/dpdk/drivers/crypto/octeontx2/otx2_ipsec_po.h b/dpdk/drivers/crypto/octeontx2/otx2_ipsec_po.h +index da24f6a5d4..520daa5ff2 100644 +--- a/dpdk/drivers/crypto/octeontx2/otx2_ipsec_po.h ++++ b/dpdk/drivers/crypto/octeontx2/otx2_ipsec_po.h +@@ -15,8 +15,6 @@ + #define OTX2_IPSEC_PO_MAX_INB_CTX_LEN 0x22 + #define OTX2_IPSEC_PO_MAX_OUTB_CTX_LEN 0x38 + +-#define OTX2_IPSEC_PO_PER_PKT_IV BIT(11) +- + #define OTX2_IPSEC_PO_WRITE_IPSEC_OUTB 0x20 + #define OTX2_IPSEC_PO_WRITE_IPSEC_INB 0x21 + #define OTX2_IPSEC_PO_PROCESS_IPSEC_OUTB 0x23 +diff --git a/dpdk/drivers/crypto/octeontx2/otx2_ipsec_po_ops.h b/dpdk/drivers/crypto/octeontx2/otx2_ipsec_po_ops.h +index bc702d5c79..887d13eb51 100644 +--- a/dpdk/drivers/crypto/octeontx2/otx2_ipsec_po_ops.h ++++ b/dpdk/drivers/crypto/octeontx2/otx2_ipsec_po_ops.h +@@ -25,8 +25,7 @@ otx2_ipsec_po_out_rlen_get(struct otx2_sec_session_ipsec_lp *sess, + } + + static __rte_always_inline struct cpt_request_info * +-alloc_request_struct(char *maddr, void *cop, int mdata_len, +- enum rte_security_ipsec_tunnel_type tunnel_type) ++alloc_request_struct(char *maddr, void *cop, int mdata_len) + { + struct cpt_request_info *req; + struct cpt_meta_info *meta; +@@ -48,7 +47,6 @@ alloc_request_struct(char *maddr, void *cop, int mdata_len, + op[1] = (uintptr_t)cop; + op[2] = (uintptr_t)req; + op[3] = mdata_len; +- op[4] = tunnel_type; + + return req; + } +@@ -61,16 +59,12 @@ process_outb_sa(struct rte_crypto_op *cop, + uint32_t dlen, rlen, extend_head, extend_tail; + struct rte_crypto_sym_op *sym_op = cop->sym; + struct rte_mbuf *m_src = sym_op->m_src; +- struct otx2_ipsec_po_sa_ctl *ctl_wrd; + struct cpt_request_info *req = NULL; + struct otx2_ipsec_po_out_hdr *hdr; +- struct otx2_ipsec_po_out_sa *sa; + int hdr_len, mdata_len, ret = 0; + vq_cmd_word0_t word0; + char *mdata, *data; + +- sa = &sess->out_sa; +- ctl_wrd = &sa->ctl; + hdr_len = sizeof(*hdr); + + dlen = rte_pktmbuf_pkt_len(m_src) + hdr_len; +@@ -88,8 +82,7 @@ process_outb_sa(struct rte_crypto_op *cop, + } + + mdata += extend_tail; /* mdata follows encrypted data */ +- req = alloc_request_struct(mdata, (void *)cop, mdata_len, +- sess->tunnel_type); ++ req = alloc_request_struct(mdata, (void *)cop, mdata_len); + + data = rte_pktmbuf_prepend(m_src, extend_head); + if (unlikely(data == NULL)) { +@@ -107,14 +100,8 @@ process_outb_sa(struct rte_crypto_op *cop, + hdr = (struct otx2_ipsec_po_out_hdr *)rte_pktmbuf_adj(m_src, + RTE_ETHER_HDR_LEN); + +- if (ctl_wrd->enc_type == OTX2_IPSEC_FP_SA_ENC_AES_GCM) { +- memcpy(&hdr->iv[0], &sa->iv.gcm.nonce, 4); +- memcpy(&hdr->iv[4], rte_crypto_op_ctod_offset(cop, uint8_t *, +- sess->iv_offset), sess->iv_length); +- } else if (ctl_wrd->auth_type == OTX2_IPSEC_FP_SA_ENC_AES_CBC) { +- memcpy(&hdr->iv[0], rte_crypto_op_ctod_offset(cop, uint8_t *, +- sess->iv_offset), sess->iv_length); +- } ++ memcpy(&hdr->iv[0], rte_crypto_op_ctod_offset(cop, uint8_t *, ++ sess->iv_offset), sess->iv_length); + + /* Prepare CPT instruction */ + word0.u64 = sess->ucmd_w0; +@@ -159,8 +146,7 @@ process_inb_sa(struct rte_crypto_op *cop, + goto exit; + } + +- req = alloc_request_struct(mdata, (void *)cop, mdata_len, +- sess->tunnel_type); ++ req = alloc_request_struct(mdata, (void *)cop, mdata_len); + + /* Prepare CPT instruction */ + word0.u64 = sess->ucmd_w0; +diff --git a/dpdk/drivers/crypto/octeontx2/otx2_security.h b/dpdk/drivers/crypto/octeontx2/otx2_security.h +index 33d3b15155..7087ea33c7 100644 +--- a/dpdk/drivers/crypto/octeontx2/otx2_security.h ++++ b/dpdk/drivers/crypto/octeontx2/otx2_security.h +@@ -19,14 +19,16 @@ + #define OTX2_SEC_AES_GCM_ROUNDUP_BYTE_LEN 4 + #define OTX2_SEC_AES_CBC_ROUNDUP_BYTE_LEN 16 + +-union otx2_sec_session_ipsec { +- struct otx2_sec_session_ipsec_ip ip; +- struct otx2_sec_session_ipsec_lp lp; ++struct otx2_sec_session_ipsec { ++ union { ++ struct otx2_sec_session_ipsec_ip ip; ++ struct otx2_sec_session_ipsec_lp lp; ++ }; + enum rte_security_ipsec_sa_direction dir; + }; + + struct otx2_sec_session { +- union otx2_sec_session_ipsec ipsec; ++ struct otx2_sec_session_ipsec ipsec; + void *userdata; + /**< Userdata registered by the application */ + } __rte_cache_aligned; +diff --git a/dpdk/drivers/crypto/openssl/rte_openssl_pmd.c b/dpdk/drivers/crypto/openssl/rte_openssl_pmd.c +index 7d3959f550..786c87baed 100644 +--- a/dpdk/drivers/crypto/openssl/rte_openssl_pmd.c ++++ b/dpdk/drivers/crypto/openssl/rte_openssl_pmd.c +@@ -2,6 +2,8 @@ + * Copyright(c) 2016-2017 Intel Corporation + */ + ++#define OPENSSL_API_COMPAT 0x10100000L ++ + #include + #include + #include +@@ -1114,7 +1116,7 @@ process_openssl_auth_encryption_ccm(struct rte_mbuf *mbuf_src, int offset, + if (EVP_EncryptUpdate(ctx, NULL, &len, aad + 18, aadlen) <= 0) + goto process_auth_encryption_ccm_err; + +- if (srclen > 0) ++ if (srclen >= 0) + if (process_openssl_encryption_update(mbuf_src, offset, &dst, + srclen, ctx, 0)) + goto process_auth_encryption_ccm_err; +@@ -1197,7 +1199,7 @@ process_openssl_auth_decryption_ccm(struct rte_mbuf *mbuf_src, int offset, + if (EVP_DecryptUpdate(ctx, NULL, &len, aad + 18, aadlen) <= 0) + goto process_auth_decryption_ccm_err; + +- if (srclen > 0) ++ if (srclen >= 0) + if (process_openssl_decryption_update(mbuf_src, offset, &dst, + srclen, ctx, 0)) + return -EFAULT; +diff --git a/dpdk/drivers/crypto/openssl/rte_openssl_pmd_ops.c b/dpdk/drivers/crypto/openssl/rte_openssl_pmd_ops.c +index ed75877581..1b075fd847 100644 +--- a/dpdk/drivers/crypto/openssl/rte_openssl_pmd_ops.c ++++ b/dpdk/drivers/crypto/openssl/rte_openssl_pmd_ops.c +@@ -2,6 +2,8 @@ + * Copyright(c) 2016-2017 Intel Corporation + */ + ++#define OPENSSL_API_COMPAT 0x10100000L ++ + #include + + #include +diff --git a/dpdk/drivers/crypto/qat/qat_asym.c b/dpdk/drivers/crypto/qat/qat_asym.c +index 85973812a8..bd0bf5f0cb 100644 +--- a/dpdk/drivers/crypto/qat/qat_asym.c ++++ b/dpdk/drivers/crypto/qat/qat_asym.c +@@ -65,33 +65,51 @@ static size_t max_of(int n, ...) + } + + static void qat_clear_arrays(struct qat_asym_op_cookie *cookie, +- int in_count, int out_count, int in_size, int out_size) ++ int in_count, int out_count, int alg_size) + { + int i; + + for (i = 0; i < in_count; i++) +- memset(cookie->input_array[i], 0x0, in_size); ++ memset(cookie->input_array[i], 0x0, alg_size); + for (i = 0; i < out_count; i++) +- memset(cookie->output_array[i], 0x0, out_size); ++ memset(cookie->output_array[i], 0x0, alg_size); ++} ++ ++static void qat_clear_arrays_crt(struct qat_asym_op_cookie *cookie, ++ int alg_size) ++{ ++ int i; ++ ++ memset(cookie->input_array[0], 0x0, alg_size); ++ for (i = 1; i < QAT_ASYM_RSA_QT_NUM_IN_PARAMS; i++) ++ memset(cookie->input_array[i], 0x0, alg_size / 2); ++ for (i = 0; i < QAT_ASYM_RSA_NUM_OUT_PARAMS; i++) ++ memset(cookie->output_array[i], 0x0, alg_size); + } + + static void qat_clear_arrays_by_alg(struct qat_asym_op_cookie *cookie, +- enum rte_crypto_asym_xform_type alg, int in_size, int out_size) ++ struct rte_crypto_asym_xform *xform, int alg_size) + { +- if (alg == RTE_CRYPTO_ASYM_XFORM_MODEX) ++ if (xform->xform_type == RTE_CRYPTO_ASYM_XFORM_MODEX) + qat_clear_arrays(cookie, QAT_ASYM_MODEXP_NUM_IN_PARAMS, +- QAT_ASYM_MODEXP_NUM_OUT_PARAMS, in_size, +- out_size); +- else if (alg == RTE_CRYPTO_ASYM_XFORM_MODINV) ++ QAT_ASYM_MODEXP_NUM_OUT_PARAMS, alg_size); ++ else if (xform->xform_type == RTE_CRYPTO_ASYM_XFORM_MODINV) + qat_clear_arrays(cookie, QAT_ASYM_MODINV_NUM_IN_PARAMS, +- QAT_ASYM_MODINV_NUM_OUT_PARAMS, in_size, +- out_size); ++ QAT_ASYM_MODINV_NUM_OUT_PARAMS, alg_size); ++ else if (xform->xform_type == RTE_CRYPTO_ASYM_XFORM_RSA) { ++ if (xform->rsa.key_type == RTE_RSA_KEY_TYPE_QT) ++ qat_clear_arrays_crt(cookie, alg_size); ++ else { ++ qat_clear_arrays(cookie, QAT_ASYM_RSA_NUM_IN_PARAMS, ++ QAT_ASYM_RSA_NUM_OUT_PARAMS, alg_size); ++ } ++ } + } + + static int qat_asym_check_nonzero(rte_crypto_param n) + { + if (n.length < 8) { +- /* Not a case for any cryptograpic function except for DH ++ /* Not a case for any cryptographic function except for DH + * generator which very often can be of one byte length + */ + size_t i; +@@ -352,7 +370,7 @@ qat_asym_fill_arrays(struct rte_crypto_asym_op *asym_op, + return -(EINVAL); + } + } +- if (xform->rsa.key_type == RTE_RSA_KET_TYPE_QT) { ++ if (xform->rsa.key_type == RTE_RSA_KEY_TYPE_QT) { + + qat_req->input_param_count = + QAT_ASYM_RSA_QT_NUM_IN_PARAMS; +@@ -629,6 +647,8 @@ static void qat_asym_collect_response(struct rte_crypto_op *rx_op, + rte_memcpy(rsa_result, + cookie->output_array[0], + alg_size_in_bytes); ++ rx_op->status = ++ RTE_CRYPTO_OP_STATUS_SUCCESS; + break; + default: + QAT_LOG(ERR, "Padding not supported"); +@@ -655,8 +675,7 @@ static void qat_asym_collect_response(struct rte_crypto_op *rx_op, + } + } + } +- qat_clear_arrays_by_alg(cookie, xform->xform_type, alg_size_in_bytes, +- alg_size_in_bytes); ++ qat_clear_arrays_by_alg(cookie, xform, alg_size_in_bytes); + } + + void +diff --git a/dpdk/drivers/crypto/qat/qat_asym_pmd.c b/dpdk/drivers/crypto/qat/qat_asym_pmd.c +index a2c8aca2c1..ef47e28112 100644 +--- a/dpdk/drivers/crypto/qat/qat_asym_pmd.c ++++ b/dpdk/drivers/crypto/qat/qat_asym_pmd.c +@@ -251,6 +251,10 @@ qat_asym_dev_create(struct qat_pci_device *qat_pci_dev, + struct rte_cryptodev *cryptodev; + struct qat_asym_dev_private *internals; + ++ if (qat_pci_dev->qat_dev_gen == QAT_GEN3) { ++ QAT_LOG(ERR, "Asymmetric crypto PMD not supported on QAT c4xxx"); ++ return -EFAULT; ++ } + snprintf(name, RTE_CRYPTODEV_NAME_MAX_LEN, "%s_%s", + qat_pci_dev->name, "asym"); + QAT_LOG(DEBUG, "Creating QAT ASYM device %s\n", name); +diff --git a/dpdk/drivers/crypto/qat/qat_asym_pmd.h b/dpdk/drivers/crypto/qat/qat_asym_pmd.h +index 3b5abddec8..f6770bc4fb 100644 +--- a/dpdk/drivers/crypto/qat/qat_asym_pmd.h ++++ b/dpdk/drivers/crypto/qat/qat_asym_pmd.h +@@ -9,7 +9,7 @@ + #include + #include "qat_device.h" + +-/** Intel(R) QAT Asymmetric Crypto PMD driver name */ ++/** Intel(R) QAT Asymmetric Crypto PMD name */ + #define CRYPTODEV_NAME_QAT_ASYM_PMD crypto_qat_asym + + +diff --git a/dpdk/drivers/crypto/qat/qat_sym.c b/dpdk/drivers/crypto/qat/qat_sym.c +index 4b7676deb8..86fc7aff38 100644 +--- a/dpdk/drivers/crypto/qat/qat_sym.c ++++ b/dpdk/drivers/crypto/qat/qat_sym.c +@@ -2,6 +2,8 @@ + * Copyright(c) 2015-2019 Intel Corporation + */ + ++#define OPENSSL_API_COMPAT 0x10100000L ++ + #include + + #include +@@ -162,6 +164,7 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg, + uint8_t do_sgl = 0; + uint8_t in_place = 1; + int alignment_adjustment = 0; ++ int oop_shift = 0; + struct rte_crypto_op *op = (struct rte_crypto_op *)in_op; + struct qat_sym_op_cookie *cookie = + (struct qat_sym_op_cookie *)op_cookie; +@@ -332,8 +335,10 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg, + } + min_ofs = auth_ofs; + +- auth_param->auth_res_addr = +- op->sym->auth.digest.phys_addr; ++ if (ctx->qat_hash_alg != ICP_QAT_HW_AUTH_ALGO_NULL || ++ ctx->auth_op == ICP_QAT_HW_AUTH_VERIFY) ++ auth_param->auth_res_addr = ++ op->sym->auth.digest.phys_addr; + + } + +@@ -363,7 +368,7 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg, + ICP_QAT_HW_AUTH_ALGO_AES_CBC_MAC) { + + /* In case of AES-CCM this may point to user selected +- * memory or iv offset in cypto_op ++ * memory or iv offset in crypto_op + */ + uint8_t *aad_data = op->sym->aead.aad.data; + /* This is true AAD length, it not includes 18 bytes of +@@ -472,6 +477,7 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg, + rte_pktmbuf_iova_offset(op->sym->m_src, min_ofs); + dst_buf_start = + rte_pktmbuf_iova_offset(op->sym->m_dst, min_ofs); ++ oop_shift = min_ofs; + + } else { + /* In-place operation +@@ -532,7 +538,7 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg, + /* First find the end of the data */ + if (do_sgl) { + uint32_t remaining_off = auth_param->auth_off + +- auth_param->auth_len + alignment_adjustment; ++ auth_param->auth_len + alignment_adjustment + oop_shift; + struct rte_mbuf *sgl_buf = + (in_place ? + op->sym->m_src : op->sym->m_dst); +diff --git a/dpdk/drivers/crypto/qat/qat_sym_pmd.h b/dpdk/drivers/crypto/qat/qat_sym_pmd.h +index e0992cbe27..c449924194 100644 +--- a/dpdk/drivers/crypto/qat/qat_sym_pmd.h ++++ b/dpdk/drivers/crypto/qat/qat_sym_pmd.h +@@ -16,7 +16,7 @@ + #include "qat_sym_capabilities.h" + #include "qat_device.h" + +-/** Intel(R) QAT Symmetric Crypto PMD driver name */ ++/** Intel(R) QAT Symmetric Crypto PMD name */ + #define CRYPTODEV_NAME_QAT_SYM_PMD crypto_qat + + /* Internal capabilities */ +diff --git a/dpdk/drivers/crypto/qat/qat_sym_session.c b/dpdk/drivers/crypto/qat/qat_sym_session.c +index 23d059bf84..2a9b2712f6 100644 +--- a/dpdk/drivers/crypto/qat/qat_sym_session.c ++++ b/dpdk/drivers/crypto/qat/qat_sym_session.c +@@ -2,6 +2,8 @@ + * Copyright(c) 2015-2019 Intel Corporation + */ + ++#define OPENSSL_API_COMPAT 0x10100000L ++ + #include /* Needed to calculate pre-compute values */ + #include /* Needed to calculate pre-compute values */ + #include /* Needed to calculate pre-compute values */ +@@ -101,8 +103,10 @@ bpi_cipher_ctx_init(enum rte_crypto_cipher_algorithm cryptodev_algo, + return 0; + + ctx_init_err: +- if (*ctx != NULL) ++ if (*ctx != NULL) { + EVP_CIPHER_CTX_free(*ctx); ++ *ctx = NULL; ++ } + return ret; + } + +@@ -1190,6 +1194,9 @@ static int partial_hash_compute(enum icp_qat_hw_auth_algo hash_alg, + uint64_t *hash_state_out_be64; + int i; + ++ /* Initialize to avoid gcc warning */ ++ memset(digest, 0, sizeof(digest)); ++ + digest_size = qat_hash_get_digest_size(hash_alg); + if (digest_size <= 0) + return -EFAULT; +diff --git a/dpdk/drivers/crypto/scheduler/scheduler_failover.c b/dpdk/drivers/crypto/scheduler/scheduler_failover.c +index 844312dd1b..7c82bc5464 100644 +--- a/dpdk/drivers/crypto/scheduler/scheduler_failover.c ++++ b/dpdk/drivers/crypto/scheduler/scheduler_failover.c +@@ -157,6 +157,9 @@ scheduler_start(struct rte_cryptodev *dev) + ((struct scheduler_qp_ctx *) + dev->data->queue_pairs[i])->private_qp_ctx; + ++ sched_ctx->workers[PRIMARY_WORKER_IDX].qp_id = i; ++ sched_ctx->workers[SECONDARY_WORKER_IDX].qp_id = i; ++ + rte_memcpy(&qp_ctx->primary_worker, + &sched_ctx->workers[PRIMARY_WORKER_IDX], + sizeof(struct scheduler_worker)); +diff --git a/dpdk/drivers/crypto/virtio/virtio_rxtx.c b/dpdk/drivers/crypto/virtio/virtio_rxtx.c +index e1cb4ad104..8b391a4fce 100644 +--- a/dpdk/drivers/crypto/virtio/virtio_rxtx.c ++++ b/dpdk/drivers/crypto/virtio/virtio_rxtx.c +@@ -264,6 +264,9 @@ virtqueue_crypto_sym_enqueue_xmit( + if (cop->phys_addr) + desc[idx].addr = cop->phys_addr + session->iv.offset; + else { ++ if (session->iv.length > VIRTIO_CRYPTO_MAX_IV_SIZE) ++ return -ENOMEM; ++ + rte_memcpy(crypto_op_cookie->iv, + rte_crypto_op_ctod_offset(cop, + uint8_t *, session->iv.offset), +diff --git a/dpdk/drivers/crypto/virtio/virtqueue.h b/dpdk/drivers/crypto/virtio/virtqueue.h +index bf10c6579b..c96ca62992 100644 +--- a/dpdk/drivers/crypto/virtio/virtqueue.h ++++ b/dpdk/drivers/crypto/virtio/virtqueue.h +@@ -145,7 +145,7 @@ virtqueue_notify(struct virtqueue *vq) + { + /* + * Ensure updated avail->idx is visible to host. +- * For virtio on IA, the notificaiton is through io port operation ++ * For virtio on IA, the notification is through io port operation + * which is a serialization instruction itself. + */ + VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq); +diff --git a/dpdk/drivers/crypto/zuc/rte_zuc_pmd.c b/dpdk/drivers/crypto/zuc/rte_zuc_pmd.c +index a9ff318281..2f8a34e4f1 100644 +--- a/dpdk/drivers/crypto/zuc/rte_zuc_pmd.c ++++ b/dpdk/drivers/crypto/zuc/rte_zuc_pmd.c +@@ -11,7 +11,6 @@ + #include + + #include "zuc_pmd_private.h" +-#define ZUC_MAX_BURST 16 + #define BYTE_LEN 8 + + static uint8_t cryptodev_driver_id; +@@ -238,11 +237,11 @@ process_zuc_hash_op(struct zuc_qp *qp, struct rte_crypto_op **ops, + { + unsigned int i; + uint8_t processed_ops = 0; +- uint8_t *src[ZUC_MAX_BURST]; ++ uint8_t *src[ZUC_MAX_BURST] = { 0 }; + uint32_t *dst[ZUC_MAX_BURST]; +- uint32_t length_in_bits[ZUC_MAX_BURST]; +- uint8_t *iv[ZUC_MAX_BURST]; +- const void *hash_keys[ZUC_MAX_BURST]; ++ uint32_t length_in_bits[ZUC_MAX_BURST] = { 0 }; ++ uint8_t *iv[ZUC_MAX_BURST] = { 0 }; ++ const void *hash_keys[ZUC_MAX_BURST] = { 0 }; + struct zuc_session *sess; + + for (i = 0; i < num_ops; i++) { +@@ -264,7 +263,7 @@ process_zuc_hash_op(struct zuc_qp *qp, struct rte_crypto_op **ops, + + hash_keys[i] = sess->pKey_hash; + if (sess->auth_op == RTE_CRYPTO_AUTH_OP_VERIFY) +- dst[i] = (uint32_t *)qp->temp_digest; ++ dst[i] = (uint32_t *)qp->temp_digest[i]; + else + dst[i] = (uint32_t *)ops[i]->sym->auth.digest.data; + +@@ -359,11 +358,11 @@ static uint16_t + zuc_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops, + uint16_t nb_ops) + { +- struct rte_crypto_op *c_ops[ZUC_MAX_BURST]; + struct rte_crypto_op *curr_c_op; + + struct zuc_session *curr_sess; + struct zuc_session *sessions[ZUC_MAX_BURST]; ++ struct rte_crypto_op *int_c_ops[ZUC_MAX_BURST]; + enum zuc_operation prev_zuc_op = ZUC_OP_NOT_SUPPORTED; + enum zuc_operation curr_zuc_op; + struct zuc_qp *qp = queue_pair; +@@ -390,11 +389,11 @@ zuc_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops, + */ + if (burst_size == 0) { + prev_zuc_op = curr_zuc_op; +- c_ops[0] = curr_c_op; ++ int_c_ops[0] = curr_c_op; + sessions[0] = curr_sess; + burst_size++; + } else if (curr_zuc_op == prev_zuc_op) { +- c_ops[burst_size] = curr_c_op; ++ int_c_ops[burst_size] = curr_c_op; + sessions[burst_size] = curr_sess; + burst_size++; + /* +@@ -402,7 +401,7 @@ zuc_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops, + * process them, and start a new batch. + */ + if (burst_size == ZUC_MAX_BURST) { +- processed_ops = process_ops(c_ops, curr_zuc_op, ++ processed_ops = process_ops(int_c_ops, curr_zuc_op, + sessions, qp, burst_size, + &enqueued_ops); + if (processed_ops < burst_size) { +@@ -417,7 +416,7 @@ zuc_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops, + * Different operation type, process the ops + * of the previous type. + */ +- processed_ops = process_ops(c_ops, prev_zuc_op, ++ processed_ops = process_ops(int_c_ops, prev_zuc_op, + sessions, qp, burst_size, + &enqueued_ops); + if (processed_ops < burst_size) { +@@ -428,7 +427,7 @@ zuc_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops, + burst_size = 0; + prev_zuc_op = curr_zuc_op; + +- c_ops[0] = curr_c_op; ++ int_c_ops[0] = curr_c_op; + sessions[0] = curr_sess; + burst_size++; + } +@@ -436,7 +435,7 @@ zuc_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops, + + if (burst_size != 0) { + /* Process the crypto ops of the last operation type. */ +- processed_ops = process_ops(c_ops, prev_zuc_op, ++ processed_ops = process_ops(int_c_ops, prev_zuc_op, + sessions, qp, burst_size, + &enqueued_ops); + } +diff --git a/dpdk/drivers/crypto/zuc/zuc_pmd_private.h b/dpdk/drivers/crypto/zuc/zuc_pmd_private.h +index d8684891ee..e01f1378c6 100644 +--- a/dpdk/drivers/crypto/zuc/zuc_pmd_private.h ++++ b/dpdk/drivers/crypto/zuc/zuc_pmd_private.h +@@ -19,6 +19,7 @@ extern int zuc_logtype_driver; + + #define ZUC_IV_KEY_LENGTH 16 + #define ZUC_DIGEST_LENGTH 4 ++#define ZUC_MAX_BURST 16 + + /** private data structure for each virtual ZUC device */ + struct zuc_private { +@@ -42,7 +43,7 @@ struct zuc_qp { + /**< Session Private Data Mempool */ + struct rte_cryptodev_stats qp_stats; + /**< Queue pair statistics */ +- uint8_t temp_digest[ZUC_DIGEST_LENGTH]; ++ uint8_t temp_digest[ZUC_MAX_BURST][ZUC_DIGEST_LENGTH]; + /**< Buffer used to store the digest generated + * by the driver when verifying a digest provided + * by the user (using authentication verify operation) +diff --git a/dpdk/drivers/event/dlb/dlb.c b/dpdk/drivers/event/dlb/dlb.c +index e2d5d43da7..35f4777750 100644 +--- a/dpdk/drivers/event/dlb/dlb.c ++++ b/dpdk/drivers/event/dlb/dlb.c +@@ -10,7 +10,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + +diff --git a/dpdk/drivers/event/dlb/pf/dlb_pf.c b/dpdk/drivers/event/dlb/pf/dlb_pf.c +index 3aeef6f91d..876c68e51d 100644 +--- a/dpdk/drivers/event/dlb/pf/dlb_pf.c ++++ b/dpdk/drivers/event/dlb/pf/dlb_pf.c +@@ -6,12 +6,13 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include + #include + #include ++ + #include + #include + #include +diff --git a/dpdk/drivers/event/dlb2/dlb2.c b/dpdk/drivers/event/dlb2/dlb2.c +index 86724863f2..97ae8a8b7e 100644 +--- a/dpdk/drivers/event/dlb2/dlb2.c ++++ b/dpdk/drivers/event/dlb2/dlb2.c +@@ -11,7 +11,7 @@ + #include + #include + #include +-#include ++#include + + #include + #include +@@ -1858,7 +1858,7 @@ dlb2_event_queue_detach_ldb(struct dlb2_eventdev *dlb2, + } + + /* This is expected with eventdev API! +- * It blindly attemmpts to unmap all queues. ++ * It blindly attempts to unmap all queues. + */ + if (i == DLB2_MAX_NUM_QIDS_PER_LDB_CQ) { + DLB2_LOG_DBG("dlb2: ignoring LB QID %d not mapped for qm_port %d.\n", +diff --git a/dpdk/drivers/event/dlb2/dlb2_priv.h b/dpdk/drivers/event/dlb2/dlb2_priv.h +index b73cf3ff14..b0ec2f3172 100644 +--- a/dpdk/drivers/event/dlb2/dlb2_priv.h ++++ b/dpdk/drivers/event/dlb2/dlb2_priv.h +@@ -27,7 +27,6 @@ + #define DLB2_MAX_NUM_EVENTS "max_num_events" + #define DLB2_NUM_DIR_CREDITS "num_dir_credits" + #define DEV_ID_ARG "dev_id" +-#define DLB2_DEFER_SCHED_ARG "defer_sched" + #define DLB2_QID_DEPTH_THRESH_ARG "qid_depth_thresh" + #define DLB2_COS_ARG "cos" + +@@ -435,7 +434,7 @@ struct dlb2_eventdev_port { + bool setup_done; + /* enq_configured is set when the qm port is created */ + bool enq_configured; +- uint8_t implicit_release; /* release events before dequeueing */ ++ uint8_t implicit_release; /* release events before dequeuing */ + } __rte_cache_aligned; + + struct dlb2_queue { +@@ -498,7 +497,6 @@ struct dlb2_eventdev { + uint16_t num_dir_ports; /* total num of dir ports requested */ + bool umwait_allowed; + bool global_dequeue_wait; /* Not using per dequeue wait if true */ +- bool defer_sched; + enum dlb2_cq_poll_modes poll_mode; + uint8_t revision; + bool configured; +@@ -523,7 +521,6 @@ struct dlb2_devargs { + int max_num_events; + int num_dir_credits_override; + int dev_id; +- int defer_sched; + struct dlb2_qid_depth_thresholds qid_depth_thresholds; + enum dlb2_cos cos_id; + }; +diff --git a/dpdk/drivers/event/dlb2/dlb2_selftest.c b/dpdk/drivers/event/dlb2/dlb2_selftest.c +index 5cf66c552c..1863ffe049 100644 +--- a/dpdk/drivers/event/dlb2/dlb2_selftest.c ++++ b/dpdk/drivers/event/dlb2/dlb2_selftest.c +@@ -223,7 +223,7 @@ test_stop_flush(struct test *t) /* test to check we can properly flush events */ + 0, + RTE_EVENT_PORT_ATTR_DEQ_DEPTH, + &dequeue_depth)) { +- printf("%d: Error retrieveing dequeue depth\n", __LINE__); ++ printf("%d: Error retrieving dequeue depth\n", __LINE__); + goto err; + } + +@@ -1354,7 +1354,7 @@ test_delayed_pop(void) + } + + /* Release one more event. This will trigger the token pop, and +- * dequeue_depth - 1 more events will be scheduled to the device. ++ * dequeue_depth more events will be scheduled to the device. + */ + ev.op = RTE_EVENT_OP_RELEASE; + +@@ -1366,7 +1366,7 @@ test_delayed_pop(void) + + timeout = 0xFFFFFFFFF; + +- for (i = 0; i < port_conf.dequeue_depth - 1; i++) { ++ for (i = 0; i < port_conf.dequeue_depth; i++) { + if (rte_event_dequeue_burst(evdev, 0, &ev, 1, timeout) != 1) { + printf("%d: event dequeue expected to succeed\n", + __LINE__); +@@ -1374,14 +1374,6 @@ test_delayed_pop(void) + } + } + +- timeout = 0x10000; +- +- if (rte_event_dequeue_burst(evdev, 0, &ev, 1, timeout) != 0) { +- printf("%d: event dequeue expected to fail\n", +- __LINE__); +- goto err; +- } +- + cleanup(); + return 0; + +diff --git a/dpdk/drivers/event/dlb2/pf/dlb2_pf.c b/dpdk/drivers/event/dlb2/pf/dlb2_pf.c +index 632c4e099f..eed789a38d 100644 +--- a/dpdk/drivers/event/dlb2/pf/dlb2_pf.c ++++ b/dpdk/drivers/event/dlb2/pf/dlb2_pf.c +@@ -6,12 +6,13 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include + #include + #include ++ + #include + #include + #include +diff --git a/dpdk/drivers/event/dlb2/rte_pmd_dlb2.h b/dpdk/drivers/event/dlb2/rte_pmd_dlb2.h +index 74399db018..1dbd885a16 100644 +--- a/dpdk/drivers/event/dlb2/rte_pmd_dlb2.h ++++ b/dpdk/drivers/event/dlb2/rte_pmd_dlb2.h +@@ -24,7 +24,7 @@ extern "C" { + * Selects the token pop mode for a DLB2 port. + */ + enum dlb2_token_pop_mode { +- /* Pop the CQ tokens immediately after dequeueing. */ ++ /* Pop the CQ tokens immediately after dequeuing. */ + AUTO_POP, + /* Pop CQ tokens after (dequeue_depth - 1) events are released. + * Supported on load-balanced ports only. +diff --git a/dpdk/drivers/event/dpaa2/dpaa2_eventdev_logs.h b/dpdk/drivers/event/dpaa2/dpaa2_eventdev_logs.h +index 5da85c60f0..66c8c77274 100644 +--- a/dpdk/drivers/event/dpaa2/dpaa2_eventdev_logs.h ++++ b/dpdk/drivers/event/dpaa2/dpaa2_eventdev_logs.h +@@ -38,7 +38,5 @@ extern int dpaa2_logtype_event; + #define dpaa2_evdev_info(fmt, ...) DPAA2_EVENTDEV_LOG(INFO, fmt, ##__VA_ARGS__) + #define dpaa2_evdev_dbg(fmt, ...) DPAA2_EVENTDEV_LOG(DEBUG, fmt, ##__VA_ARGS__) + #define dpaa2_evdev_err(fmt, ...) DPAA2_EVENTDEV_LOG(ERR, fmt, ##__VA_ARGS__) +-#define dpaa2_evdev__func_trace dpaa2_evdev_dbg +-#define dpaa2_evdev_selftest dpaa2_evdev_info + + #endif /* _DPAA2_EVENTDEV_LOGS_H_ */ +diff --git a/dpdk/drivers/event/dpaa2/dpaa2_eventdev_selftest.c b/dpdk/drivers/event/dpaa2/dpaa2_eventdev_selftest.c +index cd7311a94d..7c6d55bc2b 100644 +--- a/dpdk/drivers/event/dpaa2/dpaa2_eventdev_selftest.c ++++ b/dpdk/drivers/event/dpaa2/dpaa2_eventdev_selftest.c +@@ -118,7 +118,7 @@ _eventdev_setup(int mode) + struct rte_event_dev_info info; + const char *pool_name = "evdev_dpaa2_test_pool"; + +- /* Create and destrory pool for each test case to make it standalone */ ++ /* Create and destroy pool for each test case to make it standalone */ + eventdev_test_mempool = rte_pktmbuf_pool_create(pool_name, + MAX_EVENTS, + 0 /*MBUF_CACHE_SIZE*/, +diff --git a/dpdk/drivers/event/dsw/dsw_evdev.h b/dpdk/drivers/event/dsw/dsw_evdev.h +index 6cb77cfc44..6513d35ee7 100644 +--- a/dpdk/drivers/event/dsw/dsw_evdev.h ++++ b/dpdk/drivers/event/dsw/dsw_evdev.h +@@ -22,7 +22,7 @@ + /* Multiple 24-bit flow ids will map to the same DSW-level flow. The + * number of DSW flows should be high enough make it unlikely that + * flow ids of several large flows hash to the same DSW-level flow. +- * Such collisions will limit parallism and thus the number of cores ++ * Such collisions will limit parallelism and thus the number of cores + * that may be utilized. However, configuring a large number of DSW + * flows might potentially, depending on traffic and actual + * application flow id value range, result in each such DSW-level flow +@@ -102,7 +102,7 @@ + /* Only one outstanding migration per port is allowed */ + #define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS*DSW_MAX_FLOWS_PER_MIGRATION) + +-/* Enough room for paus request/confirm and unpaus request/confirm for ++/* Enough room for pause request/confirm and unpaus request/confirm for + * all possible senders. + */ + #define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4) +diff --git a/dpdk/drivers/event/dsw/dsw_event.c b/dpdk/drivers/event/dsw/dsw_event.c +index 611b36781f..8b81dc5c56 100644 +--- a/dpdk/drivers/event/dsw/dsw_event.c ++++ b/dpdk/drivers/event/dsw/dsw_event.c +@@ -1090,7 +1090,7 @@ dsw_port_ctl_process(struct dsw_evdev *dsw, struct dsw_port *port) + static void + dsw_port_note_op(struct dsw_port *port, uint16_t num_events) + { +- /* To pull the control ring reasonbly often on busy ports, ++ /* To pull the control ring reasonably often on busy ports, + * each dequeued/enqueued event is considered an 'op' too. + */ + port->ops_since_bg_task += (num_events+1); +@@ -1174,7 +1174,7 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port, + * addition, a port cannot be left "unattended" (e.g. unused) + * for long periods of time, since that would stall + * migration. Eventdev API extensions to provide a cleaner way +- * to archieve both of these functions should be ++ * to archive both of these functions should be + * considered. + */ + if (unlikely(events_len == 0)) { +diff --git a/dpdk/drivers/event/octeontx/ssovf_evdev.h b/dpdk/drivers/event/octeontx/ssovf_evdev.h +index 10163151cd..0f9537d310 100644 +--- a/dpdk/drivers/event/octeontx/ssovf_evdev.h ++++ b/dpdk/drivers/event/octeontx/ssovf_evdev.h +@@ -88,7 +88,7 @@ + + /* + * In Cavium OCTEON TX SoC, all accesses to the device registers are +- * implictly strongly ordered. So, The relaxed version of IO operation is ++ * implicitly strongly ordered. So, The relaxed version of IO operation is + * safe to use with out any IO memory barriers. + */ + #define ssovf_read64 rte_read64_relaxed +diff --git a/dpdk/drivers/event/octeontx/ssovf_evdev_selftest.c b/dpdk/drivers/event/octeontx/ssovf_evdev_selftest.c +index 528f99dd84..e35720e9b6 100644 +--- a/dpdk/drivers/event/octeontx/ssovf_evdev_selftest.c ++++ b/dpdk/drivers/event/octeontx/ssovf_evdev_selftest.c +@@ -151,7 +151,7 @@ _eventdev_setup(int mode) + struct rte_event_dev_info info; + const char *pool_name = "evdev_octeontx_test_pool"; + +- /* Create and destrory pool for each test case to make it standalone */ ++ /* Create and destroy pool for each test case to make it standalone */ + eventdev_test_mempool = rte_pktmbuf_pool_create(pool_name, + MAX_EVENTS, + 0 /*MBUF_CACHE_SIZE*/, +diff --git a/dpdk/drivers/event/octeontx2/otx2_evdev.c b/dpdk/drivers/event/octeontx2/otx2_evdev.c +index 0fe014c24a..dd75b9f859 100644 +--- a/dpdk/drivers/event/octeontx2/otx2_evdev.c ++++ b/dpdk/drivers/event/octeontx2/otx2_evdev.c +@@ -883,29 +883,27 @@ sso_configure_ports(const struct rte_eventdev *event_dev) + struct otx2_ssogws *ws; + uintptr_t base; + +- /* Free memory prior to re-allocation if needed */ + if (event_dev->data->ports[i] != NULL) { + ws = event_dev->data->ports[i]; +- rte_free(ssogws_get_cookie(ws)); +- ws = NULL; +- } ++ } else { ++ /* Allocate event port memory */ ++ ws = rte_zmalloc_socket("otx2_sso_ws", ++ sizeof(struct otx2_ssogws) + ++ RTE_CACHE_LINE_SIZE, ++ RTE_CACHE_LINE_SIZE, ++ event_dev->data->socket_id); ++ if (ws == NULL) { ++ otx2_err("Failed to alloc memory for port=%d", ++ i); ++ rc = -ENOMEM; ++ break; ++ } + +- /* Allocate event port memory */ +- ws = rte_zmalloc_socket("otx2_sso_ws", +- sizeof(struct otx2_ssogws) + +- RTE_CACHE_LINE_SIZE, +- RTE_CACHE_LINE_SIZE, +- event_dev->data->socket_id); +- if (ws == NULL) { +- otx2_err("Failed to alloc memory for port=%d", i); +- rc = -ENOMEM; +- break; ++ /* First cache line is reserved for cookie */ ++ ws = (struct otx2_ssogws *) ++ ((uint8_t *)ws + RTE_CACHE_LINE_SIZE); + } + +- /* First cache line is reserved for cookie */ +- ws = (struct otx2_ssogws *) +- ((uint8_t *)ws + RTE_CACHE_LINE_SIZE); +- + ws->port = i; + base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | i << 12); + sso_set_port_ops(ws, base); +@@ -983,7 +981,7 @@ sso_xaq_allocate(struct otx2_sso_evdev *dev) + + dev->fc_iova = mz->iova; + dev->fc_mem = mz->addr; +- ++ *dev->fc_mem = 0; + aura = (struct npa_aura_s *)((uintptr_t)dev->fc_mem + OTX2_ALIGN); + memset(aura, 0, sizeof(struct npa_aura_s)); + +@@ -1059,6 +1057,19 @@ sso_ggrp_alloc_xaq(struct otx2_sso_evdev *dev) + return otx2_mbox_process(mbox); + } + ++static int ++sso_ggrp_free_xaq(struct otx2_sso_evdev *dev) ++{ ++ struct otx2_mbox *mbox = dev->mbox; ++ struct sso_release_xaq *req; ++ ++ otx2_sso_dbg("Freeing XAQ for GGRPs"); ++ req = otx2_mbox_alloc_msg_sso_hw_release_xaq_aura(mbox); ++ req->hwgrps = dev->nb_event_queues; ++ ++ return otx2_mbox_process(mbox); ++} ++ + static void + sso_lf_teardown(struct otx2_sso_evdev *dev, + enum otx2_sso_lf_type lf_type) +@@ -1449,6 +1460,8 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) + ssogws_reset((struct otx2_ssogws *)&ws->ws_state[1]); + ws->swtag_req = 0; + ws->vws = 0; ++ ws->fc_mem = dev->fc_mem; ++ ws->xaq_lmt = dev->xaq_lmt; + ws->ws_state[0].cur_grp = 0; + ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY; + ws->ws_state[1].cur_grp = 0; +@@ -1459,6 +1472,8 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) + ws = event_dev->data->ports[i]; + ssogws_reset(ws); + ws->swtag_req = 0; ++ ws->fc_mem = dev->fc_mem; ++ ws->xaq_lmt = dev->xaq_lmt; + ws->cur_grp = 0; + ws->cur_tt = SSO_SYNC_EMPTY; + } +@@ -1505,28 +1520,30 @@ int + sso_xae_reconfigure(struct rte_eventdev *event_dev) + { + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); +- struct rte_mempool *prev_xaq_pool; + int rc = 0; + + if (event_dev->data->dev_started) + sso_cleanup(event_dev, 0); + +- prev_xaq_pool = dev->xaq_pool; ++ rc = sso_ggrp_free_xaq(dev); ++ if (rc < 0) { ++ otx2_err("Failed to free XAQ\n"); ++ return rc; ++ } ++ ++ rte_mempool_free(dev->xaq_pool); + dev->xaq_pool = NULL; + rc = sso_xaq_allocate(dev); + if (rc < 0) { + otx2_err("Failed to alloc xaq pool %d", rc); +- rte_mempool_free(prev_xaq_pool); + return rc; + } + rc = sso_ggrp_alloc_xaq(dev); + if (rc < 0) { + otx2_err("Failed to alloc xaq to ggrp %d", rc); +- rte_mempool_free(prev_xaq_pool); + return rc; + } + +- rte_mempool_free(prev_xaq_pool); + rte_mb(); + if (event_dev->data->dev_started) + sso_cleanup(event_dev, 1); +diff --git a/dpdk/drivers/event/octeontx2/otx2_evdev_adptr.c b/dpdk/drivers/event/octeontx2/otx2_evdev_adptr.c +index d69f269df6..d85c3665ca 100644 +--- a/dpdk/drivers/event/octeontx2/otx2_evdev_adptr.c ++++ b/dpdk/drivers/event/octeontx2/otx2_evdev_adptr.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(C) 2019 Marvell International Ltd. ++ * Copyright(C) 2019-2021 Marvell. + */ + + #include "otx2_evdev.h" +diff --git a/dpdk/drivers/event/octeontx2/otx2_evdev_crypto_adptr.c b/dpdk/drivers/event/octeontx2/otx2_evdev_crypto_adptr.c +index 7197815ae6..82bccd34ec 100644 +--- a/dpdk/drivers/event/octeontx2/otx2_evdev_crypto_adptr.c ++++ b/dpdk/drivers/event/octeontx2/otx2_evdev_crypto_adptr.c +@@ -1,10 +1,11 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright (C) 2020 Marvell International Ltd. ++ * Copyright (C) 2020-2021 Marvell. + */ + + #include + #include + ++#include "otx2_cryptodev.h" + #include "otx2_cryptodev_hw_access.h" + #include "otx2_cryptodev_qp.h" + #include "otx2_cryptodev_mbox.h" +@@ -23,34 +24,78 @@ otx2_ca_caps_get(const struct rte_eventdev *dev, + return 0; + } + +-int +-otx2_ca_qp_add(const struct rte_eventdev *dev, const struct rte_cryptodev *cdev, +- int32_t queue_pair_id, const struct rte_event *event) ++static int ++otx2_ca_qp_sso_link(const struct rte_cryptodev *cdev, struct otx2_cpt_qp *qp, ++ uint16_t sso_pf_func) + { +- struct otx2_sso_evdev *sso_evdev = sso_pmd_priv(dev); + union otx2_cpt_af_lf_ctl2 af_lf_ctl2; +- struct otx2_cpt_qp *qp; + int ret; + +- qp = cdev->data->queue_pairs[queue_pair_id]; +- +- qp->ca_enable = 1; +- rte_memcpy(&qp->ev, event, sizeof(struct rte_event)); +- + ret = otx2_cpt_af_reg_read(cdev, OTX2_CPT_AF_LF_CTL2(qp->id), +- &af_lf_ctl2.u); ++ &af_lf_ctl2.u); + if (ret) + return ret; + +- af_lf_ctl2.s.sso_pf_func = otx2_sso_pf_func_get(); ++ af_lf_ctl2.s.sso_pf_func = sso_pf_func; + ret = otx2_cpt_af_reg_write(cdev, OTX2_CPT_AF_LF_CTL2(qp->id), +- af_lf_ctl2.u); +- if (ret) +- return ret; ++ af_lf_ctl2.u); ++ return ret; ++} ++ ++static void ++otx2_ca_qp_init(struct otx2_cpt_qp *qp, const struct rte_event *event) ++{ ++ if (event) { ++ qp->qp_ev_bind = 1; ++ rte_memcpy(&qp->ev, event, sizeof(struct rte_event)); ++ } else { ++ qp->qp_ev_bind = 0; ++ } ++ qp->ca_enable = 1; ++} ++ ++int ++otx2_ca_qp_add(const struct rte_eventdev *dev, const struct rte_cryptodev *cdev, ++ int32_t queue_pair_id, const struct rte_event *event) ++{ ++ struct otx2_sso_evdev *sso_evdev = sso_pmd_priv(dev); ++ struct otx2_cpt_vf *vf = cdev->data->dev_private; ++ uint16_t sso_pf_func = otx2_sso_pf_func_get(); ++ struct otx2_cpt_qp *qp; ++ uint8_t qp_id; ++ int ret; ++ ++ if (queue_pair_id == -1) { ++ for (qp_id = 0; qp_id < vf->nb_queues; qp_id++) { ++ qp = cdev->data->queue_pairs[qp_id]; ++ ret = otx2_ca_qp_sso_link(cdev, qp, sso_pf_func); ++ if (ret) { ++ uint8_t qp_tmp; ++ for (qp_tmp = 0; qp_tmp < qp_id; qp_tmp++) ++ otx2_ca_qp_del(dev, cdev, qp_tmp); ++ return ret; ++ } ++ otx2_ca_qp_init(qp, event); ++ } ++ } else { ++ qp = cdev->data->queue_pairs[queue_pair_id]; ++ ret = otx2_ca_qp_sso_link(cdev, qp, sso_pf_func); ++ if (ret) ++ return ret; ++ otx2_ca_qp_init(qp, event); ++ } + + sso_evdev->rx_offloads |= NIX_RX_OFFLOAD_SECURITY_F; + sso_fastpath_fns_set((struct rte_eventdev *)(uintptr_t)dev); + ++ /* Update crypto adapter xae count */ ++ if (queue_pair_id == -1) ++ sso_evdev->adptr_xae_cnt += ++ vf->nb_queues * OTX2_CPT_DEFAULT_CMD_QLEN; ++ else ++ sso_evdev->adptr_xae_cnt += OTX2_CPT_DEFAULT_CMD_QLEN; ++ sso_xae_reconfigure((struct rte_eventdev *)(uintptr_t)dev); ++ + return 0; + } + +@@ -58,24 +103,29 @@ int + otx2_ca_qp_del(const struct rte_eventdev *dev, const struct rte_cryptodev *cdev, + int32_t queue_pair_id) + { +- union otx2_cpt_af_lf_ctl2 af_lf_ctl2; ++ struct otx2_cpt_vf *vf = cdev->data->dev_private; + struct otx2_cpt_qp *qp; ++ uint8_t qp_id; + int ret; + + RTE_SET_USED(dev); + +- qp = cdev->data->queue_pairs[queue_pair_id]; +- qp->ca_enable = 0; +- memset(&qp->ev, 0, sizeof(struct rte_event)); ++ ret = 0; ++ if (queue_pair_id == -1) { ++ for (qp_id = 0; qp_id < vf->nb_queues; qp_id++) { ++ qp = cdev->data->queue_pairs[qp_id]; ++ ret = otx2_ca_qp_sso_link(cdev, qp, 0); ++ if (ret) ++ return ret; ++ qp->ca_enable = 0; ++ } ++ } else { ++ qp = cdev->data->queue_pairs[queue_pair_id]; ++ ret = otx2_ca_qp_sso_link(cdev, qp, 0); ++ if (ret) ++ return ret; ++ qp->ca_enable = 0; ++ } + +- ret = otx2_cpt_af_reg_read(cdev, OTX2_CPT_AF_LF_CTL2(qp->id), +- &af_lf_ctl2.u); +- if (ret) +- return ret; +- +- af_lf_ctl2.s.sso_pf_func = 0; +- ret = otx2_cpt_af_reg_write(cdev, OTX2_CPT_AF_LF_CTL2(qp->id), +- af_lf_ctl2.u); +- +- return ret; ++ return 0; + } +diff --git a/dpdk/drivers/event/octeontx2/otx2_evdev_selftest.c b/dpdk/drivers/event/octeontx2/otx2_evdev_selftest.c +index 48bfaf893d..a89637d60f 100644 +--- a/dpdk/drivers/event/octeontx2/otx2_evdev_selftest.c ++++ b/dpdk/drivers/event/octeontx2/otx2_evdev_selftest.c +@@ -139,7 +139,7 @@ _eventdev_setup(int mode) + struct rte_event_dev_info info; + int i, ret; + +- /* Create and destrory pool for each test case to make it standalone */ ++ /* Create and destroy pool for each test case to make it standalone */ + eventdev_test_mempool = rte_pktmbuf_pool_create(pool_name, MAX_EVENTS, + 0, 0, 512, + rte_socket_id()); +diff --git a/dpdk/drivers/event/octeontx2/otx2_worker.h b/dpdk/drivers/event/octeontx2/otx2_worker.h +index 0a7d6671c4..89dc79d9c9 100644 +--- a/dpdk/drivers/event/octeontx2/otx2_worker.h ++++ b/dpdk/drivers/event/octeontx2/otx2_worker.h +@@ -272,7 +272,7 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m, + uint64_t *cmd, const uint32_t flags) + { + otx2_lmt_mov(cmd, txq->cmd, otx2_nix_tx_ext_subs(flags)); +- otx2_nix_xmit_prepare(m, cmd, flags); ++ otx2_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt); + } + + static __rte_always_inline uint16_t +diff --git a/dpdk/drivers/event/octeontx2/otx2_worker_dual.h b/dpdk/drivers/event/octeontx2/otx2_worker_dual.h +index 6e60618217..ffe5f7ac3e 100644 +--- a/dpdk/drivers/event/octeontx2/otx2_worker_dual.h ++++ b/dpdk/drivers/event/octeontx2/otx2_worker_dual.h +@@ -76,7 +76,7 @@ otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws, + event.flow_id, flags, lookup_mem); + /* Extracting tstamp, if PTP enabled. CGX will prepend + * the timestamp at starting of packet data and it can +- * be derieved from WQE 9 dword which corresponds to SG ++ * be derived from WQE 9 dword which corresponds to SG + * iova. + * rte_pktmbuf_mtod_offset can be used for this purpose + * but it brings down the performance as it reads +diff --git a/dpdk/drivers/event/opdl/opdl_evdev.c b/dpdk/drivers/event/opdl/opdl_evdev.c +index 3050578ffd..fa1b9852a3 100644 +--- a/dpdk/drivers/event/opdl/opdl_evdev.c ++++ b/dpdk/drivers/event/opdl/opdl_evdev.c +@@ -702,7 +702,7 @@ opdl_probe(struct rte_vdev_device *vdev) + } + + PMD_DRV_LOG(INFO, "DEV_ID:[%02d] : " +- "Success - creating eventdev device %s, numa_node:[%d], do_valdation:[%s]" ++ "Success - creating eventdev device %s, numa_node:[%d], do_validation:[%s]" + " , self_test:[%s]\n", + dev->data->dev_id, + name, +diff --git a/dpdk/drivers/event/opdl/opdl_test.c b/dpdk/drivers/event/opdl/opdl_test.c +index e4fc70a440..24b92df476 100644 +--- a/dpdk/drivers/event/opdl/opdl_test.c ++++ b/dpdk/drivers/event/opdl/opdl_test.c +@@ -864,7 +864,7 @@ qid_basic(struct test *t) + } + + +- /* Start the devicea */ ++ /* Start the device */ + if (!err) { + if (rte_event_dev_start(evdev) < 0) { + PMD_DRV_LOG(ERR, "%s:%d: Error with start call\n", +diff --git a/dpdk/drivers/event/sw/sw_evdev.c b/dpdk/drivers/event/sw/sw_evdev.c +index 0d8013adf7..54c7abbf93 100644 +--- a/dpdk/drivers/event/sw/sw_evdev.c ++++ b/dpdk/drivers/event/sw/sw_evdev.c +@@ -712,7 +712,6 @@ sw_dump(struct rte_eventdev *dev, FILE *f) + continue; + } + int affinities_per_port[SW_PORTS_MAX] = {0}; +- uint32_t inflights = 0; + + fprintf(f, " Queue %d (%s)\n", i, q_type_strings[qid->type]); + fprintf(f, "\trx %"PRIu64"\tdrop %"PRIu64"\ttx %"PRIu64"\n", +@@ -733,7 +732,6 @@ sw_dump(struct rte_eventdev *dev, FILE *f) + for (flow = 0; flow < RTE_DIM(qid->fids); flow++) + if (qid->fids[flow].cq != -1) { + affinities_per_port[qid->fids[flow].cq]++; +- inflights += qid->fids[flow].pcount; + } + + uint32_t port; +diff --git a/dpdk/drivers/event/sw/sw_evdev.h b/dpdk/drivers/event/sw/sw_evdev.h +index 1fc07b64fc..87d2a5d50e 100644 +--- a/dpdk/drivers/event/sw/sw_evdev.h ++++ b/dpdk/drivers/event/sw/sw_evdev.h +@@ -180,7 +180,7 @@ struct sw_port { + uint16_t outstanding_releases __rte_cache_aligned; + uint16_t inflight_max; /* app requested max inflights for this port */ + uint16_t inflight_credits; /* num credits this port has right now */ +- uint8_t implicit_release; /* release events before dequeueing */ ++ uint8_t implicit_release; /* release events before dequeuing */ + + uint16_t last_dequeue_burst_sz; /* how big the burst was */ + uint64_t last_dequeue_ticks; /* used to track burst processing time */ +diff --git a/dpdk/drivers/event/sw/sw_evdev_selftest.c b/dpdk/drivers/event/sw/sw_evdev_selftest.c +index e4bfb3a0f1..782e76db5b 100644 +--- a/dpdk/drivers/event/sw/sw_evdev_selftest.c ++++ b/dpdk/drivers/event/sw/sw_evdev_selftest.c +@@ -1109,7 +1109,7 @@ xstats_tests(struct test *t) + NULL, + 0); + +- /* Verify that the resetable stats are reset, and others are not */ ++ /* Verify that the resettable stats are reset, and others are not */ + static const uint64_t queue_expected_zero[] = { + 0 /* rx */, + 0 /* tx */, +diff --git a/dpdk/drivers/mempool/dpaa/dpaa_mempool.c b/dpdk/drivers/mempool/dpaa/dpaa_mempool.c +index e6b06f0575..0e08ac13e7 100644 +--- a/dpdk/drivers/mempool/dpaa/dpaa_mempool.c ++++ b/dpdk/drivers/mempool/dpaa/dpaa_mempool.c +@@ -257,7 +257,7 @@ dpaa_mbuf_alloc_bulk(struct rte_mempool *pool, + } + /* assigning mbuf from the acquired objects */ + for (i = 0; (i < ret) && bufs[i].addr; i++) { +- /* TODO-errata - objerved that bufs may be null ++ /* TODO-errata - observed that bufs may be null + * i.e. first buffer is valid, remaining 6 buffers + * may be null. + */ +diff --git a/dpdk/drivers/mempool/octeontx/octeontx_fpavf.c b/dpdk/drivers/mempool/octeontx/octeontx_fpavf.c +index 94dc5cd815..8fd9edced2 100644 +--- a/dpdk/drivers/mempool/octeontx/octeontx_fpavf.c ++++ b/dpdk/drivers/mempool/octeontx/octeontx_fpavf.c +@@ -669,7 +669,7 @@ octeontx_fpa_bufpool_destroy(uintptr_t handle, int node_id) + break; + } + +- /* Imsert it into an ordered linked list */ ++ /* Insert it into an ordered linked list */ + for (curr = &head; curr[0] != NULL; curr = curr[0]) { + if ((uintptr_t)node <= (uintptr_t)curr[0]) + break; +@@ -705,7 +705,7 @@ octeontx_fpa_bufpool_destroy(uintptr_t handle, int node_id) + + ret = octeontx_fpapf_aura_detach(gpool); + if (ret) { +- fpavf_log_err("Failed to dettach gaura %u. error code=%d\n", ++ fpavf_log_err("Failed to detach gaura %u. error code=%d\n", + gpool, ret); + } + +diff --git a/dpdk/drivers/mempool/octeontx2/otx2_mempool_ops.c b/dpdk/drivers/mempool/octeontx2/otx2_mempool_ops.c +index 9ff71bcf6b..d827fd8c7b 100644 +--- a/dpdk/drivers/mempool/octeontx2/otx2_mempool_ops.c ++++ b/dpdk/drivers/mempool/octeontx2/otx2_mempool_ops.c +@@ -611,7 +611,8 @@ npa_lf_aura_pool_pair_alloc(struct otx2_npa_lf *lf, const uint32_t block_size, + /* Update aura fields */ + aura->pool_addr = pool_id;/* AF will translate to associated poolctx */ + aura->ena = 1; +- aura->shift = __builtin_clz(block_count) - 8; ++ aura->shift = rte_log2_u32(block_count); ++ aura->shift = aura->shift < 8 ? 0 : aura->shift - 8; + aura->limit = block_count; + aura->pool_caching = 1; + aura->err_int_ena = BIT(NPA_AURA_ERR_INT_AURA_ADD_OVER); +@@ -626,7 +627,8 @@ npa_lf_aura_pool_pair_alloc(struct otx2_npa_lf *lf, const uint32_t block_size, + pool->ena = 1; + pool->buf_size = block_size / OTX2_ALIGN; + pool->stack_max_pages = stack_size; +- pool->shift = __builtin_clz(block_count) - 8; ++ pool->shift = rte_log2_u32(block_count); ++ pool->shift = pool->shift < 8 ? 0 : pool->shift - 8; + pool->ptr_start = 0; + pool->ptr_end = ~0; + pool->stack_caching = 1; +diff --git a/dpdk/drivers/meson.build b/dpdk/drivers/meson.build +index f9febc579e..f7db4329bf 100644 +--- a/dpdk/drivers/meson.build ++++ b/dpdk/drivers/meson.build +@@ -19,7 +19,7 @@ subdirs = [ + ] + + disabled_drivers = run_command(list_dir_globs, get_option('disable_drivers'), +- ).stdout().split() ++ check: true).stdout().split() + + default_cflags = machine_args + default_cflags += ['-DALLOW_EXPERIMENTAL_API'] +diff --git a/dpdk/drivers/net/af_packet/rte_eth_af_packet.c b/dpdk/drivers/net/af_packet/rte_eth_af_packet.c +index 671ee87ae2..f0945f22cc 100644 +--- a/dpdk/drivers/net/af_packet/rte_eth_af_packet.c ++++ b/dpdk/drivers/net/af_packet/rte_eth_af_packet.c +@@ -212,8 +212,30 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + } + + /* point at the next incoming frame */ +- if ((ppd->tp_status != TP_STATUS_AVAILABLE) && +- (poll(&pfd, 1, -1) < 0)) ++ if (ppd->tp_status != TP_STATUS_AVAILABLE) { ++ if (poll(&pfd, 1, -1) < 0) ++ break; ++ ++ /* poll() can return POLLERR if the interface is down */ ++ if (pfd.revents & POLLERR) ++ break; ++ } ++ ++ /* ++ * poll() will almost always return POLLOUT, even if there ++ * are no extra buffers available ++ * ++ * This happens, because packet_poll() calls datagram_poll() ++ * which checks the space left in the socket buffer and, ++ * in the case of packet_mmap, the default socket buffer length ++ * doesn't match the requested size for the tx_ring. ++ * As such, there is almost always space left in socket buffer, ++ * which doesn't seem to be correlated to the requested size ++ * for the tx_ring in packet_mmap. ++ * ++ * This results in poll() returning POLLOUT. ++ */ ++ if (ppd->tp_status != TP_STATUS_AVAILABLE) + break; + + /* copy the tx frame data */ +diff --git a/dpdk/drivers/net/af_xdp/rte_eth_af_xdp.c b/dpdk/drivers/net/af_xdp/rte_eth_af_xdp.c +index 7fc70df713..efe7b80e7a 100644 +--- a/dpdk/drivers/net/af_xdp/rte_eth_af_xdp.c ++++ b/dpdk/drivers/net/af_xdp/rte_eth_af_xdp.c +@@ -490,7 +490,6 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + + if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) { + rte_pktmbuf_free(local_mbuf); +- kick_tx(txq, cq); + goto out; + } + +@@ -514,10 +513,9 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + tx_bytes += mbuf->pkt_len; + } + +- kick_tx(txq, cq); +- + out: + xsk_ring_prod__submit(&txq->tx, count); ++ kick_tx(txq, cq); + + txq->stats.tx_pkts += count; + txq->stats.tx_bytes += tx_bytes; +@@ -635,67 +633,6 @@ find_internal_resource(struct pmd_internals *port_int) + return list; + } + +-/* Check if the netdev,qid context already exists */ +-static inline bool +-ctx_exists(struct pkt_rx_queue *rxq, const char *ifname, +- struct pkt_rx_queue *list_rxq, const char *list_ifname) +-{ +- bool exists = false; +- +- if (rxq->xsk_queue_idx == list_rxq->xsk_queue_idx && +- !strncmp(ifname, list_ifname, IFNAMSIZ)) { +- AF_XDP_LOG(ERR, "ctx %s,%i already exists, cannot share umem\n", +- ifname, rxq->xsk_queue_idx); +- exists = true; +- } +- +- return exists; +-} +- +-/* Get a pointer to an existing UMEM which overlays the rxq's mb_pool */ +-static inline int +-get_shared_umem(struct pkt_rx_queue *rxq, const char *ifname, +- struct xsk_umem_info **umem) +-{ +- struct internal_list *list; +- struct pmd_internals *internals; +- int i = 0, ret = 0; +- struct rte_mempool *mb_pool = rxq->mb_pool; +- +- if (mb_pool == NULL) +- return ret; +- +- pthread_mutex_lock(&internal_list_lock); +- +- TAILQ_FOREACH(list, &internal_list, next) { +- internals = list->eth_dev->data->dev_private; +- for (i = 0; i < internals->queue_cnt; i++) { +- struct pkt_rx_queue *list_rxq = +- &internals->rx_queues[i]; +- if (rxq == list_rxq) +- continue; +- if (mb_pool == internals->rx_queues[i].mb_pool) { +- if (ctx_exists(rxq, ifname, list_rxq, +- internals->if_name)) { +- ret = -1; +- goto out; +- } +- if (__atomic_load_n( +- &internals->rx_queues[i].umem->refcnt, +- __ATOMIC_ACQUIRE)) { +- *umem = internals->rx_queues[i].umem; +- goto out; +- } +- } +- } +- } +- +-out: +- pthread_mutex_unlock(&internal_list_lock); +- +- return ret; +-} +- + static int + eth_dev_configure(struct rte_eth_dev *dev) + { +@@ -922,6 +859,66 @@ static inline uintptr_t get_base_addr(struct rte_mempool *mp, uint64_t *align) + return aligned_addr; + } + ++/* Check if the netdev,qid context already exists */ ++static inline bool ++ctx_exists(struct pkt_rx_queue *rxq, const char *ifname, ++ struct pkt_rx_queue *list_rxq, const char *list_ifname) ++{ ++ bool exists = false; ++ ++ if (rxq->xsk_queue_idx == list_rxq->xsk_queue_idx && ++ !strncmp(ifname, list_ifname, IFNAMSIZ)) { ++ AF_XDP_LOG(ERR, "ctx %s,%i already exists, cannot share umem\n", ++ ifname, rxq->xsk_queue_idx); ++ exists = true; ++ } ++ ++ return exists; ++} ++ ++/* Get a pointer to an existing UMEM which overlays the rxq's mb_pool */ ++static inline int ++get_shared_umem(struct pkt_rx_queue *rxq, const char *ifname, ++ struct xsk_umem_info **umem) ++{ ++ struct internal_list *list; ++ struct pmd_internals *internals; ++ int i = 0, ret = 0; ++ struct rte_mempool *mb_pool = rxq->mb_pool; ++ ++ if (mb_pool == NULL) ++ return ret; ++ ++ pthread_mutex_lock(&internal_list_lock); ++ ++ TAILQ_FOREACH(list, &internal_list, next) { ++ internals = list->eth_dev->data->dev_private; ++ for (i = 0; i < internals->queue_cnt; i++) { ++ struct pkt_rx_queue *list_rxq = ++ &internals->rx_queues[i]; ++ if (rxq == list_rxq) ++ continue; ++ if (mb_pool == internals->rx_queues[i].mb_pool) { ++ if (ctx_exists(rxq, ifname, list_rxq, ++ internals->if_name)) { ++ ret = -1; ++ goto out; ++ } ++ if (__atomic_load_n(&internals->rx_queues[i].umem->refcnt, ++ __ATOMIC_ACQUIRE)) { ++ *umem = internals->rx_queues[i].umem; ++ goto out; ++ } ++ } ++ } ++ } ++ ++out: ++ pthread_mutex_unlock(&internal_list_lock); ++ ++ return ret; ++} ++ + static struct + xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + struct pkt_rx_queue *rxq) +@@ -961,7 +958,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + umem = rte_zmalloc_socket("umem", sizeof(*umem), 0, + rte_socket_id()); + if (umem == NULL) { +- AF_XDP_LOG(ERR, "Failed to allocate umem info"); ++ AF_XDP_LOG(ERR, "Failed to allocate umem info\n"); + return NULL; + } + +@@ -974,7 +971,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + ret = xsk_umem__create(&umem->umem, base_addr, umem_size, + &rxq->fq, &rxq->cq, &usr_config); + if (ret) { +- AF_XDP_LOG(ERR, "Failed to create umem"); ++ AF_XDP_LOG(ERR, "Failed to create umem\n"); + goto err; + } + umem->buffer = base_addr; +@@ -1008,7 +1005,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + + umem = rte_zmalloc_socket("umem", sizeof(*umem), 0, rte_socket_id()); + if (umem == NULL) { +- AF_XDP_LOG(ERR, "Failed to allocate umem info"); ++ AF_XDP_LOG(ERR, "Failed to allocate umem info\n"); + return NULL; + } + +@@ -1044,7 +1041,7 @@ xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals, + &usr_config); + + if (ret) { +- AF_XDP_LOG(ERR, "Failed to create umem"); ++ AF_XDP_LOG(ERR, "Failed to create umem\n"); + goto err; + } + umem->mz = mz; +@@ -1128,7 +1125,7 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + if (ret) { + AF_XDP_LOG(ERR, "Failed to load custom XDP program %s\n", + internals->prog_path); +- goto err; ++ goto out_umem; + } + internals->custom_prog_configured = 1; + } +@@ -1144,25 +1141,27 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, + + if (ret) { + AF_XDP_LOG(ERR, "Failed to create xsk socket.\n"); +- goto err; ++ goto out_umem; + } + + #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) +- if (rte_pktmbuf_alloc_bulk(rxq->umem->mb_pool, fq_bufs, reserve_size)) { ++ ret = rte_pktmbuf_alloc_bulk(rxq->umem->mb_pool, fq_bufs, reserve_size); ++ if (ret) { + AF_XDP_LOG(DEBUG, "Failed to get enough buffers for fq.\n"); +- goto err; ++ goto out_xsk; + } + #endif + ret = reserve_fill_queue(rxq->umem, reserve_size, fq_bufs, &rxq->fq); + if (ret) { +- xsk_socket__delete(rxq->xsk); + AF_XDP_LOG(ERR, "Failed to reserve fill queue.\n"); +- goto err; ++ goto out_xsk; + } + + return 0; + +-err: ++out_xsk: ++ xsk_socket__delete(rxq->xsk); ++out_umem: + if (__atomic_sub_fetch(&rxq->umem->refcnt, 1, __ATOMIC_ACQUIRE) == 0) + xdp_umem_destroy(rxq->umem); + +@@ -1598,16 +1597,11 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) + rte_vdev_device_name(dev)); + + name = rte_vdev_device_name(dev); +- if (rte_eal_process_type() == RTE_PROC_SECONDARY && +- strlen(rte_vdev_device_args(dev)) == 0) { +- eth_dev = rte_eth_dev_attach_secondary(name); +- if (eth_dev == NULL) { +- AF_XDP_LOG(ERR, "Failed to probe %s\n", name); +- return -EINVAL; +- } +- eth_dev->dev_ops = &ops; +- rte_eth_dev_probing_finish(eth_dev); +- return 0; ++ if (rte_eal_process_type() == RTE_PROC_SECONDARY) { ++ AF_XDP_LOG(ERR, "Failed to probe %s. " ++ "AF_XDP PMD does not support secondary processes.\n", ++ name); ++ return -ENOTSUP; + } + + kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); +diff --git a/dpdk/drivers/net/ark/ark_ethdev.c b/dpdk/drivers/net/ark/ark_ethdev.c +index a658993512..c307880ec1 100644 +--- a/dpdk/drivers/net/ark/ark_ethdev.c ++++ b/dpdk/drivers/net/ark/ark_ethdev.c +@@ -2,6 +2,7 @@ + * Copyright (c) 2015-2018 Atomic Rules LLC + */ + ++#include + #include + #include + #include +@@ -321,6 +322,7 @@ eth_ark_dev_init(struct rte_eth_dev *dev) + ark->rqpacing = + (struct ark_rqpace_t *)(ark->bar0 + ARK_RCPACING_BASE); + ark->started = 0; ++ ark->pkt_dir_v = ARK_PKT_DIR_INIT_VAL; + + ARK_PMD_LOG(INFO, "Sys Ctrl Const = 0x%x HW Commit_ID: %08x\n", + ark->sysctrl.t32[4], +@@ -584,6 +586,7 @@ delay_pg_start(void *arg) + * perform a blind sleep here to ensure that the external test + * application has time to setup the test before we generate packets + */ ++ pthread_detach(pthread_self()); + usleep(100000); + ark_pktgen_run(ark->pg); + return NULL; +diff --git a/dpdk/drivers/net/ark/ark_ethdev_rx.c b/dpdk/drivers/net/ark/ark_ethdev_rx.c +index d29d3db783..8e55b851a2 100644 +--- a/dpdk/drivers/net/ark/ark_ethdev_rx.c ++++ b/dpdk/drivers/net/ark/ark_ethdev_rx.c +@@ -26,9 +26,6 @@ static uint32_t eth_ark_rx_jumbo(struct ark_rx_queue *queue, + struct rte_mbuf *mbuf0, + uint32_t cons_index); + static inline int eth_ark_rx_seed_mbufs(struct ark_rx_queue *queue); +-static int eth_ark_rx_seed_recovery(struct ark_rx_queue *queue, +- uint32_t *pnb, +- struct rte_mbuf **mbufs); + + /* ************************************************************************* */ + struct ark_rx_queue { +@@ -54,7 +51,7 @@ struct ark_rx_queue { + /* The queue Index is used within the dpdk device structures */ + uint16_t queue_index; + +- uint32_t last_cons; ++ uint32_t unused; + + /* separate cache line */ + /* second cache line - fields only used in slow path */ +@@ -105,9 +102,8 @@ static inline void + eth_ark_rx_update_cons_index(struct ark_rx_queue *queue, uint32_t cons_index) + { + queue->cons_index = cons_index; +- eth_ark_rx_seed_mbufs(queue); +- if (((cons_index - queue->last_cons) >= 64U)) { +- queue->last_cons = cons_index; ++ if ((cons_index + queue->queue_size - queue->seed_index) >= 64U) { ++ eth_ark_rx_seed_mbufs(queue); + ark_mpu_set_producer(queue->mpu, queue->seed_index); + } + } +@@ -321,9 +317,7 @@ eth_ark_recv_pkts(void *rx_queue, + break; + } + +- if (unlikely(nb != 0)) +- /* report next free to FPGA */ +- eth_ark_rx_update_cons_index(queue, cons_index); ++ eth_ark_rx_update_cons_index(queue, cons_index); + + return nb; + } +@@ -458,11 +452,13 @@ eth_ark_rx_seed_mbufs(struct ark_rx_queue *queue) + int status = rte_pktmbuf_alloc_bulk(queue->mb_pool, mbufs, nb); + + if (unlikely(status != 0)) { +- /* Try to recover from lack of mbufs in pool */ +- status = eth_ark_rx_seed_recovery(queue, &nb, mbufs); +- if (unlikely(status != 0)) { +- return -1; +- } ++ ARK_PMD_LOG(NOTICE, ++ "Could not allocate %u mbufs from pool" ++ " for RX queue %u;" ++ " %u free buffers remaining in queue\n", ++ nb, queue->queue_index, ++ queue->seed_index - queue->cons_index); ++ return -1; + } + + if (ARK_DEBUG_CORE) { /* DEBUG */ +@@ -511,29 +507,6 @@ eth_ark_rx_seed_mbufs(struct ark_rx_queue *queue) + return 0; + } + +-int +-eth_ark_rx_seed_recovery(struct ark_rx_queue *queue, +- uint32_t *pnb, +- struct rte_mbuf **mbufs) +-{ +- int status = -1; +- +- /* Ignore small allocation failures */ +- if (*pnb <= 64) +- return -1; +- +- *pnb = 64U; +- status = rte_pktmbuf_alloc_bulk(queue->mb_pool, mbufs, *pnb); +- if (status != 0) { +- ARK_PMD_LOG(NOTICE, +- "ARK: Could not allocate %u mbufs from pool for RX queue %u;" +- " %u free buffers remaining in queue\n", +- *pnb, queue->queue_index, +- queue->seed_index - queue->cons_index); +- } +- return status; +-} +- + void + eth_ark_rx_dump_queue(struct rte_eth_dev *dev, uint16_t queue_id, + const char *msg) +diff --git a/dpdk/drivers/net/ark/ark_pktdir.c b/dpdk/drivers/net/ark/ark_pktdir.c +index 25e1218310..dbfd2924bd 100644 +--- a/dpdk/drivers/net/ark/ark_pktdir.c ++++ b/dpdk/drivers/net/ark/ark_pktdir.c +@@ -22,7 +22,7 @@ ark_pktdir_init(void *base) + return inst; + } + inst->regs = (struct ark_pkt_dir_regs *)base; +- inst->regs->ctrl = 0x00110110; /* POR state */ ++ inst->regs->ctrl = ARK_PKT_DIR_INIT_VAL; /* POR state */ + return inst; + } + +diff --git a/dpdk/drivers/net/ark/ark_pktdir.h b/dpdk/drivers/net/ark/ark_pktdir.h +index 4afd128f95..b5577cebb3 100644 +--- a/dpdk/drivers/net/ark/ark_pktdir.h ++++ b/dpdk/drivers/net/ark/ark_pktdir.h +@@ -7,7 +7,7 @@ + + #include + +-#define ARK_PKTDIR_BASE_ADR 0xa0000 ++#define ARK_PKT_DIR_INIT_VAL 0x0110 + + typedef void *ark_pkt_dir_t; + +diff --git a/dpdk/drivers/net/atlantic/atl_ethdev.c b/dpdk/drivers/net/atlantic/atl_ethdev.c +index b0716773ad..9e7273cd03 100644 +--- a/dpdk/drivers/net/atlantic/atl_ethdev.c ++++ b/dpdk/drivers/net/atlantic/atl_ethdev.c +@@ -1073,7 +1073,7 @@ atl_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) + { + struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t fw_ver = 0; +- unsigned int ret = 0; ++ int ret = 0; + + ret = hw_atl_utils_get_fw_version(hw, &fw_ver); + if (ret) +@@ -1081,10 +1081,11 @@ atl_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) + + ret = snprintf(fw_version, fw_size, "%u.%u.%u", fw_ver >> 24, + (fw_ver >> 16) & 0xFFU, fw_ver & 0xFFFFU); ++ if (ret < 0) ++ return -EINVAL; + + ret += 1; /* add string null-terminator */ +- +- if (fw_size < ret) ++ if (fw_size < (size_t)ret) + return ret; + + return 0; +@@ -1427,7 +1428,7 @@ atl_dev_interrupt_action(struct rte_eth_dev *dev, + * @param handle + * Pointer to interrupt handle. + * @param param +- * The address of parameter (struct rte_eth_dev *) regsitered before. ++ * The address of parameter (struct rte_eth_dev *) registered before. + * + * @return + * void +diff --git a/dpdk/drivers/net/atlantic/atl_rxtx.c b/dpdk/drivers/net/atlantic/atl_rxtx.c +index 449ffd454d..8b3c74ece0 100644 +--- a/dpdk/drivers/net/atlantic/atl_rxtx.c ++++ b/dpdk/drivers/net/atlantic/atl_rxtx.c +@@ -1099,7 +1099,7 @@ atl_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + * register. + * Update the RDT with the value of the last processed RX descriptor + * minus 1, to guarantee that the RDT register is never equal to the +- * RDH register, which creates a "full" ring situtation from the ++ * RDH register, which creates a "full" ring situation from the + * hardware point of view... + */ + nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold); +diff --git a/dpdk/drivers/net/atlantic/hw_atl/hw_atl_b0.c b/dpdk/drivers/net/atlantic/hw_atl/hw_atl_b0.c +index 7d0e724019..d0eb4af928 100644 +--- a/dpdk/drivers/net/atlantic/hw_atl/hw_atl_b0.c ++++ b/dpdk/drivers/net/atlantic/hw_atl/hw_atl_b0.c +@@ -281,7 +281,7 @@ int hw_atl_b0_hw_init_rx_path(struct aq_hw_s *self) + hw_atl_rpf_vlan_outer_etht_set(self, 0x88A8U); + hw_atl_rpf_vlan_inner_etht_set(self, 0x8100U); + +- /* VLAN proimisc bu defauld */ ++ /* VLAN promisc by default */ + hw_atl_rpf_vlan_prom_mode_en_set(self, 1); + + /* Rx Interrupts */ +diff --git a/dpdk/drivers/net/axgbe/axgbe_dev.c b/dpdk/drivers/net/axgbe/axgbe_dev.c +index af62eae3bb..ea1964a145 100644 +--- a/dpdk/drivers/net/axgbe/axgbe_dev.c ++++ b/dpdk/drivers/net/axgbe/axgbe_dev.c +@@ -892,7 +892,7 @@ static int axgbe_config_rx_threshold(struct axgbe_port *pdata, + return 0; + } + +-/*Distrubting fifo size */ ++/* Distributing FIFO size */ + static void axgbe_config_rx_fifo_size(struct axgbe_port *pdata) + { + unsigned int fifo_size; +diff --git a/dpdk/drivers/net/axgbe/axgbe_ethdev.c b/dpdk/drivers/net/axgbe/axgbe_ethdev.c +index 9cd056d04a..586cf6974c 100644 +--- a/dpdk/drivers/net/axgbe/axgbe_ethdev.c ++++ b/dpdk/drivers/net/axgbe/axgbe_ethdev.c +@@ -10,6 +10,8 @@ + #include "axgbe_regs.h" + #include "rte_time.h" + ++#include "eal_filesystem.h" ++ + static int eth_axgbe_dev_init(struct rte_eth_dev *eth_dev); + static int axgbe_dev_configure(struct rte_eth_dev *dev); + static int axgbe_dev_start(struct rte_eth_dev *dev); +@@ -273,7 +275,7 @@ static int axgbe_phy_reset(struct axgbe_port *pdata) + * @param handle + * Pointer to interrupt handle. + * @param param +- * The address of parameter (struct rte_eth_dev *) regsitered before. ++ * The address of parameter (struct rte_eth_dev *) registered before. + * + * @return + * void +@@ -982,18 +984,18 @@ axgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats, + struct axgbe_port *pdata = dev->data->dev_private; + unsigned int i; + +- if (!stats) +- return 0; ++ if (n < AXGBE_XSTATS_COUNT) ++ return AXGBE_XSTATS_COUNT; + + axgbe_read_mmc_stats(pdata); + +- for (i = 0; i < n && i < AXGBE_XSTATS_COUNT; i++) { ++ for (i = 0; i < AXGBE_XSTATS_COUNT; i++) { + stats[i].id = i; + stats[i].value = *(u64 *)((uint8_t *)&pdata->mmc_stats + + axgbe_xstats_strings[i].offset); + } + +- return i; ++ return AXGBE_XSTATS_COUNT; + } + + static int +@@ -1923,28 +1925,27 @@ static void axgbe_default_config(struct axgbe_port *pdata) + pdata->power_down = 0; + } + +-static int +-pci_device_cmp(const struct rte_device *dev, const void *_pci_id) ++/* ++ * Return PCI root complex device id on success else 0 ++ */ ++static uint16_t ++get_pci_rc_devid(void) + { +- const struct rte_pci_device *pdev = RTE_DEV_TO_PCI_CONST(dev); +- const struct rte_pci_id *pcid = _pci_id; ++ char pci_sysfs[PATH_MAX]; ++ const struct rte_pci_addr pci_rc_addr = {0, 0, 0, 0}; ++ unsigned long device_id; + +- if (pdev->id.vendor_id == AMD_PCI_VENDOR_ID && +- pdev->id.device_id == pcid->device_id) +- return 0; +- return 1; +-} ++ snprintf(pci_sysfs, sizeof(pci_sysfs), "%s/" PCI_PRI_FMT "/device", ++ rte_pci_get_sysfs_path(), pci_rc_addr.domain, ++ pci_rc_addr.bus, pci_rc_addr.devid, pci_rc_addr.function); + +-static bool +-pci_search_device(int device_id) +-{ +- struct rte_bus *pci_bus; +- struct rte_pci_id dev_id; ++ /* get device id */ ++ if (eal_parse_sysfs_value(pci_sysfs, &device_id) < 0) { ++ PMD_INIT_LOG(ERR, "Error in reading PCI sysfs\n"); ++ return 0; ++ } + +- dev_id.device_id = device_id; +- pci_bus = rte_bus_find_by_name("pci"); +- return (pci_bus != NULL) && +- (pci_bus->find_device(NULL, pci_device_cmp, &dev_id) != NULL); ++ return (uint16_t)device_id; + } + + /* +@@ -1986,7 +1987,7 @@ eth_axgbe_dev_init(struct rte_eth_dev *eth_dev) + /* + * Use root complex device ID to differentiate RV AXGBE vs SNOWY AXGBE + */ +- if (pci_search_device(AMD_PCI_RV_ROOT_COMPLEX_ID)) { ++ if ((get_pci_rc_devid()) == AMD_PCI_RV_ROOT_COMPLEX_ID) { + pdata->xpcs_window_def_reg = PCS_V2_RV_WINDOW_DEF; + pdata->xpcs_window_sel_reg = PCS_V2_RV_WINDOW_SELECT; + } else { +diff --git a/dpdk/drivers/net/axgbe/axgbe_ethdev.h b/dpdk/drivers/net/axgbe/axgbe_ethdev.h +index ac9210f2c8..6eda605422 100644 +--- a/dpdk/drivers/net/axgbe/axgbe_ethdev.h ++++ b/dpdk/drivers/net/axgbe/axgbe_ethdev.h +@@ -629,7 +629,7 @@ struct axgbe_port { + + unsigned int kr_redrv; + +- /* Auto-negotiation atate machine support */ ++ /* Auto-negotiation state machine support */ + unsigned int an_int; + unsigned int an_status; + enum axgbe_an an_result; +diff --git a/dpdk/drivers/net/axgbe/axgbe_i2c.c b/dpdk/drivers/net/axgbe/axgbe_i2c.c +index ab3738a12e..a2798f484e 100644 +--- a/dpdk/drivers/net/axgbe/axgbe_i2c.c ++++ b/dpdk/drivers/net/axgbe/axgbe_i2c.c +@@ -233,6 +233,7 @@ static int axgbe_i2c_xfer(struct axgbe_port *pdata, struct axgbe_i2c_op *op) + ret = axgbe_i2c_disable(pdata); + if (ret) { + PMD_DRV_LOG(ERR, "failed to disable i2c master\n"); ++ pthread_mutex_unlock(&pdata->i2c_mutex); + return ret; + } + +@@ -249,6 +250,7 @@ static int axgbe_i2c_xfer(struct axgbe_port *pdata, struct axgbe_i2c_op *op) + ret = axgbe_i2c_enable(pdata); + if (ret) { + PMD_DRV_LOG(ERR, "failed to enable i2c master\n"); ++ pthread_mutex_unlock(&pdata->i2c_mutex); + return ret; + } + +diff --git a/dpdk/drivers/net/axgbe/axgbe_phy_impl.c b/dpdk/drivers/net/axgbe/axgbe_phy_impl.c +index 02236ec192..72104f8a3f 100644 +--- a/dpdk/drivers/net/axgbe/axgbe_phy_impl.c ++++ b/dpdk/drivers/net/axgbe/axgbe_phy_impl.c +@@ -347,7 +347,7 @@ static int axgbe_phy_i2c_read(struct axgbe_port *pdata, unsigned int target, + + retry = 1; + again2: +- /* Read the specfied register */ ++ /* Read the specified register */ + i2c_op.cmd = AXGBE_I2C_CMD_READ; + i2c_op.target = target; + i2c_op.len = val_len; +@@ -1093,7 +1093,7 @@ static int axgbe_phy_an_config(struct axgbe_port *pdata __rte_unused) + { + return 0; + /* Dummy API since there is no case to support +- * external phy devices registred through kerenl apis ++ * external phy devices registered through kernel APIs + */ + } + +diff --git a/dpdk/drivers/net/axgbe/axgbe_rxtx_vec_sse.c b/dpdk/drivers/net/axgbe/axgbe_rxtx_vec_sse.c +index 1c962b9333..0d94fdba18 100644 +--- a/dpdk/drivers/net/axgbe/axgbe_rxtx_vec_sse.c ++++ b/dpdk/drivers/net/axgbe/axgbe_rxtx_vec_sse.c +@@ -11,7 +11,7 @@ + #include + #include + +-/* Useful to avoid shifting for every descriptor prepration*/ ++/* Useful to avoid shifting for every descriptor preparation */ + #define TX_DESC_CTRL_FLAGS 0xb000000000000000 + #define TX_DESC_CTRL_FLAG_TMST 0x40000000 + #define TX_FREE_BULK 8 +diff --git a/dpdk/drivers/net/bnx2x/bnx2x.c b/dpdk/drivers/net/bnx2x/bnx2x.c +index 8eb6d609bd..5c204e76c3 100644 +--- a/dpdk/drivers/net/bnx2x/bnx2x.c ++++ b/dpdk/drivers/net/bnx2x/bnx2x.c +@@ -922,7 +922,7 @@ storm_memset_eq_prod(struct bnx2x_softc *sc, uint16_t eq_prod, uint16_t pfid) + * block. + * + * RAMROD_CMD_ID_ETH_UPDATE +- * Used to update the state of the leading connection, usually to udpate ++ * Used to update the state of the leading connection, usually to update + * the RSS indirection table. Completes on the RCQ of the leading + * connection. (Not currently used under FreeBSD until OS support becomes + * available.) +@@ -937,7 +937,7 @@ storm_memset_eq_prod(struct bnx2x_softc *sc, uint16_t eq_prod, uint16_t pfid) + * the RCQ of the leading connection. + * + * RAMROD_CMD_ID_ETH_CFC_DEL +- * Used when tearing down a conneciton prior to driver unload. Completes ++ * Used when tearing down a connection prior to driver unload. Completes + * on the RCQ of the leading connection (since the current connection + * has been completely removed from controller memory). + * +@@ -1068,7 +1068,7 @@ bnx2x_sp_post(struct bnx2x_softc *sc, int command, int cid, uint32_t data_hi, + + /* + * It's ok if the actual decrement is issued towards the memory +- * somewhere between the lock and unlock. Thus no more explict ++ * somewhere between the lock and unlock. Thus no more explicit + * memory barrier is needed. + */ + if (common) { +@@ -1186,7 +1186,7 @@ bnx2x_sp_event(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, + break; + + case (RAMROD_CMD_ID_ETH_TERMINATE): +- PMD_DRV_LOG(DEBUG, sc, "got MULTI[%d] teminate ramrod", cid); ++ PMD_DRV_LOG(DEBUG, sc, "got MULTI[%d] terminate ramrod", cid); + drv_cmd = ECORE_Q_CMD_TERMINATE; + break; + +@@ -1472,7 +1472,7 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode, + case BNX2X_RX_MODE_ALLMULTI_PROMISC: + case BNX2X_RX_MODE_PROMISC: + /* +- * According to deffinition of SI mode, iface in promisc mode ++ * According to definition of SI mode, iface in promisc mode + * should receive matched and unmatched (in resolution of port) + * unicast packets. + */ +@@ -1940,7 +1940,7 @@ static void bnx2x_disable_close_the_gate(struct bnx2x_softc *sc) + + /* + * Cleans the object that have internal lists without sending +- * ramrods. Should be run when interrutps are disabled. ++ * ramrods. Should be run when interrupts are disabled. + */ + static void bnx2x_squeeze_objects(struct bnx2x_softc *sc) + { +@@ -2039,7 +2039,7 @@ bnx2x_nic_unload(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_link + + /* + * Nothing to do during unload if previous bnx2x_nic_load() +- * did not completed successfully - all resourses are released. ++ * did not complete successfully - all resources are released. + */ + if ((sc->state == BNX2X_STATE_CLOSED) || (sc->state == BNX2X_STATE_ERROR)) { + return 0; +@@ -2080,7 +2080,7 @@ bnx2x_nic_unload(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_link + /* + * Prevent transactions to host from the functions on the + * engine that doesn't reset global blocks in case of global +- * attention once gloabl blocks are reset and gates are opened ++ * attention once global blocks are reset and gates are opened + * (the engine which leader will perform the recovery + * last). + */ +@@ -2097,7 +2097,7 @@ bnx2x_nic_unload(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_link + + /* + * At this stage no more interrupts will arrive so we may safely clean +- * the queue'able objects here in case they failed to get cleaned so far. ++ * the queueable objects here in case they failed to get cleaned so far. + */ + if (IS_PF(sc)) { + bnx2x_squeeze_objects(sc); +@@ -2147,7 +2147,7 @@ bnx2x_nic_unload(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_link + } + + /* +- * Encapsulte an mbuf cluster into the tx bd chain and makes the memory ++ * Encapsulate an mbuf cluster into the Tx BD chain and makes the memory + * visible to the controller. + * + * If an mbuf is submitted to this routine and cannot be given to the +@@ -2715,7 +2715,7 @@ static uint8_t bnx2x_clear_pf_load(struct bnx2x_softc *sc) + return val1 != 0; + } + +-/* send load requrest to mcp and analyze response */ ++/* send load request to MCP and analyze response */ + static int bnx2x_nic_load_request(struct bnx2x_softc *sc, uint32_t * load_code) + { + PMD_INIT_FUNC_TRACE(sc); +@@ -5321,7 +5321,7 @@ static void bnx2x_func_init(struct bnx2x_softc *sc, struct bnx2x_func_init_param + * sum of vn_min_rates. + * or + * 0 - if all the min_rates are 0. +- * In the later case fainess algorithm should be deactivated. ++ * In the later case fairness algorithm should be deactivated. + * If all min rates are not zero then those that are zeroes will be set to 1. + */ + static void bnx2x_calc_vn_min(struct bnx2x_softc *sc, struct cmng_init_input *input) +@@ -6560,7 +6560,7 @@ bnx2x_pf_tx_q_prep(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, + txq_init->fw_sb_id = fp->fw_sb_id; + + /* +- * set the TSS leading client id for TX classfication to the ++ * set the TSS leading client id for Tx classification to the + * leading RSS client id + */ + txq_init->tss_leading_cl_id = BNX2X_FP(sc, 0, cl_id); +@@ -7630,8 +7630,8 @@ static uint8_t bnx2x_is_pcie_pending(struct bnx2x_softc *sc) + } + + /* +-* Walk the PCI capabiites list for the device to find what features are +-* supported. These capabilites may be enabled/disabled by firmware so it's ++* Walk the PCI capabilities list for the device to find what features are ++* supported. These capabilities may be enabled/disabled by firmware so it's + * best to walk the list rather than make assumptions. + */ + static void bnx2x_probe_pci_caps(struct bnx2x_softc *sc) +@@ -8421,7 +8421,7 @@ static int bnx2x_get_device_info(struct bnx2x_softc *sc) + } else { + sc->devinfo.int_block = INT_BLOCK_IGU; + +-/* do not allow device reset during IGU info preocessing */ ++/* do not allow device reset during IGU info processing */ + bnx2x_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RESET); + + val = REG_RD(sc, IGU_REG_BLOCK_CONFIGURATION); +@@ -9772,7 +9772,7 @@ int bnx2x_attach(struct bnx2x_softc *sc) + + sc->igu_base_addr = IS_VF(sc) ? PXP_VF_ADDR_IGU_START : BAR_IGU_INTMEM; + +- /* get PCI capabilites */ ++ /* get PCI capabilities */ + bnx2x_probe_pci_caps(sc); + + if (sc->devinfo.pcie_msix_cap_reg != 0) { +@@ -10291,7 +10291,7 @@ static int bnx2x_init_hw_common(struct bnx2x_softc *sc) + * stay set) + * f. If this is VNIC 3 of a port then also init + * first_timers_ilt_entry to zero and last_timers_ilt_entry +- * to the last enrty in the ILT. ++ * to the last entry in the ILT. + * + * Notes: + * Currently the PF error in the PGLC is non recoverable. +@@ -11097,7 +11097,7 @@ static void bnx2x_hw_enable_status(struct bnx2x_softc *sc) + /** + * bnx2x_pf_flr_clnup + * a. re-enable target read on the PF +- * b. poll cfc per function usgae counter ++ * b. poll cfc per function usage counter + * c. poll the qm perfunction usage counter + * d. poll the tm per function usage counter + * e. poll the tm per function scan-done indication +diff --git a/dpdk/drivers/net/bnx2x/bnx2x.h b/dpdk/drivers/net/bnx2x/bnx2x.h +index 69cc1430a4..15e85d5131 100644 +--- a/dpdk/drivers/net/bnx2x/bnx2x.h ++++ b/dpdk/drivers/net/bnx2x/bnx2x.h +@@ -681,13 +681,13 @@ struct bnx2x_slowpath { + }; /* struct bnx2x_slowpath */ + + /* +- * Port specifc data structure. ++ * Port specific data structure. + */ + struct bnx2x_port { + /* + * Port Management Function (for 57711E only). + * When this field is set the driver instance is +- * responsible for managing port specifc ++ * responsible for managing port specific + * configurations such as handling link attentions. + */ + uint32_t pmf; +@@ -732,7 +732,7 @@ struct bnx2x_port { + + /* + * MCP scratchpad address for port specific statistics. +- * The device is responsible for writing statistcss ++ * The device is responsible for writing statistics + * back to the MCP for use with management firmware such + * as UMP/NC-SI. + */ +@@ -937,8 +937,8 @@ struct bnx2x_devinfo { + * already registered for this port (which means that the user wants storage + * services). + * 2. During cnic-related load, to know if offload mode is already configured +- * in the HW or needs to be configrued. Since the transition from nic-mode to +- * offload-mode in HW causes traffic coruption, nic-mode is configured only ++ * in the HW or needs to be configured. Since the transition from nic-mode to ++ * offload-mode in HW causes traffic corruption, nic-mode is configured only + * in ports on which storage services where never requested. + */ + #define CONFIGURE_NIC_MODE(sc) (!CHIP_IS_E1x(sc) && !CNIC_ENABLED(sc)) +@@ -1902,18 +1902,19 @@ bnx2x_hc_ack_sb(struct bnx2x_softc *sc, uint8_t sb_id, uint8_t storm, + { + uint32_t hc_addr = (HC_REG_COMMAND_REG + SC_PORT(sc) * 32 + + COMMAND_REG_INT_ACK); +- struct igu_ack_register igu_ack; +- uint32_t *val = NULL; ++ union { ++ struct igu_ack_register igu_ack; ++ uint32_t val; ++ } val; + +- igu_ack.status_block_index = index; +- igu_ack.sb_id_and_flags = ++ val.igu_ack.status_block_index = index; ++ val.igu_ack.sb_id_and_flags = + ((sb_id << IGU_ACK_REGISTER_STATUS_BLOCK_ID_SHIFT) | + (storm << IGU_ACK_REGISTER_STORM_ID_SHIFT) | + (update << IGU_ACK_REGISTER_UPDATE_INDEX_SHIFT) | + (op << IGU_ACK_REGISTER_INTERRUPT_MODE_SHIFT)); + +- val = (uint32_t *)&igu_ack; +- REG_WR(sc, hc_addr, *val); ++ REG_WR(sc, hc_addr, val.val); + + /* Make sure that ACK is written */ + mb(); +diff --git a/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c b/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c +index d35c75a2e2..28f8aaeb4d 100644 +--- a/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c ++++ b/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c +@@ -648,7 +648,6 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf) + } + + rte_eth_copy_pci_info(eth_dev, pci_dev); +- eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + + sc->pcie_bus = pci_dev->addr.bus; + sc->pcie_device = pci_dev->addr.devid; +diff --git a/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c b/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c +index 57e2ce5045..2b17602290 100644 +--- a/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c ++++ b/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c +@@ -321,14 +321,15 @@ static inline void + bnx2x_upd_rx_prod_fast(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, + uint16_t rx_bd_prod, uint16_t rx_cq_prod) + { +- struct ustorm_eth_rx_producers rx_prods = { 0 }; +- uint32_t *val = NULL; ++ union { ++ struct ustorm_eth_rx_producers rx_prods; ++ uint32_t val; ++ } val = { {0} }; + +- rx_prods.bd_prod = rx_bd_prod; +- rx_prods.cqe_prod = rx_cq_prod; ++ val.rx_prods.bd_prod = rx_bd_prod; ++ val.rx_prods.cqe_prod = rx_cq_prod; + +- val = (uint32_t *)&rx_prods; +- REG_WR(sc, fp->ustorm_rx_prods_offset, val[0]); ++ REG_WR(sc, fp->ustorm_rx_prods_offset, val.val); + } + + static uint16_t +diff --git a/dpdk/drivers/net/bnx2x/bnx2x_stats.c b/dpdk/drivers/net/bnx2x/bnx2x_stats.c +index 1cd972591a..c07b01510a 100644 +--- a/dpdk/drivers/net/bnx2x/bnx2x_stats.c ++++ b/dpdk/drivers/net/bnx2x/bnx2x_stats.c +@@ -1358,7 +1358,7 @@ bnx2x_prep_fw_stats_req(struct bnx2x_softc *sc) + + /* + * Prepare the first stats ramrod (will be completed with +- * the counters equal to zero) - init counters to somethig different. ++ * the counters equal to zero) - init counters to something different. + */ + memset(&sc->fw_stats_data->storm_counters, 0xff, + sizeof(struct stats_counter)); +diff --git a/dpdk/drivers/net/bnx2x/bnx2x_stats.h b/dpdk/drivers/net/bnx2x/bnx2x_stats.h +index 635412bdd3..11ddab5039 100644 +--- a/dpdk/drivers/net/bnx2x/bnx2x_stats.h ++++ b/dpdk/drivers/net/bnx2x/bnx2x_stats.h +@@ -314,7 +314,7 @@ struct bnx2x_eth_stats_old { + }; + + struct bnx2x_eth_q_stats_old { +- /* Fields to perserve over fw reset*/ ++ /* Fields to preserve over FW reset */ + uint32_t total_unicast_bytes_received_hi; + uint32_t total_unicast_bytes_received_lo; + uint32_t total_broadcast_bytes_received_hi; +@@ -328,7 +328,7 @@ struct bnx2x_eth_q_stats_old { + uint32_t total_multicast_bytes_transmitted_hi; + uint32_t total_multicast_bytes_transmitted_lo; + +- /* Fields to perserve last of */ ++ /* Fields to preserve last of */ + uint32_t total_bytes_received_hi; + uint32_t total_bytes_received_lo; + uint32_t total_bytes_transmitted_hi; +diff --git a/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c b/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c +index 097ccfee19..6db5b0ba99 100644 +--- a/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c ++++ b/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c +@@ -73,7 +73,7 @@ bnx2x_add_tlv(__rte_unused struct bnx2x_softc *sc, void *tlvs_list, + tl->length = length; + } + +-/* Initiliaze header of the first tlv and clear mailbox*/ ++/* Initialize header of the first TLV and clear mailbox */ + static void + bnx2x_vf_prep(struct bnx2x_softc *sc, struct vf_first_tlv *first_tlv, + uint16_t type, uint16_t length) +diff --git a/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h b/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h +index 7aab8b101a..09f662b08e 100644 +--- a/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h ++++ b/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h +@@ -243,7 +243,7 @@ struct vf_close_tlv { + uint8_t pad[2]; + }; + +-/* rlease the VF's acquired resources */ ++/* release the VF's acquired resources */ + struct vf_release_tlv { + struct vf_first_tlv first_tlv; + uint16_t vf_id; /* for debug */ +diff --git a/dpdk/drivers/net/bnx2x/ecore_fw_defs.h b/dpdk/drivers/net/bnx2x/ecore_fw_defs.h +index 5397a701aa..36c287d1f5 100644 +--- a/dpdk/drivers/net/bnx2x/ecore_fw_defs.h ++++ b/dpdk/drivers/net/bnx2x/ecore_fw_defs.h +@@ -373,7 +373,7 @@ + /* temporarily used for RTT */ + #define XSEMI_CLK1_RESUL_CHIP (1e-3) + +-/* used for Host Coallescing */ ++/* used for Host Coalescing */ + #define SDM_TIMER_TICK_RESUL_CHIP (4 * (1e-6)) + #define TSDM_TIMER_TICK_RESUL_CHIP (1 * (1e-6)) + +diff --git a/dpdk/drivers/net/bnx2x/ecore_hsi.h b/dpdk/drivers/net/bnx2x/ecore_hsi.h +index 5508c53639..eda79408e9 100644 +--- a/dpdk/drivers/net/bnx2x/ecore_hsi.h ++++ b/dpdk/drivers/net/bnx2x/ecore_hsi.h +@@ -1062,7 +1062,7 @@ struct port_feat_cfg { /* port 0: 0x454 port 1: 0x4c8 */ + #define PORT_FEATURE_MBA_LINK_SPEED_20G 0x20000000 + + /* Secondary MBA configuration, +- * see mba_config for the fileds defination. ++ * see mba_config for the fields definition. + */ + uint32_t mba_config2; + +@@ -1075,7 +1075,7 @@ struct port_feat_cfg { /* port 0: 0x454 port 1: 0x4c8 */ + #define PORT_FEATURE_BOFM_CFGD_VEN 0x00080000 + + /* Secondary MBA configuration, +- * see mba_vlan_cfg for the fileds defination. ++ * see mba_vlan_cfg for the fields definition. + */ + uint32_t mba_vlan_cfg2; + +@@ -1429,7 +1429,7 @@ struct extended_dev_info_shared_cfg { /* NVRAM OFFSET */ + #define EXTENDED_DEV_INFO_SHARED_CFG_DBG_GEN3_COMPLI_ENA 0x00080000 + + /* Override Rx signal detect threshold when enabled the threshold +- * will be set staticaly ++ * will be set statically + */ + #define EXTENDED_DEV_INFO_SHARED_CFG_OVERRIDE_RX_SIG_MASK 0x00100000 + #define EXTENDED_DEV_INFO_SHARED_CFG_OVERRIDE_RX_SIG_SHIFT 20 +@@ -2189,9 +2189,9 @@ struct eee_remote_vals { + * elements on a per byte or word boundary. + * + * example: an array with 8 entries each 4 bit wide. This array will fit into +- * a single dword. The diagrmas below show the array order of the nibbles. ++ * a single dword. The diagrams below show the array order of the nibbles. + * +- * SHMEM_ARRAY_BITPOS(i, 4, 4) defines the stadard ordering: ++ * SHMEM_ARRAY_BITPOS(i, 4, 4) defines the standard ordering: + * + * | | | | + * 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | +@@ -2519,17 +2519,17 @@ struct shmem_lfa { + }; + + /* +- * Used to suppoert NSCI get OS driver version ++ * Used to support NSCI get OS driver version + * On driver load the version value will be set + * On driver unload driver value of 0x0 will be set + */ + struct os_drv_ver { + #define DRV_VER_NOT_LOADED 0 +- /*personalites orrder is importent */ ++ /* personalities order is important */ + #define DRV_PERS_ETHERNET 0 + #define DRV_PERS_ISCSI 1 + #define DRV_PERS_FCOE 2 +- /*shmem2 struct is constatnt can't add more personalites here*/ ++ /* shmem2 struct is constant can't add more personalities here */ + #define MAX_DRV_PERS 3 + uint32_t versions[MAX_DRV_PERS]; + }; +@@ -2821,7 +2821,7 @@ struct shmem2_region { + /* Flag to the driver that PF's drv_info_host_addr buffer was read */ + uint32_t mfw_drv_indication; /* Offset 0x19c */ + +- /* We use inidcation for each PF (0..3) */ ++ /* We use indication for each PF (0..3) */ + #define MFW_DRV_IND_READ_DONE_OFFSET(_pf_) (1 << (_pf_)) + + union { /* For various OEMs */ /* Offset 0x1a0 */ +@@ -6195,7 +6195,7 @@ struct hc_sb_data { + + + /* +- * Segment types for host coaslescing ++ * Segment types for host coalescing + */ + enum hc_segment { + HC_REGULAR_SEGMENT, +@@ -6242,7 +6242,7 @@ struct hc_status_block_data_e2 { + + + /* +- * IGU block operartion modes (in Everest2) ++ * IGU block operation modes (in Everest2) + */ + enum igu_mode { + HC_IGU_BC_MODE, +@@ -6508,7 +6508,7 @@ struct stats_query_header { + + + /* +- * Types of statistcis query entry ++ * Types of statistics query entry + */ + enum stats_query_type { + STATS_TYPE_QUEUE, +@@ -6542,7 +6542,7 @@ enum storm_id { + + + /* +- * Taffic types used in ETS and flow control algorithms ++ * Traffic types used in ETS and flow control algorithms + */ + enum traffic_type { + LLFC_TRAFFIC_TYPE_NW, +diff --git a/dpdk/drivers/net/bnx2x/ecore_init_ops.h b/dpdk/drivers/net/bnx2x/ecore_init_ops.h +index 0945e79993..4ed811fdd4 100644 +--- a/dpdk/drivers/net/bnx2x/ecore_init_ops.h ++++ b/dpdk/drivers/net/bnx2x/ecore_init_ops.h +@@ -534,7 +534,7 @@ static void ecore_init_pxp_arb(struct bnx2x_softc *sc, int r_order, + REG_WR(sc, PXP2_REG_WR_CDU_MPS, val); + } + +- /* Validate number of tags suppoted by device */ ++ /* Validate number of tags supported by device */ + #define PCIE_REG_PCIER_TL_HDR_FC_ST 0x2980 + val = REG_RD(sc, PCIE_REG_PCIER_TL_HDR_FC_ST); + val &= 0xFF; +@@ -714,7 +714,7 @@ static void ecore_ilt_client_init_op_ilt(struct bnx2x_softc *sc, + for (i = ilt_cli->start; i <= ilt_cli->end; i++) + ecore_ilt_line_init_op(sc, ilt, i, initop); + +- /* init/clear the ILT boundries */ ++ /* init/clear the ILT boundaries */ + ecore_ilt_boundary_init_op(sc, ilt_cli, ilt->start_line, initop); + } + +@@ -765,7 +765,7 @@ static void ecore_ilt_init_client_psz(struct bnx2x_softc *sc, int cli_num, + + /* + * called during init common stage, ilt clients should be initialized +- * prioir to calling this function ++ * prior to calling this function + */ + static void ecore_ilt_init_page_size(struct bnx2x_softc *sc, uint8_t initop) + { +diff --git a/dpdk/drivers/net/bnx2x/ecore_reg.h b/dpdk/drivers/net/bnx2x/ecore_reg.h +index bb92d131f8..6f7b0522f2 100644 +--- a/dpdk/drivers/net/bnx2x/ecore_reg.h ++++ b/dpdk/drivers/net/bnx2x/ecore_reg.h +@@ -19,7 +19,7 @@ + #define ATC_ATC_INT_STS_REG_ATC_RCPL_TO_EMPTY_CNT (0x1 << 3) + #define ATC_ATC_INT_STS_REG_ATC_TCPL_ERROR (0x1 << 4) + #define ATC_ATC_INT_STS_REG_ATC_TCPL_TO_NOT_PEND (0x1 << 1) +-/* [R 1] ATC initalization done */ ++/* [R 1] ATC initialization done */ + #define ATC_REG_ATC_INIT_DONE 0x1100bc + /* [RW 6] Interrupt mask register #0 read/write */ + #define ATC_REG_ATC_INT_MASK 0x1101c8 +@@ -56,7 +56,7 @@ + #define BRB1_REG_PAUSE_HIGH_THRESHOLD_0 0x60078 + /* [RW 10] Write client 0: Assert pause threshold. Not Functional */ + #define BRB1_REG_PAUSE_LOW_THRESHOLD_0 0x60068 +-/* [R 24] The number of full blocks occpied by port. */ ++/* [R 24] The number of full blocks occupied by port. */ + #define BRB1_REG_PORT_NUM_OCC_BLOCKS_0 0x60094 + /* [R 5] Used to read the value of the XX protection CAM occupancy counter. */ + #define CCM_REG_CAM_OCCUP 0xd0188 +@@ -456,7 +456,7 @@ + #define IGU_REG_PCI_PF_MSIX_FUNC_MASK 0x130148 + #define IGU_REG_PCI_PF_MSI_EN 0x130140 + /* [WB_R 32] Each bit represent the pending bits status for that SB. 0 = no +- * pending; 1 = pending. Pendings means interrupt was asserted; and write ++ * pending; 1 = pending. Pending means interrupt was asserted; and write + * done was not received. Data valid only in addresses 0-4. all the rest are + * zero. + */ +@@ -1059,14 +1059,14 @@ + /* [R 28] this field hold the last information that caused reserved + * attention. bits [19:0] - address; [22:20] function; [23] reserved; + * [27:24] the master that caused the attention - according to the following +- * encodeing:1 = pxp; 2 = mcp; 3 = usdm; 4 = tsdm; 5 = xsdm; 6 = csdm; 7 = ++ * encoding:1 = pxp; 2 = mcp; 3 = usdm; 4 = tsdm; 5 = xsdm; 6 = csdm; 7 = + * dbu; 8 = dmae + */ + #define MISC_REG_GRC_RSV_ATTN 0xa3c0 + /* [R 28] this field hold the last information that caused timeout + * attention. bits [19:0] - address; [22:20] function; [23] reserved; + * [27:24] the master that caused the attention - according to the following +- * encodeing:1 = pxp; 2 = mcp; 3 = usdm; 4 = tsdm; 5 = xsdm; 6 = csdm; 7 = ++ * encoding:1 = pxp; 2 = mcp; 3 = usdm; 4 = tsdm; 5 = xsdm; 6 = csdm; 7 = + * dbu; 8 = dmae + */ + #define MISC_REG_GRC_TIMEOUT_ATTN 0xa3c4 +@@ -1567,7 +1567,7 @@ + * MAC DA 2. The reset default is set to mask out all parameters. + */ + #define NIG_REG_P0_LLH_PTP_PARAM_MASK 0x187a0 +-/* [RW 14] Mask regiser for the rules used in detecting PTP packets. Set ++/* [RW 14] Mask register for the rules used in detecting PTP packets. Set + * each bit to 1 to mask out that particular rule. 0-{IPv4 DA 0; UDP DP 0} . + * 1-{IPv4 DA 0; UDP DP 1} . 2-{IPv4 DA 1; UDP DP 0} . 3-{IPv4 DA 1; UDP DP + * 1} . 4-{IPv6 DA 0; UDP DP 0} . 5-{IPv6 DA 0; UDP DP 1} . 6-{IPv6 DA 1; +@@ -1672,7 +1672,7 @@ + * MAC DA 2. The reset default is set to mask out all parameters. + */ + #define NIG_REG_P0_TLLH_PTP_PARAM_MASK 0x187f0 +-/* [RW 14] Mask regiser for the rules used in detecting PTP packets. Set ++/* [RW 14] Mask register for the rules used in detecting PTP packets. Set + * each bit to 1 to mask out that particular rule. 0-{IPv4 DA 0; UDP DP 0} . + * 1-{IPv4 DA 0; UDP DP 1} . 2-{IPv4 DA 1; UDP DP 0} . 3-{IPv4 DA 1; UDP DP + * 1} . 4-{IPv6 DA 0; UDP DP 0} . 5-{IPv6 DA 0; UDP DP 1} . 6-{IPv6 DA 1; +@@ -1839,7 +1839,7 @@ + * MAC DA 2. The reset default is set to mask out all parameters. + */ + #define NIG_REG_P1_LLH_PTP_PARAM_MASK 0x187c8 +-/* [RW 14] Mask regiser for the rules used in detecting PTP packets. Set ++/* [RW 14] Mask register for the rules used in detecting PTP packets. Set + * each bit to 1 to mask out that particular rule. 0-{IPv4 DA 0; UDP DP 0} . + * 1-{IPv4 DA 0; UDP DP 1} . 2-{IPv4 DA 1; UDP DP 0} . 3-{IPv4 DA 1; UDP DP + * 1} . 4-{IPv6 DA 0; UDP DP 0} . 5-{IPv6 DA 0; UDP DP 1} . 6-{IPv6 DA 1; +@@ -1926,7 +1926,7 @@ + * MAC DA 2. The reset default is set to mask out all parameters. + */ + #define NIG_REG_P1_TLLH_PTP_PARAM_MASK 0x187f8 +-/* [RW 14] Mask regiser for the rules used in detecting PTP packets. Set ++/* [RW 14] Mask register for the rules used in detecting PTP packets. Set + * each bit to 1 to mask out that particular rule. 0-{IPv4 DA 0; UDP DP 0} . + * 1-{IPv4 DA 0; UDP DP 1} . 2-{IPv4 DA 1; UDP DP 0} . 3-{IPv4 DA 1; UDP DP + * 1} . 4-{IPv6 DA 0; UDP DP 0} . 5-{IPv6 DA 0; UDP DP 1} . 6-{IPv6 DA 1; +@@ -2306,7 +2306,7 @@ + #define PBF_REG_HDRS_AFTER_BASIC 0x15c0a8 + /* [RW 6] Bit-map indicating which L2 hdrs may appear after L2 tag 0 */ + #define PBF_REG_HDRS_AFTER_TAG_0 0x15c0b8 +-/* [R 1] Removed for E3 B0 - Indicates which COS is conncted to the highest ++/* [R 1] Removed for E3 B0 - Indicates which COS is connected to the highest + * priority in the command arbiter. + */ + #define PBF_REG_HIGH_PRIORITY_COS_NUM 0x15c04c +@@ -2366,7 +2366,7 @@ + */ + #define PBF_REG_NUM_STRICT_ARB_SLOTS 0x15c064 + /* [R 11] Removed for E3 B0 - Port 0 threshold used by arbiter in 16 byte +- * lines used when pause not suppoterd. ++ * lines used when pause not supported. + */ + #define PBF_REG_P0_ARB_THRSH 0x1400e4 + /* [R 11] Removed for E3 B0 - Current credit for port 0 in the tx port +@@ -3503,7 +3503,7 @@ + * queues. + */ + #define QM_REG_OVFERROR 0x16805c +-/* [RC 6] the Q were the qverflow occurs */ ++/* [RC 6] the Q were the overflow occurs */ + #define QM_REG_OVFQNUM 0x168058 + /* [R 16] Pause state for physical queues 15-0 */ + #define QM_REG_PAUSESTATE0 0x168410 +@@ -4890,7 +4890,7 @@ + if set, generate pcie_err_attn output when this error is seen. WC \ + */ + #define PXPCS_TL_FUNC345_STAT_ERR_MASTER_ABRT2 \ +- (1 << 3) /* Receive UR Statusfor Function 2. If set, generate \ ++ (1 << 3) /* Receive UR Status for Function 2. If set, generate \ + pcie_err_attn output when this error is seen. WC */ + #define PXPCS_TL_FUNC345_STAT_ERR_CPL_TIMEOUT2 \ + (1 << 2) /* Completer Timeout Status Status for Function 2, if \ +@@ -4986,7 +4986,7 @@ + if set, generate pcie_err_attn output when this error is seen. WC \ + */ + #define PXPCS_TL_FUNC678_STAT_ERR_MASTER_ABRT5 \ +- (1 << 3) /* Receive UR Statusfor Function 5. If set, generate \ ++ (1 << 3) /* Receive UR Status for Function 5. If set, generate \ + pcie_err_attn output when this error is seen. WC */ + #define PXPCS_TL_FUNC678_STAT_ERR_CPL_TIMEOUT5 \ + (1 << 2) /* Completer Timeout Status Status for Function 5, if \ +diff --git a/dpdk/drivers/net/bnx2x/ecore_sp.c b/dpdk/drivers/net/bnx2x/ecore_sp.c +index 61f99c6408..36d6c42580 100644 +--- a/dpdk/drivers/net/bnx2x/ecore_sp.c ++++ b/dpdk/drivers/net/bnx2x/ecore_sp.c +@@ -1338,7 +1338,7 @@ static int __ecore_vlan_mac_execute_step(struct bnx2x_softc *sc, + if (rc != ECORE_SUCCESS) { + __ecore_vlan_mac_h_pend(sc, o, *ramrod_flags); + +- /** Calling function should not diffrentiate between this case ++ /** Calling function should not differentiate between this case + * and the case in which there is already a pending ramrod + */ + rc = ECORE_PENDING; +@@ -2246,7 +2246,7 @@ struct ecore_pending_mcast_cmd { + union { + ecore_list_t macs_head; + uint32_t macs_num; /* Needed for DEL command */ +- int next_bin; /* Needed for RESTORE flow with aprox match */ ++ int next_bin; /* Needed for RESTORE flow with approx match */ + } data; + + int done; /* set to TRUE, when the command has been handled, +@@ -3424,7 +3424,7 @@ void ecore_init_mac_credit_pool(struct bnx2x_softc *sc, + } else { + + /* +- * CAM credit is equaly divided between all active functions ++ * CAM credit is equally divided between all active functions + * on the PATH. + */ + if (func_num > 0) { +diff --git a/dpdk/drivers/net/bnx2x/ecore_sp.h b/dpdk/drivers/net/bnx2x/ecore_sp.h +index d58072dac0..1f4d5a3ebe 100644 +--- a/dpdk/drivers/net/bnx2x/ecore_sp.h ++++ b/dpdk/drivers/net/bnx2x/ecore_sp.h +@@ -430,7 +430,7 @@ enum { + RAMROD_RESTORE, + /* Execute the next command now */ + RAMROD_EXEC, +- /* Don't add a new command and continue execution of posponed ++ /* Don't add a new command and continue execution of postponed + * commands. If not set a new command will be added to the + * pending commands list. + */ +@@ -1173,7 +1173,7 @@ struct ecore_rss_config_obj { + /* Last configured indirection table */ + uint8_t ind_table[T_ETH_INDIRECTION_TABLE_SIZE]; + +- /* flags for enabling 4-tupple hash on UDP */ ++ /* flags for enabling 4-tuple hash on UDP */ + uint8_t udp_rss_v4; + uint8_t udp_rss_v6; + +@@ -1285,7 +1285,7 @@ enum ecore_q_type { + #define ECORE_MULTI_TX_COS_E3B0 3 + #define ECORE_MULTI_TX_COS 3 /* Maximum possible */ + #define MAC_PAD (ECORE_ALIGN(ETH_ALEN, sizeof(uint32_t)) - ETH_ALEN) +-/* DMAE channel to be used by FW for timesync workaroun. A driver that sends ++/* DMAE channel to be used by FW for timesync workaround. A driver that sends + * timesync-related ramrods must not use this DMAE command ID. + */ + #define FW_DMAE_CMD_ID 6 +diff --git a/dpdk/drivers/net/bnx2x/elink.c b/dpdk/drivers/net/bnx2x/elink.c +index b65126d718..43fbf04ece 100644 +--- a/dpdk/drivers/net/bnx2x/elink.c ++++ b/dpdk/drivers/net/bnx2x/elink.c +@@ -1460,7 +1460,7 @@ static void elink_ets_e3b0_pbf_disabled(const struct elink_params *params) + } + /****************************************************************************** + * Description: +- * E3B0 disable will return basicly the values to init values. ++ * E3B0 disable will return basically the values to init values. + *. + ******************************************************************************/ + static elink_status_t elink_ets_e3b0_disabled(const struct elink_params *params, +@@ -1483,7 +1483,7 @@ static elink_status_t elink_ets_e3b0_disabled(const struct elink_params *params, + + /****************************************************************************** + * Description: +- * Disable will return basicly the values to init values. ++ * Disable will return basically the values to init values. + * + ******************************************************************************/ + elink_status_t elink_ets_disabled(struct elink_params *params, +@@ -1506,7 +1506,7 @@ elink_status_t elink_ets_disabled(struct elink_params *params, + + /****************************************************************************** + * Description +- * Set the COS mappimg to SP and BW until this point all the COS are not ++ * Set the COS mapping to SP and BW until this point all the COS are not + * set as SP or BW. + ******************************************************************************/ + static elink_status_t elink_ets_e3b0_cli_map(const struct elink_params *params, +@@ -1652,7 +1652,7 @@ static elink_status_t elink_ets_e3b0_get_total_bw( + } + ELINK_DEBUG_P0(sc, + "elink_ets_E3B0_config total BW should be 100"); +- /* We can handle a case whre the BW isn't 100 this can happen ++ /* We can handle a case where the BW isn't 100 this can happen + * if the TC are joined. + */ + } +@@ -2608,7 +2608,7 @@ static elink_status_t elink_emac_enable(struct elink_params *params, + REG_WR(sc, NIG_REG_EGRESS_EMAC0_PORT + port * 4, 1); + + #ifdef ELINK_INCLUDE_EMUL +- /* for paladium */ ++ /* for palladium */ + if (CHIP_REV_IS_EMUL(sc)) { + /* Use lane 1 (of lanes 0-3) */ + REG_WR(sc, NIG_REG_XGXS_LANE_SEL_P0 + port * 4, 1); +@@ -2850,7 +2850,7 @@ static void elink_update_pfc_bmac2(struct elink_params *params, + + /* Set Time (based unit is 512 bit time) between automatic + * re-sending of PP packets amd enable automatic re-send of +- * Per-Priroity Packet as long as pp_gen is asserted and ++ * Per-Priority Packet as long as pp_gen is asserted and + * pp_disable is low. + */ + val = 0x8000; +@@ -3369,7 +3369,7 @@ static elink_status_t elink_pbf_update(struct elink_params *params, + } + + /** +- * elink_get_emac_base - retrive emac base address ++ * elink_get_emac_base - retrieve emac base address + * + * @bp: driver handle + * @mdc_mdio_access: access type +@@ -4518,7 +4518,7 @@ static void elink_warpcore_enable_AN_KR2(struct elink_phy *phy, + elink_cl45_write(sc, phy, reg_set[i].devad, reg_set[i].reg, + reg_set[i].val); + +- /* Start KR2 work-around timer which handles BNX2X8073 link-parner */ ++ /* Start KR2 work-around timer which handles BNX2X8073 link-partner */ + params->link_attr_sync |= LINK_ATTR_SYNC_KR2_ENABLE; + elink_update_link_attr(params, params->link_attr_sync); + } +@@ -7824,7 +7824,7 @@ elink_status_t elink_link_update(struct elink_params *params, + * hence its link is expected to be down + * - SECOND_PHY means that first phy should not be able + * to link up by itself (using configuration) +- * - DEFAULT should be overridden during initialiazation ++ * - DEFAULT should be overridden during initialization + */ + ELINK_DEBUG_P1(sc, "Invalid link indication" + " mpc=0x%x. DISABLING LINK !!!", +@@ -10991,7 +10991,7 @@ static elink_status_t elink_84858_cmd_hdlr(struct elink_phy *phy, + ELINK_DEBUG_P0(sc, "FW cmd failed."); + return ELINK_STATUS_ERROR; + } +- /* Step5: Once the command has completed, read the specficied DATA ++ /* Step5: Once the command has completed, read the specified DATA + * registers for any saved results for the command, if applicable + */ + +@@ -15013,7 +15013,7 @@ static void elink_check_kr2_wa(struct elink_params *params, + + /* Once KR2 was disabled, wait 5 seconds before checking KR2 recovery + * Since some switches tend to reinit the AN process and clear the +- * the advertised BP/NP after ~2 seconds causing the KR2 to be disabled ++ * advertised BP/NP after ~2 seconds causing the KR2 to be disabled + * and recovered many times + */ + if (vars->check_kr2_recovery_cnt > 0) { +diff --git a/dpdk/drivers/net/bnxt/bnxt.h b/dpdk/drivers/net/bnxt/bnxt.h +index b912fd8564..4354c0f55e 100644 +--- a/dpdk/drivers/net/bnxt/bnxt.h ++++ b/dpdk/drivers/net/bnxt/bnxt.h +@@ -82,8 +82,7 @@ + #define BROADCOM_DEV_ID_58802_VF 0xd800 + + #define BROADCOM_DEV_957508_N2100 0x5208 +-#define IS_BNXT_DEV_957508_N2100(bp) \ +- ((bp)->pdev->id.subsystem_device_id == BROADCOM_DEV_957508_N2100) ++#define BROADCOM_DEV_957414_N225 0x4145 + + #define BNXT_MAX_MTU 9574 + #define VLAN_TAG_SIZE 4 +@@ -299,7 +298,7 @@ struct bnxt_link_info { + uint8_t link_signal_mode; + uint16_t force_pam4_link_speed; + uint16_t support_pam4_speeds; +- uint16_t auto_pam4_link_speeds; ++ uint16_t auto_pam4_link_speed_mask; + uint16_t support_pam4_auto_speeds; + uint8_t req_signal_mode; + }; +@@ -316,9 +315,11 @@ struct rte_flow { + struct bnxt_vnic_info *vnic; + }; + ++#define BNXT_PTP_RX_PND_CNT 10 + #define BNXT_PTP_FLAGS_PATH_TX 0x0 + #define BNXT_PTP_FLAGS_PATH_RX 0x1 + #define BNXT_PTP_FLAGS_CURRENT_TIME 0x2 ++#define BNXT_PTP_CURRENT_TIME_MASK 0xFFFF00000000ULL + + struct bnxt_ptp_cfg { + #define BNXT_GRCPF_REG_WINDOW_BASE_OUT 0x400 +@@ -368,6 +369,7 @@ struct bnxt_ptp_cfg { + + /* On Thor, the Rx timestamp is present in the Rx completion record */ + uint64_t rx_timestamp; ++ uint64_t current_time; + }; + + struct bnxt_coal { +@@ -564,40 +566,6 @@ struct bnxt_rep_info { + ETH_RSS_NONFRAG_IPV6_UDP | \ + ETH_RSS_LEVEL_MASK) + +-#define BNXT_DEV_TX_OFFLOAD_SUPPORT (DEV_TX_OFFLOAD_VLAN_INSERT | \ +- DEV_TX_OFFLOAD_IPV4_CKSUM | \ +- DEV_TX_OFFLOAD_TCP_CKSUM | \ +- DEV_TX_OFFLOAD_UDP_CKSUM | \ +- DEV_TX_OFFLOAD_TCP_TSO | \ +- DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | \ +- DEV_TX_OFFLOAD_VXLAN_TNL_TSO | \ +- DEV_TX_OFFLOAD_GRE_TNL_TSO | \ +- DEV_TX_OFFLOAD_IPIP_TNL_TSO | \ +- DEV_TX_OFFLOAD_GENEVE_TNL_TSO | \ +- DEV_TX_OFFLOAD_QINQ_INSERT | \ +- DEV_TX_OFFLOAD_MULTI_SEGS) +- +-#define BNXT_DEV_RX_OFFLOAD_SUPPORT (DEV_RX_OFFLOAD_VLAN_FILTER | \ +- DEV_RX_OFFLOAD_VLAN_STRIP | \ +- DEV_RX_OFFLOAD_IPV4_CKSUM | \ +- DEV_RX_OFFLOAD_UDP_CKSUM | \ +- DEV_RX_OFFLOAD_TCP_CKSUM | \ +- DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | \ +- DEV_RX_OFFLOAD_OUTER_UDP_CKSUM | \ +- DEV_RX_OFFLOAD_JUMBO_FRAME | \ +- DEV_RX_OFFLOAD_KEEP_CRC | \ +- DEV_RX_OFFLOAD_VLAN_EXTEND | \ +- DEV_RX_OFFLOAD_TCP_LRO | \ +- DEV_RX_OFFLOAD_SCATTER | \ +- DEV_RX_OFFLOAD_RSS_HASH) +- +-#define MAX_TABLE_SUPPORT 4 +-#define MAX_DIR_SUPPORT 2 +-struct bnxt_dmabuf_info { +- uint32_t entry_num; +- int fd[MAX_DIR_SUPPORT][MAX_TABLE_SUPPORT]; +-}; +- + #define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input) + + struct bnxt_flow_stat_info { +@@ -609,6 +577,49 @@ struct bnxt_flow_stat_info { + struct bnxt_ctx_mem_buf_info tx_fc_out_tbl; + }; + ++struct bnxt_ring_stats { ++ /* Number of transmitted unicast packets */ ++ uint64_t tx_ucast_pkts; ++ /* Number of transmitted multicast packets */ ++ uint64_t tx_mcast_pkts; ++ /* Number of transmitted broadcast packets */ ++ uint64_t tx_bcast_pkts; ++ /* Number of packets discarded in transmit path */ ++ uint64_t tx_discard_pkts; ++ /* Number of packets in transmit path with error */ ++ uint64_t tx_error_pkts; ++ /* Number of transmitted bytes for unicast traffic */ ++ uint64_t tx_ucast_bytes; ++ /* Number of transmitted bytes for multicast traffic */ ++ uint64_t tx_mcast_bytes; ++ /* Number of transmitted bytes for broadcast traffic */ ++ uint64_t tx_bcast_bytes; ++ /* Number of received unicast packets */ ++ uint64_t rx_ucast_pkts; ++ /* Number of received multicast packets */ ++ uint64_t rx_mcast_pkts; ++ /* Number of received broadcast packets */ ++ uint64_t rx_bcast_pkts; ++ /* Number of packets discarded in receive path */ ++ uint64_t rx_discard_pkts; ++ /* Number of packets in receive path with errors */ ++ uint64_t rx_error_pkts; ++ /* Number of received bytes for unicast traffic */ ++ uint64_t rx_ucast_bytes; ++ /* Number of received bytes for multicast traffic */ ++ uint64_t rx_mcast_bytes; ++ /* Number of received bytes for broadcast traffic */ ++ uint64_t rx_bcast_bytes; ++ /* Number of aggregated unicast packets */ ++ uint64_t rx_agg_pkts; ++ /* Number of aggregated unicast bytes */ ++ uint64_t rx_agg_bytes; ++ /* Number of aggregation events */ ++ uint64_t rx_agg_events; ++ /* Number of aborted aggregations */ ++ uint64_t rx_agg_aborts; ++}; ++ + struct bnxt { + void *bar0; + +@@ -622,7 +633,6 @@ struct bnxt { + #define BNXT_FLAG_PORT_STATS BIT(2) + #define BNXT_FLAG_JUMBO BIT(3) + #define BNXT_FLAG_SHORT_CMD BIT(4) +-#define BNXT_FLAG_UPDATE_HASH BIT(5) + #define BNXT_FLAG_PTP_SUPPORTED BIT(6) + #define BNXT_FLAG_MULTI_HOST BIT(7) + #define BNXT_FLAG_EXT_RX_PORT_STATS BIT(8) +@@ -646,6 +656,8 @@ struct bnxt { + #define BNXT_FLAG_DFLT_MAC_SET BIT(26) + #define BNXT_FLAG_TRUFLOW_EN BIT(27) + #define BNXT_FLAG_GFID_ENABLE BIT(28) ++#define BNXT_FLAGS_PTP_TIMESYNC_ENABLED BIT(29) ++#define BNXT_FLAGS_PTP_ALARM_SCHEDULED BIT(30) + #define BNXT_PF(bp) (!((bp)->flags & BNXT_FLAG_VF)) + #define BNXT_VF(bp) ((bp)->flags & BNXT_FLAG_VF) + #define BNXT_NPAR(bp) ((bp)->flags & BNXT_FLAG_NPAR_PF) +@@ -662,6 +674,8 @@ struct bnxt { + #define BNXT_HAS_DFLT_MAC_SET(bp) ((bp)->flags & BNXT_FLAG_DFLT_MAC_SET) + #define BNXT_TRUFLOW_EN(bp) ((bp)->flags & BNXT_FLAG_TRUFLOW_EN) + #define BNXT_GFID_ENABLED(bp) ((bp)->flags & BNXT_FLAG_GFID_ENABLE) ++#define BNXT_THOR_PTP_TIMESYNC_ENABLED(bp) \ ++ ((bp)->flags & BNXT_FLAGS_PTP_TIMESYNC_ENABLED) + + uint32_t fw_cap; + #define BNXT_FW_CAP_HOT_RESET BIT(0) +@@ -672,12 +686,15 @@ struct bnxt { + #define BNXT_FW_CAP_ADV_FLOW_MGMT BIT(5) + #define BNXT_FW_CAP_ADV_FLOW_COUNTERS BIT(6) + #define BNXT_FW_CAP_LINK_ADMIN BIT(7) ++#define BNXT_FW_CAP_VLAN_TX_INSERT BIT(9) + + pthread_mutex_t flow_lock; + + uint32_t vnic_cap_flags; + #define BNXT_VNIC_CAP_COS_CLASSIFY BIT(0) + #define BNXT_VNIC_CAP_OUTER_RSS BIT(1) ++#define BNXT_VNIC_CAP_VLAN_RX_STRIP BIT(3) ++#define BNXT_RX_VLAN_STRIP_EN(bp) ((bp)->vnic_cap_flags & BNXT_VNIC_CAP_VLAN_RX_STRIP) + unsigned int rx_nr_rings; + unsigned int rx_cp_nr_rings; + unsigned int rx_num_qs_per_vnic; +@@ -705,7 +722,7 @@ struct bnxt { + uint32_t max_ring_grps; + struct bnxt_ring_grp_info *grp_info; + +- unsigned int nr_vnics; ++ uint16_t nr_vnics; + + #define BNXT_GET_DEFAULT_VNIC(bp) (&(bp)->vnic_info[0]) + struct bnxt_vnic_info *vnic_info; +@@ -754,7 +771,7 @@ struct bnxt { + uint16_t max_rx_rings; + #define MAX_STINGRAY_RINGS 128U + +-#define BNXT_MAX_VF_REP_RINGS 8 ++#define BNXT_MAX_VF_REP_RINGS 8U + + uint16_t max_nq_rings; + uint16_t max_l2_ctx; +@@ -803,12 +820,22 @@ struct bnxt { + uint16_t port_svif; + + struct tf tfp; +- struct bnxt_dmabuf_info dmabuf; + struct bnxt_ulp_context *ulp_ctx; + struct bnxt_flow_stat_info *flow_stat; + uint8_t flow_xstat; + uint16_t max_num_kflows; + uint16_t tx_cfa_action; ++ struct bnxt_ring_stats *prev_rx_ring_stats; ++ struct bnxt_ring_stats *prev_tx_ring_stats; ++ ++#define BNXT_MAX_MC_ADDRS ((bp)->max_mcast_addr) ++ struct rte_ether_addr *mcast_addr_list; ++ rte_iova_t mc_list_dma_addr; ++ uint32_t nb_mc_addr; ++ uint32_t max_mcast_addr; /* maximum number of mcast filters supported */ ++ ++ struct rte_eth_rss_conf rss_conf; /* RSS configuration. */ ++ uint16_t tunnel_disable_flag; /* tunnel stateless offloads status */ + }; + + static +@@ -831,6 +858,12 @@ inline uint16_t bnxt_max_rings(struct bnxt *bp) + bp->max_stat_ctx / 2U); + } + ++ /* RSS table size in Thor is 512. ++ * Cap max Rx rings to the same value for RSS. ++ */ ++ if (BNXT_CHIP_THOR(bp)) ++ max_rx_rings = RTE_MIN(max_rx_rings, BNXT_RSS_TBL_SIZE_THOR); ++ + max_tx_rings = RTE_MIN(max_tx_rings, max_rx_rings); + if (max_cp_rings > BNXT_NUM_ASYNC_CPR(bp)) + max_cp_rings -= BNXT_NUM_ASYNC_CPR(bp); +@@ -967,6 +1000,9 @@ void bnxt_flow_cnt_alarm_cb(void *arg); + int bnxt_flow_stats_req(struct bnxt *bp); + int bnxt_flow_stats_cnt(struct bnxt *bp); + uint32_t bnxt_get_speed_capabilities(struct bnxt *bp); ++int bnxt_dev_start_op(struct rte_eth_dev *eth_dev); ++int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev); ++void bnxt_handle_vf_cfg_change(void *arg); + + int + bnxt_filter_ctrl_op(struct rte_eth_dev *dev, +diff --git a/dpdk/drivers/net/bnxt/bnxt_cpr.c b/dpdk/drivers/net/bnxt/bnxt_cpr.c +index ee96ae81bf..50c18ee6fc 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_cpr.c ++++ b/dpdk/drivers/net/bnxt/bnxt_cpr.c +@@ -91,6 +91,26 @@ bnxt_process_default_vnic_change(struct bnxt *bp, + bnxt_rep_dev_start_op(eth_dev); + } + ++void bnxt_handle_vf_cfg_change(void *arg) ++{ ++ struct bnxt *bp = arg; ++ struct rte_eth_dev *eth_dev = bp->eth_dev; ++ int rc; ++ ++ /* Free and recreate filters with default VLAN */ ++ if (eth_dev->data->dev_started) { ++ rc = bnxt_dev_stop_op(eth_dev); ++ if (rc != 0) { ++ PMD_DRV_LOG(ERR, "Failed to stop Port:%u\n", eth_dev->data->port_id); ++ return; ++ } ++ ++ rc = bnxt_dev_start_op(eth_dev); ++ if (rc != 0) ++ PMD_DRV_LOG(ERR, "Failed to start Port:%u\n", eth_dev->data->port_id); ++ } ++} ++ + /* + * Async event handling + */ +@@ -109,6 +129,8 @@ void bnxt_handle_async_event(struct bnxt *bp, + case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE: + /* FALLTHROUGH */ + bnxt_link_update_op(bp->eth_dev, 0); ++ rte_eth_dev_callback_process(bp->eth_dev, ++ RTE_ETH_EVENT_INTR_LSC, NULL); + break; + case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD: + PMD_DRV_LOG(INFO, "Async event: PF driver unloaded\n"); +@@ -116,6 +138,8 @@ void bnxt_handle_async_event(struct bnxt *bp, + case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE: + PMD_DRV_LOG(INFO, "Async event: VF config changed\n"); + bnxt_hwrm_func_qcfg(bp, NULL); ++ if (BNXT_VF(bp)) ++ rte_eal_alarm_set(1, bnxt_handle_vf_cfg_change, (void *)bp); + break; + case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED: + PMD_DRV_LOG(INFO, "Port conn async event\n"); +diff --git a/dpdk/drivers/net/bnxt/bnxt_cpr.h b/dpdk/drivers/net/bnxt/bnxt_cpr.h +index ff9697f4c8..fedfb47f2f 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_cpr.h ++++ b/dpdk/drivers/net/bnxt/bnxt_cpr.h +@@ -8,13 +8,10 @@ + #include + + #include ++#include "hsi_struct_def_dpdk.h" + + struct bnxt_db_info; + +-#define CMP_VALID(cmp, raw_cons, ring) \ +- (!!(rte_le_to_cpu_32(((struct cmpl_base *)(cmp))->info3_v) & \ +- CMPL_BASE_V) == !((raw_cons) & ((ring)->ring_size))) +- + #define CMPL_VALID(cmp, v) \ + (!!(rte_le_to_cpu_32(((struct cmpl_base *)(cmp))->info3_v) & \ + CMPL_BASE_V) == !(v)) +@@ -26,6 +23,10 @@ struct bnxt_db_info; + #define CMP_TYPE(cmp) \ + (((struct cmpl_base *)cmp)->type & CMPL_BASE_TYPE_MASK) + ++/* Get completion length from completion type, in 16-byte units. */ ++#define CMP_LEN(cmp_type) (((cmp_type) & 1) + 1) ++ ++ + #define ADV_RAW_CMP(idx, n) ((idx) + (n)) + #define NEXT_RAW_CMP(idx) ADV_RAW_CMP(idx, 1) + #define RING_CMP(ring, idx) ((idx) & (ring)->ring_mask) +@@ -127,4 +128,35 @@ bool bnxt_is_recovery_enabled(struct bnxt *bp); + bool bnxt_is_master_func(struct bnxt *bp); + + void bnxt_stop_rxtx(struct bnxt *bp); ++ ++/** ++ * Check validity of a completion ring entry. If the entry is valid, include a ++ * C11 __ATOMIC_ACQUIRE fence to ensure that subsequent loads of fields in the ++ * completion are not hoisted by the compiler or by the CPU to come before the ++ * loading of the "valid" field. ++ * ++ * Note: the caller must not access any fields in the specified completion ++ * entry prior to calling this function. ++ * ++ * @param cmpl ++ * Pointer to an entry in the completion ring. ++ * @param raw_cons ++ * Raw consumer index of entry in completion ring. ++ * @param ring_size ++ * Size of completion ring. ++ */ ++static __rte_always_inline bool ++bnxt_cpr_cmp_valid(const void *cmpl, uint32_t raw_cons, uint32_t ring_size) ++{ ++ const struct cmpl_base *c = cmpl; ++ bool expected, valid; ++ ++ expected = !(raw_cons & ring_size); ++ valid = !!(rte_le_to_cpu_32(c->info3_v) & CMPL_BASE_V); ++ if (valid == expected) { ++ rte_atomic_thread_fence(__ATOMIC_ACQUIRE); ++ return true; ++ } ++ return false; ++} + #endif +diff --git a/dpdk/drivers/net/bnxt/bnxt_ethdev.c b/dpdk/drivers/net/bnxt/bnxt_ethdev.c +index 3aa346d45c..0ca79229e6 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_ethdev.c ++++ b/dpdk/drivers/net/bnxt/bnxt_ethdev.c +@@ -188,6 +188,7 @@ static int bnxt_restore_vlan_filters(struct bnxt *bp); + static void bnxt_dev_recover(void *arg); + static void bnxt_free_error_recovery_info(struct bnxt *bp); + static void bnxt_free_rep_info(struct bnxt *bp); ++static int bnxt_check_fw_ready(struct bnxt *bp); + + int is_bnxt_in_error(struct bnxt *bp) + { +@@ -229,16 +230,19 @@ uint16_t bnxt_rss_hash_tbl_size(const struct bnxt *bp) + static void bnxt_free_parent_info(struct bnxt *bp) + { + rte_free(bp->parent); ++ bp->parent = NULL; + } + + static void bnxt_free_pf_info(struct bnxt *bp) + { + rte_free(bp->pf); ++ bp->pf = NULL; + } + + static void bnxt_free_link_info(struct bnxt *bp) + { + rte_free(bp->link_info); ++ bp->link_info = NULL; + } + + static void bnxt_free_leds_info(struct bnxt *bp) +@@ -259,7 +263,9 @@ static void bnxt_free_flow_stats_info(struct bnxt *bp) + static void bnxt_free_cos_queues(struct bnxt *bp) + { + rte_free(bp->rx_cos_queue); ++ bp->rx_cos_queue = NULL; + rte_free(bp->tx_cos_queue); ++ bp->tx_cos_queue = NULL; + } + + static void bnxt_free_mem(struct bnxt *bp, bool reconfig) +@@ -374,7 +380,7 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool reconfig) + if (rc) + goto alloc_mem_err; + +- rc = bnxt_alloc_vnic_attributes(bp); ++ rc = bnxt_alloc_vnic_attributes(bp, reconfig); + if (rc) + goto alloc_mem_err; + +@@ -427,12 +433,13 @@ static int bnxt_setup_one_vnic(struct bnxt *bp, uint16_t vnic_id) + if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS) { + int j, nr_ctxs = bnxt_rss_ctxts(bp); + ++ /* RSS table size in Thor is 512. ++ * Cap max Rx rings to same value ++ */ + if (bp->rx_nr_rings > BNXT_RSS_TBL_SIZE_THOR) { + PMD_DRV_LOG(ERR, "RxQ cnt %d > reta_size %d\n", + bp->rx_nr_rings, BNXT_RSS_TBL_SIZE_THOR); +- PMD_DRV_LOG(ERR, +- "Only queues 0-%d will be in RSS table\n", +- BNXT_RSS_TBL_SIZE_THOR - 1); ++ goto err_out; + } + + rc = 0; +@@ -489,10 +496,11 @@ static int bnxt_setup_one_vnic(struct bnxt *bp, uint16_t vnic_id) + + bnxt_hwrm_vnic_plcmode_cfg(bp, vnic); + +- if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) +- bnxt_hwrm_vnic_tpa_cfg(bp, vnic, 1); +- else +- bnxt_hwrm_vnic_tpa_cfg(bp, vnic, 0); ++ rc = bnxt_hwrm_vnic_tpa_cfg(bp, vnic, ++ (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) ? ++ true : false); ++ if (rc) ++ goto err_out; + + return 0; + err_out: +@@ -576,13 +584,14 @@ static int bnxt_register_fc_ctx_mem(struct bnxt *bp) + return rc; + } + +-static int bnxt_alloc_ctx_mem_buf(char *type, size_t size, ++static int bnxt_alloc_ctx_mem_buf(struct bnxt *bp, char *type, size_t size, + struct bnxt_ctx_mem_buf_info *ctx) + { + if (!ctx) + return -EINVAL; + +- ctx->va = rte_zmalloc(type, size, 0); ++ ctx->va = rte_zmalloc_socket(type, size, 0, ++ bp->eth_dev->device->numa_node); + if (ctx->va == NULL) + return -ENOMEM; + rte_mem_lock_page(ctx->va); +@@ -606,7 +615,7 @@ static int bnxt_init_fc_ctx_mem(struct bnxt *bp) + sprintf(type, "bnxt_rx_fc_in_" PCI_PRI_FMT, pdev->addr.domain, + pdev->addr.bus, pdev->addr.devid, pdev->addr.function); + /* 4 bytes for each counter-id */ +- rc = bnxt_alloc_ctx_mem_buf(type, ++ rc = bnxt_alloc_ctx_mem_buf(bp, type, + max_fc * 4, + &bp->flow_stat->rx_fc_in_tbl); + if (rc) +@@ -615,7 +624,7 @@ static int bnxt_init_fc_ctx_mem(struct bnxt *bp) + sprintf(type, "bnxt_rx_fc_out_" PCI_PRI_FMT, pdev->addr.domain, + pdev->addr.bus, pdev->addr.devid, pdev->addr.function); + /* 16 bytes for each counter - 8 bytes pkt_count, 8 bytes byte_count */ +- rc = bnxt_alloc_ctx_mem_buf(type, ++ rc = bnxt_alloc_ctx_mem_buf(bp, type, + max_fc * 16, + &bp->flow_stat->rx_fc_out_tbl); + if (rc) +@@ -624,7 +633,7 @@ static int bnxt_init_fc_ctx_mem(struct bnxt *bp) + sprintf(type, "bnxt_tx_fc_in_" PCI_PRI_FMT, pdev->addr.domain, + pdev->addr.bus, pdev->addr.devid, pdev->addr.function); + /* 4 bytes for each counter-id */ +- rc = bnxt_alloc_ctx_mem_buf(type, ++ rc = bnxt_alloc_ctx_mem_buf(bp, type, + max_fc * 4, + &bp->flow_stat->tx_fc_in_tbl); + if (rc) +@@ -633,7 +642,7 @@ static int bnxt_init_fc_ctx_mem(struct bnxt *bp) + sprintf(type, "bnxt_tx_fc_out_" PCI_PRI_FMT, pdev->addr.domain, + pdev->addr.bus, pdev->addr.devid, pdev->addr.function); + /* 16 bytes for each counter - 8 bytes pkt_count, 8 bytes byte_count */ +- rc = bnxt_alloc_ctx_mem_buf(type, ++ rc = bnxt_alloc_ctx_mem_buf(bp, type, + max_fc * 16, + &bp->flow_stat->tx_fc_out_tbl); + if (rc) +@@ -662,6 +671,19 @@ static int bnxt_init_ctx_mem(struct bnxt *bp) + return rc; + } + ++static inline bool bnxt_force_link_config(struct bnxt *bp) ++{ ++ uint16_t subsystem_device_id = bp->pdev->id.subsystem_device_id; ++ ++ switch (subsystem_device_id) { ++ case BROADCOM_DEV_957508_N2100: ++ case BROADCOM_DEV_957414_N225: ++ return true; ++ default: ++ return false; ++ } ++} ++ + static int bnxt_update_phy_setting(struct bnxt *bp) + { + struct rte_eth_link new; +@@ -674,11 +696,12 @@ static int bnxt_update_phy_setting(struct bnxt *bp) + } + + /* +- * On BCM957508-N2100 adapters, FW will not allow any user other +- * than BMC to shutdown the port. bnxt_get_hwrm_link_config() call +- * always returns link up. Force phy update always in that case. ++ * Device is not obliged link down in certain scenarios, even ++ * when forced. When FW does not allow any user other than BMC ++ * to shutdown the port, bnxt_get_hwrm_link_config() call always ++ * returns link up. Force phy update always in that case. + */ +- if (!new.link_status || IS_BNXT_DEV_957508_N2100(bp)) { ++ if (!new.link_status || bnxt_force_link_config(bp)) { + rc = bnxt_set_hwrm_link_config(bp, true); + if (rc) { + PMD_DRV_LOG(ERR, "Failed to update PHY settings\n"); +@@ -689,6 +712,38 @@ static int bnxt_update_phy_setting(struct bnxt *bp) + return rc; + } + ++static void bnxt_free_prev_ring_stats(struct bnxt *bp) ++{ ++ rte_free(bp->prev_rx_ring_stats); ++ rte_free(bp->prev_tx_ring_stats); ++ ++ bp->prev_rx_ring_stats = NULL; ++ bp->prev_tx_ring_stats = NULL; ++} ++ ++static int bnxt_alloc_prev_ring_stats(struct bnxt *bp) ++{ ++ bp->prev_rx_ring_stats = rte_zmalloc("bnxt_prev_rx_ring_stats", ++ sizeof(struct bnxt_ring_stats) * ++ bp->rx_cp_nr_rings, ++ 0); ++ if (bp->prev_rx_ring_stats == NULL) ++ return -ENOMEM; ++ ++ bp->prev_tx_ring_stats = rte_zmalloc("bnxt_prev_tx_ring_stats", ++ sizeof(struct bnxt_ring_stats) * ++ bp->tx_cp_nr_rings, ++ 0); ++ if (bp->tx_cp_nr_rings > 0 && bp->prev_tx_ring_stats == NULL) ++ goto error; ++ ++ return 0; ++ ++error: ++ bnxt_free_prev_ring_stats(bp); ++ return -ENOMEM; ++} ++ + static int bnxt_start_nic(struct bnxt *bp) + { + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(bp->eth_dev); +@@ -765,6 +820,16 @@ static int bnxt_start_nic(struct bnxt *bp) + goto err_out; + } + ++ for (j = 0; j < bp->tx_nr_rings; j++) { ++ struct bnxt_tx_queue *txq = bp->tx_queues[j]; ++ ++ if (!txq->tx_deferred_start) { ++ bp->eth_dev->data->tx_queue_state[j] = ++ RTE_ETH_QUEUE_STATE_STARTED; ++ txq->tx_started = true; ++ } ++ } ++ + rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, &bp->vnic_info[0], 0, NULL); + if (rc) { + PMD_DRV_LOG(ERR, +@@ -797,7 +862,7 @@ static int bnxt_start_nic(struct bnxt *bp) + PMD_DRV_LOG(ERR, "Failed to allocate %d rx_queues" + " intr_vec", bp->eth_dev->data->nb_rx_queues); + rc = -ENOMEM; +- goto err_disable; ++ goto err_out; + } + PMD_DRV_LOG(DEBUG, "intr_handle->intr_vec = %p " + "intr_handle->nb_efd = %d intr_handle->max_intr = %d\n", +@@ -817,12 +882,12 @@ static int bnxt_start_nic(struct bnxt *bp) + #ifndef RTE_EXEC_ENV_FREEBSD + /* In FreeBSD OS, nic_uio driver does not support interrupts */ + if (rc) +- goto err_free; ++ goto err_out; + #endif + + rc = bnxt_update_phy_setting(bp); + if (rc) +- goto err_free; ++ goto err_out; + + bp->mark_table = rte_zmalloc("bnxt_mark_table", BNXT_MARK_TABLE_SZ, 0); + if (!bp->mark_table) +@@ -830,10 +895,6 @@ static int bnxt_start_nic(struct bnxt *bp) + + return 0; + +-err_free: +- rte_free(intr_handle->intr_vec); +-err_disable: +- rte_intr_efd_disable(intr_handle); + err_out: + /* Some of the error status returned by FW may not be from errno.h */ + if (rc > 0) +@@ -856,12 +917,18 @@ static int bnxt_shutdown_nic(struct bnxt *bp) + + uint32_t bnxt_get_speed_capabilities(struct bnxt *bp) + { +- uint32_t link_speed = bp->link_info->support_speeds; ++ uint32_t pam4_link_speed = 0; ++ uint32_t link_speed = 0; + uint32_t speed_capa = 0; + ++ if (bp->link_info == NULL) ++ return 0; ++ ++ link_speed = bp->link_info->support_speeds; ++ + /* If PAM4 is configured, use PAM4 supported speed */ +- if (link_speed == 0 && bp->link_info->support_pam4_speeds > 0) +- link_speed = bp->link_info->support_pam4_speeds; ++ if (bp->link_info->support_pam4_speeds > 0) ++ pam4_link_speed = bp->link_info->support_pam4_speeds; + + if (link_speed & HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100MB) + speed_capa |= ETH_LINK_SPEED_100M; +@@ -883,11 +950,11 @@ uint32_t bnxt_get_speed_capabilities(struct bnxt *bp) + speed_capa |= ETH_LINK_SPEED_50G; + if (link_speed & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_100GB) + speed_capa |= ETH_LINK_SPEED_100G; +- if (link_speed & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_PAM4_SPEEDS_50G) ++ if (pam4_link_speed & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_PAM4_SPEEDS_50G) + speed_capa |= ETH_LINK_SPEED_50G; +- if (link_speed & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_PAM4_SPEEDS_100G) ++ if (pam4_link_speed & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_PAM4_SPEEDS_100G) + speed_capa |= ETH_LINK_SPEED_100G; +- if (link_speed & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_PAM4_SPEEDS_200G) ++ if (pam4_link_speed & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_PAM4_SPEEDS_200G) + speed_capa |= ETH_LINK_SPEED_200G; + + if (bp->link_info->auto_mode == +@@ -913,7 +980,7 @@ static int bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev, + return rc; + + /* MAC Specifics */ +- dev_info->max_mac_addrs = bp->max_l2_ctx; ++ dev_info->max_mac_addrs = RTE_MIN(bp->max_l2_ctx, ETH_NUM_RECEIVE_MAC_ADDR); + dev_info->max_hash_mac_addrs = 0; + + /* PF/VF specifics */ +@@ -925,7 +992,7 @@ static int bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev, + dev_info->max_rx_queues = max_rx_rings; + dev_info->max_tx_queues = max_rx_rings; + dev_info->reta_size = bnxt_rss_hash_tbl_size(bp); +- dev_info->hash_key_size = 40; ++ dev_info->hash_key_size = HW_HASH_KEY_SIZE; + max_vnics = bp->max_vnics; + + /* MTU specifics */ +@@ -936,17 +1003,14 @@ static int bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev, + dev_info->min_rx_bufsize = 1; + dev_info->max_rx_pktlen = BNXT_MAX_PKT_LEN; + +- dev_info->rx_offload_capa = BNXT_DEV_RX_OFFLOAD_SUPPORT; +- if (bp->flags & BNXT_FLAG_PTP_SUPPORTED) +- dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TIMESTAMP; ++ dev_info->rx_offload_capa = bnxt_get_rx_port_offloads(bp); + dev_info->tx_queue_offload_capa = DEV_TX_OFFLOAD_MBUF_FAST_FREE; +- dev_info->tx_offload_capa = BNXT_DEV_TX_OFFLOAD_SUPPORT | ++ dev_info->tx_offload_capa = bnxt_get_tx_port_offloads(bp) | + dev_info->tx_queue_offload_capa; + dev_info->flow_type_rss_offloads = BNXT_ETH_RSS_SUPPORT; + + dev_info->speed_capa = bnxt_get_speed_capabilities(bp); + +- /* *INDENT-OFF* */ + dev_info->default_rxconf = (struct rte_eth_rxconf) { + .rx_thresh = { + .pthresh = 8, +@@ -968,7 +1032,6 @@ static int bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev, + }; + eth_dev->data->dev_conf.intr_conf.lsc = 1; + +- eth_dev->data->dev_conf.intr_conf.rxq = 1; + dev_info->rx_desc_lim.nb_min = BNXT_MIN_RING_DESC; + dev_info->rx_desc_lim.nb_max = BNXT_MAX_RX_RING_DESC; + dev_info->tx_desc_lim.nb_min = BNXT_MIN_RING_DESC; +@@ -982,8 +1045,6 @@ static int bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev, + BNXT_SWITCH_PORT_ID_TRUSTED_VF; + } + +- /* *INDENT-ON* */ +- + /* + * TODO: default_rxconf, default_txconf, rx_desc_lim, and tx_desc_lim + * need further investigation. +@@ -1023,6 +1084,7 @@ static int bnxt_dev_configure_op(struct rte_eth_dev *eth_dev) + { + struct bnxt *bp = eth_dev->data->dev_private; + uint64_t rx_offloads = eth_dev->data->dev_conf.rxmode.offloads; ++ struct rte_eth_rss_conf *rss_conf = ð_dev->data->dev_conf.rx_adv_conf.rss_conf; + int rc; + + bp->rx_queues = (void *)eth_dev->data->rx_queues; +@@ -1104,6 +1166,18 @@ static int bnxt_dev_configure_op(struct rte_eth_dev *eth_dev) + BNXT_NUM_VLANS; + bnxt_mtu_set_op(eth_dev, eth_dev->data->mtu); + } ++ ++ /* application provides the hash key to program */ ++ if (rss_conf->rss_key != NULL) { ++ if (rss_conf->rss_key_len != HW_HASH_KEY_SIZE) ++ PMD_DRV_LOG(WARNING, "port %u RSS key len must be %d bytes long", ++ eth_dev->data->port_id, HW_HASH_KEY_SIZE); ++ else ++ memcpy(bp->rss_conf.rss_key, rss_conf->rss_key, HW_HASH_KEY_SIZE); ++ } ++ bp->rss_conf.rss_key_len = HW_HASH_KEY_SIZE; ++ bp->rss_conf.rss_hf = rss_conf->rss_hf; ++ + return 0; + + resource_error: +@@ -1249,6 +1323,11 @@ static int bnxt_handle_if_change_status(struct bnxt *bp) + + /* clear fatal flag so that re-init happens */ + bp->flags &= ~BNXT_FLAG_FATAL_ERROR; ++ ++ rc = bnxt_check_fw_ready(bp); ++ if (rc) ++ return rc; ++ + rc = bnxt_init_resources(bp, true); + + bp->flags &= ~BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE; +@@ -1261,6 +1340,9 @@ static int bnxt_dev_set_link_up_op(struct rte_eth_dev *eth_dev) + struct bnxt *bp = eth_dev->data->dev_private; + int rc = 0; + ++ if (!BNXT_SINGLE_PF(bp)) ++ return -ENOTSUP; ++ + if (!bp->link_info->link_up) + rc = bnxt_set_hwrm_link_config(bp, true); + if (!rc) +@@ -1274,6 +1356,9 @@ static int bnxt_dev_set_link_down_op(struct rte_eth_dev *eth_dev) + { + struct bnxt *bp = eth_dev->data->dev_private; + ++ if (!BNXT_SINGLE_PF(bp)) ++ return -ENOTSUP; ++ + eth_dev->data->dev_link.link_status = 0; + bnxt_set_hwrm_link_config(bp, false); + bp->link_info->link_up = 0; +@@ -1285,16 +1370,84 @@ static void bnxt_free_switch_domain(struct bnxt *bp) + { + int rc = 0; + +- if (bp->switch_domain_id) { +- rc = rte_eth_switch_domain_free(bp->switch_domain_id); +- if (rc) +- PMD_DRV_LOG(ERR, "free switch domain:%d fail: %d\n", +- bp->switch_domain_id, rc); ++ if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) ++ return; ++ ++ rc = rte_eth_switch_domain_free(bp->switch_domain_id); ++ if (rc) ++ PMD_DRV_LOG(ERR, "free switch domain:%d fail: %d\n", ++ bp->switch_domain_id, rc); ++} ++ ++static void bnxt_ptp_get_current_time(void *arg) ++{ ++ struct bnxt *bp = arg; ++ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; ++ int rc; ++ ++ rc = is_bnxt_in_error(bp); ++ if (rc) ++ return; ++ ++ if (!ptp) ++ return; ++ ++ bnxt_hwrm_port_ts_query(bp, BNXT_PTP_FLAGS_CURRENT_TIME, ++ &ptp->current_time); ++ ++ rc = rte_eal_alarm_set(US_PER_S, bnxt_ptp_get_current_time, (void *)bp); ++ if (rc != 0) { ++ PMD_DRV_LOG(ERR, "Failed to re-schedule PTP alarm\n"); ++ bp->flags &= ~BNXT_FLAGS_PTP_ALARM_SCHEDULED; ++ } ++} ++ ++static int bnxt_schedule_ptp_alarm(struct bnxt *bp) ++{ ++ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; ++ int rc; ++ ++ if (bp->flags & BNXT_FLAGS_PTP_ALARM_SCHEDULED) ++ return 0; ++ ++ bnxt_hwrm_port_ts_query(bp, BNXT_PTP_FLAGS_CURRENT_TIME, ++ &ptp->current_time); ++ ++ rc = rte_eal_alarm_set(US_PER_S, bnxt_ptp_get_current_time, (void *)bp); ++ return rc; ++} ++ ++static void bnxt_cancel_ptp_alarm(struct bnxt *bp) ++{ ++ if (bp->flags & BNXT_FLAGS_PTP_ALARM_SCHEDULED) { ++ rte_eal_alarm_cancel(bnxt_ptp_get_current_time, (void *)bp); ++ bp->flags &= ~BNXT_FLAGS_PTP_ALARM_SCHEDULED; + } + } + ++static void bnxt_ptp_stop(struct bnxt *bp) ++{ ++ bnxt_cancel_ptp_alarm(bp); ++ bp->flags &= ~BNXT_FLAGS_PTP_TIMESYNC_ENABLED; ++} ++ ++static int bnxt_ptp_start(struct bnxt *bp) ++{ ++ int rc; ++ ++ rc = bnxt_schedule_ptp_alarm(bp); ++ if (rc != 0) { ++ PMD_DRV_LOG(ERR, "Failed to schedule PTP alarm\n"); ++ } else { ++ bp->flags |= BNXT_FLAGS_PTP_TIMESYNC_ENABLED; ++ bp->flags |= BNXT_FLAGS_PTP_ALARM_SCHEDULED; ++ } ++ ++ return rc; ++} ++ + /* Unload the driver, release resources */ +-static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) ++int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) + { + struct bnxt *bp = eth_dev->data->dev_private; + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); +@@ -1324,6 +1477,9 @@ static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) + + bnxt_cancel_fw_health_check(bp); + ++ if (BNXT_THOR_PTP_TIMESYNC_ENABLED(bp)) ++ bnxt_cancel_ptp_alarm(bp); ++ + /* Do not bring link down during reset recovery */ + if (!is_bnxt_in_error(bp)) { + bnxt_dev_set_link_down_op(eth_dev); +@@ -1350,6 +1506,7 @@ static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) + bnxt_shutdown_nic(bp); + bnxt_hwrm_if_change(bp, false); + ++ bnxt_free_prev_ring_stats(bp); + rte_free(bp->mark_table); + bp->mark_table = NULL; + +@@ -1362,18 +1519,13 @@ static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) + return 0; + } + +-static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) ++int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) + { + struct bnxt *bp = eth_dev->data->dev_private; + uint64_t rx_offloads = eth_dev->data->dev_conf.rxmode.offloads; + int vlan_mask = 0; + int rc, retry_cnt = BNXT_IF_CHANGE_RETRY_COUNT; + +- if (!eth_dev->data->nb_tx_queues || !eth_dev->data->nb_rx_queues) { +- PMD_DRV_LOG(ERR, "Queues are not configured yet!\n"); +- return -EINVAL; +- } +- + if (bp->rx_cp_nr_rings > RTE_ETHDEV_QUEUE_STAT_CNTRS) + PMD_DRV_LOG(ERR, + "RxQ cnt %d > RTE_ETHDEV_QUEUE_STAT_CNTRS %d\n", +@@ -1404,6 +1556,10 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) + if (rc) + goto error; + ++ rc = bnxt_alloc_prev_ring_stats(bp); ++ if (rc) ++ goto error; ++ + eth_dev->data->dev_started = 1; + + bnxt_link_update_op(eth_dev, 1); +@@ -1426,6 +1582,9 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) + + bnxt_schedule_fw_health_check(bp); + ++ if (BNXT_THOR_PTP_TIMESYNC_ENABLED(bp)) ++ bnxt_schedule_ptp_alarm(bp); ++ + return 0; + + error: +@@ -1447,11 +1606,9 @@ bnxt_uninit_locks(struct bnxt *bp) + + static void bnxt_drv_uninit(struct bnxt *bp) + { +- bnxt_free_switch_domain(bp); + bnxt_free_leds_info(bp); + bnxt_free_cos_queues(bp); + bnxt_free_link_info(bp); +- bnxt_free_pf_info(bp); + bnxt_free_parent_info(bp); + bnxt_uninit_locks(bp); + +@@ -1460,7 +1617,8 @@ static void bnxt_drv_uninit(struct bnxt *bp) + rte_memzone_free((const struct rte_memzone *)bp->rx_mem_zone); + bp->rx_mem_zone = NULL; + +- bnxt_hwrm_free_vf_info(bp); ++ bnxt_free_vf_info(bp); ++ bnxt_free_pf_info(bp); + + rte_free(bp->grp_info); + bp->grp_info = NULL; +@@ -1478,6 +1636,7 @@ static int bnxt_dev_close_op(struct rte_eth_dev *eth_dev) + rte_eal_alarm_cancel(bnxt_dev_reset_and_resume, (void *)bp); + rte_eal_alarm_cancel(bnxt_dev_recover, (void *)bp); + bnxt_cancel_fc_thread(bp); ++ rte_eal_alarm_cancel(bnxt_handle_vf_cfg_change, (void *)bp); + + if (eth_dev->data->dev_started) + ret = bnxt_dev_stop_op(eth_dev); +@@ -1613,6 +1772,18 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete) + return rc; + + memset(&new, 0, sizeof(new)); ++ ++ if (bp->link_info == NULL) ++ goto out; ++ ++ /* Only single function PF can bring the phy down. ++ * In certain scenarios, device is not obliged link down even when forced. ++ * When port is stopped, report link down in those cases. ++ */ ++ if (!eth_dev->data->dev_started && ++ (!BNXT_SINGLE_PF(bp) || bnxt_force_link_config(bp))) ++ goto out; ++ + do { + /* Retrieve link info from hardware */ + rc = bnxt_get_hwrm_link_config(bp, &new); +@@ -1630,22 +1801,11 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete) + rte_delay_ms(BNXT_LINK_WAIT_INTERVAL); + } while (cnt--); + +- /* Only single function PF can bring phy down. +- * When port is stopped, report link down for VF/MH/NPAR functions. +- */ +- if (!BNXT_SINGLE_PF(bp) && !eth_dev->data->dev_started) +- memset(&new, 0, sizeof(new)); +- + out: + /* Timed out or success */ + if (new.link_status != eth_dev->data->dev_link.link_status || + new.link_speed != eth_dev->data->dev_link.link_speed) { + rte_eth_linkstatus_set(eth_dev, &new); +- +- rte_eth_dev_callback_process(eth_dev, +- RTE_ETH_EVENT_INTR_LSC, +- NULL); +- + bnxt_print_link_info(eth_dev); + } + +@@ -1871,7 +2031,6 @@ static int bnxt_reta_query_op(struct rte_eth_dev *eth_dev, + if (rc) + return rc; + +- /* Retrieve from the default VNIC */ + if (!vnic) + return -EINVAL; + if (!vnic->rss_table) +@@ -1932,11 +2091,6 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev, + return -EINVAL; + } + +- bp->flags |= BNXT_FLAG_UPDATE_HASH; +- memcpy(ð_dev->data->dev_conf.rx_adv_conf.rss_conf, +- rss_conf, +- sizeof(*rss_conf)); +- + /* Update the default RSS VNIC(s) */ + vnic = BNXT_GET_DEFAULT_VNIC(bp); + vnic->hash_type = bnxt_rte_to_hwrm_hash_types(rss_conf->rss_hf); +@@ -1944,6 +2098,9 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev, + bnxt_rte_to_hwrm_hash_level(bp, rss_conf->rss_hf, + ETH_RSS_LEVEL(rss_conf->rss_hf)); + ++ /* Cache the hash function */ ++ bp->rss_conf.rss_hf = rss_conf->rss_hf; ++ + /* + * If hashkey is not specified, use the previously configured + * hashkey +@@ -1953,11 +2110,15 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev, + + if (rss_conf->rss_key_len != HW_HASH_KEY_SIZE) { + PMD_DRV_LOG(ERR, +- "Invalid hashkey length, should be 16 bytes\n"); ++ "Invalid hashkey length, should be %d bytes\n", ++ HW_HASH_KEY_SIZE); + return -EINVAL; + } + memcpy(vnic->rss_hash_key, rss_conf->rss_key, rss_conf->rss_key_len); + ++ /* Cache the hash key */ ++ memcpy(bp->rss_conf.rss_key, rss_conf->rss_key, HW_HASH_KEY_SIZE); ++ + rss_config: + rc = bnxt_hwrm_vnic_rss_cfg(bp, vnic); + return rc; +@@ -2075,8 +2236,9 @@ static int bnxt_flow_ctrl_set_op(struct rte_eth_dev *dev, + if (rc) + return rc; + +- if (!BNXT_SINGLE_PF(bp) || BNXT_VF(bp)) { +- PMD_DRV_LOG(ERR, "Flow Control Settings cannot be modified\n"); ++ if (!BNXT_SINGLE_PF(bp)) { ++ PMD_DRV_LOG(ERR, ++ "Flow Control Settings cannot be modified on VF or on shared PF\n"); + return -ENOTSUP; + } + +@@ -2151,7 +2313,6 @@ bnxt_udp_tunnel_port_add_op(struct rte_eth_dev *eth_dev, + } + tunnel_type = + HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_VXLAN; +- bp->vxlan_port_cnt++; + break; + case RTE_TUNNEL_TYPE_GENEVE: + if (bp->geneve_port_cnt) { +@@ -2166,7 +2327,6 @@ bnxt_udp_tunnel_port_add_op(struct rte_eth_dev *eth_dev, + } + tunnel_type = + HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_GENEVE; +- bp->geneve_port_cnt++; + break; + default: + PMD_DRV_LOG(ERR, "Tunnel type is not supported\n"); +@@ -2174,6 +2334,18 @@ bnxt_udp_tunnel_port_add_op(struct rte_eth_dev *eth_dev, + } + rc = bnxt_hwrm_tunnel_dst_port_alloc(bp, udp_tunnel->udp_port, + tunnel_type); ++ ++ if (rc != 0) ++ return rc; ++ ++ if (tunnel_type == ++ HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_VXLAN) ++ bp->vxlan_port_cnt++; ++ ++ if (tunnel_type == ++ HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_GENEVE) ++ bp->geneve_port_cnt++; ++ + return rc; + } + +@@ -2425,6 +2597,10 @@ static int bnxt_free_one_vnic(struct bnxt *bp, uint16_t vnic_id) + } + bnxt_del_dflt_mac_filter(bp, vnic); + ++ rc = bnxt_hwrm_vnic_ctx_free(bp, vnic); ++ if (rc) ++ return rc; ++ + rc = bnxt_hwrm_vnic_free(bp, vnic); + if (rc) + return rc; +@@ -2622,9 +2798,8 @@ bnxt_dev_set_mc_addr_list_op(struct rte_eth_dev *eth_dev, + uint32_t nb_mc_addr) + { + struct bnxt *bp = eth_dev->data->dev_private; +- char *mc_addr_list = (char *)mc_addr_set; + struct bnxt_vnic_info *vnic; +- uint32_t off = 0, i = 0; ++ uint32_t i = 0; + int rc; + + rc = is_bnxt_in_error(bp); +@@ -2633,6 +2808,8 @@ bnxt_dev_set_mc_addr_list_op(struct rte_eth_dev *eth_dev, + + vnic = BNXT_GET_DEFAULT_VNIC(bp); + ++ bp->nb_mc_addr = nb_mc_addr; ++ + if (nb_mc_addr > BNXT_MAX_MC_ADDRS) { + vnic->flags |= BNXT_VNIC_INFO_ALLMULTI; + goto allmulti; +@@ -2640,14 +2817,10 @@ bnxt_dev_set_mc_addr_list_op(struct rte_eth_dev *eth_dev, + + /* TODO Check for Duplicate mcast addresses */ + vnic->flags &= ~BNXT_VNIC_INFO_ALLMULTI; +- for (i = 0; i < nb_mc_addr; i++) { +- memcpy(vnic->mc_list + off, &mc_addr_list[i], +- RTE_ETHER_ADDR_LEN); +- off += RTE_ETHER_ADDR_LEN; +- } ++ for (i = 0; i < nb_mc_addr; i++) ++ rte_ether_addr_copy(&mc_addr_set[i], &bp->mcast_addr_list[i]); + +- vnic->mc_addr_cnt = i; +- if (vnic->mc_addr_cnt) ++ if (bp->nb_mc_addr) + vnic->flags |= BNXT_VNIC_INFO_MCAST; + else + vnic->flags &= ~BNXT_VNIC_INFO_MCAST; +@@ -2668,9 +2841,11 @@ bnxt_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) + + ret = snprintf(fw_version, fw_size, "%d.%d.%d.%d", + fw_major, fw_minor, fw_updt, fw_rsvd); ++ if (ret < 0) ++ return -EINVAL; + + ret += 1; /* add the size of '\0' */ +- if (fw_size < (uint32_t)ret) ++ if (fw_size < (size_t)ret) + return ret; + else + return 0; +@@ -2866,9 +3041,8 @@ bnxt_vlan_pvid_set_op(struct rte_eth_dev *dev, uint16_t pvid, int on) + if (rc) + return rc; + +- if (!BNXT_SINGLE_PF(bp) || BNXT_VF(bp)) { +- PMD_DRV_LOG(ERR, +- "PVID cannot be modified for this function\n"); ++ if (!BNXT_SINGLE_PF(bp)) { ++ PMD_DRV_LOG(ERR, "PVID cannot be modified on VF or on shared PF\n"); + return -ENOTSUP; + } + bp->vlan = on ? pvid : 0; +@@ -2909,8 +3083,8 @@ static uint32_t + bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id) + { + struct bnxt *bp = (struct bnxt *)dev->data->dev_private; +- uint32_t desc = 0, raw_cons = 0, cons; + struct bnxt_cp_ring_info *cpr; ++ uint32_t desc = 0, raw_cons, cp_ring_size; + struct bnxt_rx_queue *rxq; + struct rx_pkt_cmpl *rxcmp; + int rc; +@@ -2922,17 +3096,46 @@ bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id) + rxq = dev->data->rx_queues[rx_queue_id]; + cpr = rxq->cp_ring; + raw_cons = cpr->cp_raw_cons; ++ cp_ring_size = cpr->cp_ring_struct->ring_size; + + while (1) { ++ uint32_t agg_cnt, cons, cmpl_type; ++ + cons = RING_CMP(cpr->cp_ring_struct, raw_cons); +- rte_prefetch0(&cpr->cp_desc_ring[cons]); + rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons]; + +- if (!CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct)) { ++ if (!bnxt_cpr_cmp_valid(rxcmp, raw_cons, cp_ring_size)) + break; +- } else { +- raw_cons++; ++ ++ cmpl_type = CMP_TYPE(rxcmp); ++ ++ switch (cmpl_type) { ++ case CMPL_BASE_TYPE_RX_L2: ++ case CMPL_BASE_TYPE_RX_L2_V2: ++ agg_cnt = BNXT_RX_L2_AGG_BUFS(rxcmp); ++ raw_cons = raw_cons + CMP_LEN(cmpl_type) + agg_cnt; ++ desc++; ++ break; ++ ++ case CMPL_BASE_TYPE_RX_TPA_END: ++ if (BNXT_CHIP_THOR(rxq->bp)) { ++ struct rx_tpa_v2_end_cmpl_hi *p5_tpa_end; ++ ++ p5_tpa_end = (void *)rxcmp; ++ agg_cnt = BNXT_TPA_END_AGG_BUFS_TH(p5_tpa_end); ++ } else { ++ struct rx_tpa_end_cmpl *tpa_end; ++ ++ tpa_end = (void *)rxcmp; ++ agg_cnt = BNXT_TPA_END_AGG_BUFS(tpa_end); ++ } ++ ++ raw_cons = raw_cons + CMP_LEN(cmpl_type) + agg_cnt; + desc++; ++ break; ++ ++ default: ++ raw_cons += CMP_LEN(cmpl_type); + } + } + +@@ -2942,42 +3145,110 @@ bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id) + static int + bnxt_rx_descriptor_status_op(void *rx_queue, uint16_t offset) + { +- struct bnxt_rx_queue *rxq = (struct bnxt_rx_queue *)rx_queue; +- struct bnxt_rx_ring_info *rxr; ++ struct bnxt_rx_queue *rxq = rx_queue; + struct bnxt_cp_ring_info *cpr; +- struct rte_mbuf *rx_buf; ++ struct bnxt_rx_ring_info *rxr; ++ uint32_t desc, raw_cons, cp_ring_size; ++ struct bnxt *bp = rxq->bp; + struct rx_pkt_cmpl *rxcmp; +- uint32_t cons, cp_cons; + int rc; + +- if (!rxq) +- return -EINVAL; +- +- rc = is_bnxt_in_error(rxq->bp); ++ rc = is_bnxt_in_error(bp); + if (rc) + return rc; + +- cpr = rxq->cp_ring; +- rxr = rxq->rx_ring; +- + if (offset >= rxq->nb_rx_desc) + return -EINVAL; + +- cons = RING_CMP(cpr->cp_ring_struct, offset); +- cp_cons = cpr->cp_raw_cons; +- rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons]; ++ rxr = rxq->rx_ring; ++ cpr = rxq->cp_ring; ++ cp_ring_size = cpr->cp_ring_struct->ring_size; + +- if (cons > cp_cons) { +- if (CMPL_VALID(rxcmp, cpr->valid)) +- return RTE_ETH_RX_DESC_DONE; +- } else { +- if (CMPL_VALID(rxcmp, !cpr->valid)) ++ /* ++ * For the vector receive case, the completion at the requested ++ * offset can be indexed directly. ++ */ ++#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) ++ if (bp->flags & BNXT_FLAG_RX_VECTOR_PKT_MODE) { ++ struct rx_pkt_cmpl *rxcmp; ++ uint32_t cons; ++ ++ /* Check status of completion descriptor. */ ++ raw_cons = cpr->cp_raw_cons + ++ offset * CMP_LEN(CMPL_BASE_TYPE_RX_L2); ++ cons = RING_CMP(cpr->cp_ring_struct, raw_cons); ++ rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons]; ++ ++ if (bnxt_cpr_cmp_valid(rxcmp, raw_cons, cp_ring_size)) + return RTE_ETH_RX_DESC_DONE; ++ ++ /* Check whether rx desc has an mbuf attached. */ ++ cons = RING_CMP(rxr->rx_ring_struct, raw_cons / 2); ++ if (cons >= rxq->rxrearm_start && ++ cons < rxq->rxrearm_start + rxq->rxrearm_nb) { ++ return RTE_ETH_RX_DESC_UNAVAIL; ++ } ++ ++ return RTE_ETH_RX_DESC_AVAIL; + } +- rx_buf = rxr->rx_buf_ring[cons]; +- if (rx_buf == NULL || rx_buf == &rxq->fake_mbuf) +- return RTE_ETH_RX_DESC_UNAVAIL; ++#endif + ++ /* ++ * For the non-vector receive case, scan the completion ring to ++ * locate the completion descriptor for the requested offset. ++ */ ++ raw_cons = cpr->cp_raw_cons; ++ desc = 0; ++ while (1) { ++ uint32_t agg_cnt, cons, cmpl_type; ++ ++ cons = RING_CMP(cpr->cp_ring_struct, raw_cons); ++ rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons]; ++ ++ if (!bnxt_cpr_cmp_valid(rxcmp, raw_cons, cp_ring_size)) ++ break; ++ ++ cmpl_type = CMP_TYPE(rxcmp); ++ ++ switch (cmpl_type) { ++ case CMPL_BASE_TYPE_RX_L2: ++ case CMPL_BASE_TYPE_RX_L2_V2: ++ if (desc == offset) { ++ cons = rxcmp->opaque; ++ if (rxr->rx_buf_ring[cons]) ++ return RTE_ETH_RX_DESC_DONE; ++ else ++ return RTE_ETH_RX_DESC_UNAVAIL; ++ } ++ agg_cnt = BNXT_RX_L2_AGG_BUFS(rxcmp); ++ raw_cons = raw_cons + CMP_LEN(cmpl_type) + agg_cnt; ++ desc++; ++ break; ++ ++ case CMPL_BASE_TYPE_RX_TPA_END: ++ if (desc == offset) ++ return RTE_ETH_RX_DESC_DONE; ++ ++ if (BNXT_CHIP_THOR(rxq->bp)) { ++ struct rx_tpa_v2_end_cmpl_hi *p5_tpa_end; ++ ++ p5_tpa_end = (void *)rxcmp; ++ agg_cnt = BNXT_TPA_END_AGG_BUFS_TH(p5_tpa_end); ++ } else { ++ struct rx_tpa_end_cmpl *tpa_end; ++ ++ tpa_end = (void *)rxcmp; ++ agg_cnt = BNXT_TPA_END_AGG_BUFS(tpa_end); ++ } ++ ++ raw_cons = raw_cons + CMP_LEN(cmpl_type) + agg_cnt; ++ desc++; ++ break; ++ ++ default: ++ raw_cons += CMP_LEN(cmpl_type); ++ } ++ } + + return RTE_ETH_RX_DESC_AVAIL; + } +@@ -2986,41 +3257,47 @@ static int + bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset) + { + struct bnxt_tx_queue *txq = (struct bnxt_tx_queue *)tx_queue; +- struct bnxt_tx_ring_info *txr; +- struct bnxt_cp_ring_info *cpr; +- struct bnxt_sw_tx_bd *tx_buf; +- struct tx_pkt_cmpl *txcmp; +- uint32_t cons, cp_cons; ++ struct bnxt_cp_ring_info *cpr = txq->cp_ring; ++ uint32_t ring_mask, raw_cons, nb_tx_pkts = 0; ++ struct cmpl_base *cp_desc_ring; + int rc; + +- if (!txq) +- return -EINVAL; +- + rc = is_bnxt_in_error(txq->bp); + if (rc) + return rc; + +- cpr = txq->cp_ring; +- txr = txq->tx_ring; +- + if (offset >= txq->nb_tx_desc) + return -EINVAL; + +- cons = RING_CMP(cpr->cp_ring_struct, offset); +- txcmp = (struct tx_pkt_cmpl *)&cpr->cp_desc_ring[cons]; +- cp_cons = cpr->cp_raw_cons; ++ /* Return "desc done" if descriptor is available for use. */ ++ if (bnxt_tx_bds_in_hw(txq) <= offset) ++ return RTE_ETH_TX_DESC_DONE; + +- if (cons > cp_cons) { +- if (CMPL_VALID(txcmp, cpr->valid)) +- return RTE_ETH_TX_DESC_UNAVAIL; +- } else { +- if (CMPL_VALID(txcmp, !cpr->valid)) +- return RTE_ETH_TX_DESC_UNAVAIL; ++ raw_cons = cpr->cp_raw_cons; ++ cp_desc_ring = cpr->cp_desc_ring; ++ ring_mask = cpr->cp_ring_struct->ring_mask; ++ ++ /* Check to see if hw has posted a completion for the descriptor. */ ++ while (1) { ++ struct tx_cmpl *txcmp; ++ uint32_t cons; ++ ++ cons = RING_CMPL(ring_mask, raw_cons); ++ txcmp = (struct tx_cmpl *)&cp_desc_ring[cons]; ++ ++ if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1)) ++ break; ++ ++ if (CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2) ++ nb_tx_pkts += rte_le_to_cpu_32(txcmp->opaque); ++ ++ if (nb_tx_pkts > offset) ++ return RTE_ETH_TX_DESC_DONE; ++ ++ raw_cons = NEXT_RAW_CMP(raw_cons); + } +- tx_buf = &txr->tx_buf_ring[cons]; +- if (tx_buf->mbuf == NULL) +- return RTE_ETH_TX_DESC_DONE; + ++ /* Descriptor is pending transmit, not yet completed by hardware. */ + return RTE_ETH_TX_DESC_FULL; + } + +@@ -3176,19 +3453,49 @@ static int bnxt_get_tx_ts(struct bnxt *bp, uint64_t *ts) + ptp->tx_mapped_regs[BNXT_PTP_TX_TS_L])); + *ts |= (uint64_t)rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 + + ptp->tx_mapped_regs[BNXT_PTP_TX_TS_H])) << 32; ++ rte_read32((uint8_t *)bp->bar0 + ptp->tx_mapped_regs[BNXT_PTP_TX_SEQ]); + + return 0; + } + +-static int bnxt_get_rx_ts(struct bnxt *bp, uint64_t *ts) ++static int bnxt_clr_rx_ts(struct bnxt *bp, uint64_t *last_ts) + { + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + struct bnxt_pf_info *pf = bp->pf; + uint16_t port_id; ++ int i = 0; + uint32_t fifo; + +- if (!ptp) +- return -ENODEV; ++ if (!ptp || (bp->flags & BNXT_FLAG_THOR_CHIP)) ++ return -EINVAL; ++ ++ port_id = pf->port_id; ++ fifo = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 + ++ ptp->rx_mapped_regs[BNXT_PTP_RX_FIFO])); ++ while ((fifo & BNXT_PTP_RX_FIFO_PENDING) && (i < BNXT_PTP_RX_PND_CNT)) { ++ rte_write32(1 << port_id, (uint8_t *)bp->bar0 + ++ ptp->rx_mapped_regs[BNXT_PTP_RX_FIFO_ADV]); ++ fifo = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 + ++ ptp->rx_mapped_regs[BNXT_PTP_RX_FIFO])); ++ *last_ts = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 + ++ ptp->rx_mapped_regs[BNXT_PTP_RX_TS_L])); ++ *last_ts |= (uint64_t)rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 + ++ ptp->rx_mapped_regs[BNXT_PTP_RX_TS_H])) << 32; ++ i++; ++ } ++ ++ if (i >= BNXT_PTP_RX_PND_CNT) ++ return -EBUSY; ++ ++ return 0; ++} ++ ++static int bnxt_get_rx_ts(struct bnxt *bp, uint64_t *ts) ++{ ++ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; ++ struct bnxt_pf_info *pf = bp->pf; ++ uint16_t port_id; ++ uint32_t fifo; + + fifo = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 + + ptp->rx_mapped_regs[BNXT_PTP_RX_FIFO])); +@@ -3201,10 +3508,8 @@ static int bnxt_get_rx_ts(struct bnxt *bp, uint64_t *ts) + + fifo = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 + + ptp->rx_mapped_regs[BNXT_PTP_RX_FIFO])); +- if (fifo & BNXT_PTP_RX_FIFO_PENDING) { +-/* bnxt_clr_rx_ts(bp); TBD */ +- return -EBUSY; +- } ++ if (fifo & BNXT_PTP_RX_FIFO_PENDING) ++ return bnxt_clr_rx_ts(bp, ts); + + *ts = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 + + ptp->rx_mapped_regs[BNXT_PTP_RX_TS_L])); +@@ -3222,11 +3527,13 @@ bnxt_timesync_write_time(struct rte_eth_dev *dev, const struct timespec *ts) + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + + if (!ptp) +- return 0; ++ return -ENOTSUP; + + ns = rte_timespec_to_ns(ts); + /* Set the timecounters to a new value. */ + ptp->tc.nsec = ns; ++ ptp->tx_tstamp_tc.nsec = ns; ++ ptp->rx_tstamp_tc.nsec = ns; + + return 0; + } +@@ -3240,7 +3547,7 @@ bnxt_timesync_read_time(struct rte_eth_dev *dev, struct timespec *ts) + int rc = 0; + + if (!ptp) +- return 0; ++ return -ENOTSUP; + + if (BNXT_CHIP_THOR(bp)) + rc = bnxt_hwrm_port_ts_query(bp, BNXT_PTP_FLAGS_CURRENT_TIME, +@@ -3262,7 +3569,7 @@ bnxt_timesync_enable(struct rte_eth_dev *dev) + int rc; + + if (!ptp) +- return 0; ++ return -ENOTSUP; + + ptp->rx_filter = 1; + ptp->tx_tstamp_en = 1; +@@ -3290,8 +3597,10 @@ bnxt_timesync_enable(struct rte_eth_dev *dev) + + if (!BNXT_CHIP_THOR(bp)) + bnxt_map_ptp_regs(bp); ++ else ++ rc = bnxt_ptp_start(bp); + +- return 0; ++ return rc; + } + + static int +@@ -3301,7 +3610,7 @@ bnxt_timesync_disable(struct rte_eth_dev *dev) + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + + if (!ptp) +- return 0; ++ return -ENOTSUP; + + ptp->rx_filter = 0; + ptp->tx_tstamp_en = 0; +@@ -3311,6 +3620,8 @@ bnxt_timesync_disable(struct rte_eth_dev *dev) + + if (!BNXT_CHIP_THOR(bp)) + bnxt_unmap_ptp_regs(bp); ++ else ++ bnxt_ptp_stop(bp); + + return 0; + } +@@ -3326,7 +3637,7 @@ bnxt_timesync_read_rx_timestamp(struct rte_eth_dev *dev, + uint64_t ns; + + if (!ptp) +- return 0; ++ return -ENOTSUP; + + if (BNXT_CHIP_THOR(bp)) + rx_tstamp_cycles = ptp->rx_timestamp; +@@ -3349,7 +3660,7 @@ bnxt_timesync_read_tx_timestamp(struct rte_eth_dev *dev, + int rc = 0; + + if (!ptp) +- return 0; ++ return -ENOTSUP; + + if (BNXT_CHIP_THOR(bp)) + rc = bnxt_hwrm_port_ts_query(bp, BNXT_PTP_FLAGS_PATH_TX, +@@ -3370,9 +3681,11 @@ bnxt_timesync_adjust_time(struct rte_eth_dev *dev, int64_t delta) + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + + if (!ptp) +- return 0; ++ return -ENOTSUP; + + ptp->tc.nsec += delta; ++ ptp->tx_tstamp_tc.nsec += delta; ++ ptp->rx_tstamp_tc.nsec += delta; + + return 0; + } +@@ -3651,13 +3964,19 @@ static void bnxt_write_fw_reset_reg(struct bnxt *bp, uint32_t index) + uint32_t val = info->reset_reg_val[index]; + uint32_t reg = info->reset_reg[index]; + uint32_t type, offset; ++ int ret; + + type = BNXT_FW_STATUS_REG_TYPE(reg); + offset = BNXT_FW_STATUS_REG_OFF(reg); + + switch (type) { + case BNXT_FW_STATUS_REG_TYPE_CFG: +- rte_pci_write_config(bp->pdev, &val, sizeof(val), offset); ++ ret = rte_pci_write_config(bp->pdev, &val, sizeof(val), offset); ++ if (ret < 0) { ++ PMD_DRV_LOG(ERR, "Failed to write %#x at PCI offset %#x", ++ val, offset); ++ return; ++ } + break; + case BNXT_FW_STATUS_REG_TYPE_GRC: + offset = bnxt_map_reset_regs(bp, offset); +@@ -3682,6 +4001,32 @@ static void bnxt_dev_cleanup(struct bnxt *bp) + bnxt_uninit_resources(bp, true); + } + ++static int ++bnxt_check_fw_reset_done(struct bnxt *bp) ++{ ++ int timeout = bp->fw_reset_max_msecs; ++ uint16_t val = 0; ++ int rc; ++ ++ do { ++ rc = rte_pci_read_config(bp->pdev, &val, sizeof(val), PCI_SUBSYSTEM_ID_OFFSET); ++ if (rc < 0) { ++ PMD_DRV_LOG(ERR, "Failed to read PCI offset 0x%x", PCI_SUBSYSTEM_ID_OFFSET); ++ return rc; ++ } ++ if (val != 0xffff) ++ break; ++ rte_delay_ms(1); ++ } while (timeout--); ++ ++ if (val == 0xffff) { ++ PMD_DRV_LOG(ERR, "Firmware reset aborted, PCI config space invalid\n"); ++ return -1; ++ } ++ ++ return 0; ++} ++ + static int bnxt_restore_vlan_filters(struct bnxt *bp) + { + struct rte_eth_dev *dev = bp->eth_dev; +@@ -3712,7 +4057,7 @@ static int bnxt_restore_mac_filters(struct bnxt *bp) + struct rte_ether_addr *addr; + uint64_t pool_mask; + uint32_t pool = 0; +- uint16_t i; ++ uint32_t i; + int rc; + + if (BNXT_VF(bp) && !BNXT_VF_IS_TRUSTED(bp)) +@@ -3747,6 +4092,18 @@ static int bnxt_restore_mac_filters(struct bnxt *bp) + return 0; + } + ++static int bnxt_restore_mcast_mac_filters(struct bnxt *bp) ++{ ++ int ret = 0; ++ ++ ret = bnxt_dev_set_mc_addr_list_op(bp->eth_dev, bp->mcast_addr_list, ++ bp->nb_mc_addr); ++ if (ret) ++ PMD_DRV_LOG(ERR, "Failed to restore multicast MAC addreeses\n"); ++ ++ return ret; ++} ++ + static int bnxt_restore_filters(struct bnxt *bp) + { + struct rte_eth_dev *dev = bp->eth_dev; +@@ -3767,33 +4124,56 @@ static int bnxt_restore_filters(struct bnxt *bp) + if (ret) + return ret; + ++ /* if vlans are already programmed, this can fail with -EEXIST */ + ret = bnxt_restore_vlan_filters(bp); +- /* TODO restore other filters as well */ ++ if (ret && ret != -EEXIST) ++ return ret; ++ ++ ret = bnxt_restore_mcast_mac_filters(bp); ++ if (ret) ++ return ret; ++ + return ret; + } + +-static void bnxt_dev_recover(void *arg) ++static int bnxt_check_fw_ready(struct bnxt *bp) + { +- struct bnxt *bp = arg; +- int timeout = bp->fw_reset_max_msecs; ++ int timeout = bp->fw_reset_max_msecs ? : BNXT_MAX_FW_RESET_TIMEOUT; + int rc = 0; + +- /* Clear Error flag so that device re-init should happen */ +- bp->flags &= ~BNXT_FLAG_FATAL_ERROR; +- + do { +- rc = bnxt_hwrm_ver_get(bp, SHORT_HWRM_CMD_TIMEOUT); ++ rc = bnxt_hwrm_poll_ver_get(bp); + if (rc == 0) + break; + rte_delay_ms(BNXT_FW_READY_WAIT_INTERVAL); + timeout -= BNXT_FW_READY_WAIT_INTERVAL; +- } while (rc && timeout); ++ } while (rc && timeout > 0); + +- if (rc) { ++ if (rc) + PMD_DRV_LOG(ERR, "FW is not Ready after reset\n"); +- goto err; ++ ++ return rc; ++} ++ ++static void bnxt_dev_recover(void *arg) ++{ ++ struct bnxt *bp = arg; ++ int rc = 0; ++ ++ ++ if (!bp->fw_reset_min_msecs) { ++ rc = bnxt_check_fw_reset_done(bp); ++ if (rc) ++ goto err; + } + ++ /* Clear Error flag so that device re-init should happen */ ++ bp->flags &= ~BNXT_FLAG_FATAL_ERROR; ++ ++ rc = bnxt_check_fw_ready(bp); ++ if (rc) ++ goto err; ++ + rc = bnxt_init_resources(bp, true); + if (rc) { + PMD_DRV_LOG(ERR, +@@ -3820,20 +4200,43 @@ static void bnxt_dev_recover(void *arg) + err: + bp->flags |= BNXT_FLAG_FATAL_ERROR; + bnxt_uninit_resources(bp, false); ++ if (bp->eth_dev->data->dev_conf.intr_conf.rmv) ++ rte_eth_dev_callback_process(bp->eth_dev, ++ RTE_ETH_EVENT_INTR_RMV, ++ NULL); + PMD_DRV_LOG(ERR, "Failed to recover from FW reset\n"); + } + + void bnxt_dev_reset_and_resume(void *arg) + { + struct bnxt *bp = arg; ++ uint32_t us = US_PER_MS * bp->fw_reset_min_msecs; ++ uint16_t val = 0; + int rc; + + bnxt_dev_cleanup(bp); + + bnxt_wait_for_device_shutdown(bp); + +- rc = rte_eal_alarm_set(US_PER_MS * bp->fw_reset_min_msecs, +- bnxt_dev_recover, (void *)bp); ++ /* During some fatal firmware error conditions, the PCI config space ++ * register 0x2e which normally contains the subsystem ID will become ++ * 0xffff. This register will revert back to the normal value after ++ * the chip has completed core reset. If we detect this condition, ++ * we can poll this config register immediately for the value to revert. ++ */ ++ if (bp->flags & BNXT_FLAG_FATAL_ERROR) { ++ rc = rte_pci_read_config(bp->pdev, &val, sizeof(val), PCI_SUBSYSTEM_ID_OFFSET); ++ if (rc < 0) { ++ PMD_DRV_LOG(ERR, "Failed to read PCI offset 0x%x", PCI_SUBSYSTEM_ID_OFFSET); ++ return; ++ } ++ if (val == 0xffff) { ++ bp->fw_reset_min_msecs = 0; ++ us = 1; ++ } ++ } ++ ++ rc = rte_eal_alarm_set(us, bnxt_dev_recover, (void *)bp); + if (rc) + PMD_DRV_LOG(ERR, "Error setting recovery alarm"); + } +@@ -3843,13 +4246,17 @@ uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index) + struct bnxt_error_recovery_info *info = bp->recovery_info; + uint32_t reg = info->status_regs[index]; + uint32_t type, offset, val = 0; ++ int ret = 0; + + type = BNXT_FW_STATUS_REG_TYPE(reg); + offset = BNXT_FW_STATUS_REG_OFF(reg); + + switch (type) { + case BNXT_FW_STATUS_REG_TYPE_CFG: +- rte_pci_read_config(bp->pdev, &val, sizeof(val), offset); ++ ret = rte_pci_read_config(bp->pdev, &val, sizeof(val), offset); ++ if (ret < 0) ++ PMD_DRV_LOG(ERR, "Failed to read PCI offset %#x", ++ offset); + break; + case BNXT_FW_STATUS_REG_TYPE_GRC: + offset = info->mapped_status_regs[index]; +@@ -3955,6 +4362,8 @@ static void bnxt_check_fw_health(void *arg) + bp->flags |= BNXT_FLAG_FATAL_ERROR; + bp->flags |= BNXT_FLAG_FW_RESET; + ++ bnxt_stop_rxtx(bp); ++ + PMD_DRV_LOG(ERR, "Detected FW dead condition\n"); + + if (bnxt_is_master_func(bp)) +@@ -3990,9 +4399,6 @@ void bnxt_schedule_fw_health_check(struct bnxt *bp) + + static void bnxt_cancel_fw_health_check(struct bnxt *bp) + { +- if (!bnxt_is_recovery_enabled(bp)) +- return; +- + rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp); + bp->flags &= ~BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED; + } +@@ -4107,7 +4513,7 @@ static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp, + if (!mz) { + mz = rte_memzone_reserve_aligned(mz_name, + rmem->nr_pages * 8, +- SOCKET_ID_ANY, ++ bp->eth_dev->device->numa_node, + RTE_MEMZONE_2MB | + RTE_MEMZONE_SIZE_HINT_ONLY | + RTE_MEMZONE_IOVA_CONTIG, +@@ -4130,7 +4536,7 @@ static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp, + if (!mz) { + mz = rte_memzone_reserve_aligned(mz_name, + mem_size, +- SOCKET_ID_ANY, ++ bp->eth_dev->device->numa_node, + RTE_MEMZONE_1GB | + RTE_MEMZONE_SIZE_HINT_ONLY | + RTE_MEMZONE_IOVA_CONTIG, +@@ -4386,11 +4792,15 @@ static int bnxt_alloc_stats_mem(struct bnxt *bp) + static int bnxt_setup_mac_addr(struct rte_eth_dev *eth_dev) + { + struct bnxt *bp = eth_dev->data->dev_private; ++ size_t max_mac_addr = RTE_MIN(bp->max_l2_ctx, ETH_NUM_RECEIVE_MAC_ADDR); + int rc = 0; + ++ if (bp->max_l2_ctx > ETH_NUM_RECEIVE_MAC_ADDR) ++ PMD_DRV_LOG(INFO, "Max number of MAC addrs supported is %d, but will be limited to %d\n", ++ bp->max_l2_ctx, ETH_NUM_RECEIVE_MAC_ADDR); ++ + eth_dev->data->mac_addrs = rte_zmalloc("bnxt_mac_addr_tbl", +- RTE_ETHER_ADDR_LEN * +- bp->max_l2_ctx, ++ RTE_ETHER_ADDR_LEN * max_mac_addr, + 0); + if (eth_dev->data->mac_addrs == NULL) { + PMD_DRV_LOG(ERR, "Failed to alloc MAC addr tbl\n"); +@@ -4417,6 +4827,23 @@ static int bnxt_setup_mac_addr(struct rte_eth_dev *eth_dev) + /* Copy the permanent MAC from the FUNC_QCAPS response */ + memcpy(ð_dev->data->mac_addrs[0], bp->mac_addr, RTE_ETHER_ADDR_LEN); + ++ /* ++ * Allocate memory to hold multicast mac addresses added. ++ * Used to restore them during reset recovery ++ */ ++ bp->mcast_addr_list = rte_zmalloc("bnxt_mcast_addr_tbl", ++ sizeof(struct rte_ether_addr) * ++ BNXT_MAX_MC_ADDRS, 0); ++ if (bp->mcast_addr_list == NULL) { ++ PMD_DRV_LOG(ERR, "Failed to allocate multicast addr table\n"); ++ return -ENOMEM; ++ } ++ bp->mc_list_dma_addr = rte_malloc_virt2iova(bp->mcast_addr_list); ++ if (bp->mc_list_dma_addr == RTE_BAD_IOVA) { ++ PMD_DRV_LOG(ERR, "Fail to map mcast_addr_list to physical memory\n"); ++ return -ENOMEM; ++ } ++ + return rc; + } + +@@ -4763,6 +5190,25 @@ bnxt_init_locks(struct bnxt *bp) + return err; + } + ++/* This should be called after we have queried trusted VF cap */ ++static int bnxt_alloc_switch_domain(struct bnxt *bp) ++{ ++ int rc = 0; ++ ++ if (BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp)) { ++ rc = rte_eth_switch_domain_alloc(&bp->switch_domain_id); ++ if (rc) ++ PMD_DRV_LOG(ERR, ++ "Failed to alloc switch domain: %d\n", rc); ++ else ++ PMD_DRV_LOG(INFO, ++ "Switch domain allocated %d\n", ++ bp->switch_domain_id); ++ } ++ ++ return rc; ++} ++ + static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev) + { + int rc = 0; +@@ -4771,6 +5217,10 @@ static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev) + if (rc) + return rc; + ++ rc = bnxt_alloc_switch_domain(bp); ++ if (rc) ++ return rc; ++ + if (!reconfig_dev) { + rc = bnxt_setup_mac_addr(bp->eth_dev); + if (rc) +@@ -4806,6 +5256,16 @@ static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev) + } + } + ++ if (!reconfig_dev) { ++ bp->rss_conf.rss_key = rte_zmalloc("bnxt_rss_key", ++ HW_HASH_KEY_SIZE, 0); ++ if (bp->rss_conf.rss_key == NULL) { ++ PMD_DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory", ++ bp->eth_dev->data->port_id); ++ return -ENOMEM; ++ } ++ } ++ + rc = bnxt_alloc_mem(bp, reconfig_dev); + if (rc) + return rc; +@@ -5160,58 +5620,49 @@ bnxt_parse_devarg_rep_fc_f2r(__rte_unused const char *key, + return 0; + } + +-static void ++static int + bnxt_parse_dev_args(struct bnxt *bp, struct rte_devargs *devargs) + { + struct rte_kvargs *kvlist; ++ int ret; + + if (devargs == NULL) +- return; ++ return 0; + + kvlist = rte_kvargs_parse(devargs->args, bnxt_dev_args); + if (kvlist == NULL) +- return; ++ return -EINVAL; + + /* + * Handler for "truflow" devarg. + * Invoked as for ex: "-a 0000:00:0d.0,host-based-truflow=1" + */ +- rte_kvargs_process(kvlist, BNXT_DEVARG_TRUFLOW, +- bnxt_parse_devarg_truflow, bp); ++ ret = rte_kvargs_process(kvlist, BNXT_DEVARG_TRUFLOW, ++ bnxt_parse_devarg_truflow, bp); ++ if (ret) ++ goto err; + + /* + * Handler for "flow_xstat" devarg. + * Invoked as for ex: "-a 0000:00:0d.0,flow_xstat=1" + */ +- rte_kvargs_process(kvlist, BNXT_DEVARG_FLOW_XSTAT, +- bnxt_parse_devarg_flow_xstat, bp); ++ ret = rte_kvargs_process(kvlist, BNXT_DEVARG_FLOW_XSTAT, ++ bnxt_parse_devarg_flow_xstat, bp); ++ if (ret) ++ goto err; + + /* + * Handler for "max_num_kflows" devarg. + * Invoked as for ex: "-a 000:00:0d.0,max_num_kflows=32" + */ +- rte_kvargs_process(kvlist, BNXT_DEVARG_MAX_NUM_KFLOWS, +- bnxt_parse_devarg_max_num_kflows, bp); ++ ret = rte_kvargs_process(kvlist, BNXT_DEVARG_MAX_NUM_KFLOWS, ++ bnxt_parse_devarg_max_num_kflows, bp); ++ if (ret) ++ goto err; + ++err: + rte_kvargs_free(kvlist); +-} +- +-static int bnxt_alloc_switch_domain(struct bnxt *bp) +-{ +- int rc = 0; +- +- if (BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp)) { +- rc = rte_eth_switch_domain_alloc(&bp->switch_domain_id); +- if (rc) +- PMD_DRV_LOG(ERR, +- "Failed to alloc switch domain: %d\n", rc); +- else +- PMD_DRV_LOG(INFO, +- "Switch domain allocated %d\n", +- bp->switch_domain_id); +- } +- +- return rc; ++ return ret; + } + + /* Allocate and initialize various fields in bnxt struct that +@@ -5275,7 +5726,7 @@ static int bnxt_drv_init(struct rte_eth_dev *eth_dev) + rc = bnxt_alloc_hwrm_resources(bp); + if (rc) { + PMD_DRV_LOG(ERR, +- "Failed to allocate hwrm resource rc: %x\n", rc); ++ "Failed to allocate response buffer rc: %x\n", rc); + return rc; + } + rc = bnxt_alloc_leds_info(bp); +@@ -5290,10 +5741,6 @@ static int bnxt_drv_init(struct rte_eth_dev *eth_dev) + if (rc) + return rc; + +- rc = bnxt_alloc_switch_domain(bp); +- if (rc) +- return rc; +- + return rc; + } + +@@ -5328,7 +5775,9 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev, void *params __rte_unused) + bp = eth_dev->data->dev_private; + + /* Parse dev arguments passed on when starting the DPDK application. */ +- bnxt_parse_dev_args(bp, pci_dev->device.devargs); ++ rc = bnxt_parse_dev_args(bp, pci_dev->device.devargs); ++ if (rc) ++ goto error_free; + + rc = bnxt_drv_init(eth_dev); + if (rc) +@@ -5343,7 +5792,8 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev, void *params __rte_unused) + goto error_free; + + PMD_DRV_LOG(INFO, +- DRV_MODULE_NAME "found at mem %" PRIX64 ", node addr %pM\n", ++ "Found %s device at mem %" PRIX64 ", node addr %pM\n", ++ DRV_MODULE_NAME, + pci_dev->mem_resource[0].phys_addr, + pci_dev->mem_resource[0].addr); + +@@ -5432,19 +5882,28 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev) + bnxt_free_mem(bp, reconfig_dev); + + bnxt_hwrm_func_buf_unrgtr(bp); +- rte_free(bp->pf->vf_req_buf); ++ if (bp->pf != NULL) { ++ rte_free(bp->pf->vf_req_buf); ++ bp->pf->vf_req_buf = NULL; ++ } + +- rc = bnxt_hwrm_func_driver_unregister(bp, 0); ++ rc = bnxt_hwrm_func_driver_unregister(bp); + bp->flags &= ~BNXT_FLAG_REGISTERED; + bnxt_free_ctx_mem(bp); + if (!reconfig_dev) { + bnxt_free_hwrm_resources(bp); + bnxt_free_error_recovery_info(bp); ++ rte_free(bp->mcast_addr_list); ++ bp->mcast_addr_list = NULL; ++ rte_free(bp->rss_conf.rss_key); ++ bp->rss_conf.rss_key = NULL; + } + + bnxt_uninit_ctx_mem(bp); + + bnxt_free_flow_stats_info(bp); ++ if (bp->rep_info != NULL) ++ bnxt_free_switch_domain(bp); + bnxt_free_rep_info(bp); + rte_free(bp->ptp_cfg); + bp->ptp_cfg = NULL; +@@ -5792,6 +6251,7 @@ static int bnxt_pci_remove(struct rte_pci_device *pci_dev) + static struct rte_pci_driver bnxt_rte_pmd = { + .id_table = bnxt_pci_id_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | ++ RTE_PCI_DRV_INTR_RMV | + RTE_PCI_DRV_PROBE_AGAIN, /* Needed in case of VF-REPs + * and OVS-DPDK + */ +diff --git a/dpdk/drivers/net/bnxt/bnxt_filter.c b/dpdk/drivers/net/bnxt/bnxt_filter.c +index 6d85983242..5e3e5b8034 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_filter.c ++++ b/dpdk/drivers/net/bnxt/bnxt_filter.c +@@ -99,6 +99,8 @@ void bnxt_free_all_filters(struct bnxt *bp) + bnxt_filter_info, next); + STAILQ_INSERT_TAIL(&bp->free_filter_list, + filter, next); ++ if (filter->vnic) ++ filter->vnic = NULL; + filter = temp_filter; + } + STAILQ_INIT(&vnic->filter); +diff --git a/dpdk/drivers/net/bnxt/bnxt_flow.c b/dpdk/drivers/net/bnxt/bnxt_flow.c +index 127d51c45b..646d8dc54f 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_flow.c ++++ b/dpdk/drivers/net/bnxt/bnxt_flow.c +@@ -188,11 +188,15 @@ bnxt_validate_and_parse_flow_type(struct bnxt *bp, + PMD_DRV_LOG(DEBUG, "Parse inner header\n"); + break; + case RTE_FLOW_ITEM_TYPE_ETH: +- if (!item->spec || !item->mask) ++ if (!item->spec) + break; + + eth_spec = item->spec; +- eth_mask = item->mask; ++ ++ if (item->mask) ++ eth_mask = item->mask; ++ else ++ eth_mask = &rte_flow_item_eth_mask; + + /* Source MAC address mask cannot be partially set. + * Should be All 0's or all 1's. +@@ -281,7 +285,12 @@ bnxt_validate_and_parse_flow_type(struct bnxt *bp, + break; + case RTE_FLOW_ITEM_TYPE_VLAN: + vlan_spec = item->spec; +- vlan_mask = item->mask; ++ ++ if (item->mask) ++ vlan_mask = item->mask; ++ else ++ vlan_mask = &rte_flow_item_vlan_mask; ++ + if (en & en_ethertype) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, +@@ -324,11 +333,15 @@ bnxt_validate_and_parse_flow_type(struct bnxt *bp, + case RTE_FLOW_ITEM_TYPE_IPV4: + /* If mask is not involved, we could use EM filters. */ + ipv4_spec = item->spec; +- ipv4_mask = item->mask; + +- if (!item->spec || !item->mask) ++ if (!item->spec) + break; + ++ if (item->mask) ++ ipv4_mask = item->mask; ++ else ++ ipv4_mask = &rte_flow_item_ipv4_mask; ++ + /* Only IP DST and SRC fields are maskable. */ + if (ipv4_mask->hdr.version_ihl || + ipv4_mask->hdr.type_of_service || +@@ -385,11 +398,15 @@ bnxt_validate_and_parse_flow_type(struct bnxt *bp, + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + ipv6_spec = item->spec; +- ipv6_mask = item->mask; + +- if (!item->spec || !item->mask) ++ if (!item->spec) + break; + ++ if (item->mask) ++ ipv6_mask = item->mask; ++ else ++ ipv6_mask = &rte_flow_item_ipv6_mask; ++ + /* Only IP DST and SRC fields are maskable. */ + if (ipv6_mask->hdr.vtc_flow || + ipv6_mask->hdr.payload_len || +@@ -437,11 +454,15 @@ bnxt_validate_and_parse_flow_type(struct bnxt *bp, + break; + case RTE_FLOW_ITEM_TYPE_TCP: + tcp_spec = item->spec; +- tcp_mask = item->mask; + +- if (!item->spec || !item->mask) ++ if (!item->spec) + break; + ++ if (item->mask) ++ tcp_mask = item->mask; ++ else ++ tcp_mask = &rte_flow_item_tcp_mask; ++ + /* Check TCP mask. Only DST & SRC ports are maskable */ + if (tcp_mask->hdr.sent_seq || + tcp_mask->hdr.recv_ack || +@@ -482,11 +503,15 @@ bnxt_validate_and_parse_flow_type(struct bnxt *bp, + break; + case RTE_FLOW_ITEM_TYPE_UDP: + udp_spec = item->spec; +- udp_mask = item->mask; + +- if (!item->spec || !item->mask) ++ if (!item->spec) + break; + ++ if (item->mask) ++ udp_mask = item->mask; ++ else ++ udp_mask = &rte_flow_item_udp_mask; ++ + if (udp_mask->hdr.dgram_len || + udp_mask->hdr.dgram_cksum) { + rte_flow_error_set(error, +@@ -894,33 +919,59 @@ bnxt_get_l2_filter(struct bnxt *bp, struct bnxt_filter_info *nf, + return l2_filter; + } + +-static int bnxt_vnic_prep(struct bnxt *bp, struct bnxt_vnic_info *vnic) ++static void bnxt_vnic_cleanup(struct bnxt *bp, struct bnxt_vnic_info *vnic) ++{ ++ if (vnic->rx_queue_cnt > 1) ++ bnxt_hwrm_vnic_ctx_free(bp, vnic); ++ ++ bnxt_hwrm_vnic_free(bp, vnic); ++ ++ rte_free(vnic->fw_grp_ids); ++ vnic->fw_grp_ids = NULL; ++ ++ vnic->rx_queue_cnt = 0; ++} ++ ++static int bnxt_vnic_prep(struct bnxt *bp, struct bnxt_vnic_info *vnic, ++ const struct rte_flow_action *act, ++ struct rte_flow_error *error) + { + struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf; + uint64_t rx_offloads = dev_conf->rxmode.offloads; + int rc; + ++ if (bp->nr_vnics > bp->max_vnics - 1) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ATTR_GROUP, ++ NULL, ++ "Group id is invalid"); ++ + rc = bnxt_vnic_grp_alloc(bp, vnic); + if (rc) +- goto ret; ++ return rte_flow_error_set(error, -rc, ++ RTE_FLOW_ERROR_TYPE_ACTION, ++ act, ++ "Failed to alloc VNIC group"); + + rc = bnxt_hwrm_vnic_alloc(bp, vnic); + if (rc) { +- PMD_DRV_LOG(ERR, "HWRM vnic alloc failure rc: %x\n", rc); ++ rte_flow_error_set(error, -rc, ++ RTE_FLOW_ERROR_TYPE_ACTION, ++ act, ++ "Failed to alloc VNIC"); + goto ret; + } +- bp->nr_vnics++; + + /* RSS context is required only when there is more than one RSS ring */ + if (vnic->rx_queue_cnt > 1) { +- rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, 0 /* ctx_idx 0 */); ++ rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, 0); + if (rc) { +- PMD_DRV_LOG(ERR, +- "HWRM vnic ctx alloc failure: %x\n", rc); ++ rte_flow_error_set(error, -rc, ++ RTE_FLOW_ERROR_TYPE_ACTION, ++ act, ++ "Failed to alloc VNIC context"); + goto ret; + } +- } else { +- PMD_DRV_LOG(DEBUG, "No RSS context required\n"); + } + + if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP) +@@ -929,12 +980,29 @@ static int bnxt_vnic_prep(struct bnxt *bp, struct bnxt_vnic_info *vnic) + vnic->vlan_strip = false; + + rc = bnxt_hwrm_vnic_cfg(bp, vnic); +- if (rc) ++ if (rc) { ++ rte_flow_error_set(error, -rc, ++ RTE_FLOW_ERROR_TYPE_ACTION, ++ act, ++ "Failed to configure VNIC"); + goto ret; ++ } + +- bnxt_hwrm_vnic_plcmode_cfg(bp, vnic); ++ rc = bnxt_hwrm_vnic_plcmode_cfg(bp, vnic); ++ if (rc) { ++ rte_flow_error_set(error, -rc, ++ RTE_FLOW_ERROR_TYPE_ACTION, ++ act, ++ "Failed to configure VNIC plcmode"); ++ goto ret; ++ } ++ ++ bp->nr_vnics++; ++ ++ return 0; + + ret: ++ bnxt_vnic_cleanup(bp, vnic); + return rc; + } + +@@ -1107,16 +1175,9 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev, + + PMD_DRV_LOG(DEBUG, "VNIC found\n"); + +- rc = bnxt_vnic_prep(bp, vnic); +- if (rc) { +- rte_flow_error_set(error, +- EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, +- act, +- "VNIC prep fail"); +- rc = -rte_errno; ++ rc = bnxt_vnic_prep(bp, vnic, act, error); ++ if (rc) + goto ret; +- } + + PMD_DRV_LOG(DEBUG, + "vnic[%d] = %p vnic->fw_grp_ids = %p\n", +@@ -1327,16 +1388,9 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev, + vnic->end_grp_id = rss->queue[rss->queue_num - 1]; + vnic->func_default = 0; //This is not a default VNIC. + +- rc = bnxt_vnic_prep(bp, vnic); +- if (rc) { +- rte_flow_error_set(error, +- EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, +- act, +- "VNIC prep fail"); +- rc = -rte_errno; ++ rc = bnxt_vnic_prep(bp, vnic, act, error); ++ if (rc) + goto ret; +- } + + PMD_DRV_LOG(DEBUG, + "vnic[%d] = %p vnic->fw_grp_ids = %p\n", +@@ -1372,8 +1426,8 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev, + /* If hash key has not been specified, + * use random hash key. + */ +- prandom_bytes(vnic->rss_hash_key, +- HW_HASH_KEY_SIZE); ++ bnxt_prandom_bytes(vnic->rss_hash_key, ++ HW_HASH_KEY_SIZE); + } else { + if (rss->key_len > HW_HASH_KEY_SIZE) + memcpy(vnic->rss_hash_key, +@@ -1501,9 +1555,11 @@ bnxt_flow_validate(struct rte_eth_dev *dev, + + filter = bnxt_get_unused_filter(bp); + if (filter == NULL) { +- PMD_DRV_LOG(ERR, "Not enough resources for a new flow.\n"); ++ rte_flow_error_set(error, ENOSPC, ++ RTE_FLOW_ERROR_TYPE_HANDLE, NULL, ++ "Not enough resources for a new flow"); + bnxt_release_flow_lock(bp); +- return -ENOMEM; ++ return -ENOSPC; + } + + ret = bnxt_validate_and_parse_flow(dev, pattern, actions, attr, +@@ -1514,10 +1570,8 @@ bnxt_flow_validate(struct rte_eth_dev *dev, + vnic = find_matching_vnic(bp, filter); + if (vnic) { + if (STAILQ_EMPTY(&vnic->filter)) { +- rte_free(vnic->fw_grp_ids); +- bnxt_hwrm_vnic_ctx_free(bp, vnic); +- bnxt_hwrm_vnic_free(bp, vnic); +- vnic->rx_queue_cnt = 0; ++ bnxt_vnic_cleanup(bp, vnic); ++ bp->nr_vnics--; + PMD_DRV_LOG(DEBUG, "Free VNIC\n"); + } + } +@@ -1902,12 +1956,20 @@ static int bnxt_handle_tunnel_redirect_destroy(struct bnxt *bp, + /* Tunnel doesn't belong to this VF, so don't send HWRM + * cmd, just delete the flow from driver + */ +- if (bp->fw_fid != (tun_dst_fid + bp->first_vf_id)) ++ if (bp->fw_fid != (tun_dst_fid + bp->first_vf_id)) { + PMD_DRV_LOG(ERR, + "Tunnel does not belong to this VF, skip hwrm_tunnel_redirect_free\n"); +- else ++ } else { + ret = bnxt_hwrm_tunnel_redirect_free(bp, + filter->tunnel_type); ++ if (ret) { ++ rte_flow_error_set(error, -ret, ++ RTE_FLOW_ERROR_TYPE_HANDLE, ++ NULL, ++ "Unable to free tunnel redirection"); ++ return ret; ++ } ++ } + } + return ret; + } +@@ -1970,12 +2032,8 @@ _bnxt_flow_destroy(struct bnxt *bp, + */ + if (vnic && !vnic->func_default && + STAILQ_EMPTY(&vnic->flow_list)) { +- rte_free(vnic->fw_grp_ids); +- if (vnic->rx_queue_cnt > 1) +- bnxt_hwrm_vnic_ctx_free(bp, vnic); +- +- bnxt_hwrm_vnic_free(bp, vnic); +- vnic->rx_queue_cnt = 0; ++ bnxt_vnic_cleanup(bp, vnic); ++ bp->nr_vnics--; + } + } else { + rte_flow_error_set(error, -ret, +diff --git a/dpdk/drivers/net/bnxt/bnxt_hwrm.c b/dpdk/drivers/net/bnxt/bnxt_hwrm.c +index 344895843b..8b50e44fcf 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_hwrm.c ++++ b/dpdk/drivers/net/bnxt/bnxt_hwrm.c +@@ -391,8 +391,8 @@ int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, + mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST; + } else if (vnic->flags & BNXT_VNIC_INFO_MCAST) { + mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_MCAST; +- req.num_mc_entries = rte_cpu_to_le_32(vnic->mc_addr_cnt); +- req.mc_tbl_addr = rte_cpu_to_le_64(vnic->mc_list_dma_addr); ++ req.num_mc_entries = rte_cpu_to_le_32(bp->nb_mc_addr); ++ req.mc_tbl_addr = rte_cpu_to_le_64(bp->mc_list_dma_addr); + } + if (vlan_table) { + if (!(mask & HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_VLAN_NONVLAN)) +@@ -635,9 +635,13 @@ static int bnxt_hwrm_ptp_qcfg(struct bnxt *bp) + + HWRM_CHECK_RESULT(); + +- if (!BNXT_CHIP_THOR(bp) && +- !(resp->flags & HWRM_PORT_MAC_PTP_QCFG_OUTPUT_FLAGS_DIRECT_ACCESS)) +- return 0; ++ if (BNXT_CHIP_THOR(bp)) { ++ if (!(resp->flags & HWRM_PORT_MAC_PTP_QCFG_OUTPUT_FLAGS_HWRM_ACCESS)) ++ return 0; ++ } else { ++ if (!(resp->flags & HWRM_PORT_MAC_PTP_QCFG_OUTPUT_FLAGS_DIRECT_ACCESS)) ++ return 0; ++ } + + if (resp->flags & HWRM_PORT_MAC_PTP_QCFG_OUTPUT_FLAGS_ONE_STEP_TX_TS) + bp->flags |= BNXT_FLAG_FW_CAP_ONE_STEP_TX_TS; +@@ -673,10 +677,16 @@ static int bnxt_hwrm_ptp_qcfg(struct bnxt *bp) + return 0; + } + +-void bnxt_hwrm_free_vf_info(struct bnxt *bp) ++void bnxt_free_vf_info(struct bnxt *bp) + { + int i; + ++ if (bp->pf == NULL) ++ return; ++ ++ if (bp->pf->vf_info == NULL) ++ return; ++ + for (i = 0; i < bp->pf->max_vfs; i++) { + rte_free(bp->pf->vf_info[i].vlan_table); + bp->pf->vf_info[i].vlan_table = NULL; +@@ -687,6 +697,50 @@ void bnxt_hwrm_free_vf_info(struct bnxt *bp) + bp->pf->vf_info = NULL; + } + ++static int bnxt_alloc_vf_info(struct bnxt *bp, uint16_t max_vfs) ++{ ++ struct bnxt_child_vf_info *vf_info = bp->pf->vf_info; ++ int i; ++ ++ if (vf_info) ++ bnxt_free_vf_info(bp); ++ ++ vf_info = rte_zmalloc("bnxt_vf_info", sizeof(*vf_info) * max_vfs, 0); ++ if (vf_info == NULL) { ++ PMD_DRV_LOG(ERR, "Failed to alloc vf info\n"); ++ return -ENOMEM; ++ } ++ ++ bp->pf->max_vfs = max_vfs; ++ for (i = 0; i < max_vfs; i++) { ++ vf_info[i].fid = bp->pf->first_vf_id + i; ++ vf_info[i].vlan_table = rte_zmalloc("VF VLAN table", ++ getpagesize(), getpagesize()); ++ if (vf_info[i].vlan_table == NULL) { ++ PMD_DRV_LOG(ERR, "Failed to alloc VLAN table for VF %d\n", i); ++ goto err; ++ } ++ rte_mem_lock_page(vf_info[i].vlan_table); ++ ++ vf_info[i].vlan_as_table = rte_zmalloc("VF VLAN AS table", ++ getpagesize(), getpagesize()); ++ if (vf_info[i].vlan_as_table == NULL) { ++ PMD_DRV_LOG(ERR, "Failed to alloc VLAN AS table for VF %d\n", i); ++ goto err; ++ } ++ rte_mem_lock_page(vf_info[i].vlan_as_table); ++ ++ STAILQ_INIT(&vf_info[i].filter); ++ } ++ ++ bp->pf->vf_info = vf_info; ++ ++ return 0; ++err: ++ bnxt_free_vf_info(bp); ++ return -ENOMEM; ++} ++ + static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) + { + int rc = 0; +@@ -694,7 +748,6 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) + struct hwrm_func_qcaps_output *resp = bp->hwrm_cmd_resp_addr; + uint16_t new_max_vfs; + uint32_t flags; +- int i; + + HWRM_PREP(&req, HWRM_FUNC_QCAPS, BNXT_USE_CHIMP_MB); + +@@ -712,43 +765,9 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) + bp->pf->total_vfs = rte_le_to_cpu_16(resp->max_vfs); + new_max_vfs = bp->pdev->max_vfs; + if (new_max_vfs != bp->pf->max_vfs) { +- if (bp->pf->vf_info) +- bnxt_hwrm_free_vf_info(bp); +- bp->pf->vf_info = rte_zmalloc("bnxt_vf_info", +- sizeof(bp->pf->vf_info[0]) * new_max_vfs, 0); +- if (bp->pf->vf_info == NULL) { +- PMD_DRV_LOG(ERR, "Alloc vf info fail\n"); +- HWRM_UNLOCK(); +- return -ENOMEM; +- } +- bp->pf->max_vfs = new_max_vfs; +- for (i = 0; i < new_max_vfs; i++) { +- bp->pf->vf_info[i].fid = +- bp->pf->first_vf_id + i; +- bp->pf->vf_info[i].vlan_table = +- rte_zmalloc("VF VLAN table", +- getpagesize(), +- getpagesize()); +- if (bp->pf->vf_info[i].vlan_table == NULL) +- PMD_DRV_LOG(ERR, +- "Fail to alloc VLAN table for VF %d\n", +- i); +- else +- rte_mem_lock_page( +- bp->pf->vf_info[i].vlan_table); +- bp->pf->vf_info[i].vlan_as_table = +- rte_zmalloc("VF VLAN AS table", +- getpagesize(), +- getpagesize()); +- if (bp->pf->vf_info[i].vlan_as_table == NULL) +- PMD_DRV_LOG(ERR, +- "Alloc VLAN AS table for VF %d fail\n", +- i); +- else +- rte_mem_lock_page( +- bp->pf->vf_info[i].vlan_as_table); +- STAILQ_INIT(&bp->pf->vf_info[i].filter); +- } ++ rc = bnxt_alloc_vf_info(bp, new_max_vfs); ++ if (rc) ++ goto unlock; + } + } + +@@ -768,18 +787,12 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) + bp->max_l2_ctx = rte_le_to_cpu_16(resp->max_l2_ctxs); + if (!BNXT_CHIP_THOR(bp) && !bp->pdev->max_vfs) + bp->max_l2_ctx += bp->max_rx_em_flows; +- /* TODO: For now, do not support VMDq/RFS on VFs. */ +- if (BNXT_PF(bp)) { +- if (bp->pf->max_vfs) +- bp->max_vnics = 1; +- else +- bp->max_vnics = rte_le_to_cpu_16(resp->max_vnics); +- } else { +- bp->max_vnics = 1; +- } ++ bp->max_vnics = rte_le_to_cpu_16(resp->max_vnics); + PMD_DRV_LOG(DEBUG, "Max l2_cntxts is %d vnics is %d\n", + bp->max_l2_ctx, bp->max_vnics); + bp->max_stat_ctx = rte_le_to_cpu_16(resp->max_stat_ctx); ++ bp->max_mcast_addr = rte_le_to_cpu_32(resp->max_mcast_filters); ++ + if (BNXT_PF(bp)) { + bp->pf->total_vnics = rte_le_to_cpu_16(resp->max_vnics); + if (flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_PTP_SUPPORTED) { +@@ -807,6 +820,15 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) + if (flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_LINK_ADMIN_STATUS_SUPPORTED) + bp->fw_cap |= BNXT_FW_CAP_LINK_ADMIN; + ++ if (!(flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_VLAN_ACCELERATION_TX_DISABLED)) { ++ bp->fw_cap |= BNXT_FW_CAP_VLAN_TX_INSERT; ++ PMD_DRV_LOG(DEBUG, "VLAN acceleration for TX is enabled\n"); ++ } ++ bp->tunnel_disable_flag = rte_le_to_cpu_16(resp->tunnel_disable_flag); ++ if (bp->tunnel_disable_flag) ++ PMD_DRV_LOG(DEBUG, "Tunnel parsing capability is disabled, flags : %#x\n", ++ bp->tunnel_disable_flag); ++unlock: + HWRM_UNLOCK(); + + return rc; +@@ -817,6 +839,9 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) + int rc; + + rc = __bnxt_hwrm_func_qcaps(bp); ++ if (rc == -ENOMEM) ++ return rc; ++ + if (!rc && bp->hwrm_spec_code >= HWRM_SPEC_CODE_1_8_3) { + rc = bnxt_alloc_ctx_mem(bp); + if (rc) +@@ -860,6 +885,11 @@ int bnxt_hwrm_vnic_qcaps(struct bnxt *bp) + if (flags & HWRM_VNIC_QCAPS_OUTPUT_FLAGS_OUTERMOST_RSS_CAP) + bp->vnic_cap_flags |= BNXT_VNIC_CAP_OUTER_RSS; + ++ ++ if (flags & HWRM_VNIC_QCAPS_OUTPUT_FLAGS_VLAN_STRIP_CAP) { ++ bp->vnic_cap_flags |= BNXT_VNIC_CAP_VLAN_RX_STRIP; ++ PMD_DRV_LOG(DEBUG, "Rx VLAN strip capability enabled\n"); ++ } + bp->max_tpa_v2 = rte_le_to_cpu_16(resp->max_aggs_supported); + + HWRM_UNLOCK(); +@@ -909,9 +939,9 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp) + HWRM_PREP(&req, HWRM_FUNC_DRV_RGTR, BNXT_USE_CHIMP_MB); + req.enables = rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_VER | + HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_ASYNC_EVENT_FWD); +- req.ver_maj = RTE_VER_YEAR; +- req.ver_min = RTE_VER_MONTH; +- req.ver_upd = RTE_VER_MINOR; ++ req.ver_maj_8b = RTE_VER_YEAR; ++ req.ver_min_8b = RTE_VER_MONTH; ++ req.ver_upd_8b = RTE_VER_MINOR; + + if (BNXT_PF(bp)) { + req.enables |= rte_cpu_to_le_32( +@@ -1101,9 +1131,9 @@ int bnxt_hwrm_ver_get(struct bnxt *bp, uint32_t timeout) + resp->hwrm_intf_upd_8b, resp->hwrm_fw_maj_8b, + resp->hwrm_fw_min_8b, resp->hwrm_fw_bld_8b, + resp->hwrm_fw_rsvd_8b); +- bp->fw_ver = (resp->hwrm_fw_maj_8b << 24) | +- (resp->hwrm_fw_min_8b << 16) | +- (resp->hwrm_fw_bld_8b << 8) | ++ bp->fw_ver = ((uint32_t)resp->hwrm_fw_maj_8b << 24) | ++ ((uint32_t)resp->hwrm_fw_min_8b << 16) | ++ ((uint32_t)resp->hwrm_fw_bld_8b << 8) | + resp->hwrm_fw_rsvd_8b; + PMD_DRV_LOG(INFO, "Driver HWRM version: %d.%d.%d\n", + HWRM_VERSION_MAJOR, HWRM_VERSION_MINOR, HWRM_VERSION_UPDATE); +@@ -1129,6 +1159,7 @@ int bnxt_hwrm_ver_get(struct bnxt *bp, uint32_t timeout) + if (bp->max_req_len > resp->max_req_win_len) { + PMD_DRV_LOG(ERR, "Unsupported request length\n"); + rc = -EINVAL; ++ goto error; + } + bp->max_req_len = rte_le_to_cpu_16(resp->max_req_win_len); + bp->hwrm_max_ext_req_len = rte_le_to_cpu_16(resp->max_ext_req_len); +@@ -1138,28 +1169,8 @@ int bnxt_hwrm_ver_get(struct bnxt *bp, uint32_t timeout) + max_resp_len = rte_le_to_cpu_16(resp->max_resp_len); + dev_caps_cfg = rte_le_to_cpu_32(resp->dev_caps_cfg); + +- if (bp->max_resp_len != max_resp_len) { +- sprintf(type, "bnxt_hwrm_" PCI_PRI_FMT, +- bp->pdev->addr.domain, bp->pdev->addr.bus, +- bp->pdev->addr.devid, bp->pdev->addr.function); +- +- rte_free(bp->hwrm_cmd_resp_addr); +- +- bp->hwrm_cmd_resp_addr = rte_malloc(type, max_resp_len, 0); +- if (bp->hwrm_cmd_resp_addr == NULL) { +- rc = -ENOMEM; +- goto error; +- } +- bp->hwrm_cmd_resp_dma_addr = +- rte_malloc_virt2iova(bp->hwrm_cmd_resp_addr); +- if (bp->hwrm_cmd_resp_dma_addr == RTE_BAD_IOVA) { +- PMD_DRV_LOG(ERR, +- "Unable to map response buffer to physical memory.\n"); +- rc = -ENOMEM; +- goto error; +- } +- bp->max_resp_len = max_resp_len; +- } ++ RTE_VERIFY(max_resp_len <= bp->max_resp_len); ++ bp->max_resp_len = max_resp_len; + + if ((dev_caps_cfg & + HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_SHORT_CMD_SUPPORTED) && +@@ -1222,7 +1233,7 @@ int bnxt_hwrm_ver_get(struct bnxt *bp, uint32_t timeout) + return rc; + } + +-int bnxt_hwrm_func_driver_unregister(struct bnxt *bp, uint32_t flags) ++int bnxt_hwrm_func_driver_unregister(struct bnxt *bp) + { + int rc; + struct hwrm_func_drv_unrgtr_input req = {.req_type = 0 }; +@@ -1232,7 +1243,6 @@ int bnxt_hwrm_func_driver_unregister(struct bnxt *bp, uint32_t flags) + return 0; + + HWRM_PREP(&req, HWRM_FUNC_DRV_UNRGTR, BNXT_USE_CHIMP_MB); +- req.flags = flags; + + rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB); + +@@ -1280,20 +1290,21 @@ static int bnxt_hwrm_port_phy_cfg(struct bnxt *bp, struct bnxt_link_info *conf) + } + } + /* AutoNeg - Advertise speeds specified. */ +- if (conf->auto_link_speed_mask && ++ if ((conf->auto_link_speed_mask || conf->auto_pam4_link_speed_mask) && + !(conf->phy_flags & HWRM_PORT_PHY_CFG_INPUT_FLAGS_FORCE)) { + req.auto_mode = + HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_SPEED_MASK; +- req.auto_link_speed_mask = +- conf->auto_link_speed_mask; +- if (conf->auto_pam4_link_speeds) { ++ if (conf->auto_pam4_link_speed_mask) { + enables |= + HWRM_PORT_PHY_CFG_IN_EN_AUTO_PAM4_LINK_SPD_MASK; + req.auto_link_pam4_speed_mask = +- conf->auto_pam4_link_speeds; +- } else { ++ rte_cpu_to_le_16(conf->auto_pam4_link_speed_mask); ++ } ++ if (conf->auto_link_speed_mask) { + enables |= + HWRM_PORT_PHY_CFG_IN_EN_AUTO_LINK_SPEED_MASK; ++ req.auto_link_speed_mask = ++ rte_cpu_to_le_16(conf->auto_link_speed_mask); + } + } + if (conf->auto_link_speed && +@@ -1354,18 +1365,19 @@ static int bnxt_hwrm_port_phy_qcfg(struct bnxt *bp, + + link_info->support_speeds = rte_le_to_cpu_16(resp->support_speeds); + link_info->auto_link_speed = rte_le_to_cpu_16(resp->auto_link_speed); ++ link_info->auto_link_speed_mask = rte_le_to_cpu_16(resp->auto_link_speed_mask); + link_info->preemphasis = rte_le_to_cpu_32(resp->preemphasis); + link_info->force_link_speed = rte_le_to_cpu_16(resp->force_link_speed); + link_info->phy_ver[0] = resp->phy_maj; + link_info->phy_ver[1] = resp->phy_min; + link_info->phy_ver[2] = resp->phy_bld; + link_info->link_signal_mode = +- rte_le_to_cpu_16(resp->active_fec_signal_mode); ++ resp->active_fec_signal_mode & HWRM_PORT_PHY_QCFG_OUTPUT_SIGNAL_MODE_MASK; + link_info->force_pam4_link_speed = + rte_le_to_cpu_16(resp->force_pam4_link_speed); + link_info->support_pam4_speeds = + rte_le_to_cpu_16(resp->support_pam4_speeds); +- link_info->auto_pam4_link_speeds = ++ link_info->auto_pam4_link_speed_mask = + rte_le_to_cpu_16(resp->auto_pam4_link_speed_mask); + HWRM_UNLOCK(); + +@@ -1375,7 +1387,7 @@ static int bnxt_hwrm_port_phy_qcfg(struct bnxt *bp, + link_info->support_speeds, link_info->force_link_speed); + PMD_DRV_LOG(DEBUG, "Link Signal:%d,PAM::Auto:%x,Support:%x,Force:%x\n", + link_info->link_signal_mode, +- link_info->auto_pam4_link_speeds, ++ link_info->auto_pam4_link_speed_mask, + link_info->support_pam4_speeds, + link_info->force_pam4_link_speed); + return rc; +@@ -1395,7 +1407,7 @@ int bnxt_hwrm_port_phy_qcaps(struct bnxt *bp) + + rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB); + +- HWRM_CHECK_RESULT(); ++ HWRM_CHECK_RESULT_SILENT(); + + bp->port_cnt = resp->port_cnt; + if (resp->supported_speeds_auto_mode) +@@ -1407,6 +1419,12 @@ int bnxt_hwrm_port_phy_qcaps(struct bnxt *bp) + + HWRM_UNLOCK(); + ++ /* Older firmware does not have supported_auto_speeds, so assume ++ * that all supported speeds can be autonegotiated. ++ */ ++ if (link_info->auto_link_speed_mask && !link_info->support_auto_speeds) ++ link_info->support_auto_speeds = link_info->support_speeds; ++ + return 0; + } + +@@ -1660,12 +1678,16 @@ int bnxt_hwrm_ring_free(struct bnxt *bp, + struct hwrm_ring_free_input req = {.req_type = 0 }; + struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr; + ++ if (ring->fw_ring_id == INVALID_HW_RING_ID) ++ return -EINVAL; ++ + HWRM_PREP(&req, HWRM_RING_FREE, BNXT_USE_CHIMP_MB); + + req.ring_type = ring_type; + req.ring_id = rte_cpu_to_le_16(ring->fw_ring_id); + + rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB); ++ ring->fw_ring_id = INVALID_HW_RING_ID; + + if (rc || resp->error_code) { + if (rc == 0 && resp->error_code) +@@ -1708,6 +1730,10 @@ int bnxt_hwrm_ring_grp_alloc(struct bnxt *bp, unsigned int idx) + struct hwrm_ring_grp_alloc_input req = {.req_type = 0 }; + struct hwrm_ring_grp_alloc_output *resp = bp->hwrm_cmd_resp_addr; + ++ /* Don't attempt to re-create the ring group if it is already created */ ++ if (bp->grp_info[idx].fw_grp_id != INVALID_HW_RING_ID) ++ return 0; ++ + HWRM_PREP(&req, HWRM_RING_GRP_ALLOC, BNXT_USE_CHIMP_MB); + + req.cr = rte_cpu_to_le_16(bp->grp_info[idx].cp_fw_ring_id); +@@ -1732,6 +1758,9 @@ int bnxt_hwrm_ring_grp_free(struct bnxt *bp, unsigned int idx) + struct hwrm_ring_grp_free_input req = {.req_type = 0 }; + struct hwrm_ring_grp_free_output *resp = bp->hwrm_cmd_resp_addr; + ++ if (bp->grp_info[idx].fw_grp_id == INVALID_HW_RING_ID) ++ return 0; ++ + HWRM_PREP(&req, HWRM_RING_GRP_FREE, BNXT_USE_CHIMP_MB); + + req.ring_group_id = rte_cpu_to_le_16(bp->grp_info[idx].fw_grp_id); +@@ -1751,7 +1780,7 @@ int bnxt_hwrm_stat_clear(struct bnxt *bp, struct bnxt_cp_ring_info *cpr) + struct hwrm_stat_ctx_clr_stats_input req = {.req_type = 0 }; + struct hwrm_stat_ctx_clr_stats_output *resp = bp->hwrm_cmd_resp_addr; + +- if (cpr->hw_stats_ctx_id == (uint32_t)HWRM_NA_SIGNATURE) ++ if (cpr->hw_stats_ctx_id == HWRM_NA_SIGNATURE) + return rc; + + HWRM_PREP(&req, HWRM_STAT_CTX_CLR_STATS, BNXT_USE_CHIMP_MB); +@@ -1766,13 +1795,15 @@ int bnxt_hwrm_stat_clear(struct bnxt *bp, struct bnxt_cp_ring_info *cpr) + return rc; + } + +-int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, +- unsigned int idx __rte_unused) ++static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp, struct bnxt_cp_ring_info *cpr) + { + int rc; + struct hwrm_stat_ctx_alloc_input req = {.req_type = 0 }; + struct hwrm_stat_ctx_alloc_output *resp = bp->hwrm_cmd_resp_addr; + ++ if (cpr->hw_stats_ctx_id != HWRM_NA_SIGNATURE) ++ return 0; ++ + HWRM_PREP(&req, HWRM_STAT_CTX_ALLOC, BNXT_USE_CHIMP_MB); + + req.update_period_ms = rte_cpu_to_le_32(0); +@@ -1790,13 +1821,15 @@ int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, + return rc; + } + +-int bnxt_hwrm_stat_ctx_free(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, +- unsigned int idx __rte_unused) ++static int bnxt_hwrm_stat_ctx_free(struct bnxt *bp, struct bnxt_cp_ring_info *cpr) + { + int rc; + struct hwrm_stat_ctx_free_input req = {.req_type = 0 }; + struct hwrm_stat_ctx_free_output *resp = bp->hwrm_cmd_resp_addr; + ++ if (cpr->hw_stats_ctx_id == HWRM_NA_SIGNATURE) ++ return 0; ++ + HWRM_PREP(&req, HWRM_STAT_CTX_FREE, BNXT_USE_CHIMP_MB); + + req.stat_ctx_id = rte_cpu_to_le_32(cpr->hw_stats_ctx_id); +@@ -1806,6 +1839,8 @@ int bnxt_hwrm_stat_ctx_free(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, + HWRM_CHECK_RESULT(); + HWRM_UNLOCK(); + ++ cpr->hw_stats_ctx_id = HWRM_NA_SIGNATURE; ++ + return rc; + } + +@@ -1999,12 +2034,6 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic) + if (vnic->bd_stall) + req.flags |= + rte_cpu_to_le_32(HWRM_VNIC_CFG_INPUT_FLAGS_BD_STALL_MODE); +- if (vnic->roce_dual) +- req.flags |= rte_cpu_to_le_32( +- HWRM_VNIC_QCFG_OUTPUT_FLAGS_ROCE_DUAL_VNIC_MODE); +- if (vnic->roce_only) +- req.flags |= rte_cpu_to_le_32( +- HWRM_VNIC_QCFG_OUTPUT_FLAGS_ROCE_ONLY_VNIC_MODE); + if (vnic->rss_dflt_cr) + req.flags |= rte_cpu_to_le_32( + HWRM_VNIC_QCFG_OUTPUT_FLAGS_RSS_DFLT_CR_MODE); +@@ -2052,10 +2081,6 @@ int bnxt_hwrm_vnic_qcfg(struct bnxt *bp, struct bnxt_vnic_info *vnic, + HWRM_VNIC_QCFG_OUTPUT_FLAGS_VLAN_STRIP_MODE; + vnic->bd_stall = rte_le_to_cpu_32(resp->flags) & + HWRM_VNIC_QCFG_OUTPUT_FLAGS_BD_STALL_MODE; +- vnic->roce_dual = rte_le_to_cpu_32(resp->flags) & +- HWRM_VNIC_QCFG_OUTPUT_FLAGS_ROCE_DUAL_VNIC_MODE; +- vnic->roce_only = rte_le_to_cpu_32(resp->flags) & +- HWRM_VNIC_QCFG_OUTPUT_FLAGS_ROCE_ONLY_VNIC_MODE; + vnic->rss_dflt_cr = rte_le_to_cpu_32(resp->flags) & + HWRM_VNIC_QCFG_OUTPUT_FLAGS_RSS_DFLT_CR_MODE; + +@@ -2456,48 +2481,54 @@ bnxt_free_all_hwrm_stat_ctxs(struct bnxt *bp) + unsigned int i; + struct bnxt_cp_ring_info *cpr; + +- for (i = 0; i < bp->rx_cp_nr_rings + bp->tx_cp_nr_rings; i++) { ++ for (i = 0; i < bp->rx_cp_nr_rings; i++) { + +- if (i >= bp->rx_cp_nr_rings) { +- cpr = bp->tx_queues[i - bp->rx_cp_nr_rings]->cp_ring; +- } else { +- cpr = bp->rx_queues[i]->cp_ring; +- if (BNXT_HAS_RING_GRPS(bp)) +- bp->grp_info[i].fw_stats_ctx = -1; +- } +- if (cpr->hw_stats_ctx_id != HWRM_NA_SIGNATURE) { +- rc = bnxt_hwrm_stat_ctx_free(bp, cpr, i); +- cpr->hw_stats_ctx_id = HWRM_NA_SIGNATURE; +- if (rc) +- return rc; +- } ++ cpr = bp->rx_queues[i]->cp_ring; ++ if (BNXT_HAS_RING_GRPS(bp)) ++ bp->grp_info[i].fw_stats_ctx = -1; ++ rc = bnxt_hwrm_stat_ctx_free(bp, cpr); ++ if (rc) ++ return rc; ++ } ++ ++ for (i = 0; i < bp->tx_cp_nr_rings; i++) { ++ cpr = bp->tx_queues[i]->cp_ring; ++ rc = bnxt_hwrm_stat_ctx_free(bp, cpr); ++ if (rc) ++ return rc; + } ++ + return 0; + } + + int bnxt_alloc_all_hwrm_stat_ctxs(struct bnxt *bp) + { ++ struct bnxt_cp_ring_info *cpr; + unsigned int i; + int rc = 0; + +- for (i = 0; i < bp->rx_cp_nr_rings + bp->tx_cp_nr_rings; i++) { +- struct bnxt_tx_queue *txq; +- struct bnxt_rx_queue *rxq; +- struct bnxt_cp_ring_info *cpr; ++ for (i = 0; i < bp->rx_cp_nr_rings; i++) { ++ struct bnxt_rx_queue *rxq = bp->rx_queues[i]; + +- if (i >= bp->rx_cp_nr_rings) { +- txq = bp->tx_queues[i - bp->rx_cp_nr_rings]; +- cpr = txq->cp_ring; +- } else { +- rxq = bp->rx_queues[i]; +- cpr = rxq->cp_ring; ++ cpr = rxq->cp_ring; ++ if (cpr->hw_stats_ctx_id == HWRM_NA_SIGNATURE) { ++ rc = bnxt_hwrm_stat_ctx_alloc(bp, cpr); ++ if (rc) ++ return rc; + } ++ } + +- rc = bnxt_hwrm_stat_ctx_alloc(bp, cpr, i); ++ for (i = 0; i < bp->tx_cp_nr_rings; i++) { ++ struct bnxt_tx_queue *txq = bp->tx_queues[i]; + +- if (rc) +- return rc; ++ cpr = txq->cp_ring; ++ if (cpr->hw_stats_ctx_id == HWRM_NA_SIGNATURE) { ++ rc = bnxt_hwrm_stat_ctx_alloc(bp, cpr); ++ if (rc) ++ return rc; ++ } + } ++ + return rc; + } + +@@ -2530,10 +2561,9 @@ void bnxt_free_nq_ring(struct bnxt *bp, struct bnxt_cp_ring_info *cpr) + bnxt_hwrm_ring_free(bp, cp_ring, + HWRM_RING_FREE_INPUT_RING_TYPE_NQ); + cp_ring->fw_ring_id = INVALID_HW_RING_ID; +- memset(cpr->cp_desc_ring, 0, cpr->cp_ring_struct->ring_size * +- sizeof(*cpr->cp_desc_ring)); ++ memset(cpr->cp_desc_ring, 0, ++ cpr->cp_ring_struct->ring_size * sizeof(*cpr->cp_desc_ring)); + cpr->cp_raw_cons = 0; +- cpr->valid = 0; + } + + void bnxt_free_cp_ring(struct bnxt *bp, struct bnxt_cp_ring_info *cpr) +@@ -2541,12 +2571,11 @@ void bnxt_free_cp_ring(struct bnxt *bp, struct bnxt_cp_ring_info *cpr) + struct bnxt_ring *cp_ring = cpr->cp_ring_struct; + + bnxt_hwrm_ring_free(bp, cp_ring, +- HWRM_RING_FREE_INPUT_RING_TYPE_L2_CMPL); ++ HWRM_RING_FREE_INPUT_RING_TYPE_L2_CMPL); + cp_ring->fw_ring_id = INVALID_HW_RING_ID; +- memset(cpr->cp_desc_ring, 0, cpr->cp_ring_struct->ring_size * +- sizeof(*cpr->cp_desc_ring)); ++ memset(cpr->cp_desc_ring, 0, ++ cpr->cp_ring_struct->ring_size * sizeof(*cpr->cp_desc_ring)); + cpr->cp_raw_cons = 0; +- cpr->valid = 0; + } + + void bnxt_free_hwrm_rx_ring(struct bnxt *bp, int queue_index) +@@ -2556,31 +2585,49 @@ void bnxt_free_hwrm_rx_ring(struct bnxt *bp, int queue_index) + struct bnxt_ring *ring = rxr->rx_ring_struct; + struct bnxt_cp_ring_info *cpr = rxq->cp_ring; + +- if (ring->fw_ring_id != INVALID_HW_RING_ID) { +- bnxt_hwrm_ring_free(bp, ring, +- HWRM_RING_FREE_INPUT_RING_TYPE_RX); +- ring->fw_ring_id = INVALID_HW_RING_ID; +- if (BNXT_HAS_RING_GRPS(bp)) +- bp->grp_info[queue_index].rx_fw_ring_id = +- INVALID_HW_RING_ID; +- } ++ if (BNXT_HAS_RING_GRPS(bp)) ++ bnxt_hwrm_ring_grp_free(bp, queue_index); ++ ++ bnxt_hwrm_ring_free(bp, ring, ++ HWRM_RING_FREE_INPUT_RING_TYPE_RX); ++ if (BNXT_HAS_RING_GRPS(bp)) ++ bp->grp_info[queue_index].rx_fw_ring_id = ++ INVALID_HW_RING_ID; ++ + ring = rxr->ag_ring_struct; +- if (ring->fw_ring_id != INVALID_HW_RING_ID) { +- bnxt_hwrm_ring_free(bp, ring, +- BNXT_CHIP_THOR(bp) ? +- HWRM_RING_FREE_INPUT_RING_TYPE_RX_AGG : +- HWRM_RING_FREE_INPUT_RING_TYPE_RX); +- if (BNXT_HAS_RING_GRPS(bp)) +- bp->grp_info[queue_index].ag_fw_ring_id = +- INVALID_HW_RING_ID; +- } +- if (cpr->cp_ring_struct->fw_ring_id != INVALID_HW_RING_ID) +- bnxt_free_cp_ring(bp, cpr); ++ bnxt_hwrm_ring_free(bp, ring, ++ BNXT_CHIP_THOR(bp) ? ++ HWRM_RING_FREE_INPUT_RING_TYPE_RX_AGG : ++ HWRM_RING_FREE_INPUT_RING_TYPE_RX); ++ if (BNXT_HAS_RING_GRPS(bp)) ++ bp->grp_info[queue_index].ag_fw_ring_id = ++ INVALID_HW_RING_ID; ++ ++ bnxt_free_cp_ring(bp, cpr); + + if (BNXT_HAS_RING_GRPS(bp)) + bp->grp_info[queue_index].cp_fw_ring_id = INVALID_HW_RING_ID; + } + ++int bnxt_hwrm_rx_ring_reset(struct bnxt *bp, int queue_index) ++{ ++ int rc; ++ struct hwrm_ring_reset_input req = {.req_type = 0 }; ++ struct hwrm_ring_reset_output *resp = bp->hwrm_cmd_resp_addr; ++ ++ HWRM_PREP(&req, HWRM_RING_RESET, BNXT_USE_CHIMP_MB); ++ ++ req.ring_type = HWRM_RING_RESET_INPUT_RING_TYPE_RX_RING_GRP; ++ req.ring_id = rte_cpu_to_le_16(bp->grp_info[queue_index].fw_grp_id); ++ rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB); ++ ++ HWRM_CHECK_RESULT(); ++ ++ HWRM_UNLOCK(); ++ ++ return rc; ++} ++ + static int + bnxt_free_all_hwrm_rings(struct bnxt *bp) + { +@@ -2655,7 +2702,7 @@ int bnxt_alloc_hwrm_resources(struct bnxt *bp) + + sprintf(type, "bnxt_hwrm_" PCI_PRI_FMT, pdev->addr.domain, + pdev->addr.bus, pdev->addr.devid, pdev->addr.function); +- bp->max_resp_len = HWRM_MAX_RESP_LEN; ++ bp->max_resp_len = BNXT_PAGE_SIZE; + bp->hwrm_cmd_resp_addr = rte_malloc(type, bp->max_resp_len, 0); + if (bp->hwrm_cmd_resp_addr == NULL) + return -ENOMEM; +@@ -2814,7 +2861,7 @@ static uint16_t bnxt_check_eth_link_autoneg(uint32_t conf_link) + } + + static uint16_t bnxt_parse_eth_link_speed(uint32_t conf_link_speed, +- uint16_t pam4_link) ++ struct bnxt_link_info *link_info) + { + uint16_t eth_link_speed = 0; + +@@ -2853,18 +2900,29 @@ static uint16_t bnxt_parse_eth_link_speed(uint32_t conf_link_speed, + HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_40GB; + break; + case ETH_LINK_SPEED_50G: +- eth_link_speed = pam4_link ? +- HWRM_PORT_PHY_CFG_INPUT_FORCE_PAM4_LINK_SPEED_50GB : +- HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_50GB; ++ if (link_info->support_pam4_speeds & ++ HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_PAM4_SPEEDS_50G) { ++ eth_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_PAM4_LINK_SPEED_50GB; ++ link_info->link_signal_mode = BNXT_SIG_MODE_PAM4; ++ } else { ++ eth_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_50GB; ++ link_info->link_signal_mode = BNXT_SIG_MODE_NRZ; ++ } + break; + case ETH_LINK_SPEED_100G: +- eth_link_speed = pam4_link ? +- HWRM_PORT_PHY_CFG_INPUT_FORCE_PAM4_LINK_SPEED_100GB : +- HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_100GB; ++ if (link_info->support_pam4_speeds & ++ HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_PAM4_SPEEDS_100G) { ++ eth_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_PAM4_LINK_SPEED_100GB; ++ link_info->link_signal_mode = BNXT_SIG_MODE_PAM4; ++ } else { ++ eth_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_100GB; ++ link_info->link_signal_mode = BNXT_SIG_MODE_NRZ; ++ } + break; + case ETH_LINK_SPEED_200G: + eth_link_speed = + HWRM_PORT_PHY_CFG_INPUT_FORCE_PAM4_LINK_SPEED_200GB; ++ link_info->link_signal_mode = BNXT_SIG_MODE_PAM4; + break; + default: + PMD_DRV_LOG(ERR, +@@ -3082,26 +3140,22 @@ int bnxt_set_hwrm_link_config(struct bnxt *bp, bool link_up) + /* No auto speeds and no auto_pam4_link. Disable autoneg */ + if (bp->link_info->auto_link_speed == 0 && + bp->link_info->link_signal_mode && +- bp->link_info->auto_pam4_link_speeds == 0) ++ bp->link_info->auto_pam4_link_speed_mask == 0) + autoneg = 0; + + speed = bnxt_parse_eth_link_speed(dev_conf->link_speeds, +- bp->link_info->link_signal_mode); ++ bp->link_info); + link_req.phy_flags = HWRM_PORT_PHY_CFG_INPUT_FLAGS_RESET_PHY; +- /* Autoneg can be done only when the FW allows. +- * When user configures fixed speed of 40G and later changes to +- * any other speed, auto_link_speed/force_link_speed is still set +- * to 40G until link comes up at new speed. +- */ ++ /* Autoneg can be done only when the FW allows. */ + if (autoneg == 1 && +- !(!BNXT_CHIP_THOR(bp) && +- (bp->link_info->auto_link_speed || +- bp->link_info->force_link_speed))) { ++ (bp->link_info->support_auto_speeds || bp->link_info->support_pam4_auto_speeds)) { + link_req.phy_flags |= + HWRM_PORT_PHY_CFG_INPUT_FLAGS_RESTART_AUTONEG; + link_req.auto_link_speed_mask = + bnxt_parse_eth_link_speed_mask(bp, + dev_conf->link_speeds); ++ link_req.auto_pam4_link_speed_mask = ++ bp->link_info->auto_pam4_link_speed_mask; + } else { + if (bp->link_info->phy_type == + HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASET || +@@ -3120,9 +3174,9 @@ int bnxt_set_hwrm_link_config(struct bnxt *bp, bool link_up) + else if (bp->link_info->force_pam4_link_speed) + link_req.link_speed = + bp->link_info->force_pam4_link_speed; +- else if (bp->link_info->auto_pam4_link_speeds) ++ else if (bp->link_info->auto_pam4_link_speed_mask) + link_req.link_speed = +- bp->link_info->auto_pam4_link_speeds; ++ bp->link_info->auto_pam4_link_speed_mask; + else if (bp->link_info->support_pam4_speeds) + link_req.link_speed = + bp->link_info->support_pam4_speeds; +@@ -3134,7 +3188,7 @@ int bnxt_set_hwrm_link_config(struct bnxt *bp, bool link_up) + * zero. Use the auto_link_speed. + */ + if (bp->link_info->auto_link_speed != 0 && +- bp->link_info->auto_pam4_link_speeds == 0) ++ bp->link_info->auto_pam4_link_speed_mask == 0) + link_req.link_speed = bp->link_info->auto_link_speed; + } + link_req.duplex = bnxt_parse_eth_link_duplex(dev_conf->link_speeds); +@@ -3152,7 +3206,6 @@ int bnxt_set_hwrm_link_config(struct bnxt *bp, bool link_up) + return rc; + } + +-/* JIRA 22088 */ + int bnxt_hwrm_func_qcfg(struct bnxt *bp, uint16_t *mtu) + { + struct hwrm_func_qcfg_input req = {0}; +@@ -3169,8 +3222,7 @@ int bnxt_hwrm_func_qcfg(struct bnxt *bp, uint16_t *mtu) + + HWRM_CHECK_RESULT(); + +- /* Hard Coded.. 0xfff VLAN ID mask */ +- bp->vlan = rte_le_to_cpu_16(resp->vlan) & 0xfff; ++ bp->vlan = rte_le_to_cpu_16(resp->vlan) & ETH_VLAN_ID_MAX; + + svif_info = rte_le_to_cpu_16(resp->svif_info); + if (svif_info & HWRM_FUNC_QCFG_OUTPUT_SVIF_INFO_SVIF_VALID) +@@ -3233,23 +3285,13 @@ int bnxt_hwrm_parent_pf_qcfg(struct bnxt *bp) + + rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB); + +- HWRM_CHECK_RESULT(); ++ HWRM_CHECK_RESULT_SILENT(); + + memcpy(bp->parent->mac_addr, resp->mac_address, RTE_ETHER_ADDR_LEN); + bp->parent->vnic = rte_le_to_cpu_16(resp->dflt_vnic_id); + bp->parent->fid = rte_le_to_cpu_16(resp->fid); + bp->parent->port_id = rte_le_to_cpu_16(resp->port_id); + +- /* FIXME: Temporary workaround - remove when firmware issue is fixed. */ +- if (bp->parent->vnic == 0) { +- PMD_DRV_LOG(ERR, "Error: parent VNIC unavailable.\n"); +- /* Use hard-coded values appropriate for current Wh+ fw. */ +- if (bp->parent->fid == 2) +- bp->parent->vnic = 0x100; +- else +- bp->parent->vnic = 1; +- } +- + HWRM_UNLOCK(); + + return 0; +@@ -3335,7 +3377,7 @@ static int bnxt_hwrm_pf_func_cfg(struct bnxt *bp, + rte_cpu_to_le_16(pf_resc->num_hw_ring_grps); + } else if (BNXT_HAS_NQ(bp)) { + enables |= HWRM_FUNC_CFG_INPUT_ENABLES_NUM_MSIX; +- req.num_msix = rte_cpu_to_le_16(bp->max_nq_rings); ++ req.num_msix = rte_cpu_to_le_16(pf_resc->num_nq_rings); + } + + req.flags = rte_cpu_to_le_32(bp->pf->func_cfg_flags); +@@ -3347,7 +3389,7 @@ static int bnxt_hwrm_pf_func_cfg(struct bnxt *bp, + req.num_tx_rings = rte_cpu_to_le_16(pf_resc->num_tx_rings); + req.num_rx_rings = rte_cpu_to_le_16(pf_resc->num_rx_rings); + req.num_l2_ctxs = rte_cpu_to_le_16(pf_resc->num_l2_ctxs); +- req.num_vnics = rte_cpu_to_le_16(bp->max_vnics); ++ req.num_vnics = rte_cpu_to_le_16(pf_resc->num_vnics); + req.fid = rte_cpu_to_le_16(0xffff); + req.enables = rte_cpu_to_le_32(enables); + +@@ -3384,14 +3426,12 @@ bnxt_fill_vf_func_cfg_req_new(struct bnxt *bp, + req->min_rx_rings = req->max_rx_rings; + req->max_l2_ctxs = rte_cpu_to_le_16(bp->max_l2_ctx / (num_vfs + 1)); + req->min_l2_ctxs = req->max_l2_ctxs; +- /* TODO: For now, do not support VMDq/RFS on VFs. */ +- req->max_vnics = rte_cpu_to_le_16(1); ++ req->max_vnics = rte_cpu_to_le_16(bp->max_vnics / (num_vfs + 1)); + req->min_vnics = req->max_vnics; + req->max_hw_ring_grps = rte_cpu_to_le_16(bp->max_ring_grps / + (num_vfs + 1)); + req->min_hw_ring_grps = req->max_hw_ring_grps; +- req->flags = +- rte_cpu_to_le_16(HWRM_FUNC_VF_RESOURCE_CFG_INPUT_FLAGS_MIN_GUARANTEED); ++ req->max_msix = rte_cpu_to_le_16(bp->max_nq_rings / (num_vfs + 1)); + } + + static void +@@ -3451,6 +3491,8 @@ static int bnxt_update_max_resources(struct bnxt *bp, + bp->max_rx_rings -= rte_le_to_cpu_16(resp->alloc_rx_rings); + bp->max_l2_ctx -= rte_le_to_cpu_16(resp->alloc_l2_ctx); + bp->max_ring_grps -= rte_le_to_cpu_16(resp->alloc_hw_ring_grps); ++ bp->max_nq_rings -= rte_le_to_cpu_16(resp->alloc_msix); ++ bp->max_vnics -= rte_le_to_cpu_16(resp->alloc_vnics); + + HWRM_UNLOCK(); + +@@ -3524,6 +3566,8 @@ static int bnxt_query_pf_resources(struct bnxt *bp, + pf_resc->num_rx_rings = rte_le_to_cpu_16(resp->alloc_rx_rings); + pf_resc->num_l2_ctxs = rte_le_to_cpu_16(resp->alloc_l2_ctx); + pf_resc->num_hw_ring_grps = rte_le_to_cpu_32(resp->alloc_hw_ring_grps); ++ pf_resc->num_nq_rings = rte_le_to_cpu_32(resp->alloc_msix); ++ pf_resc->num_vnics = rte_le_to_cpu_16(resp->alloc_vnics); + bp->pf->evb_mode = resp->evb_mode; + + HWRM_UNLOCK(); +@@ -3544,6 +3588,8 @@ bnxt_calculate_pf_resources(struct bnxt *bp, + pf_resc->num_rx_rings = bp->max_rx_rings; + pf_resc->num_l2_ctxs = bp->max_l2_ctx; + pf_resc->num_hw_ring_grps = bp->max_ring_grps; ++ pf_resc->num_nq_rings = bp->max_nq_rings; ++ pf_resc->num_vnics = bp->max_vnics; + + return; + } +@@ -3562,6 +3608,10 @@ bnxt_calculate_pf_resources(struct bnxt *bp, + bp->max_l2_ctx % (num_vfs + 1); + pf_resc->num_hw_ring_grps = bp->max_ring_grps / (num_vfs + 1) + + bp->max_ring_grps % (num_vfs + 1); ++ pf_resc->num_nq_rings = bp->max_nq_rings / (num_vfs + 1) + ++ bp->max_nq_rings % (num_vfs + 1); ++ pf_resc->num_vnics = bp->max_vnics / (num_vfs + 1) + ++ bp->max_vnics % (num_vfs + 1); + } + + int bnxt_hwrm_allocate_pf_only(struct bnxt *bp) +@@ -3570,7 +3620,7 @@ int bnxt_hwrm_allocate_pf_only(struct bnxt *bp) + int rc; + + if (!BNXT_PF(bp)) { +- PMD_DRV_LOG(ERR, "Attempt to allcoate VFs on a VF!\n"); ++ PMD_DRV_LOG(ERR, "Attempt to allocate VFs on a VF!\n"); + return -EINVAL; + } + +@@ -3737,6 +3787,8 @@ bnxt_update_pf_resources(struct bnxt *bp, + bp->max_tx_rings = pf_resc->num_tx_rings; + bp->max_rx_rings = pf_resc->num_rx_rings; + bp->max_ring_grps = pf_resc->num_hw_ring_grps; ++ bp->max_nq_rings = pf_resc->num_nq_rings; ++ bp->max_vnics = pf_resc->num_vnics; + } + + static int32_t +@@ -4152,8 +4204,20 @@ int bnxt_hwrm_exec_fwd_resp(struct bnxt *bp, uint16_t target_id, + return rc; + } + +-int bnxt_hwrm_ctx_qstats(struct bnxt *bp, uint32_t cid, int idx, +- struct rte_eth_stats *stats, uint8_t rx) ++static void bnxt_update_prev_stat(uint64_t *cntr, uint64_t *prev_cntr) ++{ ++ /* One of the HW stat values that make up this counter was zero as ++ * returned by HW in this iteration, so use the previous ++ * iteration's counter value ++ */ ++ if (*prev_cntr && *cntr == 0) ++ *cntr = *prev_cntr; ++ else ++ *prev_cntr = *cntr; ++} ++ ++int bnxt_hwrm_ring_stats(struct bnxt *bp, uint32_t cid, int idx, ++ struct bnxt_ring_stats *ring_stats, bool rx) + { + int rc = 0; + struct hwrm_stat_ctx_query_input req = {.req_type = 0}; +@@ -4168,21 +4232,85 @@ int bnxt_hwrm_ctx_qstats(struct bnxt *bp, uint32_t cid, int idx, + HWRM_CHECK_RESULT(); + + if (rx) { +- stats->q_ipackets[idx] = rte_le_to_cpu_64(resp->rx_ucast_pkts); +- stats->q_ipackets[idx] += rte_le_to_cpu_64(resp->rx_mcast_pkts); +- stats->q_ipackets[idx] += rte_le_to_cpu_64(resp->rx_bcast_pkts); +- stats->q_ibytes[idx] = rte_le_to_cpu_64(resp->rx_ucast_bytes); +- stats->q_ibytes[idx] += rte_le_to_cpu_64(resp->rx_mcast_bytes); +- stats->q_ibytes[idx] += rte_le_to_cpu_64(resp->rx_bcast_bytes); +- stats->q_errors[idx] = rte_le_to_cpu_64(resp->rx_discard_pkts); +- stats->q_errors[idx] += rte_le_to_cpu_64(resp->rx_error_pkts); ++ struct bnxt_ring_stats *prev_stats = &bp->prev_rx_ring_stats[idx]; ++ ++ ring_stats->rx_ucast_pkts = rte_le_to_cpu_64(resp->rx_ucast_pkts); ++ bnxt_update_prev_stat(&ring_stats->rx_ucast_pkts, ++ &prev_stats->rx_ucast_pkts); ++ ++ ring_stats->rx_mcast_pkts = rte_le_to_cpu_64(resp->rx_mcast_pkts); ++ bnxt_update_prev_stat(&ring_stats->rx_mcast_pkts, ++ &prev_stats->rx_mcast_pkts); ++ ++ ring_stats->rx_bcast_pkts = rte_le_to_cpu_64(resp->rx_bcast_pkts); ++ bnxt_update_prev_stat(&ring_stats->rx_bcast_pkts, ++ &prev_stats->rx_bcast_pkts); ++ ++ ring_stats->rx_ucast_bytes = rte_le_to_cpu_64(resp->rx_ucast_bytes); ++ bnxt_update_prev_stat(&ring_stats->rx_ucast_bytes, ++ &prev_stats->rx_ucast_bytes); ++ ++ ring_stats->rx_mcast_bytes = rte_le_to_cpu_64(resp->rx_mcast_bytes); ++ bnxt_update_prev_stat(&ring_stats->rx_mcast_bytes, ++ &prev_stats->rx_mcast_bytes); ++ ++ ring_stats->rx_bcast_bytes = rte_le_to_cpu_64(resp->rx_bcast_bytes); ++ bnxt_update_prev_stat(&ring_stats->rx_bcast_bytes, ++ &prev_stats->rx_bcast_bytes); ++ ++ ring_stats->rx_discard_pkts = rte_le_to_cpu_64(resp->rx_discard_pkts); ++ bnxt_update_prev_stat(&ring_stats->rx_discard_pkts, ++ &prev_stats->rx_discard_pkts); ++ ++ ring_stats->rx_error_pkts = rte_le_to_cpu_64(resp->rx_error_pkts); ++ bnxt_update_prev_stat(&ring_stats->rx_error_pkts, ++ &prev_stats->rx_error_pkts); ++ ++ ring_stats->rx_agg_pkts = rte_le_to_cpu_64(resp->rx_agg_pkts); ++ bnxt_update_prev_stat(&ring_stats->rx_agg_pkts, ++ &prev_stats->rx_agg_pkts); ++ ++ ring_stats->rx_agg_bytes = rte_le_to_cpu_64(resp->rx_agg_bytes); ++ bnxt_update_prev_stat(&ring_stats->rx_agg_bytes, ++ &prev_stats->rx_agg_bytes); ++ ++ ring_stats->rx_agg_events = rte_le_to_cpu_64(resp->rx_agg_events); ++ bnxt_update_prev_stat(&ring_stats->rx_agg_events, ++ &prev_stats->rx_agg_events); ++ ++ ring_stats->rx_agg_aborts = rte_le_to_cpu_64(resp->rx_agg_aborts); ++ bnxt_update_prev_stat(&ring_stats->rx_agg_aborts, ++ &prev_stats->rx_agg_aborts); + } else { +- stats->q_opackets[idx] = rte_le_to_cpu_64(resp->tx_ucast_pkts); +- stats->q_opackets[idx] += rte_le_to_cpu_64(resp->tx_mcast_pkts); +- stats->q_opackets[idx] += rte_le_to_cpu_64(resp->tx_bcast_pkts); +- stats->q_obytes[idx] = rte_le_to_cpu_64(resp->tx_ucast_bytes); +- stats->q_obytes[idx] += rte_le_to_cpu_64(resp->tx_mcast_bytes); +- stats->q_obytes[idx] += rte_le_to_cpu_64(resp->tx_bcast_bytes); ++ struct bnxt_ring_stats *prev_stats = &bp->prev_tx_ring_stats[idx]; ++ ++ ring_stats->tx_ucast_pkts = rte_le_to_cpu_64(resp->tx_ucast_pkts); ++ bnxt_update_prev_stat(&ring_stats->tx_ucast_pkts, ++ &prev_stats->tx_ucast_pkts); ++ ++ ring_stats->tx_mcast_pkts = rte_le_to_cpu_64(resp->tx_mcast_pkts); ++ bnxt_update_prev_stat(&ring_stats->tx_mcast_pkts, ++ &prev_stats->tx_mcast_pkts); ++ ++ ring_stats->tx_bcast_pkts = rte_le_to_cpu_64(resp->tx_bcast_pkts); ++ bnxt_update_prev_stat(&ring_stats->tx_bcast_pkts, ++ &prev_stats->tx_bcast_pkts); ++ ++ ring_stats->tx_ucast_bytes = rte_le_to_cpu_64(resp->tx_ucast_bytes); ++ bnxt_update_prev_stat(&ring_stats->tx_ucast_bytes, ++ &prev_stats->tx_ucast_bytes); ++ ++ ring_stats->tx_mcast_bytes = rte_le_to_cpu_64(resp->tx_mcast_bytes); ++ bnxt_update_prev_stat(&ring_stats->tx_mcast_bytes, ++ &prev_stats->tx_mcast_bytes); ++ ++ ring_stats->tx_bcast_bytes = rte_le_to_cpu_64(resp->tx_bcast_bytes); ++ bnxt_update_prev_stat(&ring_stats->tx_bcast_bytes, ++ &prev_stats->tx_bcast_bytes); ++ ++ ring_stats->tx_discard_pkts = rte_le_to_cpu_64(resp->tx_discard_pkts); ++ bnxt_update_prev_stat(&ring_stats->tx_discard_pkts, ++ &prev_stats->tx_discard_pkts); + } + + HWRM_UNLOCK(); +@@ -4246,7 +4374,7 @@ int bnxt_hwrm_port_led_qcaps(struct bnxt *bp) + req.port_id = bp->pf->port_id; + rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB); + +- HWRM_CHECK_RESULT(); ++ HWRM_CHECK_RESULT_SILENT(); + + if (resp->num_leds > 0 && resp->num_leds < BNXT_MAX_LED) { + unsigned int i; +@@ -5821,3 +5949,50 @@ int bnxt_hwrm_cfa_pair_free(struct bnxt *bp, struct bnxt_representor *rep_bp) + rep_bp->vf_id); + return rc; + } ++ ++ ++int bnxt_hwrm_poll_ver_get(struct bnxt *bp) ++{ ++ struct hwrm_ver_get_input req = {.req_type = 0 }; ++ struct hwrm_ver_get_output *resp = bp->hwrm_cmd_resp_addr; ++ int rc = 0; ++ ++ bp->max_req_len = HWRM_MAX_REQ_LEN; ++ bp->max_resp_len = BNXT_PAGE_SIZE; ++ bp->hwrm_cmd_timeout = SHORT_HWRM_CMD_TIMEOUT; ++ ++ HWRM_PREP(&req, HWRM_VER_GET, BNXT_USE_CHIMP_MB); ++ req.hwrm_intf_maj = HWRM_VERSION_MAJOR; ++ req.hwrm_intf_min = HWRM_VERSION_MINOR; ++ req.hwrm_intf_upd = HWRM_VERSION_UPDATE; ++ ++ rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB); ++ ++ HWRM_CHECK_RESULT_SILENT(); ++ HWRM_UNLOCK(); ++ ++ return rc; ++} ++ ++int ++bnxt_vnic_rss_clear_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic) ++{ ++ struct hwrm_vnic_rss_cfg_output *resp = bp->hwrm_cmd_resp_addr; ++ struct hwrm_vnic_rss_cfg_input req = {0}; ++ int nr_ctxs = vnic->num_lb_ctxts; ++ int i, rc = 0; ++ ++ for (i = 0; i < nr_ctxs; i++) { ++ HWRM_PREP(&req, HWRM_VNIC_RSS_CFG, BNXT_USE_CHIMP_MB); ++ ++ req.rss_ctx_idx = rte_cpu_to_le_16(vnic->fw_grp_ids[i]); ++ req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id); ++ ++ rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB); ++ ++ HWRM_CHECK_RESULT(); ++ HWRM_UNLOCK(); ++ } ++ ++ return rc; ++} +diff --git a/dpdk/drivers/net/bnxt/bnxt_hwrm.h b/dpdk/drivers/net/bnxt/bnxt_hwrm.h +index 23ca6ab515..769e3d7e03 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_hwrm.h ++++ b/dpdk/drivers/net/bnxt/bnxt_hwrm.h +@@ -14,7 +14,6 @@ struct bnxt_filter_info; + struct bnxt_cp_ring_info; + struct hwrm_func_qstats_output; + +-#define HWRM_SEQ_ID_INVALID -1U + /* Convert Bit field location to value */ + #define ASYNC_CMPL_EVENT_ID_LINK_STATUS_CHANGE \ + (1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE) +@@ -114,11 +113,36 @@ struct bnxt_pf_resource_info { + uint16_t num_rx_rings; + uint16_t num_cp_rings; + uint16_t num_l2_ctxs; ++ uint16_t num_nq_rings; ++ uint16_t num_vnics; + uint32_t num_hw_ring_grps; + }; + + #define BNXT_CTX_VAL_INVAL 0xFFFF + ++#define BNXT_TUNNELED_OFFLOADS_CAP_VXLAN_EN(bp) \ ++ (!((bp)->tunnel_disable_flag & HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_VXLAN)) ++#define BNXT_TUNNELED_OFFLOADS_CAP_NGE_EN(bp) \ ++ (!((bp)->tunnel_disable_flag & HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_NGE)) ++#define BNXT_TUNNELED_OFFLOADS_CAP_GRE_EN(bp) \ ++ (!((bp)->tunnel_disable_flag & HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_GRE)) ++#define BNXT_TUNNELED_OFFLOADS_CAP_IPINIP_EN(bp) \ ++ (!((bp)->tunnel_disable_flag & HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_IPINIP)) ++ ++/* ++ * If the device supports VXLAN, GRE, IPIP and GENEVE tunnel parsing, then report ++ * RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM, RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM and ++ * RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM in the Rx/Tx offload capabilities of the device. ++ */ ++#define BNXT_TUNNELED_OFFLOADS_CAP_ALL_EN(bp) \ ++ (BNXT_TUNNELED_OFFLOADS_CAP_VXLAN_EN(bp) && \ ++ BNXT_TUNNELED_OFFLOADS_CAP_NGE_EN(bp) && \ ++ BNXT_TUNNELED_OFFLOADS_CAP_GRE_EN(bp) && \ ++ BNXT_TUNNELED_OFFLOADS_CAP_IPINIP_EN(bp)) ++ ++#define BNXT_SIG_MODE_NRZ HWRM_PORT_PHY_QCFG_OUTPUT_SIGNAL_MODE_NRZ ++#define BNXT_SIG_MODE_PAM4 HWRM_PORT_PHY_QCFG_OUTPUT_SIGNAL_MODE_PAM4 ++ + int bnxt_hwrm_cfa_l2_clear_rx_mask(struct bnxt *bp, + struct bnxt_vnic_info *vnic); + int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, struct bnxt_vnic_info *vnic, +@@ -142,7 +166,7 @@ int bnxt_hwrm_func_buf_unrgtr(struct bnxt *bp); + int bnxt_hwrm_func_driver_register(struct bnxt *bp); + int bnxt_hwrm_func_qcaps(struct bnxt *bp); + int bnxt_hwrm_func_reset(struct bnxt *bp); +-int bnxt_hwrm_func_driver_unregister(struct bnxt *bp, uint32_t flags); ++int bnxt_hwrm_func_driver_unregister(struct bnxt *bp); + int bnxt_hwrm_func_qstats(struct bnxt *bp, uint16_t fid, + struct rte_eth_stats *stats, + struct hwrm_func_qstats_output *func_qstats); +@@ -166,13 +190,6 @@ int bnxt_hwrm_ring_grp_alloc(struct bnxt *bp, unsigned int idx); + int bnxt_hwrm_ring_grp_free(struct bnxt *bp, unsigned int idx); + + int bnxt_hwrm_stat_clear(struct bnxt *bp, struct bnxt_cp_ring_info *cpr); +-int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp, +- struct bnxt_cp_ring_info *cpr, unsigned int idx); +-int bnxt_hwrm_stat_ctx_free(struct bnxt *bp, +- struct bnxt_cp_ring_info *cpr, unsigned int idx); +-int bnxt_hwrm_ctx_qstats(struct bnxt *bp, uint32_t cid, int idx, +- struct rte_eth_stats *stats, uint8_t rx); +- + int bnxt_hwrm_ver_get(struct bnxt *bp, uint32_t timeout); + + int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic); +@@ -294,12 +311,16 @@ int bnxt_hwrm_get_dflt_vnic_svif(struct bnxt *bp, uint16_t fid, + uint16_t *vnic_id, uint16_t *svif); + int bnxt_hwrm_parent_pf_qcfg(struct bnxt *bp); + int bnxt_hwrm_port_phy_qcaps(struct bnxt *bp); +-int bnxt_hwrm_oem_cmd(struct bnxt *bp, uint32_t entry_num); + int bnxt_clear_one_vnic_filter(struct bnxt *bp, + struct bnxt_filter_info *filter); +-void bnxt_hwrm_free_vf_info(struct bnxt *bp); ++void bnxt_free_vf_info(struct bnxt *bp); + int bnxt_hwrm_first_vf_id_query(struct bnxt *bp, uint16_t fid, + uint16_t *first_vf_id); + int bnxt_hwrm_cfa_pair_alloc(struct bnxt *bp, struct bnxt_representor *rep); + int bnxt_hwrm_cfa_pair_free(struct bnxt *bp, struct bnxt_representor *rep); ++int bnxt_hwrm_poll_ver_get(struct bnxt *bp); ++int bnxt_hwrm_ring_stats(struct bnxt *bp, uint32_t cid, int idx, ++ struct bnxt_ring_stats *stats, bool rx); ++int bnxt_hwrm_rx_ring_reset(struct bnxt *bp, int queue_index); ++int bnxt_vnic_rss_clear_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); + #endif +diff --git a/dpdk/drivers/net/bnxt/bnxt_irq.c b/dpdk/drivers/net/bnxt/bnxt_irq.c +index 40e1b0c980..1a99508572 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_irq.c ++++ b/dpdk/drivers/net/bnxt/bnxt_irq.c +@@ -21,11 +21,14 @@ void bnxt_int_handler(void *param) + { + struct rte_eth_dev *eth_dev = (struct rte_eth_dev *)param; + struct bnxt *bp = eth_dev->data->dev_private; +- struct bnxt_cp_ring_info *cpr = bp->async_cp_ring; ++ uint32_t cons, raw_cons, cp_ring_size; ++ struct bnxt_cp_ring_info *cpr; + struct cmpl_base *cmp; +- uint32_t raw_cons; +- uint32_t cons; + ++ ++ if (bp == NULL) ++ return; ++ cpr = bp->async_cp_ring; + if (cpr == NULL) + return; + +@@ -42,10 +45,11 @@ void bnxt_int_handler(void *param) + return; + } + ++ cp_ring_size = cpr->cp_ring_struct->ring_size; + cons = RING_CMP(cpr->cp_ring_struct, raw_cons); + cmp = &cpr->cp_desc_ring[cons]; + +- if (!CMP_VALID(cmp, raw_cons, cpr->cp_ring_struct)) ++ if (!bnxt_cpr_cmp_valid(cmp, raw_cons, cp_ring_size)) + break; + + bnxt_event_hwrm_resp_handler(bp, cmp); +diff --git a/dpdk/drivers/net/bnxt/bnxt_reps.c b/dpdk/drivers/net/bnxt/bnxt_reps.c +index 167c46ad41..95d99072cd 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_reps.c ++++ b/dpdk/drivers/net/bnxt/bnxt_reps.c +@@ -35,16 +35,20 @@ static const struct eth_dev_ops bnxt_rep_dev_ops = { + uint16_t + bnxt_vfr_recv(uint16_t port_id, uint16_t queue_id, struct rte_mbuf *mbuf) + { +- struct rte_mbuf **prod_rx_buf; ++ struct bnxt_representor *vfr_bp = NULL; + struct bnxt_rx_ring_info *rep_rxr; +- struct bnxt_rx_queue *rep_rxq; + struct rte_eth_dev *vfr_eth_dev; +- struct bnxt_representor *vfr_bp; ++ struct rte_mbuf **prod_rx_buf; ++ struct bnxt_rx_queue *rep_rxq; + uint16_t mask; + uint8_t que; + + vfr_eth_dev = &rte_eth_devices[port_id]; +- vfr_bp = vfr_eth_dev->data->dev_private; ++ vfr_bp = vfr_eth_dev ? vfr_eth_dev->data->dev_private : NULL; ++ ++ if (unlikely(vfr_bp == NULL)) ++ return 1; ++ + /* If rxq_id happens to be > nr_rings, use ring 0 */ + que = queue_id < vfr_bp->rx_nr_rings ? queue_id : 0; + rep_rxq = vfr_bp->rx_queues[que]; +@@ -104,7 +108,7 @@ bnxt_rep_rx_burst(void *rx_queue, + static uint16_t + bnxt_rep_tx_burst(void *tx_queue, + struct rte_mbuf **tx_pkts, +- __rte_unused uint16_t nb_pkts) ++ uint16_t nb_pkts) + { + struct bnxt_vf_rep_tx_queue *vfr_txq = tx_queue; + struct bnxt_tx_queue *ptxq; +@@ -519,7 +523,10 @@ int bnxt_rep_dev_info_get_op(struct rte_eth_dev *eth_dev, + dev_info->max_mac_addrs = parent_bp->max_l2_ctx; + dev_info->max_hash_mac_addrs = 0; + +- max_rx_rings = BNXT_MAX_VF_REP_RINGS; ++ max_rx_rings = parent_bp->rx_nr_rings ? ++ RTE_MIN(parent_bp->rx_nr_rings, BNXT_MAX_VF_REP_RINGS) : ++ BNXT_MAX_VF_REP_RINGS; ++ + /* For the sake of symmetry, max_rx_queues = max_tx_queues */ + dev_info->max_rx_queues = max_rx_rings; + dev_info->max_tx_queues = max_rx_rings; +@@ -534,10 +541,8 @@ int bnxt_rep_dev_info_get_op(struct rte_eth_dev *eth_dev, + dev_info->min_rx_bufsize = 1; + dev_info->max_rx_pktlen = BNXT_MAX_PKT_LEN; + +- dev_info->rx_offload_capa = BNXT_DEV_RX_OFFLOAD_SUPPORT; +- if (parent_bp->flags & BNXT_FLAG_PTP_SUPPORTED) +- dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TIMESTAMP; +- dev_info->tx_offload_capa = BNXT_DEV_TX_OFFLOAD_SUPPORT; ++ dev_info->rx_offload_capa = bnxt_get_rx_port_offloads(parent_bp); ++ dev_info->tx_offload_capa = bnxt_get_tx_port_offloads(parent_bp); + dev_info->flow_type_rss_offloads = BNXT_ETH_RSS_SUPPORT; + + dev_info->switch_info.name = eth_dev->device->name; +@@ -548,7 +553,7 @@ int bnxt_rep_dev_info_get_op(struct rte_eth_dev *eth_dev, + return 0; + } + +-int bnxt_rep_dev_configure_op(__rte_unused struct rte_eth_dev *eth_dev) ++int bnxt_rep_dev_configure_op(struct rte_eth_dev *eth_dev) + { + struct bnxt_representor *rep_bp = eth_dev->data->dev_private; + +@@ -599,10 +604,10 @@ int bnxt_rep_rx_queue_setup_op(struct rte_eth_dev *eth_dev, + struct rte_mbuf **buf_ring; + int rc = 0; + +- if (queue_idx >= BNXT_MAX_VF_REP_RINGS) { ++ if (queue_idx >= rep_bp->rx_nr_rings) { + PMD_DRV_LOG(ERR, + "Cannot create Rx ring %d. %d rings available\n", +- queue_idx, BNXT_MAX_VF_REP_RINGS); ++ queue_idx, rep_bp->rx_nr_rings); + return -EINVAL; + } + +@@ -698,10 +703,10 @@ int bnxt_rep_tx_queue_setup_op(struct rte_eth_dev *eth_dev, + struct bnxt_tx_queue *parent_txq, *txq; + struct bnxt_vf_rep_tx_queue *vfr_txq; + +- if (queue_idx >= BNXT_MAX_VF_REP_RINGS) { ++ if (queue_idx >= rep_bp->rx_nr_rings) { + PMD_DRV_LOG(ERR, + "Cannot create Tx rings %d. %d rings available\n", +- queue_idx, BNXT_MAX_VF_REP_RINGS); ++ queue_idx, rep_bp->rx_nr_rings); + return -EINVAL; + } + +@@ -773,10 +778,10 @@ int bnxt_rep_stats_get_op(struct rte_eth_dev *eth_dev, + struct rte_eth_stats *stats) + { + struct bnxt_representor *rep_bp = eth_dev->data->dev_private; +- int i; ++ unsigned int i; + + memset(stats, 0, sizeof(*stats)); +- for (i = 0; i < BNXT_MAX_VF_REP_RINGS; i++) { ++ for (i = 0; i < rep_bp->rx_nr_rings; i++) { + stats->obytes += rep_bp->tx_bytes[i]; + stats->opackets += rep_bp->tx_pkts[i]; + stats->ibytes += rep_bp->rx_bytes[i]; +@@ -796,9 +801,9 @@ int bnxt_rep_stats_get_op(struct rte_eth_dev *eth_dev, + int bnxt_rep_stats_reset_op(struct rte_eth_dev *eth_dev) + { + struct bnxt_representor *rep_bp = eth_dev->data->dev_private; +- int i; ++ unsigned int i; + +- for (i = 0; i < BNXT_MAX_VF_REP_RINGS; i++) { ++ for (i = 0; i < rep_bp->rx_nr_rings; i++) { + rep_bp->tx_pkts[i] = 0; + rep_bp->tx_bytes[i] = 0; + rep_bp->rx_pkts[i] = 0; +diff --git a/dpdk/drivers/net/bnxt/bnxt_ring.c b/dpdk/drivers/net/bnxt/bnxt_ring.c +index 94cf7d3de2..53ce63f07b 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_ring.c ++++ b/dpdk/drivers/net/bnxt/bnxt_ring.c +@@ -94,7 +94,7 @@ int bnxt_alloc_ring_grps(struct bnxt *bp) + * tx bd ring - Only non-zero length if tx_ring_info is not NULL + * rx bd ring - Only non-zero length if rx_ring_info is not NULL + */ +-int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx, ++int bnxt_alloc_rings(struct bnxt *bp, unsigned int socket_id, uint16_t qidx, + struct bnxt_tx_queue *txq, + struct bnxt_rx_queue *rxq, + struct bnxt_cp_ring_info *cp_ring_info, +@@ -203,7 +203,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx, + mz = rte_memzone_lookup(mz_name); + if (!mz) { + mz = rte_memzone_reserve_aligned(mz_name, total_alloc_len, +- SOCKET_ID_ANY, ++ socket_id, + RTE_MEMZONE_2MB | + RTE_MEMZONE_SIZE_HINT_ONLY | + RTE_MEMZONE_IOVA_CONTIG, +@@ -422,24 +422,23 @@ int bnxt_alloc_rxtx_nq_ring(struct bnxt *bp) + struct bnxt_cp_ring_info *nqr; + struct bnxt_ring *ring; + int ring_index = BNXT_NUM_ASYNC_CPR(bp); +- unsigned int socket_id; + uint8_t ring_type; + int rc = 0; + + if (!BNXT_HAS_NQ(bp) || bp->rxtx_nq_ring) + return 0; + +- socket_id = rte_lcore_to_socket_id(rte_get_main_lcore()); +- + nqr = rte_zmalloc_socket("nqr", + sizeof(struct bnxt_cp_ring_info), +- RTE_CACHE_LINE_SIZE, socket_id); ++ RTE_CACHE_LINE_SIZE, ++ bp->eth_dev->device->numa_node); + if (nqr == NULL) + return -ENOMEM; + + ring = rte_zmalloc_socket("bnxt_cp_ring_struct", + sizeof(struct bnxt_ring), +- RTE_CACHE_LINE_SIZE, socket_id); ++ RTE_CACHE_LINE_SIZE, ++ bp->eth_dev->device->numa_node); + if (ring == NULL) { + rte_free(nqr); + return -ENOMEM; +@@ -454,7 +453,8 @@ int bnxt_alloc_rxtx_nq_ring(struct bnxt *bp) + ring->fw_ring_id = INVALID_HW_RING_ID; + + nqr->cp_ring_struct = ring; +- rc = bnxt_alloc_rings(bp, 0, NULL, NULL, nqr, NULL, "l2_nqr"); ++ rc = bnxt_alloc_rings(bp, bp->eth_dev->device->numa_node, 0, NULL, ++ NULL, nqr, NULL, "l2_nqr"); + if (rc) { + rte_free(ring); + rte_free(nqr); +@@ -608,6 +608,12 @@ int bnxt_alloc_hwrm_rx_ring(struct bnxt *bp, int queue_index) + if (rc) + goto err_out; + ++ if (BNXT_HAS_RING_GRPS(bp)) { ++ rc = bnxt_hwrm_ring_grp_alloc(bp, queue_index); ++ if (rc) ++ goto err_out; ++ } ++ + if (rxq->rx_started) { + if (bnxt_init_one_rx_ring(rxq)) { + PMD_DRV_LOG(ERR, +@@ -837,22 +843,21 @@ int bnxt_alloc_async_ring_struct(struct bnxt *bp) + { + struct bnxt_cp_ring_info *cpr = NULL; + struct bnxt_ring *ring = NULL; +- unsigned int socket_id; + + if (BNXT_NUM_ASYNC_CPR(bp) == 0) + return 0; + +- socket_id = rte_lcore_to_socket_id(rte_get_main_lcore()); +- + cpr = rte_zmalloc_socket("cpr", + sizeof(struct bnxt_cp_ring_info), +- RTE_CACHE_LINE_SIZE, socket_id); ++ RTE_CACHE_LINE_SIZE, ++ bp->eth_dev->device->numa_node); + if (cpr == NULL) + return -ENOMEM; + + ring = rte_zmalloc_socket("bnxt_cp_ring_struct", + sizeof(struct bnxt_ring), +- RTE_CACHE_LINE_SIZE, socket_id); ++ RTE_CACHE_LINE_SIZE, ++ bp->eth_dev->device->numa_node); + if (ring == NULL) { + rte_free(cpr); + return -ENOMEM; +@@ -864,11 +869,11 @@ int bnxt_alloc_async_ring_struct(struct bnxt *bp) + ring->ring_mask = ring->ring_size - 1; + ring->vmem_size = 0; + ring->vmem = NULL; ++ ring->fw_ring_id = INVALID_HW_RING_ID; + + bp->async_cp_ring = cpr; + cpr->cp_ring_struct = ring; + +- return bnxt_alloc_rings(bp, 0, NULL, NULL, +- bp->async_cp_ring, NULL, +- "def_cp"); ++ return bnxt_alloc_rings(bp, bp->eth_dev->device->numa_node, 0, NULL, ++ NULL, bp->async_cp_ring, NULL, "def_cp"); + } +diff --git a/dpdk/drivers/net/bnxt/bnxt_ring.h b/dpdk/drivers/net/bnxt/bnxt_ring.h +index 0a4685d167..201b3919ed 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_ring.h ++++ b/dpdk/drivers/net/bnxt/bnxt_ring.h +@@ -66,7 +66,7 @@ struct bnxt_rx_ring_info; + struct bnxt_cp_ring_info; + void bnxt_free_ring(struct bnxt_ring *ring); + int bnxt_alloc_ring_grps(struct bnxt *bp); +-int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx, ++int bnxt_alloc_rings(struct bnxt *bp, unsigned int socket_id, uint16_t qidx, + struct bnxt_tx_queue *txq, + struct bnxt_rx_queue *rxq, + struct bnxt_cp_ring_info *cp_ring_info, +diff --git a/dpdk/drivers/net/bnxt/bnxt_rxq.c b/dpdk/drivers/net/bnxt/bnxt_rxq.c +index 8637559370..f597f376e6 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_rxq.c ++++ b/dpdk/drivers/net/bnxt/bnxt_rxq.c +@@ -20,6 +20,32 @@ + * RX Queues + */ + ++uint64_t bnxt_get_rx_port_offloads(struct bnxt *bp) ++{ ++ uint64_t rx_offload_capa; ++ ++ rx_offload_capa = DEV_RX_OFFLOAD_IPV4_CKSUM | ++ DEV_RX_OFFLOAD_UDP_CKSUM | ++ DEV_RX_OFFLOAD_TCP_CKSUM | ++ DEV_RX_OFFLOAD_KEEP_CRC | ++ DEV_RX_OFFLOAD_VLAN_FILTER | ++ DEV_RX_OFFLOAD_VLAN_EXTEND | ++ DEV_RX_OFFLOAD_TCP_LRO | ++ DEV_RX_OFFLOAD_SCATTER | ++ DEV_RX_OFFLOAD_RSS_HASH; ++ ++ if (bp->flags & BNXT_FLAG_PTP_SUPPORTED) ++ rx_offload_capa |= DEV_RX_OFFLOAD_TIMESTAMP; ++ if (bp->vnic_cap_flags & BNXT_VNIC_CAP_VLAN_RX_STRIP) ++ rx_offload_capa |= DEV_RX_OFFLOAD_VLAN_STRIP; ++ ++ if (BNXT_TUNNELED_OFFLOADS_CAP_ALL_EN(bp)) ++ rx_offload_capa |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | ++ DEV_RX_OFFLOAD_OUTER_UDP_CKSUM; ++ ++ return rx_offload_capa; ++} ++ + void bnxt_free_rxq_stats(struct bnxt_rx_queue *rxq) + { + if (rxq && rxq->cp_ring && rxq->cp_ring->hw_stats) +@@ -29,6 +55,7 @@ void bnxt_free_rxq_stats(struct bnxt_rx_queue *rxq) + int bnxt_mq_rx_configure(struct bnxt *bp) + { + struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf; ++ struct rte_eth_rss_conf *rss = &bp->rss_conf; + const struct rte_eth_vmdq_rx_conf *conf = + &dev_conf->rx_adv_conf.vmdq_rx_conf; + unsigned int i, j, nb_q_per_grp = 1, ring_idx = 0; +@@ -40,35 +67,6 @@ int bnxt_mq_rx_configure(struct bnxt *bp) + + bp->nr_vnics = 0; + +- /* Single queue mode */ +- if (bp->rx_cp_nr_rings < 2) { +- vnic = &bp->vnic_info[0]; +- if (!vnic) { +- PMD_DRV_LOG(ERR, "VNIC alloc failed\n"); +- rc = -ENOMEM; +- goto err_out; +- } +- vnic->flags |= BNXT_VNIC_INFO_BCAST; +- bp->nr_vnics++; +- +- rxq = bp->eth_dev->data->rx_queues[0]; +- rxq->vnic = vnic; +- +- vnic->func_default = true; +- vnic->start_grp_id = 0; +- vnic->end_grp_id = vnic->start_grp_id; +- filter = bnxt_alloc_filter(bp); +- if (!filter) { +- PMD_DRV_LOG(ERR, "L2 filter alloc failed\n"); +- rc = -ENOMEM; +- goto err_out; +- } +- filter->mac_index = 0; +- filter->flags |= HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_OUTERMOST; +- STAILQ_INSERT_TAIL(&vnic->filter, filter, next); +- goto out; +- } +- + /* Multi-queue mode */ + if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_VMDQ_DCB_RSS) { + /* VMDq ONLY, VMDq+RSS, VMDq+DCB, VMDq+DCB+RSS */ +@@ -106,7 +104,6 @@ int bnxt_mq_rx_configure(struct bnxt *bp) + + pools = RTE_MIN(pools, bp->rx_cp_nr_rings); + nb_q_per_grp = bp->rx_cp_nr_rings / pools; +- bp->rx_num_qs_per_vnic = nb_q_per_grp; + PMD_DRV_LOG(DEBUG, "pools = %u nb_q_per_grp = %u\n", + pools, nb_q_per_grp); + start_grp_id = 0; +@@ -164,33 +161,21 @@ int bnxt_mq_rx_configure(struct bnxt *bp) + end_grp_id += nb_q_per_grp; + } + +-out: +- if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) { +- struct rte_eth_rss_conf *rss = &dev_conf->rx_adv_conf.rss_conf; +- +- if (bp->flags & BNXT_FLAG_UPDATE_HASH) +- bp->flags &= ~BNXT_FLAG_UPDATE_HASH; +- +- for (i = 0; i < bp->nr_vnics; i++) { +- uint32_t lvl = ETH_RSS_LEVEL(rss->rss_hf); +- +- vnic = &bp->vnic_info[i]; +- vnic->hash_type = +- bnxt_rte_to_hwrm_hash_types(rss->rss_hf); +- vnic->hash_mode = +- bnxt_rte_to_hwrm_hash_level(bp, +- rss->rss_hf, +- lvl); +- +- /* +- * Use the supplied key if the key length is +- * acceptable and the rss_key is not NULL +- */ +- if (rss->rss_key && +- rss->rss_key_len <= HW_HASH_KEY_SIZE) +- memcpy(vnic->rss_hash_key, +- rss->rss_key, rss->rss_key_len); +- } ++ bp->rx_num_qs_per_vnic = nb_q_per_grp; ++ ++ for (i = 0; i < bp->nr_vnics; i++) { ++ uint32_t lvl = ETH_RSS_LEVEL(rss->rss_hf); ++ ++ vnic = &bp->vnic_info[i]; ++ vnic->hash_type = bnxt_rte_to_hwrm_hash_types(rss->rss_hf); ++ vnic->hash_mode = bnxt_rte_to_hwrm_hash_level(bp, rss->rss_hf, lvl); ++ ++ /* ++ * Use the supplied key if the key length is ++ * acceptable and the rss_key is not NULL ++ */ ++ if (rss->rss_key && rss->rss_key_len <= HW_HASH_KEY_SIZE) ++ memcpy(vnic->rss_hash_key, rss->rss_key, rss->rss_key_len); + } + + return rc; +@@ -364,8 +349,8 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev, + + eth_dev->data->rx_queues[queue_idx] = rxq; + /* Allocate RX ring hardware descriptors */ +- rc = bnxt_alloc_rings(bp, queue_idx, NULL, rxq, rxq->cp_ring, NULL, +- "rxr"); ++ rc = bnxt_alloc_rings(bp, socket_id, queue_idx, NULL, rxq, rxq->cp_ring, ++ NULL, "rxr"); + if (rc) { + PMD_DRV_LOG(ERR, + "ring_dma_zone_reserve for rx_ring failed!\n"); +@@ -473,10 +458,11 @@ int bnxt_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id) + if (rc) + return rc; + +- if (BNXT_CHIP_THOR(bp)) { +- /* Reconfigure default receive ring and MRU. */ +- bnxt_hwrm_vnic_cfg(bp, rxq->vnic); +- } ++ if (BNXT_HAS_RING_GRPS(bp)) ++ rxq->vnic->dflt_ring_grp = bp->grp_info[rx_queue_id].fw_grp_id; ++ /* Reconfigure default receive ring and MRU. */ ++ bnxt_hwrm_vnic_cfg(bp, rxq->vnic); ++ + PMD_DRV_LOG(INFO, "Rx queue started %d\n", rx_queue_id); + + if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) { +@@ -575,6 +561,9 @@ int bnxt_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id) + if (active_queue_cnt == 0) { + uint16_t saved_mru = vnic->mru; + ++ /* clear RSS setting on vnic. */ ++ bnxt_vnic_rss_clear_p5(bp, vnic); ++ + vnic->mru = 0; + /* Reconfigure default receive ring and MRU. */ + bnxt_hwrm_vnic_cfg(bp, vnic); +diff --git a/dpdk/drivers/net/bnxt/bnxt_rxq.h b/dpdk/drivers/net/bnxt/bnxt_rxq.h +index c72105cf06..e4d185676f 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_rxq.h ++++ b/dpdk/drivers/net/bnxt/bnxt_rxq.h +@@ -30,6 +30,7 @@ struct bnxt_rx_queue { + uint8_t rx_deferred_start; /* not in global dev start */ + uint8_t rx_started; /* RX queue is started */ + uint8_t drop_en; /* Drop when rx desc not available. */ ++ uint8_t in_reset; /* Rx ring is scheduled for reset */ + + struct bnxt *bp; + int index; +@@ -62,4 +63,5 @@ int bnxt_rx_queue_start(struct rte_eth_dev *dev, + int bnxt_rx_queue_stop(struct rte_eth_dev *dev, + uint16_t rx_queue_id); + void bnxt_rx_queue_release_mbufs(struct bnxt_rx_queue *rxq); ++uint64_t bnxt_get_rx_port_offloads(struct bnxt *bp); + #endif +diff --git a/dpdk/drivers/net/bnxt/bnxt_rxr.c b/dpdk/drivers/net/bnxt/bnxt_rxr.c +index b28b7fb561..f923b35ce2 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_rxr.c ++++ b/dpdk/drivers/net/bnxt/bnxt_rxr.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + #include "bnxt.h" + #include "bnxt_reps.h" +@@ -17,9 +18,7 @@ + #include "bnxt_rxr.h" + #include "bnxt_rxq.h" + #include "hsi_struct_def_dpdk.h" +-#ifdef RTE_LIBRTE_IEEE1588 + #include "bnxt_hwrm.h" +-#endif + + #include + #include +@@ -127,6 +126,50 @@ struct rte_mbuf *bnxt_consume_rx_buf(struct bnxt_rx_ring_info *rxr, + return mbuf; + } + ++static void bnxt_rx_ring_reset(void *arg) ++{ ++ struct bnxt *bp = arg; ++ int i, rc = 0; ++ struct bnxt_rx_queue *rxq; ++ ++ ++ for (i = 0; i < (int)bp->rx_nr_rings; i++) { ++ struct bnxt_rx_ring_info *rxr; ++ ++ rxq = bp->rx_queues[i]; ++ if (!rxq || !rxq->in_reset) ++ continue; ++ ++ rxr = rxq->rx_ring; ++ /* Disable and flush TPA before resetting the RX ring */ ++ if (rxr->tpa_info) ++ bnxt_hwrm_vnic_tpa_cfg(bp, rxq->vnic, false); ++ rc = bnxt_hwrm_rx_ring_reset(bp, i); ++ if (rc) { ++ PMD_DRV_LOG(ERR, "Rx ring%d reset failed\n", i); ++ continue; ++ } ++ ++ bnxt_rx_queue_release_mbufs(rxq); ++ rxr->rx_prod = 0; ++ rxr->ag_prod = 0; ++ rxr->rx_next_cons = 0; ++ bnxt_init_one_rx_ring(rxq); ++ bnxt_db_write(&rxr->rx_db, rxr->rx_prod); ++ bnxt_db_write(&rxr->ag_db, rxr->ag_prod); ++ if (rxr->tpa_info) ++ bnxt_hwrm_vnic_tpa_cfg(bp, rxq->vnic, true); ++ ++ rxq->in_reset = 0; ++ } ++} ++ ++static void bnxt_sched_ring_reset(struct bnxt_rx_queue *rxq) ++{ ++ rxq->in_reset = 1; ++ rte_eal_alarm_set(1, bnxt_rx_ring_reset, (void *)rxq->bp); ++} ++ + static void bnxt_tpa_start(struct bnxt_rx_queue *rxq, + struct rx_tpa_start_cmpl *tpa_start, + struct rx_tpa_start_cmpl_hi *tpa_start1) +@@ -141,6 +184,12 @@ static void bnxt_tpa_start(struct bnxt_rx_queue *rxq, + + data_cons = tpa_start->opaque; + tpa_info = &rxr->tpa_info[agg_id]; ++ if (unlikely(data_cons != rxr->rx_next_cons)) { ++ PMD_DRV_LOG(ERR, "TPA cons %x, expected cons %x\n", ++ data_cons, rxr->rx_next_cons); ++ bnxt_sched_ring_reset(rxq); ++ return; ++ } + + mbuf = bnxt_consume_rx_buf(rxr, data_cons); + +@@ -165,8 +214,9 @@ static void bnxt_tpa_start(struct bnxt_rx_queue *rxq, + mbuf->hash.fdir.id = rte_le_to_cpu_16(tpa_start1->cfa_code); + mbuf->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID; + } +- if (tpa_start1->flags2 & +- rte_cpu_to_le_32(RX_TPA_START_CMPL_FLAGS2_META_FORMAT_VLAN)) { ++ if ((tpa_start1->flags2 & ++ rte_cpu_to_le_32(RX_TPA_START_CMPL_FLAGS2_META_FORMAT_VLAN)) && ++ BNXT_RX_VLAN_STRIP_EN(rxq->bp)) { + mbuf->vlan_tci = rte_le_to_cpu_32(tpa_start1->metadata); + mbuf->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; + } +@@ -177,6 +227,8 @@ static void bnxt_tpa_start(struct bnxt_rx_queue *rxq, + /* recycle next mbuf */ + data_cons = RING_NEXT(rxr->rx_ring_struct, data_cons); + bnxt_reuse_rx_mbuf(rxr, bnxt_consume_rx_buf(rxr, data_cons)); ++ ++ rxr->rx_next_cons = RING_NEXT(rxr->rx_ring_struct, data_cons); + } + + static int bnxt_agg_bufs_valid(struct bnxt_cp_ring_info *cpr, +@@ -191,7 +243,8 @@ static int bnxt_agg_bufs_valid(struct bnxt_cp_ring_info *cpr, + cpr->valid = FLIP_VALID(raw_cp_cons, + cpr->cp_ring_struct->ring_mask, + cpr->valid); +- return CMP_VALID(agg_cmpl, raw_cp_cons, cpr->cp_ring_struct); ++ return bnxt_cpr_cmp_valid(agg_cmpl, raw_cp_cons, ++ cpr->cp_ring_struct->ring_size); + } + + /* TPA consume agg buffer out of order, allocate connected data only */ +@@ -272,6 +325,34 @@ static int bnxt_rx_pages(struct bnxt_rx_queue *rxq, + return 0; + } + ++static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, ++ uint32_t *raw_cons, void *cmp) ++{ ++ struct rx_pkt_cmpl *rxcmp = cmp; ++ uint32_t tmp_raw_cons = *raw_cons; ++ uint8_t cmp_type, agg_bufs = 0; ++ ++ cmp_type = CMP_TYPE(rxcmp); ++ ++ if (cmp_type == CMPL_BASE_TYPE_RX_L2) { ++ agg_bufs = BNXT_RX_L2_AGG_BUFS(rxcmp); ++ } else if (cmp_type == RX_TPA_END_CMPL_TYPE_RX_TPA_END) { ++ struct rx_tpa_end_cmpl *tpa_end = cmp; ++ ++ if (BNXT_CHIP_THOR(bp)) ++ return 0; ++ ++ agg_bufs = BNXT_TPA_END_AGG_BUFS(tpa_end); ++ } ++ ++ if (agg_bufs) { ++ if (!bnxt_agg_bufs_valid(cpr, agg_bufs, tmp_raw_cons)) ++ return -EBUSY; ++ } ++ *raw_cons = tmp_raw_cons; ++ return 0; ++} ++ + static inline struct rte_mbuf *bnxt_tpa_end( + struct bnxt_rx_queue *rxq, + uint32_t *raw_cp_cons, +@@ -286,6 +367,13 @@ static inline struct rte_mbuf *bnxt_tpa_end( + uint8_t payload_offset; + struct bnxt_tpa_info *tpa_info; + ++ if (unlikely(rxq->in_reset)) { ++ PMD_DRV_LOG(ERR, "rxq->in_reset: raw_cp_cons:%d\n", ++ *raw_cp_cons); ++ bnxt_discard_rx(rxq->bp, cpr, raw_cp_cons, tpa_end); ++ return NULL; ++ } ++ + if (BNXT_CHIP_THOR(rxq->bp)) { + struct rx_tpa_v2_end_cmpl *th_tpa_end; + struct rx_tpa_v2_end_cmpl_hi *th_tpa_end1; +@@ -428,8 +516,10 @@ bnxt_init_ol_flags_tables(struct bnxt_rx_queue *rxq) + for (i = 0; i < BNXT_OL_FLAGS_TBL_DIM; i++) { + pt[i] = 0; + +- if (i & RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) +- pt[i] |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; ++ if (BNXT_RX_VLAN_STRIP_EN(rxq->bp)) { ++ if (i & RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) ++ pt[i] |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; ++ } + + if (i & (RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC << 3)) { + /* Tunnel case. */ +@@ -539,9 +629,11 @@ bnxt_set_ol_flags(struct bnxt_rx_ring_info *rxr, struct rx_pkt_cmpl *rxcmp, + static void + bnxt_get_rx_ts_thor(struct bnxt *bp, uint32_t rx_ts_cmpl) + { +- uint64_t systime_cycles = 0; ++ struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; ++ uint64_t last_hwrm_time; ++ uint64_t pkt_time = 0; + +- if (!BNXT_CHIP_THOR(bp)) ++ if (!BNXT_CHIP_THOR(bp) || !ptp) + return; + + /* On Thor, Rx timestamps are provided directly in the +@@ -552,10 +644,13 @@ bnxt_get_rx_ts_thor(struct bnxt *bp, uint32_t rx_ts_cmpl) + * from the HWRM response with the lower 32 bits in the + * Rx completion to produce the 48 bit timestamp for the Rx packet + */ +- bnxt_hwrm_port_ts_query(bp, BNXT_PTP_FLAGS_CURRENT_TIME, +- &systime_cycles); +- bp->ptp_cfg->rx_timestamp = (systime_cycles & 0xFFFF00000000); +- bp->ptp_cfg->rx_timestamp |= rx_ts_cmpl; ++ last_hwrm_time = ptp->current_time; ++ pkt_time = (last_hwrm_time & BNXT_PTP_CURRENT_TIME_MASK) | rx_ts_cmpl; ++ if (rx_ts_cmpl < (uint32_t)last_hwrm_time) { ++ /* timer has rolled over */ ++ pkt_time += (1ULL << 32); ++ } ++ ptp->rx_timestamp = pkt_time; + } + #endif + +@@ -664,9 +759,9 @@ void bnxt_set_mark_in_mbuf(struct bnxt *bp, + struct rte_mbuf *mbuf) + { + uint32_t cfa_code = 0; +- uint8_t meta_fmt = 0; +- uint16_t flags2 = 0; +- uint32_t meta = 0; ++ ++ if (unlikely(bp->mark_table == NULL)) ++ return; + + cfa_code = rte_le_to_cpu_16(rxcmp1->cfa_code); + if (!cfa_code) +@@ -675,25 +770,6 @@ void bnxt_set_mark_in_mbuf(struct bnxt *bp, + if (cfa_code && !bp->mark_table[cfa_code].valid) + return; + +- flags2 = rte_le_to_cpu_16(rxcmp1->flags2); +- meta = rte_le_to_cpu_32(rxcmp1->metadata); +- if (meta) { +- meta >>= BNXT_RX_META_CFA_CODE_SHIFT; +- +- /* The flags field holds extra bits of info from [6:4] +- * which indicate if the flow is in TCAM or EM or EEM +- */ +- meta_fmt = (flags2 & BNXT_CFA_META_FMT_MASK) >> +- BNXT_CFA_META_FMT_SHFT; +- +- /* meta_fmt == 4 => 'b100 => 'b10x => EM. +- * meta_fmt == 5 => 'b101 => 'b10x => EM + VLAN +- * meta_fmt == 6 => 'b110 => 'b11x => EEM +- * meta_fmt == 7 => 'b111 => 'b11x => EEM + VLAN. +- */ +- meta_fmt >>= BNXT_CFA_META_FMT_EM_EEM_SHFT; +- } +- + mbuf->hash.fdir.hi = bp->mark_table[cfa_code].mark_id; + mbuf->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID; + } +@@ -736,7 +812,8 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt, + cp_cons = RING_CMP(cpr->cp_ring_struct, tmp_raw_cons); + rxcmp1 = (struct rx_pkt_cmpl_hi *)&cpr->cp_desc_ring[cp_cons]; + +- if (!CMP_VALID(rxcmp1, tmp_raw_cons, cpr->cp_ring_struct)) ++ if (!bnxt_cpr_cmp_valid(rxcmp1, tmp_raw_cons, ++ cpr->cp_ring_struct->ring_size)) + return -EBUSY; + + cpr->valid = FLIP_VALID(cp_cons, +@@ -761,14 +838,21 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt, + goto next_rx; + } + +- agg_buf = (rxcmp->agg_bufs_v1 & RX_PKT_CMPL_AGG_BUFS_MASK) +- >> RX_PKT_CMPL_AGG_BUFS_SFT; ++ agg_buf = BNXT_RX_L2_AGG_BUFS(rxcmp); + if (agg_buf && !bnxt_agg_bufs_valid(cpr, agg_buf, tmp_raw_cons)) + return -EBUSY; + + prod = rxr->rx_prod; + + cons = rxcmp->opaque; ++ if (unlikely(cons != rxr->rx_next_cons)) { ++ bnxt_discard_rx(bp, cpr, &tmp_raw_cons, rxcmp); ++ PMD_DRV_LOG(ERR, "RX cons %x != expected cons %x\n", ++ cons, rxr->rx_next_cons); ++ bnxt_sched_ring_reset(rxq); ++ rc = -EBUSY; ++ goto next_rx; ++ } + mbuf = bnxt_consume_rx_buf(rxr, cons); + if (mbuf == NULL) + return -EBUSY; +@@ -791,6 +875,8 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt, + } + #endif + ++ bnxt_set_vlan(rxcmp1, mbuf); ++ + if (BNXT_TRUFLOW_EN(bp)) + mark_id = bnxt_ulp_set_mark_in_mbuf(rxq->bp, rxcmp1, mbuf, + &vfr_flag); +@@ -833,6 +919,8 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt, + goto rx; + } + rxr->rx_prod = prod; ++rx: ++ rxr->rx_next_cons = RING_NEXT(rxr->rx_ring_struct, cons); + + if (BNXT_TRUFLOW_EN(bp) && (BNXT_VF_IS_TRUSTED(bp) || BNXT_PF(bp)) && + vfr_flag) { +@@ -850,7 +938,6 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt, + * All MBUFs are allocated with the same size under DPDK, + * no optimization for rx_copy_thresh + */ +-rx: + *rx_pkt = mbuf; + + next_rx: +@@ -907,7 +994,8 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + cons = RING_CMP(cpr->cp_ring_struct, raw_cons); + rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons]; + +- if (!CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct)) ++ if (!bnxt_cpr_cmp_valid(rxcmp, raw_cons, ++ cpr->cp_ring_struct->ring_size)) + break; + cpr->valid = FLIP_VALID(cons, + cpr->cp_ring_struct->ring_mask, +@@ -938,9 +1026,6 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + raw_cons = NEXT_RAW_CMP(raw_cons); + if (nb_rx_pkts == nb_pkts || nb_rep_rx_pkts == nb_pkts || evt) + break; +- /* Post some Rx buf early in case of larger burst processing */ +- if (nb_rx_pkts == BNXT_RX_POST_THRESH) +- bnxt_db_write(&rxr->rx_db, rxr->rx_prod); + } + + cpr->cp_raw_cons = raw_cons; +@@ -965,7 +1050,7 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + + /* Attempt to alloc Rx buf in case of a previous allocation failure. */ + if (alloc_failed) { +- uint16_t cnt; ++ int cnt; + + for (cnt = 0; cnt < nb_rx_pkts + nb_rep_rx_pkts; cnt++) { + struct rte_mbuf **rx_buf; +@@ -1033,6 +1118,9 @@ void bnxt_free_rx_rings(struct bnxt *bp) + rte_free(rxq->cp_ring->cp_ring_struct); + rte_free(rxq->cp_ring); + ++ rte_memzone_free(rxq->mz); ++ rxq->mz = NULL; ++ + rte_free(rxq); + bp->rx_queues[i] = NULL; + } +@@ -1209,5 +1297,8 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq) + } + PMD_DRV_LOG(DEBUG, "TPA alloc Done!\n"); + ++ /* Explicitly reset this driver internal tracker on a ring init */ ++ rxr->rx_next_cons = 0; ++ + return 0; + } +diff --git a/dpdk/drivers/net/bnxt/bnxt_rxr.h b/dpdk/drivers/net/bnxt/bnxt_rxr.h +index 46c34e6e16..0fd5920f0a 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_rxr.h ++++ b/dpdk/drivers/net/bnxt/bnxt_rxr.h +@@ -37,7 +37,9 @@ static inline uint16_t bnxt_tpa_start_agg_id(struct bnxt *bp, + #define BNXT_TPA_END_AGG_ID_TH(cmp) \ + rte_le_to_cpu_16((cmp)->agg_id) + +-#define BNXT_RX_POST_THRESH 32 ++#define BNXT_RX_L2_AGG_BUFS(cmp) \ ++ (((cmp)->agg_bufs_v1 & RX_PKT_CMPL_AGG_BUFS_MASK) >> \ ++ RX_PKT_CMPL_AGG_BUFS_SFT) + + /* Number of descriptors to process per inner loop in vector mode. */ + #define RTE_BNXT_DESCS_PER_LOOP 4U +@@ -56,6 +58,7 @@ struct bnxt_rx_ring_info { + uint16_t rx_prod; + uint16_t ag_prod; + uint16_t rx_cons; /* Needed for representor */ ++ uint16_t rx_next_cons; + struct bnxt_db_info rx_db; + struct bnxt_db_info ag_db; + +@@ -123,3 +126,13 @@ bnxt_cfa_code_dynfield(struct rte_mbuf *mbuf) + #define BNXT_PTYPE_TBL_DIM 128 + extern uint32_t bnxt_ptype_table[BNXT_PTYPE_TBL_DIM]; + #endif ++static inline void bnxt_set_vlan(struct rx_pkt_cmpl_hi *rxcmp1, ++ struct rte_mbuf *mbuf) ++{ ++ uint32_t metadata = rte_le_to_cpu_32(rxcmp1->metadata); ++ ++ mbuf->vlan_tci = metadata & (RX_PKT_CMPL_METADATA_VID_MASK | ++ RX_PKT_CMPL_METADATA_DE | ++ RX_PKT_CMPL_METADATA_PRI_MASK); ++} ++ +diff --git a/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_neon.c +index 54f47a3fe1..858e91bb9d 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_neon.c ++++ b/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_neon.c +@@ -151,9 +151,8 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4], + vst1q_u32((uint32_t *)&mbuf[3]->rx_descriptor_fields1, tmp); + } + +-uint16_t +-bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, +- uint16_t nb_pkts) ++static uint16_t ++recv_burst_vec_neon(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + { + struct bnxt_rx_queue *rxq = rx_queue; + struct bnxt_cp_ring_info *cpr = rxq->cp_ring; +@@ -178,9 +177,6 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + if (rxq->rxrearm_nb >= rxq->rx_free_thresh) + bnxt_rxq_rearm(rxq, rxr); + +- /* Return no more than RTE_BNXT_MAX_RX_BURST per call. */ +- nb_pkts = RTE_MIN(nb_pkts, RTE_BNXT_MAX_RX_BURST); +- + cons = raw_cons & (cp_ring_size - 1); + mbcons = (raw_cons / 2) & (rx_ring_size - 1); + +@@ -229,25 +225,38 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + } + + /* +- * Load the four current descriptors into SSE registers in +- * reverse order to ensure consistent state. ++ * Load the four current descriptors into NEON registers. ++ * IO barriers are used to ensure consistent state. + */ + rxcmp1[3] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 7]); + rte_io_rmb(); ++ /* Reload lower 64b of descriptors to make it ordered after info3_v. */ ++ rxcmp1[3] = vreinterpretq_u32_u64(vld1q_lane_u64 ++ ((void *)&cpr->cp_desc_ring[cons + 7], ++ vreinterpretq_u64_u32(rxcmp1[3]), 0)); + rxcmp[3] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 6]); + + rxcmp1[2] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 5]); + rte_io_rmb(); ++ rxcmp1[2] = vreinterpretq_u32_u64(vld1q_lane_u64 ++ ((void *)&cpr->cp_desc_ring[cons + 5], ++ vreinterpretq_u64_u32(rxcmp1[2]), 0)); + rxcmp[2] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 4]); + + t1 = vreinterpretq_u64_u32(vzip2q_u32(rxcmp1[2], rxcmp1[3])); + + rxcmp1[1] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 3]); + rte_io_rmb(); ++ rxcmp1[1] = vreinterpretq_u32_u64(vld1q_lane_u64 ++ ((void *)&cpr->cp_desc_ring[cons + 3], ++ vreinterpretq_u64_u32(rxcmp1[1]), 0)); + rxcmp[1] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 2]); + + rxcmp1[0] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 1]); + rte_io_rmb(); ++ rxcmp1[0] = vreinterpretq_u32_u64(vld1q_lane_u64 ++ ((void *)&cpr->cp_desc_ring[cons + 1], ++ vreinterpretq_u64_u32(rxcmp1[0]), 0)); + rxcmp[0] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 0]); + + t0 = vreinterpretq_u64_u32(vzip2q_u32(rxcmp1[0], rxcmp1[1])); +@@ -314,6 +323,27 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + return nb_rx_pkts; + } + ++uint16_t ++bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) ++{ ++ uint16_t cnt = 0; ++ ++ while (nb_pkts > RTE_BNXT_MAX_RX_BURST) { ++ uint16_t burst; ++ ++ burst = recv_burst_vec_neon(rx_queue, rx_pkts + cnt, ++ RTE_BNXT_MAX_RX_BURST); ++ ++ cnt += burst; ++ nb_pkts -= burst; ++ ++ if (burst < RTE_BNXT_MAX_RX_BURST) ++ return cnt; ++ } ++ ++ return cnt + recv_burst_vec_neon(rx_queue, rx_pkts + cnt, nb_pkts); ++} ++ + static void + bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq) + { +@@ -330,7 +360,7 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq) + cons = RING_CMPL(ring_mask, raw_cons); + txcmp = (struct tx_cmpl *)&cp_desc_ring[cons]; + +- if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct)) ++ if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1)) + break; + + if (likely(CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2)) +diff --git a/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_sse.c +index 621f567890..5974b475bf 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_sse.c ++++ b/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_sse.c +@@ -143,9 +143,8 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], + _mm_store_si128((void *)&mbuf[3]->rx_descriptor_fields1, t0); + } + +-uint16_t +-bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, +- uint16_t nb_pkts) ++static uint16_t ++recv_burst_vec_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + { + struct bnxt_rx_queue *rxq = rx_queue; + const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer); +@@ -170,9 +169,6 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + if (rxq->rxrearm_nb >= rxq->rx_free_thresh) + bnxt_rxq_rearm(rxq, rxr); + +- /* Return no more than RTE_BNXT_MAX_RX_BURST per call. */ +- nb_pkts = RTE_MIN(nb_pkts, RTE_BNXT_MAX_RX_BURST); +- + cons = raw_cons & (cp_ring_size - 1); + mbcons = (raw_cons / 2) & (rx_ring_size - 1); + +@@ -296,6 +292,27 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + return nb_rx_pkts; + } + ++uint16_t ++bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) ++{ ++ uint16_t cnt = 0; ++ ++ while (nb_pkts > RTE_BNXT_MAX_RX_BURST) { ++ uint16_t burst; ++ ++ burst = recv_burst_vec_sse(rx_queue, rx_pkts + cnt, ++ RTE_BNXT_MAX_RX_BURST); ++ ++ cnt += burst; ++ nb_pkts -= burst; ++ ++ if (burst < RTE_BNXT_MAX_RX_BURST) ++ return cnt; ++ } ++ ++ return cnt + recv_burst_vec_sse(rx_queue, rx_pkts + cnt, nb_pkts); ++} ++ + static void + bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq) + { +@@ -312,7 +329,7 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq) + cons = RING_CMPL(ring_mask, raw_cons); + txcmp = (struct tx_cmpl *)&cp_desc_ring[cons]; + +- if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct)) ++ if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1)) + break; + + if (likely(CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2)) +diff --git a/dpdk/drivers/net/bnxt/bnxt_stats.c b/dpdk/drivers/net/bnxt/bnxt_stats.c +index 3c9715f5fa..41997fcaa5 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_stats.c ++++ b/dpdk/drivers/net/bnxt/bnxt_stats.c +@@ -506,8 +506,47 @@ void bnxt_free_stats(struct bnxt *bp) + } + } + ++static void bnxt_fill_rte_eth_stats(struct rte_eth_stats *stats, ++ struct bnxt_ring_stats *ring_stats, ++ unsigned int i, bool rx) ++{ ++ if (rx) { ++ stats->q_ipackets[i] = ring_stats->rx_ucast_pkts; ++ stats->q_ipackets[i] += ring_stats->rx_mcast_pkts; ++ stats->q_ipackets[i] += ring_stats->rx_bcast_pkts; ++ ++ stats->ipackets += stats->q_ipackets[i]; ++ ++ stats->q_ibytes[i] = ring_stats->rx_ucast_bytes; ++ stats->q_ibytes[i] += ring_stats->rx_mcast_bytes; ++ stats->q_ibytes[i] += ring_stats->rx_bcast_bytes; ++ ++ stats->ibytes += stats->q_ibytes[i]; ++ ++ stats->q_errors[i] = ring_stats->rx_discard_pkts; ++ stats->q_errors[i] += ring_stats->rx_error_pkts; ++ ++ stats->imissed += ring_stats->rx_discard_pkts; ++ stats->ierrors += ring_stats->rx_error_pkts; ++ } else { ++ stats->q_opackets[i] = ring_stats->tx_ucast_pkts; ++ stats->q_opackets[i] += ring_stats->tx_mcast_pkts; ++ stats->q_opackets[i] += ring_stats->tx_bcast_pkts; ++ ++ stats->opackets += stats->q_opackets[i]; ++ ++ stats->q_obytes[i] = ring_stats->tx_ucast_bytes; ++ stats->q_obytes[i] += ring_stats->tx_mcast_bytes; ++ stats->q_obytes[i] += ring_stats->tx_bcast_bytes; ++ ++ stats->obytes += stats->q_obytes[i]; ++ ++ stats->oerrors += ring_stats->tx_discard_pkts; ++ } ++} ++ + int bnxt_stats_get_op(struct rte_eth_dev *eth_dev, +- struct rte_eth_stats *bnxt_stats) ++ struct rte_eth_stats *bnxt_stats) + { + int rc = 0; + unsigned int i; +@@ -527,11 +566,14 @@ int bnxt_stats_get_op(struct rte_eth_dev *eth_dev, + for (i = 0; i < num_q_stats; i++) { + struct bnxt_rx_queue *rxq = bp->rx_queues[i]; + struct bnxt_cp_ring_info *cpr = rxq->cp_ring; ++ struct bnxt_ring_stats ring_stats = {0}; + +- rc = bnxt_hwrm_ctx_qstats(bp, cpr->hw_stats_ctx_id, i, +- bnxt_stats, 1); ++ rc = bnxt_hwrm_ring_stats(bp, cpr->hw_stats_ctx_id, i, ++ &ring_stats, true); + if (unlikely(rc)) + return rc; ++ ++ bnxt_fill_rte_eth_stats(bnxt_stats, &ring_stats, i, true); + bnxt_stats->rx_nombuf += + rte_atomic64_read(&rxq->rx_mbuf_alloc_fail); + } +@@ -542,17 +584,29 @@ int bnxt_stats_get_op(struct rte_eth_dev *eth_dev, + for (i = 0; i < num_q_stats; i++) { + struct bnxt_tx_queue *txq = bp->tx_queues[i]; + struct bnxt_cp_ring_info *cpr = txq->cp_ring; ++ struct bnxt_ring_stats ring_stats = {0}; + +- rc = bnxt_hwrm_ctx_qstats(bp, cpr->hw_stats_ctx_id, i, +- bnxt_stats, 0); ++ rc = bnxt_hwrm_ring_stats(bp, cpr->hw_stats_ctx_id, i, ++ &ring_stats, false); + if (unlikely(rc)) + return rc; ++ ++ bnxt_fill_rte_eth_stats(bnxt_stats, &ring_stats, i, false); + } + +- rc = bnxt_hwrm_func_qstats(bp, 0xffff, bnxt_stats, NULL); + return rc; + } + ++static void bnxt_clear_prev_stat(struct bnxt *bp) ++{ ++ /* ++ * Clear the cached values of stats returned by HW in the previous ++ * get operation. ++ */ ++ memset(bp->prev_rx_ring_stats, 0, sizeof(struct bnxt_ring_stats) * bp->rx_cp_nr_rings); ++ memset(bp->prev_tx_ring_stats, 0, sizeof(struct bnxt_ring_stats) * bp->tx_cp_nr_rings); ++} ++ + int bnxt_stats_reset_op(struct rte_eth_dev *eth_dev) + { + struct bnxt *bp = eth_dev->data->dev_private; +@@ -575,9 +629,45 @@ int bnxt_stats_reset_op(struct rte_eth_dev *eth_dev) + rte_atomic64_clear(&rxq->rx_mbuf_alloc_fail); + } + ++ bnxt_clear_prev_stat(bp); ++ + return ret; + } + ++static void bnxt_fill_func_qstats(struct hwrm_func_qstats_output *func_qstats, ++ struct bnxt_ring_stats *ring_stats, ++ bool rx) ++{ ++ if (rx) { ++ func_qstats->rx_ucast_pkts += ring_stats->rx_ucast_pkts; ++ func_qstats->rx_mcast_pkts += ring_stats->rx_mcast_pkts; ++ func_qstats->rx_bcast_pkts += ring_stats->rx_bcast_pkts; ++ ++ func_qstats->rx_ucast_bytes += ring_stats->rx_ucast_bytes; ++ func_qstats->rx_mcast_bytes += ring_stats->rx_mcast_bytes; ++ func_qstats->rx_bcast_bytes += ring_stats->rx_bcast_bytes; ++ ++ func_qstats->rx_discard_pkts += ring_stats->rx_discard_pkts; ++ func_qstats->rx_drop_pkts += ring_stats->rx_error_pkts; ++ ++ func_qstats->rx_agg_pkts += ring_stats->rx_agg_pkts; ++ func_qstats->rx_agg_bytes += ring_stats->rx_agg_bytes; ++ func_qstats->rx_agg_events += ring_stats->rx_agg_events; ++ func_qstats->rx_agg_aborts += ring_stats->rx_agg_aborts; ++ } else { ++ func_qstats->tx_ucast_pkts += ring_stats->tx_ucast_pkts; ++ func_qstats->tx_mcast_pkts += ring_stats->tx_mcast_pkts; ++ func_qstats->tx_bcast_pkts += ring_stats->tx_bcast_pkts; ++ ++ func_qstats->tx_ucast_bytes += ring_stats->tx_ucast_bytes; ++ func_qstats->tx_mcast_bytes += ring_stats->tx_mcast_bytes; ++ func_qstats->tx_bcast_bytes += ring_stats->tx_bcast_bytes; ++ ++ func_qstats->tx_drop_pkts += ring_stats->tx_error_pkts; ++ func_qstats->tx_discard_pkts += ring_stats->tx_discard_pkts; ++ } ++} ++ + int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev, + struct rte_eth_xstat *xstats, unsigned int n) + { +@@ -594,12 +684,48 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev, + if (rc) + return rc; + +- if (xstats == NULL) +- return 0; ++ stat_count = RTE_DIM(bnxt_rx_stats_strings) + ++ RTE_DIM(bnxt_tx_stats_strings) + ++ RTE_DIM(bnxt_func_stats_strings) + ++ RTE_DIM(bnxt_rx_ext_stats_strings) + ++ RTE_DIM(bnxt_tx_ext_stats_strings) + ++ bnxt_flow_stats_cnt(bp); ++ ++ if (n < stat_count || xstats == NULL) ++ return stat_count; ++ ++ for (i = 0; i < bp->rx_cp_nr_rings; i++) { ++ struct bnxt_rx_queue *rxq = bp->rx_queues[i]; ++ struct bnxt_cp_ring_info *cpr = rxq->cp_ring; ++ struct bnxt_ring_stats ring_stats = {0}; ++ ++ if (!rxq->rx_started) ++ continue; ++ ++ rc = bnxt_hwrm_ring_stats(bp, cpr->hw_stats_ctx_id, i, ++ &ring_stats, true); ++ if (unlikely(rc)) ++ return rc; ++ ++ bnxt_fill_func_qstats(&func_qstats, &ring_stats, true); ++ } ++ ++ for (i = 0; i < bp->tx_cp_nr_rings; i++) { ++ struct bnxt_tx_queue *txq = bp->tx_queues[i]; ++ struct bnxt_cp_ring_info *cpr = txq->cp_ring; ++ struct bnxt_ring_stats ring_stats = {0}; ++ ++ if (!txq->tx_started) ++ continue; ++ ++ rc = bnxt_hwrm_ring_stats(bp, cpr->hw_stats_ctx_id, i, ++ &ring_stats, false); ++ if (unlikely(rc)) ++ return rc; + +- memset(xstats, 0, sizeof(*xstats)); ++ bnxt_fill_func_qstats(&func_qstats, &ring_stats, false); ++ } + +- bnxt_hwrm_func_qstats(bp, 0xffff, NULL, &func_qstats); + bnxt_hwrm_port_qstats(bp); + bnxt_hwrm_ext_port_qstats(bp); + rx_port_stats_ext_cnt = RTE_MIN(RTE_DIM(bnxt_rx_ext_stats_strings), +@@ -609,17 +735,7 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev, + (bp->fw_tx_port_stats_ext_size / + stat_size)); + +- count = RTE_DIM(bnxt_rx_stats_strings) + +- RTE_DIM(bnxt_tx_stats_strings) + +- RTE_DIM(bnxt_func_stats_strings) + +- RTE_DIM(bnxt_rx_ext_stats_strings) + +- RTE_DIM(bnxt_tx_ext_stats_strings) + +- bnxt_flow_stats_cnt(bp); +- +- stat_count = count; +- +- if (n < count) +- return count; ++ memset(xstats, 0, sizeof(*xstats) * n); + + count = 0; + for (i = 0; i < RTE_DIM(bnxt_rx_stats_strings); i++) { +@@ -642,13 +758,11 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev, + + for (i = 0; i < RTE_DIM(bnxt_func_stats_strings); i++) { + xstats[count].id = count; +- xstats[count].value = +- rte_le_to_cpu_64(*(uint64_t *)((char *)&func_qstats + +- bnxt_func_stats_strings[i].offset)); ++ xstats[count].value = *(uint64_t *)((char *)&func_qstats + ++ bnxt_func_stats_strings[i].offset); + count++; + } + +- + for (i = 0; i < rx_port_stats_ext_cnt; i++) { + uint64_t *rx_stats_ext = (uint64_t *)bp->hw_rx_port_stats_ext; + +@@ -726,7 +840,7 @@ int bnxt_flow_stats_cnt(struct bnxt *bp) + + int bnxt_dev_xstats_get_names_op(struct rte_eth_dev *eth_dev, + struct rte_eth_xstat_name *xstats_names, +- __rte_unused unsigned int limit) ++ unsigned int size) + { + struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private; + const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) + +@@ -742,63 +856,62 @@ int bnxt_dev_xstats_get_names_op(struct rte_eth_dev *eth_dev, + if (rc) + return rc; + +- if (xstats_names != NULL) { +- count = 0; ++ if (xstats_names == NULL || size < stat_cnt) ++ return stat_cnt; + +- for (i = 0; i < RTE_DIM(bnxt_rx_stats_strings); i++) { +- strlcpy(xstats_names[count].name, +- bnxt_rx_stats_strings[i].name, +- sizeof(xstats_names[count].name)); +- count++; +- } ++ for (i = 0; i < RTE_DIM(bnxt_rx_stats_strings); i++) { ++ strlcpy(xstats_names[count].name, ++ bnxt_rx_stats_strings[i].name, ++ sizeof(xstats_names[count].name)); ++ count++; ++ } + +- for (i = 0; i < RTE_DIM(bnxt_tx_stats_strings); i++) { +- strlcpy(xstats_names[count].name, +- bnxt_tx_stats_strings[i].name, +- sizeof(xstats_names[count].name)); +- count++; +- } ++ for (i = 0; i < RTE_DIM(bnxt_tx_stats_strings); i++) { ++ strlcpy(xstats_names[count].name, ++ bnxt_tx_stats_strings[i].name, ++ sizeof(xstats_names[count].name)); ++ count++; ++ } + +- for (i = 0; i < RTE_DIM(bnxt_func_stats_strings); i++) { +- strlcpy(xstats_names[count].name, +- bnxt_func_stats_strings[i].name, +- sizeof(xstats_names[count].name)); +- count++; +- } ++ for (i = 0; i < RTE_DIM(bnxt_func_stats_strings); i++) { ++ strlcpy(xstats_names[count].name, ++ bnxt_func_stats_strings[i].name, ++ sizeof(xstats_names[count].name)); ++ count++; ++ } + +- for (i = 0; i < RTE_DIM(bnxt_rx_ext_stats_strings); i++) { +- strlcpy(xstats_names[count].name, +- bnxt_rx_ext_stats_strings[i].name, +- sizeof(xstats_names[count].name)); ++ for (i = 0; i < RTE_DIM(bnxt_rx_ext_stats_strings); i++) { ++ strlcpy(xstats_names[count].name, ++ bnxt_rx_ext_stats_strings[i].name, ++ sizeof(xstats_names[count].name)); + +- count++; +- } ++ count++; ++ } + +- for (i = 0; i < RTE_DIM(bnxt_tx_ext_stats_strings); i++) { +- strlcpy(xstats_names[count].name, +- bnxt_tx_ext_stats_strings[i].name, +- sizeof(xstats_names[count].name)); ++ for (i = 0; i < RTE_DIM(bnxt_tx_ext_stats_strings); i++) { ++ strlcpy(xstats_names[count].name, ++ bnxt_tx_ext_stats_strings[i].name, ++ sizeof(xstats_names[count].name)); + +- count++; +- } ++ count++; ++ } + +- if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS && +- bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT && +- BNXT_FLOW_XSTATS_EN(bp)) { +- for (i = 0; i < bp->max_l2_ctx; i++) { +- char buf[RTE_ETH_XSTATS_NAME_SIZE]; ++ if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS && ++ bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT && ++ BNXT_FLOW_XSTATS_EN(bp)) { ++ for (i = 0; i < bp->max_l2_ctx; i++) { ++ char buf[RTE_ETH_XSTATS_NAME_SIZE]; + +- sprintf(buf, "flow_%d_bytes", i); +- strlcpy(xstats_names[count].name, buf, +- sizeof(xstats_names[count].name)); +- count++; ++ sprintf(buf, "flow_%d_bytes", i); ++ strlcpy(xstats_names[count].name, buf, ++ sizeof(xstats_names[count].name)); ++ count++; + +- sprintf(buf, "flow_%d_packets", i); +- strlcpy(xstats_names[count].name, buf, +- sizeof(xstats_names[count].name)); ++ sprintf(buf, "flow_%d_packets", i); ++ strlcpy(xstats_names[count].name, buf, ++ sizeof(xstats_names[count].name)); + +- count++; +- } ++ count++; + } + } + +@@ -825,6 +938,8 @@ int bnxt_dev_xstats_reset_op(struct rte_eth_dev *eth_dev) + PMD_DRV_LOG(ERR, "Failed to reset xstats: %s\n", + strerror(-ret)); + ++ bnxt_clear_prev_stat(bp); ++ + return ret; + } + +diff --git a/dpdk/drivers/net/bnxt/bnxt_stats.h b/dpdk/drivers/net/bnxt/bnxt_stats.h +index 3cf2a1b822..591c55c406 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_stats.h ++++ b/dpdk/drivers/net/bnxt/bnxt_stats.h +@@ -12,17 +12,12 @@ void bnxt_free_stats(struct bnxt *bp); + int bnxt_stats_get_op(struct rte_eth_dev *eth_dev, + struct rte_eth_stats *bnxt_stats); + int bnxt_stats_reset_op(struct rte_eth_dev *eth_dev); +-int bnxt_dev_xstats_get_names_op(__rte_unused struct rte_eth_dev *eth_dev, ++int bnxt_dev_xstats_get_names_op(struct rte_eth_dev *eth_dev, + struct rte_eth_xstat_name *xstats_names, + __rte_unused unsigned int limit); + int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev, + struct rte_eth_xstat *xstats, unsigned int n); + int bnxt_dev_xstats_reset_op(struct rte_eth_dev *eth_dev); +-int bnxt_dev_xstats_get_by_id_op(struct rte_eth_dev *dev, const uint64_t *ids, +- uint64_t *values, unsigned int limit); +-int bnxt_dev_xstats_get_names_by_id_op(struct rte_eth_dev *dev, +- struct rte_eth_xstat_name *xstats_names, +- const uint64_t *ids, unsigned int limit); + + struct bnxt_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; +diff --git a/dpdk/drivers/net/bnxt/bnxt_txq.c b/dpdk/drivers/net/bnxt/bnxt_txq.c +index 99a31cef28..d282d077ea 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_txq.c ++++ b/dpdk/drivers/net/bnxt/bnxt_txq.c +@@ -8,6 +8,7 @@ + #include + + #include "bnxt.h" ++#include "bnxt_hwrm.h" + #include "bnxt_ring.h" + #include "bnxt_txq.h" + #include "bnxt_txr.h" +@@ -16,6 +17,35 @@ + * TX Queues + */ + ++uint64_t bnxt_get_tx_port_offloads(struct bnxt *bp) ++{ ++ uint64_t tx_offload_capa; ++ ++ tx_offload_capa = DEV_TX_OFFLOAD_IPV4_CKSUM | ++ DEV_TX_OFFLOAD_UDP_CKSUM | ++ DEV_TX_OFFLOAD_TCP_CKSUM | ++ DEV_TX_OFFLOAD_TCP_TSO | ++ DEV_TX_OFFLOAD_QINQ_INSERT | ++ DEV_TX_OFFLOAD_MULTI_SEGS; ++ ++ if (bp->fw_cap & BNXT_FW_CAP_VLAN_TX_INSERT) ++ tx_offload_capa |= DEV_TX_OFFLOAD_VLAN_INSERT; ++ ++ if (BNXT_TUNNELED_OFFLOADS_CAP_ALL_EN(bp)) ++ tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; ++ ++ if (BNXT_TUNNELED_OFFLOADS_CAP_VXLAN_EN(bp)) ++ tx_offload_capa |= DEV_TX_OFFLOAD_VXLAN_TNL_TSO; ++ if (BNXT_TUNNELED_OFFLOADS_CAP_GRE_EN(bp)) ++ tx_offload_capa |= DEV_TX_OFFLOAD_GRE_TNL_TSO; ++ if (BNXT_TUNNELED_OFFLOADS_CAP_NGE_EN(bp)) ++ tx_offload_capa |= DEV_TX_OFFLOAD_GENEVE_TNL_TSO; ++ if (BNXT_TUNNELED_OFFLOADS_CAP_IPINIP_EN(bp)) ++ tx_offload_capa |= DEV_TX_OFFLOAD_IPIP_TNL_TSO; ++ ++ return tx_offload_capa; ++} ++ + void bnxt_free_txq_stats(struct bnxt_tx_queue *txq) + { + if (txq && txq->cp_ring && txq->cp_ring->hw_stats) +@@ -149,8 +179,8 @@ int bnxt_tx_queue_setup_op(struct rte_eth_dev *eth_dev, + txq->port_id = eth_dev->data->port_id; + + /* Allocate TX ring hardware descriptors */ +- if (bnxt_alloc_rings(bp, queue_idx, txq, NULL, txq->cp_ring, NULL, +- "txr")) { ++ if (bnxt_alloc_rings(bp, socket_id, queue_idx, txq, NULL, txq->cp_ring, ++ NULL, "txr")) { + PMD_DRV_LOG(ERR, "ring_dma_zone_reserve for tx_ring failed!"); + rc = -ENOMEM; + goto err; +@@ -164,11 +194,6 @@ int bnxt_tx_queue_setup_op(struct rte_eth_dev *eth_dev, + + eth_dev->data->tx_queues[queue_idx] = txq; + +- if (txq->tx_deferred_start) +- txq->tx_started = false; +- else +- txq->tx_started = true; +- + return 0; + err: + bnxt_tx_queue_release_op(txq); +diff --git a/dpdk/drivers/net/bnxt/bnxt_txq.h b/dpdk/drivers/net/bnxt/bnxt_txq.h +index 42d37f7c7f..4416c54fca 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_txq.h ++++ b/dpdk/drivers/net/bnxt/bnxt_txq.h +@@ -43,4 +43,5 @@ int bnxt_tx_queue_setup_op(struct rte_eth_dev *eth_dev, + uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); ++uint64_t bnxt_get_tx_port_offloads(struct bnxt *bp); + #endif +diff --git a/dpdk/drivers/net/bnxt/bnxt_txr.c b/dpdk/drivers/net/bnxt/bnxt_txr.c +index fb358d6f14..78d7329f2b 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_txr.c ++++ b/dpdk/drivers/net/bnxt/bnxt_txr.c +@@ -37,6 +37,9 @@ void bnxt_free_tx_rings(struct bnxt *bp) + rte_free(txq->cp_ring->cp_ring_struct); + rte_free(txq->cp_ring); + ++ rte_memzone_free(txq->mz); ++ txq->mz = NULL; ++ + rte_free(txq); + bp->tx_queues[i] = NULL; + } +@@ -178,7 +181,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt, + txbd->flags_type |= TX_BD_SHORT_FLAGS_COAL_NOW; + txbd->flags_type |= TX_BD_LONG_FLAGS_NO_CMPL; + txbd->len = tx_pkt->data_len; +- if (tx_pkt->pkt_len >= 2014) ++ if (tx_pkt->pkt_len >= 2048) + txbd->flags_type |= TX_BD_LONG_FLAGS_LHINT_GTE2K; + else + txbd->flags_type |= lhint_arr[tx_pkt->pkt_len >> 9]; +@@ -427,30 +430,26 @@ static void bnxt_tx_cmp(struct bnxt_tx_queue *txq, int nr_pkts) + + static int bnxt_handle_tx_cp(struct bnxt_tx_queue *txq) + { ++ uint32_t nb_tx_pkts = 0, cons, ring_mask, opaque; + struct bnxt_cp_ring_info *cpr = txq->cp_ring; + uint32_t raw_cons = cpr->cp_raw_cons; +- uint32_t cons; +- uint32_t nb_tx_pkts = 0; ++ struct bnxt_ring *cp_ring_struct; + struct tx_cmpl *txcmp; +- struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring; +- struct bnxt_ring *cp_ring_struct = cpr->cp_ring_struct; +- uint32_t ring_mask = cp_ring_struct->ring_mask; +- uint32_t opaque = 0; + + if (bnxt_tx_bds_in_hw(txq) < txq->tx_free_thresh) + return 0; + ++ cp_ring_struct = cpr->cp_ring_struct; ++ ring_mask = cp_ring_struct->ring_mask; ++ + do { + cons = RING_CMPL(ring_mask, raw_cons); + txcmp = (struct tx_cmpl *)&cpr->cp_desc_ring[cons]; +- rte_prefetch_non_temporal(&cp_desc_ring[(cons + 2) & +- ring_mask]); + +- if (!CMPL_VALID(txcmp, cpr->valid)) ++ if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1)) + break; +- opaque = rte_cpu_to_le_32(txcmp->opaque); +- NEXT_CMPL(cpr, cons, cpr->valid, 1); +- rte_prefetch0(&cp_desc_ring[cons]); ++ ++ opaque = rte_le_to_cpu_32(txcmp->opaque); + + if (CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2) + nb_tx_pkts += opaque; +@@ -458,9 +457,11 @@ static int bnxt_handle_tx_cp(struct bnxt_tx_queue *txq) + RTE_LOG_DP(ERR, PMD, + "Unhandled CMP type %02x\n", + CMP_TYPE(txcmp)); +- raw_cons = cons; ++ raw_cons = NEXT_RAW_CMP(raw_cons); + } while (nb_tx_pkts < ring_mask); + ++ cpr->valid = !!(raw_cons & cp_ring_struct->ring_size); ++ + if (nb_tx_pkts) { + if (txq->offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE) + bnxt_tx_cmp_fast(txq, nb_tx_pkts); +diff --git a/dpdk/drivers/net/bnxt/bnxt_util.h b/dpdk/drivers/net/bnxt/bnxt_util.h +index a15b3a1a95..68665196c3 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_util.h ++++ b/dpdk/drivers/net/bnxt/bnxt_util.h +@@ -10,6 +10,8 @@ + #define BIT(n) (1UL << (n)) + #endif /* BIT */ + ++#define PCI_SUBSYSTEM_ID_OFFSET 0x2e ++ + int bnxt_check_zero_bytes(const uint8_t *bytes, int len); + void bnxt_eth_hw_addr_random(uint8_t *mac_addr); + +diff --git a/dpdk/drivers/net/bnxt/bnxt_vnic.c b/dpdk/drivers/net/bnxt/bnxt_vnic.c +index 1602fb2b88..fd425e5d83 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_vnic.c ++++ b/dpdk/drivers/net/bnxt/bnxt_vnic.c +@@ -16,7 +16,7 @@ + * VNIC Functions + */ + +-void prandom_bytes(void *dest_ptr, size_t len) ++void bnxt_prandom_bytes(void *dest_ptr, size_t len) + { + char *dest = (char *)dest_ptr; + uint64_t rb; +@@ -98,23 +98,16 @@ void bnxt_free_vnic_attributes(struct bnxt *bp) + + for (i = 0; i < bp->max_vnics; i++) { + vnic = &bp->vnic_info[i]; +- if (vnic->rss_table) { +- /* 'Unreserve' the rss_table */ +- /* N/A */ +- +- vnic->rss_table = NULL; +- } +- +- if (vnic->rss_hash_key) { +- /* 'Unreserve' the rss_hash_key */ +- /* N/A */ +- ++ if (vnic->rss_mz != NULL) { ++ rte_memzone_free(vnic->rss_mz); ++ vnic->rss_mz = NULL; + vnic->rss_hash_key = NULL; ++ vnic->rss_table = NULL; + } + } + } + +-int bnxt_alloc_vnic_attributes(struct bnxt *bp) ++int bnxt_alloc_vnic_attributes(struct bnxt *bp, bool reconfig) + { + struct bnxt_vnic_info *vnic; + struct rte_pci_device *pdev = bp->pdev; +@@ -122,12 +115,10 @@ int bnxt_alloc_vnic_attributes(struct bnxt *bp) + char mz_name[RTE_MEMZONE_NAMESIZE]; + uint32_t entry_length; + size_t rss_table_size; +- uint16_t max_vnics; + int i; + rte_iova_t mz_phys_addr; + +- entry_length = HW_HASH_KEY_SIZE + +- BNXT_MAX_MC_ADDRS * RTE_ETHER_ADDR_LEN; ++ entry_length = HW_HASH_KEY_SIZE; + + if (BNXT_CHIP_THOR(bp)) + rss_table_size = BNXT_RSS_TBL_SIZE_THOR * +@@ -137,42 +128,42 @@ int bnxt_alloc_vnic_attributes(struct bnxt *bp) + + entry_length = RTE_CACHE_LINE_ROUNDUP(entry_length + rss_table_size); + +- max_vnics = bp->max_vnics; +- snprintf(mz_name, RTE_MEMZONE_NAMESIZE, +- "bnxt_" PCI_PRI_FMT "_vnicattr", pdev->addr.domain, +- pdev->addr.bus, pdev->addr.devid, pdev->addr.function); +- mz_name[RTE_MEMZONE_NAMESIZE - 1] = 0; +- mz = rte_memzone_lookup(mz_name); +- if (!mz) { +- mz = rte_memzone_reserve(mz_name, +- entry_length * max_vnics, SOCKET_ID_ANY, +- RTE_MEMZONE_2MB | +- RTE_MEMZONE_SIZE_HINT_ONLY | +- RTE_MEMZONE_IOVA_CONTIG); +- if (!mz) +- return -ENOMEM; +- } +- mz_phys_addr = mz->iova; +- +- for (i = 0; i < max_vnics; i++) { ++ for (i = 0; i < bp->max_vnics; i++) { + vnic = &bp->vnic_info[i]; + ++ snprintf(mz_name, RTE_MEMZONE_NAMESIZE, ++ "bnxt_" PCI_PRI_FMT "_vnicattr_%d", pdev->addr.domain, ++ pdev->addr.bus, pdev->addr.devid, pdev->addr.function, i); ++ mz_name[RTE_MEMZONE_NAMESIZE - 1] = 0; ++ mz = rte_memzone_lookup(mz_name); ++ if (mz == NULL) { ++ mz = rte_memzone_reserve(mz_name, ++ entry_length, ++ bp->eth_dev->device->numa_node, ++ RTE_MEMZONE_2MB | ++ RTE_MEMZONE_SIZE_HINT_ONLY | ++ RTE_MEMZONE_IOVA_CONTIG); ++ if (mz == NULL) { ++ PMD_DRV_LOG(ERR, "Cannot allocate bnxt vnic_attributes memory\n"); ++ return -ENOMEM; ++ } ++ } ++ vnic->rss_mz = mz; ++ mz_phys_addr = mz->iova; ++ + /* Allocate rss table and hash key */ +- vnic->rss_table = +- (void *)((char *)mz->addr + (entry_length * i)); ++ vnic->rss_table = (void *)((char *)mz->addr); ++ vnic->rss_table_dma_addr = mz_phys_addr; + memset(vnic->rss_table, -1, entry_length); + +- vnic->rss_table_dma_addr = mz_phys_addr + (entry_length * i); +- vnic->rss_hash_key = (void *)((char *)vnic->rss_table + +- rss_table_size); +- +- vnic->rss_hash_key_dma_addr = vnic->rss_table_dma_addr + +- rss_table_size; +- vnic->mc_list = (void *)((char *)vnic->rss_hash_key + +- HW_HASH_KEY_SIZE); +- vnic->mc_list_dma_addr = vnic->rss_hash_key_dma_addr + +- HW_HASH_KEY_SIZE; +- prandom_bytes(vnic->rss_hash_key, HW_HASH_KEY_SIZE); ++ vnic->rss_hash_key = (void *)((char *)vnic->rss_table + rss_table_size); ++ vnic->rss_hash_key_dma_addr = vnic->rss_table_dma_addr + rss_table_size; ++ if (!reconfig) { ++ bnxt_prandom_bytes(vnic->rss_hash_key, HW_HASH_KEY_SIZE); ++ memcpy(bp->rss_conf.rss_key, vnic->rss_hash_key, HW_HASH_KEY_SIZE); ++ } else { ++ memcpy(vnic->rss_hash_key, bp->rss_conf.rss_key, HW_HASH_KEY_SIZE); ++ } + } + + return 0; +diff --git a/dpdk/drivers/net/bnxt/bnxt_vnic.h b/dpdk/drivers/net/bnxt/bnxt_vnic.h +index 2a6f05d9e4..c07fd52b21 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_vnic.h ++++ b/dpdk/drivers/net/bnxt/bnxt_vnic.h +@@ -28,14 +28,11 @@ struct bnxt_vnic_info { + uint16_t mru; + uint16_t hash_type; + uint8_t hash_mode; ++ const struct rte_memzone *rss_mz; + rte_iova_t rss_table_dma_addr; + uint16_t *rss_table; + rte_iova_t rss_hash_key_dma_addr; + void *rss_hash_key; +- rte_iova_t mc_list_dma_addr; +- char *mc_list; +- uint32_t mc_addr_cnt; +-#define BNXT_MAX_MC_ADDRS 16 + uint32_t flags; + #define BNXT_VNIC_INFO_PROMISC (1 << 0) + #define BNXT_VNIC_INFO_ALLMULTI (1 << 1) +@@ -52,8 +49,6 @@ struct bnxt_vnic_info { + bool vlan_strip; + bool func_default; + bool bd_stall; +- bool roce_dual; +- bool roce_only; + bool rss_dflt_cr; + + STAILQ_HEAD(, bnxt_filter_info) filter; +@@ -66,11 +61,11 @@ int bnxt_free_vnic(struct bnxt *bp, struct bnxt_vnic_info *vnic, + struct bnxt_vnic_info *bnxt_alloc_vnic(struct bnxt *bp); + void bnxt_free_all_vnics(struct bnxt *bp); + void bnxt_free_vnic_attributes(struct bnxt *bp); +-int bnxt_alloc_vnic_attributes(struct bnxt *bp); ++int bnxt_alloc_vnic_attributes(struct bnxt *bp, bool reconfig); + void bnxt_free_vnic_mem(struct bnxt *bp); + int bnxt_alloc_vnic_mem(struct bnxt *bp); + int bnxt_vnic_grp_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic); +-void prandom_bytes(void *dest_ptr, size_t len); ++void bnxt_prandom_bytes(void *dest_ptr, size_t len); + uint16_t bnxt_rte_to_hwrm_hash_types(uint64_t rte_type); + int bnxt_rte_to_hwrm_hash_level(struct bnxt *bp, uint64_t hash_f, uint32_t lvl); + uint64_t bnxt_hwrm_to_rte_rss_level(struct bnxt *bp, uint32_t mode); +diff --git a/dpdk/drivers/net/bnxt/hsi_struct_def_dpdk.h b/dpdk/drivers/net/bnxt/hsi_struct_def_dpdk.h +index 4b8b13e3bb..f905181d3b 100644 +--- a/dpdk/drivers/net/bnxt/hsi_struct_def_dpdk.h ++++ b/dpdk/drivers/net/bnxt/hsi_struct_def_dpdk.h +@@ -10718,7 +10718,7 @@ struct hwrm_func_qcaps_input { + uint8_t unused_0[6]; + } __rte_packed; + +-/* hwrm_func_qcaps_output (size:704b/88B) */ ++/* hwrm_func_qcaps_output (size:768b/96B) */ + struct hwrm_func_qcaps_output { + /* The specific error status for the command. */ + uint16_t error_code; +@@ -11082,7 +11082,13 @@ struct hwrm_func_qcaps_output { + * (max_tx_rings) to the function. + */ + uint16_t max_sp_tx_rings; +- uint8_t unused_0[2]; ++ /* ++ * The maximum number of MSI-X vectors that may be allocated across ++ * all VFs for the function. This is valid only on the PF with SR-IOV ++ * enabled. Returns zero if this command is called on a PF with ++ * SR-IOV disabled or on a VF. ++ */ ++ uint16_t max_msix_vfs; + uint32_t flags_ext; + /* + * If 1, the device can be configured to set the ECN bits in the +@@ -11164,6 +11170,70 @@ struct hwrm_func_qcaps_output { + * to the primate processor block. + */ + #define HWRM_FUNC_QCAPS_OUTPUT_MPC_CHNLS_CAP_PRIMATE UINT32_C(0x10) ++ /* ++ * Maximum number of Key Contexts supported per HWRM ++ * function call for allocating Key Contexts. ++ */ ++ uint16_t max_key_ctxs_alloc; ++ uint32_t flags_ext2; ++ /* ++ * When this bit is '1', it indicates that FW will support ++ * timestamping on all RX packets, not just PTP type packets. ++ */ ++ #define HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_RX_ALL_PKTS_TIMESTAMPS_SUPPORTED \ ++ UINT32_C(0x1) ++ /* When this bit is '1', it indicates that HW and FW support QUIC. */ ++ #define HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_QUIC_SUPPORTED \ ++ UINT32_C(0x2) ++ uint16_t tunnel_disable_flag; ++ /* ++ * When this bit is '1', it indicates that the VXLAN parsing ++ * is disabled in hardware ++ */ ++ #define HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_VXLAN \ ++ UINT32_C(0x1) ++ /* ++ * When this bit is '1', it indicates that the NGE parsing ++ * is disabled in hardware ++ */ ++ #define HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_NGE \ ++ UINT32_C(0x2) ++ /* ++ * When this bit is '1', it indicates that the NVGRE parsing ++ * is disabled in hardware ++ */ ++ #define HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_NVGRE \ ++ UINT32_C(0x4) ++ /* ++ * When this bit is '1', it indicates that the L2GRE parsing ++ * is disabled in hardware ++ */ ++ #define HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_L2GRE \ ++ UINT32_C(0x8) ++ /* ++ * When this bit is '1', it indicates that the GRE parsing ++ * is disabled in hardware ++ */ ++ #define HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_GRE \ ++ UINT32_C(0x10) ++ /* ++ * When this bit is '1', it indicates that the IPINIP parsing ++ * is disabled in hardware ++ */ ++ #define HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_IPINIP \ ++ UINT32_C(0x20) ++ /* ++ * When this bit is '1', it indicates that the MPLS parsing ++ * is disabled in hardware ++ */ ++ #define HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_MPLS \ ++ UINT32_C(0x40) ++ /* ++ * When this bit is '1', it indicates that the PPPOE parsing ++ * is disabled in hardware ++ */ ++ #define HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_PPPOE \ ++ UINT32_C(0x80) + uint8_t unused_1; + /* + * This field is used in Output records to indicate that the output +@@ -13591,7 +13661,7 @@ struct hwrm_func_resource_qcaps_output { + #define HWRM_FUNC_RESOURCE_QCAPS_OUTPUT_VF_RESERVATION_STRATEGY_MINIMAL \ + UINT32_C(0x1) + /* +- * The PF driver should not reserve any resources for each VF until the ++ * The PF driver should not reserve any resources for each VF until + * the VF interface is brought up. + */ + #define HWRM_FUNC_RESOURCE_QCAPS_OUTPUT_VF_RESERVATION_STRATEGY_MINIMAL_STATIC \ +diff --git a/dpdk/drivers/net/bnxt/meson.build b/dpdk/drivers/net/bnxt/meson.build +index 2896337b5d..a2fd494dac 100644 +--- a/dpdk/drivers/net/bnxt/meson.build ++++ b/dpdk/drivers/net/bnxt/meson.build +@@ -74,6 +74,6 @@ sources = files('bnxt_cpr.c', + + if arch_subdir == 'x86' + sources += files('bnxt_rxtx_vec_sse.c') +-elif arch_subdir == 'arm' and host_machine.cpu_family().startswith('aarch64') ++elif arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64') + sources += files('bnxt_rxtx_vec_neon.c') + endif +diff --git a/dpdk/drivers/net/bnxt/tf_core/tf_session.c b/dpdk/drivers/net/bnxt/tf_core/tf_session.c +index c95c4bdbd3..598338d46c 100644 +--- a/dpdk/drivers/net/bnxt/tf_core/tf_session.c ++++ b/dpdk/drivers/net/bnxt/tf_core/tf_session.c +@@ -182,9 +182,12 @@ tf_session_create(struct tf *tfp, + return 0; + + cleanup: +- tfp_free(tfp->session->core_data); +- tfp_free(tfp->session); +- tfp->session = NULL; ++ if (tfp->session) { ++ tfp_free(tfp->session->core_data); ++ tfp_free(tfp->session); ++ tfp->session = NULL; ++ } ++ + return rc; + } + +diff --git a/dpdk/drivers/net/bnxt/tf_core/tfp.c b/dpdk/drivers/net/bnxt/tf_core/tfp.c +index 0f6d63cc00..90b3bb7580 100644 +--- a/dpdk/drivers/net/bnxt/tf_core/tfp.c ++++ b/dpdk/drivers/net/bnxt/tf_core/tfp.c +@@ -88,7 +88,7 @@ tfp_send_msg_tunneled(struct tf *tfp, + } + + /** +- * Allocates zero'ed memory from the heap. ++ * Allocates zeroed memory from the heap. + * + * Returns success or failure code. + */ +diff --git a/dpdk/drivers/net/bnxt/tf_core/tfp.h b/dpdk/drivers/net/bnxt/tf_core/tfp.h +index 551b9c569f..a2ad31494e 100644 +--- a/dpdk/drivers/net/bnxt/tf_core/tfp.h ++++ b/dpdk/drivers/net/bnxt/tf_core/tfp.h +@@ -186,7 +186,7 @@ tfp_msg_hwrm_oem_cmd(struct tf *tfp, + uint32_t max_flows); + + /** +- * Allocates zero'ed memory from the heap. ++ * Allocates zeroed memory from the heap. + * + * NOTE: Also performs virt2phy address conversion by default thus is + * can be expensive to invoke. +diff --git a/dpdk/drivers/net/bnxt/tf_ulp/ulp_utils.c b/dpdk/drivers/net/bnxt/tf_ulp/ulp_utils.c +index a13a3bbf65..ac9ce97ff2 100644 +--- a/dpdk/drivers/net/bnxt/tf_ulp/ulp_utils.c ++++ b/dpdk/drivers/net/bnxt/tf_ulp/ulp_utils.c +@@ -175,7 +175,6 @@ ulp_bs_push_msb(uint8_t *bs, uint16_t pos, uint8_t len, uint8_t *val) + { + int i; + int cnt = (len + 7) / 8; +- int tlen = len; + + /* Handle any remainder bits */ + int tmp = len % 8; +@@ -186,12 +185,10 @@ ulp_bs_push_msb(uint8_t *bs, uint16_t pos, uint8_t len, uint8_t *val) + ulp_bs_put_msb(bs, pos, tmp, val[0]); + + pos += tmp; +- tlen -= tmp; + + for (i = 1; i < cnt; i++) { + ulp_bs_put_msb(bs, pos, 8, val[i]); + pos += 8; +- tlen -= 8; + } + + return len; +diff --git a/dpdk/drivers/net/bonding/eth_bond_8023ad_private.h b/dpdk/drivers/net/bonding/eth_bond_8023ad_private.h +index 9b5738afee..a5e1fffea1 100644 +--- a/dpdk/drivers/net/bonding/eth_bond_8023ad_private.h ++++ b/dpdk/drivers/net/bonding/eth_bond_8023ad_private.h +@@ -20,7 +20,7 @@ + /** Maximum number of LACP packets from one slave queued in TX ring. */ + #define BOND_MODE_8023AX_SLAVE_TX_PKTS 1 + /** +- * Timeouts deffinitions (5.4.4 in 802.1AX documentation). ++ * Timeouts definitions (5.4.4 in 802.1AX documentation). + */ + #define BOND_8023AD_FAST_PERIODIC_MS 900 + #define BOND_8023AD_SLOW_PERIODIC_MS 29000 +diff --git a/dpdk/drivers/net/bonding/eth_bond_private.h b/dpdk/drivers/net/bonding/eth_bond_private.h +index 8f198bd50e..d7a7646194 100644 +--- a/dpdk/drivers/net/bonding/eth_bond_private.h ++++ b/dpdk/drivers/net/bonding/eth_bond_private.h +@@ -139,7 +139,7 @@ struct bond_dev_private { + + uint16_t slave_count; /**< Number of bonded slaves */ + struct bond_slave_details slaves[RTE_MAX_ETHPORTS]; +- /**< Arary of bonded slaves details */ ++ /**< Array of bonded slaves details */ + + struct mode8023ad_private mode4; + uint16_t tlb_slaves_order[RTE_MAX_ETHPORTS]; +@@ -212,7 +212,7 @@ int + valid_bonded_port_id(uint16_t port_id); + + int +-valid_slave_port_id(uint16_t port_id, uint8_t mode); ++valid_slave_port_id(struct bond_dev_private *internals, uint16_t port_id); + + void + deactivate_slave(struct rte_eth_dev *eth_dev, uint16_t port_id); +@@ -240,7 +240,7 @@ slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev, + uint16_t slave_port_id); + + int +-bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode); ++bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, uint8_t mode); + + int + slave_configure(struct rte_eth_dev *bonded_eth_dev, +diff --git a/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.c b/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.c +index 5fe004e551..4ead74412b 100644 +--- a/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.c ++++ b/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.c +@@ -243,7 +243,7 @@ record_default(struct port *port) + { + /* Record default parameters for partner. Partner admin parameters + * are not implemented so set them to arbitrary default (last known) and +- * mark actor that parner is in defaulted state. */ ++ * mark actor that partner is in defaulted state. */ + port->partner_state = STATE_LACP_ACTIVE; + ACTOR_STATE_SET(port, DEFAULTED); + } +@@ -300,7 +300,7 @@ rx_machine(struct bond_dev_private *internals, uint16_t slave_id, + MODE4_DEBUG("LACP -> CURRENT\n"); + BOND_PRINT_LACP(lacp); + /* Update selected flag. If partner parameters are defaulted assume they +- * are match. If not defaulted compare LACP actor with ports parner ++ * are match. If not defaulted compare LACP actor with ports partner + * params. */ + if (!ACTOR_STATE(port, DEFAULTED) && + (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION) +@@ -399,16 +399,16 @@ periodic_machine(struct bond_dev_private *internals, uint16_t slave_id) + PARTNER_STATE(port, LACP_ACTIVE); + + uint8_t is_partner_fast, was_partner_fast; +- /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */ ++ /* No periodic is on BEGIN, LACP DISABLE or when both sides are passive */ + if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) { + timer_cancel(&port->periodic_timer); + timer_force_expired(&port->tx_machine_timer); + SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT); + + MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n", +- SM_FLAG(port, BEGIN) ? "begind " : "", ++ SM_FLAG(port, BEGIN) ? "begin " : "", + SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ", +- active ? "LACP active " : "LACP pasive "); ++ active ? "LACP active " : "LACP passive "); + return; + } + +@@ -495,10 +495,10 @@ mux_machine(struct bond_dev_private *internals, uint16_t slave_id) + if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) && + !PARTNER_STATE(port, SYNCHRONIZATION)) { + /* If in COLLECTING or DISTRIBUTING state and partner becomes out of +- * sync transit to ATACHED state. */ ++ * sync transit to ATTACHED state. */ + ACTOR_STATE_CLR(port, DISTRIBUTING); + ACTOR_STATE_CLR(port, COLLECTING); +- /* Clear actor sync to activate transit ATACHED in condition bellow */ ++ /* Clear actor sync to activate transit ATTACHED in condition bellow */ + ACTOR_STATE_CLR(port, SYNCHRONIZATION); + MODE4_DEBUG("Out of sync -> ATTACHED\n"); + } +@@ -696,7 +696,7 @@ selection_logic(struct bond_dev_private *internals, uint16_t slave_id) + /* Search for aggregator suitable for this port */ + for (i = 0; i < slaves_count; ++i) { + agg = &bond_mode_8023ad_ports[slaves[i]]; +- /* Skip ports that are not aggreagators */ ++ /* Skip ports that are not aggregators */ + if (agg->aggregator_port_id != slaves[i]) + continue; + +@@ -804,25 +804,61 @@ rx_machine_update(struct bond_dev_private *internals, uint16_t slave_id, + struct rte_mbuf *lacp_pkt) { + struct lacpdu_header *lacp; + struct lacpdu_actor_partner_params *partner; ++ struct port *port, *agg; + + if (lacp_pkt != NULL) { + lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *); + RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP); + + partner = &lacp->lacpdu.partner; ++ port = &bond_mode_8023ad_ports[slave_id]; ++ agg = &bond_mode_8023ad_ports[port->aggregator_port_id]; ++ + if (rte_is_zero_ether_addr(&partner->port_params.system) || + rte_is_same_ether_addr(&partner->port_params.system, +- &internals->mode4.mac_addr)) { ++ &agg->actor.system)) { + /* This LACP frame is sending to the bonding port + * so pass it to rx_machine. + */ + rx_machine(internals, slave_id, &lacp->lacpdu); ++ } else { ++ char preferred_system_name[RTE_ETHER_ADDR_FMT_SIZE]; ++ char self_system_name[RTE_ETHER_ADDR_FMT_SIZE]; ++ ++ rte_ether_format_addr(preferred_system_name, ++ RTE_ETHER_ADDR_FMT_SIZE, &partner->port_params.system); ++ rte_ether_format_addr(self_system_name, ++ RTE_ETHER_ADDR_FMT_SIZE, &agg->actor.system); ++ MODE4_DEBUG("preferred partner system %s " ++ "is not equal with self system: %s\n", ++ preferred_system_name, self_system_name); + } + rte_pktmbuf_free(lacp_pkt); + } else + rx_machine(internals, slave_id, NULL); + } + ++static void ++bond_mode_8023ad_dedicated_rxq_process(struct bond_dev_private *internals, ++ uint16_t slave_id) ++{ ++#define DEDICATED_QUEUE_BURST_SIZE 32 ++ struct rte_mbuf *lacp_pkt[DEDICATED_QUEUE_BURST_SIZE]; ++ uint16_t rx_count = rte_eth_rx_burst(slave_id, ++ internals->mode4.dedicated_queues.rx_qid, ++ lacp_pkt, DEDICATED_QUEUE_BURST_SIZE); ++ ++ if (rx_count) { ++ uint16_t i; ++ ++ for (i = 0; i < rx_count; i++) ++ bond_mode_8023ad_handle_slow_pkt(internals, slave_id, ++ lacp_pkt[i]); ++ } else { ++ rx_machine_update(internals, slave_id, NULL); ++ } ++} ++ + static void + bond_mode_8023ad_periodic_cb(void *arg) + { +@@ -885,7 +921,7 @@ bond_mode_8023ad_periodic_cb(void *arg) + + SM_FLAG_SET(port, BEGIN); + +- /* LACP is disabled on half duples or link is down */ ++ /* LACP is disabled on half duplex or link is down */ + if (SM_FLAG(port, LACP_ENABLED)) { + /* If port was enabled set it to BEGIN state */ + SM_FLAG_CLR(port, LACP_ENABLED); +@@ -911,15 +947,8 @@ bond_mode_8023ad_periodic_cb(void *arg) + + rx_machine_update(internals, slave_id, lacp_pkt); + } else { +- uint16_t rx_count = rte_eth_rx_burst(slave_id, +- internals->mode4.dedicated_queues.rx_qid, +- &lacp_pkt, 1); +- +- if (rx_count == 1) +- bond_mode_8023ad_handle_slow_pkt(internals, +- slave_id, lacp_pkt); +- else +- rx_machine_update(internals, slave_id, NULL); ++ bond_mode_8023ad_dedicated_rxq_process(internals, ++ slave_id); + } + + periodic_machine(internals, slave_id); +@@ -1040,7 +1069,7 @@ bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, + port->partner_state = STATE_LACP_ACTIVE | STATE_AGGREGATION; + port->sm_flags = SM_FLAGS_BEGIN; + +- /* use this port as agregator */ ++ /* use this port as aggregator */ + port->aggregator_port_id = slave_id; + + if (bond_mode_8023ad_register_lacp_mac(slave_id) < 0) { +diff --git a/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.h b/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.h +index 11a71a55e5..7eb392f8c8 100644 +--- a/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.h ++++ b/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.h +@@ -68,7 +68,7 @@ struct port_params { + struct rte_ether_addr system; + /**< System ID - Slave MAC address, same as bonding MAC address */ + uint16_t key; +- /**< Speed information (implementation dependednt) and duplex. */ ++ /**< Speed information (implementation dependent) and duplex. */ + uint16_t port_priority; + /**< Priority of this (unused in current implementation) */ + uint16_t port_number; +@@ -317,7 +317,7 @@ rte_eth_bond_8023ad_dedicated_queues_disable(uint16_t port_id); + * @param port_id Bonding device id + * + * @return +- * agregator mode on success, negative value otherwise ++ * aggregator mode on success, negative value otherwise + */ + int + rte_eth_bond_8023ad_agg_selection_get(uint16_t port_id); +diff --git a/dpdk/drivers/net/bonding/rte_eth_bond_alb.h b/dpdk/drivers/net/bonding/rte_eth_bond_alb.h +index 386e70c594..4e9aeda9bc 100644 +--- a/dpdk/drivers/net/bonding/rte_eth_bond_alb.h ++++ b/dpdk/drivers/net/bonding/rte_eth_bond_alb.h +@@ -96,7 +96,7 @@ bond_mode_alb_arp_xmit(struct rte_ether_hdr *eth_h, uint16_t offset, + * @param internals Bonding data. + * + * @return +- * Index of slawe on which packet should be sent. ++ * Index of slave on which packet should be sent. + */ + uint16_t + bond_mode_alb_arp_upd(struct client_data *client_info, +diff --git a/dpdk/drivers/net/bonding/rte_eth_bond_api.c b/dpdk/drivers/net/bonding/rte_eth_bond_api.c +index 55c8e3167c..2f8d003cb0 100644 +--- a/dpdk/drivers/net/bonding/rte_eth_bond_api.c ++++ b/dpdk/drivers/net/bonding/rte_eth_bond_api.c +@@ -56,19 +56,25 @@ check_for_master_bonded_ethdev(const struct rte_eth_dev *eth_dev) + } + + int +-valid_slave_port_id(uint16_t port_id, uint8_t mode) ++valid_slave_port_id(struct bond_dev_private *internals, uint16_t slave_port_id) + { +- RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1); ++ RTE_ETH_VALID_PORTID_OR_ERR_RET(slave_port_id, -1); + +- /* Verify that port_id refers to a non bonded port */ +- if (check_for_bonded_ethdev(&rte_eth_devices[port_id]) == 0 && +- mode == BONDING_MODE_8023AD) { ++ /* Verify that slave_port_id refers to a non bonded port */ ++ if (check_for_bonded_ethdev(&rte_eth_devices[slave_port_id]) == 0 && ++ internals->mode == BONDING_MODE_8023AD) { + RTE_BOND_LOG(ERR, "Cannot add slave to bonded device in 802.3ad" + " mode as slave is also a bonded device, only " + "physical devices can be support in this mode."); + return -1; + } + ++ if (internals->port_id == slave_port_id) { ++ RTE_BOND_LOG(ERR, ++ "Cannot add the bonded device itself as its slave."); ++ return -1; ++ } ++ + return 0; + } + +@@ -284,6 +290,7 @@ eth_bond_slave_inherit_dev_info_rx_first(struct bond_dev_private *internals, + struct rte_eth_rxconf *rxconf_i = &internals->default_rxconf; + + internals->reta_size = di->reta_size; ++ internals->rss_key_len = di->hash_key_size; + + /* Inherit Rx offload capabilities from the first slave device */ + internals->rx_offload_capa = di->rx_offload_capa; +@@ -368,7 +375,7 @@ eth_bond_slave_inherit_dev_info_rx_next(struct bond_dev_private *internals, + * value. Thus, the new internal value of default Rx queue offloads + * has to be masked by rx_queue_offload_capa to make sure that only + * commonly supported offloads are preserved from both the previous +- * value and the value being inhereted from the new slave device. ++ * value and the value being inherited from the new slave device. + */ + rxconf_i->offloads = (rxconf_i->offloads | rxconf->offloads) & + internals->rx_queue_offload_capa; +@@ -379,6 +386,11 @@ eth_bond_slave_inherit_dev_info_rx_next(struct bond_dev_private *internals, + */ + if (internals->reta_size > di->reta_size) + internals->reta_size = di->reta_size; ++ if (internals->rss_key_len > di->hash_key_size) { ++ RTE_BOND_LOG(WARNING, "slave has different rss key size, " ++ "configuring rss may fail"); ++ internals->rss_key_len = di->hash_key_size; ++ } + + if (!internals->max_rx_pktlen && + di->max_rx_pktlen < internals->candidate_max_rx_pktlen) +@@ -401,7 +413,7 @@ eth_bond_slave_inherit_dev_info_tx_next(struct bond_dev_private *internals, + * value. Thus, the new internal value of default Tx queue offloads + * has to be masked by tx_queue_offload_capa to make sure that only + * commonly supported offloads are preserved from both the previous +- * value and the value being inhereted from the new slave device. ++ * value and the value being inherited from the new slave device. + */ + txconf_i->offloads = (txconf_i->offloads | txconf->offloads) & + internals->tx_queue_offload_capa; +@@ -456,7 +468,7 @@ __eth_bond_slave_add_lock_free(uint16_t bonded_port_id, uint16_t slave_port_id) + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + +- if (valid_slave_port_id(slave_port_id, internals->mode) != 0) ++ if (valid_slave_port_id(internals, slave_port_id) != 0) + return -1; + + slave_eth_dev = &rte_eth_devices[slave_port_id]; +@@ -605,13 +617,15 @@ rte_eth_bond_slave_add(uint16_t bonded_port_id, uint16_t slave_port_id) + + int retval; + +- /* Verify that port id's are valid bonded and slave ports */ + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + ++ if (valid_slave_port_id(internals, slave_port_id) != 0) ++ return -1; ++ + rte_spinlock_lock(&internals->lock); + + retval = __eth_bond_slave_add_lock_free(bonded_port_id, slave_port_id); +@@ -635,7 +649,7 @@ __eth_bond_slave_remove_lock_free(uint16_t bonded_port_id, + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + +- if (valid_slave_port_id(slave_port_id, internals->mode) < 0) ++ if (valid_slave_port_id(internals, slave_port_id) < 0) + return -1; + + /* first remove from active slave list */ +@@ -654,7 +668,7 @@ __eth_bond_slave_remove_lock_free(uint16_t bonded_port_id, + } + + if (slave_idx < 0) { +- RTE_BOND_LOG(ERR, "Couldn't find slave in port list, slave count %d", ++ RTE_BOND_LOG(ERR, "Couldn't find slave in port list, slave count %u", + internals->slave_count); + return -1; + } +@@ -783,7 +797,7 @@ rte_eth_bond_primary_set(uint16_t bonded_port_id, uint16_t slave_port_id) + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + +- if (valid_slave_port_id(slave_port_id, internals->mode) != 0) ++ if (valid_slave_port_id(internals, slave_port_id) != 0) + return -1; + + internals->user_defined_primary_port = 1; +diff --git a/dpdk/drivers/net/bonding/rte_eth_bond_args.c b/dpdk/drivers/net/bonding/rte_eth_bond_args.c +index 8c5f90dc63..764b1b8c8e 100644 +--- a/dpdk/drivers/net/bonding/rte_eth_bond_args.c ++++ b/dpdk/drivers/net/bonding/rte_eth_bond_args.c +@@ -200,20 +200,20 @@ int + bond_ethdev_parse_socket_id_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) + { +- int socket_id; ++ long socket_id; + char *endptr; + + if (value == NULL || extra_args == NULL) + return -1; + + errno = 0; +- socket_id = (uint8_t)strtol(value, &endptr, 10); ++ socket_id = strtol(value, &endptr, 10); + if (*endptr != 0 || errno != 0) + return -1; + + /* validate socket id value */ +- if (socket_id >= 0) { +- *(uint8_t *)extra_args = (uint8_t)socket_id; ++ if (socket_id >= 0 && socket_id < RTE_MAX_NUMA_NODES) { ++ *(int *)extra_args = (int)socket_id; + return 0; + } + return -1; +diff --git a/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c b/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c +index 057b1ada54..4fa523201f 100644 +--- a/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c ++++ b/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c +@@ -1322,7 +1322,7 @@ bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs, + + /* Increment reference count on mbufs */ + for (i = 0; i < nb_pkts; i++) +- rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1); ++ rte_pktmbuf_refcnt_update(bufs[i], num_of_slaves - 1); + + /* Transmit burst on each active slave */ + for (i = 0; i < num_of_slaves; i++) { +@@ -1558,7 +1558,7 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) + } + + int +-bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode) ++bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, uint8_t mode) + { + struct bond_dev_private *internals; + +@@ -1705,28 +1705,38 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, + + /* If RSS is enabled for bonding, try to enable it for slaves */ + if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) { +- if (internals->rss_key_len != 0) { +- slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = ++ /* rss_key won't be empty if RSS is configured in bonded dev */ ++ slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = + internals->rss_key_len; +- slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = ++ slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = + internals->rss_key; +- } else { +- slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL; +- } + + slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = + bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; + slave_eth_dev->data->dev_conf.rxmode.mq_mode = + bonded_eth_dev->data->dev_conf.rxmode.mq_mode; ++ } else { ++ slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0; ++ slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL; ++ slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0; ++ slave_eth_dev->data->dev_conf.rxmode.mq_mode = ++ bonded_eth_dev->data->dev_conf.rxmode.mq_mode; + } + +- if (bonded_eth_dev->data->dev_conf.rxmode.offloads & +- DEV_RX_OFFLOAD_VLAN_FILTER) +- slave_eth_dev->data->dev_conf.rxmode.offloads |= +- DEV_RX_OFFLOAD_VLAN_FILTER; +- else +- slave_eth_dev->data->dev_conf.rxmode.offloads &= +- ~DEV_RX_OFFLOAD_VLAN_FILTER; ++ slave_eth_dev->data->dev_conf.txmode.offloads |= ++ bonded_eth_dev->data->dev_conf.txmode.offloads; ++ ++ slave_eth_dev->data->dev_conf.txmode.offloads &= ++ (bonded_eth_dev->data->dev_conf.txmode.offloads | ++ ~internals->tx_offload_capa); ++ ++ slave_eth_dev->data->dev_conf.rxmode.offloads |= ++ bonded_eth_dev->data->dev_conf.rxmode.offloads; ++ ++ slave_eth_dev->data->dev_conf.rxmode.offloads &= ++ (bonded_eth_dev->data->dev_conf.rxmode.offloads | ++ ~internals->rx_offload_capa); ++ + + nb_rx_queues = bonded_eth_dev->data->nb_rx_queues; + nb_tx_queues = bonded_eth_dev->data->nb_tx_queues; +@@ -1794,12 +1804,13 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, + != 0) + return errval; + +- if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev, +- slave_eth_dev->data->port_id) != 0) { ++ errval = bond_ethdev_8023ad_flow_verify(bonded_eth_dev, ++ slave_eth_dev->data->port_id); ++ if (errval != 0) { + RTE_BOND_LOG(ERR, +- "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", +- slave_eth_dev->data->port_id, q_id, errval); +- return -1; ++ "bond_ethdev_8023ad_flow_verify: port=%d, err (%d)", ++ slave_eth_dev->data->port_id, errval); ++ return errval; + } + + if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL) +@@ -1807,8 +1818,14 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, + internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id], + &flow_error); + +- bond_ethdev_8023ad_flow_set(bonded_eth_dev, ++ errval = bond_ethdev_8023ad_flow_set(bonded_eth_dev, + slave_eth_dev->data->port_id); ++ if (errval != 0) { ++ RTE_BOND_LOG(ERR, ++ "bond_ethdev_8023ad_flow_set: port=%d, err (%d)", ++ slave_eth_dev->data->port_id, errval); ++ return errval; ++ } + } + + /* Start device */ +@@ -2089,18 +2106,20 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev) + internals->link_status_polling_enabled = 0; + for (i = 0; i < internals->slave_count; i++) { + uint16_t slave_id = internals->slaves[i].port_id; ++ ++ internals->slaves[i].last_link_status = 0; ++ ret = rte_eth_dev_stop(slave_id); ++ if (ret != 0) { ++ RTE_BOND_LOG(ERR, "Failed to stop device on port %u", ++ slave_id); ++ return ret; ++ } ++ ++ /* active slaves need to be deactivated. */ + if (find_slave_by_id(internals->active_slaves, + internals->active_slave_count, slave_id) != +- internals->active_slave_count) { +- internals->slaves[i].last_link_status = 0; +- ret = rte_eth_dev_stop(slave_id); +- if (ret != 0) { +- RTE_BOND_LOG(ERR, "Failed to stop device on port %u", +- slave_id); +- return ret; +- } ++ internals->active_slave_count) + deactivate_slave(eth_dev, slave_id); +- } + } + + return 0; +@@ -2125,6 +2144,7 @@ bond_ethdev_close(struct rte_eth_dev *dev) + RTE_BOND_LOG(ERR, "Failed to stop device on port %u", + port_id); + skipped++; ++ continue; + } + + if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) { +@@ -2145,6 +2165,9 @@ bond_ethdev_close(struct rte_eth_dev *dev) + */ + rte_mempool_free(internals->mode6.mempool); + ++ if (internals->kvlist != NULL) ++ rte_kvargs_free(internals->kvlist); ++ + return 0; + } + +@@ -2234,6 +2257,7 @@ bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) + dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads; + + dev_info->reta_size = internals->reta_size; ++ dev_info->hash_key_size = internals->rss_key_len; + + return 0; + } +@@ -2673,6 +2697,39 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) + return ret; + } + ++static int ++bond_ethdev_promiscuous_update(struct rte_eth_dev *dev) ++{ ++ struct bond_dev_private *internals = dev->data->dev_private; ++ uint16_t port_id = internals->current_primary_port; ++ ++ switch (internals->mode) { ++ case BONDING_MODE_ROUND_ROBIN: ++ case BONDING_MODE_BALANCE: ++ case BONDING_MODE_BROADCAST: ++ case BONDING_MODE_8023AD: ++ /* As promiscuous mode is propagated to all slaves for these ++ * mode, no need to update for bonding device. ++ */ ++ break; ++ case BONDING_MODE_ACTIVE_BACKUP: ++ case BONDING_MODE_TLB: ++ case BONDING_MODE_ALB: ++ default: ++ /* As promiscuous mode is propagated only to primary slave ++ * for these mode. When active/standby switchover, promiscuous ++ * mode should be set to new primary slave according to bonding ++ * device. ++ */ ++ if (rte_eth_promiscuous_get(internals->port_id) == 1) ++ rte_eth_promiscuous_enable(port_id); ++ else ++ rte_eth_promiscuous_disable(port_id); ++ } ++ ++ return 0; ++} ++ + static int + bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev) + { +@@ -2786,6 +2843,39 @@ bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev) + return ret; + } + ++static int ++bond_ethdev_allmulticast_update(struct rte_eth_dev *dev) ++{ ++ struct bond_dev_private *internals = dev->data->dev_private; ++ uint16_t port_id = internals->current_primary_port; ++ ++ switch (internals->mode) { ++ case BONDING_MODE_ROUND_ROBIN: ++ case BONDING_MODE_BALANCE: ++ case BONDING_MODE_BROADCAST: ++ case BONDING_MODE_8023AD: ++ /* As allmulticast mode is propagated to all slaves for these ++ * mode, no need to update for bonding device. ++ */ ++ break; ++ case BONDING_MODE_ACTIVE_BACKUP: ++ case BONDING_MODE_TLB: ++ case BONDING_MODE_ALB: ++ default: ++ /* As allmulticast mode is propagated only to primary slave ++ * for these mode. When active/standby switchover, allmulticast ++ * mode should be set to new primary slave according to bonding ++ * device. ++ */ ++ if (rte_eth_allmulticast_get(internals->port_id) == 1) ++ rte_eth_allmulticast_enable(port_id); ++ else ++ rte_eth_allmulticast_disable(port_id); ++ } ++ ++ return 0; ++} ++ + static void + bond_ethdev_delayed_lsc_propagation(void *arg) + { +@@ -2875,6 +2965,8 @@ bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type, + lsc_flag = 1; + + mac_address_slaves_update(bonded_eth_dev); ++ bond_ethdev_promiscuous_update(bonded_eth_dev); ++ bond_ethdev_allmulticast_update(bonded_eth_dev); + } + + activate_slave(bonded_eth_dev, port_id); +@@ -2904,6 +2996,8 @@ bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type, + else + internals->current_primary_port = internals->primary_port; + mac_address_slaves_update(bonded_eth_dev); ++ bond_ethdev_promiscuous_update(bonded_eth_dev); ++ bond_ethdev_allmulticast_update(bonded_eth_dev); + } + } + +@@ -3023,13 +3117,15 @@ bond_ethdev_rss_hash_update(struct rte_eth_dev *dev, + if (bond_rss_conf.rss_hf != 0) + dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf; + +- if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len < +- sizeof(internals->rss_key)) { +- if (bond_rss_conf.rss_key_len == 0) +- bond_rss_conf.rss_key_len = 40; +- internals->rss_key_len = bond_rss_conf.rss_key_len; ++ if (bond_rss_conf.rss_key) { ++ if (bond_rss_conf.rss_key_len < internals->rss_key_len) ++ return -EINVAL; ++ else if (bond_rss_conf.rss_key_len > internals->rss_key_len) ++ RTE_BOND_LOG(WARNING, "rss_key will be truncated"); ++ + memcpy(internals->rss_key, bond_rss_conf.rss_key, + internals->rss_key_len); ++ bond_rss_conf.rss_key_len = internals->rss_key_len; + } + + for (i = 0; i < internals->slave_count; i++) { +@@ -3283,7 +3379,7 @@ bond_alloc(struct rte_vdev_device *dev, uint8_t mode) + /* Set mode 4 default configuration */ + bond_mode_8023ad_setup(eth_dev, NULL); + if (bond_ethdev_mode_set(eth_dev, mode)) { +- RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d", ++ RTE_BOND_LOG(ERR, "Failed to set bonded device %u mode to %u", + eth_dev->data->port_id, mode); + goto err; + } +@@ -3325,8 +3421,9 @@ bond_probe(struct rte_vdev_device *dev) + const char *name; + struct bond_dev_private *internals; + struct rte_kvargs *kvlist; +- uint8_t bonding_mode, socket_id/*, agg_mode*/; +- int arg_count, port_id; ++ uint8_t bonding_mode; ++ int arg_count, port_id; ++ int socket_id; + uint8_t agg_mode; + struct rte_eth_dev *eth_dev; + +@@ -3472,6 +3569,7 @@ bond_ethdev_configure(struct rte_eth_dev *dev) + const char *name = dev->device->name; + struct bond_dev_private *internals = dev->data->dev_private; + struct rte_kvargs *kvlist = internals->kvlist; ++ uint64_t offloads; + int arg_count; + uint16_t port_id = dev - rte_eth_devices; + uint8_t agg_mode; +@@ -3487,20 +3585,40 @@ bond_ethdev_configure(struct rte_eth_dev *dev) + + /* + * If RSS is enabled, fill table with default values and +- * set key to the the value specified in port RSS configuration. ++ * set key to the value specified in port RSS configuration. + * Fall back to default RSS key if the key is not specified + */ + if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) { +- if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) { +- internals->rss_key_len = +- dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len; +- memcpy(internals->rss_key, +- dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key, +- internals->rss_key_len); +- } else { ++ struct rte_eth_rss_conf *rss_conf = ++ &dev->data->dev_conf.rx_adv_conf.rss_conf; ++ ++ if (internals->rss_key_len == 0) { + internals->rss_key_len = sizeof(default_rss_key); +- memcpy(internals->rss_key, default_rss_key, ++ } ++ ++ if (rss_conf->rss_key != NULL) { ++ if (internals->rss_key_len > rss_conf->rss_key_len) { ++ RTE_BOND_LOG(ERR, "Invalid rss key length(%u)", ++ rss_conf->rss_key_len); ++ return -EINVAL; ++ } ++ ++ memcpy(internals->rss_key, rss_conf->rss_key, + internals->rss_key_len); ++ } else { ++ if (internals->rss_key_len > sizeof(default_rss_key)) { ++ /* ++ * If the rss_key includes standard_rss_key and ++ * extended_hash_key, the rss key length will be ++ * larger than default rss key length, so it should ++ * re-calculate the hash key. ++ */ ++ for (i = 0; i < internals->rss_key_len; i++) ++ internals->rss_key[i] = (uint8_t)rte_rand(); ++ } else { ++ memcpy(internals->rss_key, default_rss_key, ++ internals->rss_key_len); ++ } + } + + for (i = 0; i < RTE_DIM(internals->reta_conf); i++) { +@@ -3512,6 +3630,16 @@ bond_ethdev_configure(struct rte_eth_dev *dev) + } + } + ++ offloads = dev->data->dev_conf.txmode.offloads; ++ if ((offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE) && ++ (internals->mode == BONDING_MODE_8023AD || ++ internals->mode == BONDING_MODE_BROADCAST)) { ++ RTE_BOND_LOG(WARNING, ++ "bond mode broadcast & 8023AD don't support MBUF_FAST_FREE offload, force disable it."); ++ offloads &= ~DEV_TX_OFFLOAD_MBUF_FAST_FREE; ++ dev->data->dev_conf.txmode.offloads = offloads; ++ } ++ + /* set the max_rx_pktlen */ + internals->max_rx_pktlen = internals->candidate_max_rx_pktlen; + +diff --git a/dpdk/drivers/net/cxgbe/base/adapter.h b/dpdk/drivers/net/cxgbe/base/adapter.h +index 6ff009a5f6..5de41110eb 100644 +--- a/dpdk/drivers/net/cxgbe/base/adapter.h ++++ b/dpdk/drivers/net/cxgbe/base/adapter.h +@@ -297,8 +297,6 @@ struct sge { + u32 fl_starve_thres; /* Free List starvation threshold */ + }; + +-#define T4_OS_NEEDS_MBOX_LOCKING 1 +- + /* + * OS Lock/List primitives for those interfaces in the Common Code which + * need this. +diff --git a/dpdk/drivers/net/cxgbe/base/common.h b/dpdk/drivers/net/cxgbe/base/common.h +index 8fe8e2a36b..1a12fbc081 100644 +--- a/dpdk/drivers/net/cxgbe/base/common.h ++++ b/dpdk/drivers/net/cxgbe/base/common.h +@@ -12,10 +12,6 @@ + #include "t4_chip_type.h" + #include "t4fw_interface.h" + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + #define CXGBE_PAGE_SIZE RTE_PGSIZE_4K + + #define T4_MEMORY_WRITE 0 +@@ -201,15 +197,15 @@ struct rss_params { + unsigned int mode; /* RSS mode */ + union { + struct { +- uint synmapen:1; /* SYN Map Enable */ +- uint syn4tupenipv6:1; /* en 4-tuple IPv6 SYNs hash */ +- uint syn2tupenipv6:1; /* en 2-tuple IPv6 SYNs hash */ +- uint syn4tupenipv4:1; /* en 4-tuple IPv4 SYNs hash */ +- uint syn2tupenipv4:1; /* en 2-tuple IPv4 SYNs hash */ +- uint ofdmapen:1; /* Offload Map Enable */ +- uint tnlmapen:1; /* Tunnel Map Enable */ +- uint tnlalllookup:1; /* Tunnel All Lookup */ +- uint hashtoeplitz:1; /* use Toeplitz hash */ ++ unsigned int synmapen:1; /* SYN Map Enable */ ++ unsigned int syn4tupenipv6:1; /* en 4-tuple IPv6 SYNs hash */ ++ unsigned int syn2tupenipv6:1; /* en 2-tuple IPv6 SYNs hash */ ++ unsigned int syn4tupenipv4:1; /* en 4-tuple IPv4 SYNs hash */ ++ unsigned int syn2tupenipv4:1; /* en 2-tuple IPv4 SYNs hash */ ++ unsigned int ofdmapen:1; /* Offload Map Enable */ ++ unsigned int tnlmapen:1; /* Tunnel Map Enable */ ++ unsigned int tnlalllookup:1; /* Tunnel All Lookup */ ++ unsigned int hashtoeplitz:1; /* use Toeplitz hash */ + } basicvirtual; + } u; + }; +diff --git a/dpdk/drivers/net/cxgbe/base/t4_hw.c b/dpdk/drivers/net/cxgbe/base/t4_hw.c +index 9217956b42..aa839cc1d3 100644 +--- a/dpdk/drivers/net/cxgbe/base/t4_hw.c ++++ b/dpdk/drivers/net/cxgbe/base/t4_hw.c +@@ -264,17 +264,6 @@ static void fw_asrt(struct adapter *adap, u32 mbox_addr) + + #define X_CIM_PF_NOACCESS 0xeeeeeeee + +-/* +- * If the Host OS Driver needs locking arround accesses to the mailbox, this +- * can be turned on via the T4_OS_NEEDS_MBOX_LOCKING CPP define ... +- */ +-/* makes single-statement usage a bit cleaner ... */ +-#ifdef T4_OS_NEEDS_MBOX_LOCKING +-#define T4_OS_MBOX_LOCKING(x) x +-#else +-#define T4_OS_MBOX_LOCKING(x) do {} while (0) +-#endif +- + /** + * t4_wr_mbox_meat_timeout - send a command to FW through the given mailbox + * @adap: the adapter +@@ -315,28 +304,17 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, + 1, 1, 3, 5, 10, 10, 20, 50, 100 + }; + +- u32 v; +- u64 res; +- int i, ms; +- unsigned int delay_idx; +- __be64 *temp = (__be64 *)malloc(size * sizeof(char)); +- __be64 *p = temp; + u32 data_reg = PF_REG(mbox, A_CIM_PF_MAILBOX_DATA); + u32 ctl_reg = PF_REG(mbox, A_CIM_PF_MAILBOX_CTRL); +- u32 ctl; +- struct mbox_entry entry; +- u32 pcie_fw = 0; +- +- if (!temp) +- return -ENOMEM; ++ struct mbox_entry *entry; ++ u32 v, ctl, pcie_fw = 0; ++ unsigned int delay_idx; ++ const __be64 *p; ++ int i, ms, ret; ++ u64 res; + +- if ((size & 15) || size > MBOX_LEN) { +- free(temp); ++ if ((size & 15) != 0 || size > MBOX_LEN) + return -EINVAL; +- } +- +- memset(p, 0, size); +- memcpy(p, (const __be64 *)cmd, size); + + /* + * If we have a negative timeout, that implies that we can't sleep. +@@ -346,14 +324,17 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, + timeout = -timeout; + } + +-#ifdef T4_OS_NEEDS_MBOX_LOCKING ++ entry = t4_os_alloc(sizeof(*entry)); ++ if (entry == NULL) ++ return -ENOMEM; ++ + /* + * Queue ourselves onto the mailbox access list. When our entry is at + * the front of the list, we have rights to access the mailbox. So we + * wait [for a while] till we're at the front [or bail out with an + * EBUSY] ... + */ +- t4_os_atomic_add_tail(&entry, &adap->mbox_list, &adap->mbox_lock); ++ t4_os_atomic_add_tail(entry, &adap->mbox_list, &adap->mbox_lock); + + delay_idx = 0; + ms = delay[0]; +@@ -368,18 +349,18 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, + */ + pcie_fw = t4_read_reg(adap, A_PCIE_FW); + if (i > 4 * timeout || (pcie_fw & F_PCIE_FW_ERR)) { +- t4_os_atomic_list_del(&entry, &adap->mbox_list, ++ t4_os_atomic_list_del(entry, &adap->mbox_list, + &adap->mbox_lock); + t4_report_fw_error(adap); +- free(temp); +- return (pcie_fw & F_PCIE_FW_ERR) ? -ENXIO : -EBUSY; ++ ret = ((pcie_fw & F_PCIE_FW_ERR) != 0) ? -ENXIO : -EBUSY; ++ goto out_free; + } + + /* + * If we're at the head, break out and start the mailbox + * protocol. + */ +- if (t4_os_list_first_entry(&adap->mbox_list) == &entry) ++ if (t4_os_list_first_entry(&adap->mbox_list) == entry) + break; + + /* +@@ -394,7 +375,6 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, + rte_delay_ms(ms); + } + } +-#endif /* T4_OS_NEEDS_MBOX_LOCKING */ + + /* + * Attempt to gain access to the mailbox. +@@ -411,12 +391,11 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, + * mailbox atomic access list and report the error to our caller. + */ + if (v != X_MBOWNER_PL) { +- T4_OS_MBOX_LOCKING(t4_os_atomic_list_del(&entry, +- &adap->mbox_list, +- &adap->mbox_lock)); ++ t4_os_atomic_list_del(entry, &adap->mbox_list, ++ &adap->mbox_lock); + t4_report_fw_error(adap); +- free(temp); +- return (v == X_MBOWNER_FW ? -EBUSY : -ETIMEDOUT); ++ ret = (v == X_MBOWNER_FW) ? -EBUSY : -ETIMEDOUT; ++ goto out_free; + } + + /* +@@ -442,7 +421,7 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, + /* + * Copy in the new mailbox command and send it on its way ... + */ +- for (i = 0; i < size; i += 8, p++) ++ for (i = 0, p = cmd; i < size; i += 8, p++) + t4_write_reg64(adap, data_reg + i, be64_to_cpu(*p)); + + CXGBE_DEBUG_MBOX(adap, "%s: mbox %u: %016llx %016llx %016llx %016llx " +@@ -513,11 +492,10 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, + get_mbox_rpl(adap, rpl, size / 8, data_reg); + } + t4_write_reg(adap, ctl_reg, V_MBOWNER(X_MBOWNER_NONE)); +- T4_OS_MBOX_LOCKING( +- t4_os_atomic_list_del(&entry, &adap->mbox_list, +- &adap->mbox_lock)); +- free(temp); +- return -G_FW_CMD_RETVAL((int)res); ++ t4_os_atomic_list_del(entry, &adap->mbox_list, ++ &adap->mbox_lock); ++ ret = -G_FW_CMD_RETVAL((int)res); ++ goto out_free; + } + } + +@@ -528,12 +506,13 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, + */ + dev_err(adap, "command %#x in mailbox %d timed out\n", + *(const u8 *)cmd, mbox); +- T4_OS_MBOX_LOCKING(t4_os_atomic_list_del(&entry, +- &adap->mbox_list, +- &adap->mbox_lock)); ++ t4_os_atomic_list_del(entry, &adap->mbox_list, &adap->mbox_lock); + t4_report_fw_error(adap); +- free(temp); +- return (pcie_fw & F_PCIE_FW_ERR) ? -ENXIO : -ETIMEDOUT; ++ ret = ((pcie_fw & F_PCIE_FW_ERR) != 0) ? -ENXIO : -ETIMEDOUT; ++ ++out_free: ++ t4_os_free(entry); ++ return ret; + } + + int t4_wr_mbox_meat(struct adapter *adap, int mbox, const void *cmd, int size, +diff --git a/dpdk/drivers/net/cxgbe/base/t4vf_hw.c b/dpdk/drivers/net/cxgbe/base/t4vf_hw.c +index 649bacfb25..7e323d9b66 100644 +--- a/dpdk/drivers/net/cxgbe/base/t4vf_hw.c ++++ b/dpdk/drivers/net/cxgbe/base/t4vf_hw.c +@@ -83,7 +83,7 @@ int t4vf_wr_mbox_core(struct adapter *adapter, + + u32 mbox_ctl = T4VF_CIM_BASE_ADDR + A_CIM_VF_EXT_MAILBOX_CTRL; + __be64 cmd_rpl[MBOX_LEN / 8]; +- struct mbox_entry entry; ++ struct mbox_entry *entry; + unsigned int delay_idx; + u32 v, mbox_data; + const __be64 *p; +@@ -106,13 +106,17 @@ int t4vf_wr_mbox_core(struct adapter *adapter, + size > NUM_CIM_VF_MAILBOX_DATA_INSTANCES * 4) + return -EINVAL; + ++ entry = t4_os_alloc(sizeof(*entry)); ++ if (entry == NULL) ++ return -ENOMEM; ++ + /* + * Queue ourselves onto the mailbox access list. When our entry is at + * the front of the list, we have rights to access the mailbox. So we + * wait [for a while] till we're at the front [or bail out with an + * EBUSY] ... + */ +- t4_os_atomic_add_tail(&entry, &adapter->mbox_list, &adapter->mbox_lock); ++ t4_os_atomic_add_tail(entry, &adapter->mbox_list, &adapter->mbox_lock); + + delay_idx = 0; + ms = delay[0]; +@@ -125,17 +129,17 @@ int t4vf_wr_mbox_core(struct adapter *adapter, + * contend on access to the mailbox ... + */ + if (i > (2 * FW_CMD_MAX_TIMEOUT)) { +- t4_os_atomic_list_del(&entry, &adapter->mbox_list, ++ t4_os_atomic_list_del(entry, &adapter->mbox_list, + &adapter->mbox_lock); + ret = -EBUSY; +- return ret; ++ goto out_free; + } + + /* + * If we're at the head, break out and start the mailbox + * protocol. + */ +- if (t4_os_list_first_entry(&adapter->mbox_list) == &entry) ++ if (t4_os_list_first_entry(&adapter->mbox_list) == entry) + break; + + /* +@@ -160,10 +164,10 @@ int t4vf_wr_mbox_core(struct adapter *adapter, + v = G_MBOWNER(t4_read_reg(adapter, mbox_ctl)); + + if (v != X_MBOWNER_PL) { +- t4_os_atomic_list_del(&entry, &adapter->mbox_list, ++ t4_os_atomic_list_del(entry, &adapter->mbox_list, + &adapter->mbox_lock); + ret = (v == X_MBOWNER_FW) ? -EBUSY : -ETIMEDOUT; +- return ret; ++ goto out_free; + } + + /* +@@ -224,7 +228,7 @@ int t4vf_wr_mbox_core(struct adapter *adapter, + get_mbox_rpl(adapter, cmd_rpl, size / 8, mbox_data); + t4_write_reg(adapter, mbox_ctl, + V_MBOWNER(X_MBOWNER_NONE)); +- t4_os_atomic_list_del(&entry, &adapter->mbox_list, ++ t4_os_atomic_list_del(entry, &adapter->mbox_list, + &adapter->mbox_lock); + + /* return value in high-order host-endian word */ +@@ -236,7 +240,8 @@ int t4vf_wr_mbox_core(struct adapter *adapter, + & F_FW_CMD_REQUEST) == 0); + memcpy(rpl, cmd_rpl, size); + } +- return -((int)G_FW_CMD_RETVAL(v)); ++ ret = -((int)G_FW_CMD_RETVAL(v)); ++ goto out_free; + } + } + +@@ -246,8 +251,11 @@ int t4vf_wr_mbox_core(struct adapter *adapter, + dev_err(adapter, "command %#x timed out\n", + *(const u8 *)cmd); + dev_err(adapter, " Control = %#x\n", t4_read_reg(adapter, mbox_ctl)); +- t4_os_atomic_list_del(&entry, &adapter->mbox_list, &adapter->mbox_lock); ++ t4_os_atomic_list_del(entry, &adapter->mbox_list, &adapter->mbox_lock); + ret = -ETIMEDOUT; ++ ++out_free: ++ t4_os_free(entry); + return ret; + } + +diff --git a/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c b/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c +index 480d6f58a8..378f285cc9 100644 +--- a/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c ++++ b/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c +@@ -665,8 +665,7 @@ int cxgbe_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, + } + + rxq->rspq.size = temp_nb_desc; +- if ((&rxq->fl) != NULL) +- rxq->fl.size = temp_nb_desc; ++ rxq->fl.size = temp_nb_desc; + + /* Set to jumbo mode if necessary */ + if (pkt_len > CXGBE_ETH_MAX_LEN) +diff --git a/dpdk/drivers/net/cxgbe/cxgbe_flow.c b/dpdk/drivers/net/cxgbe/cxgbe_flow.c +index 520a5a5c9a..aef657dc74 100644 +--- a/dpdk/drivers/net/cxgbe/cxgbe_flow.c ++++ b/dpdk/drivers/net/cxgbe/cxgbe_flow.c +@@ -1378,7 +1378,7 @@ cxgbe_flow_validate(struct rte_eth_dev *dev, + } + + /* +- * @ret : > 0 filter destroyed succsesfully ++ * @ret : > 0 filter destroyed successfully + * < 0 error destroying filter + * == 1 filter not active / not found + */ +diff --git a/dpdk/drivers/net/cxgbe/cxgbevf_main.c b/dpdk/drivers/net/cxgbe/cxgbevf_main.c +index 9ee060504f..7e6d3c4049 100644 +--- a/dpdk/drivers/net/cxgbe/cxgbevf_main.c ++++ b/dpdk/drivers/net/cxgbe/cxgbevf_main.c +@@ -44,7 +44,7 @@ static void size_nports_qsets(struct adapter *adapter) + */ + pmask_nports = hweight32(adapter->params.vfres.pmask); + if (pmask_nports < adapter->params.nports) { +- dev_warn(adapter->pdev_dev, "only using %d of %d provissioned" ++ dev_warn(adapter->pdev_dev, "only using %d of %d provisioned" + " virtual interfaces; limited by Port Access Rights" + " mask %#x\n", pmask_nports, adapter->params.nports, + adapter->params.vfres.pmask); +diff --git a/dpdk/drivers/net/cxgbe/sge.c b/dpdk/drivers/net/cxgbe/sge.c +index 8c7dbe3a1d..0998864269 100644 +--- a/dpdk/drivers/net/cxgbe/sge.c ++++ b/dpdk/drivers/net/cxgbe/sge.c +@@ -212,7 +212,7 @@ static inline unsigned int fl_cap(const struct sge_fl *fl) + * @fl: the Free List + * + * Tests specified Free List to see whether the number of buffers +- * available to the hardware has falled below our "starvation" ++ * available to the hardware has fallen below our "starvation" + * threshold. + */ + static inline bool fl_starving(const struct adapter *adapter, +@@ -682,7 +682,7 @@ static void write_sgl(struct rte_mbuf *mbuf, struct sge_txq *q, + * @q: the Tx queue + * @n: number of new descriptors to give to HW + * +- * Ring the doorbel for a Tx queue. ++ * Ring the doorbell for a Tx queue. + */ + static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q) + { +@@ -793,9 +793,9 @@ static inline void txq_advance(struct sge_txq *q, unsigned int n) + + #define MAX_COALESCE_LEN 64000 + +-static inline int wraps_around(struct sge_txq *q, int ndesc) ++static inline bool wraps_around(struct sge_txq *q, int ndesc) + { +- return (q->pidx + ndesc) > q->size ? 1 : 0; ++ return (q->pidx + ndesc) > q->size ? true : false; + } + + static void tx_timer_cb(void *data) +@@ -846,7 +846,6 @@ static inline void ship_tx_pkt_coalesce_wr(struct adapter *adap, + + /* fill the pkts WR header */ + wr = (void *)&q->desc[q->pidx]; +- wr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR)); + vmwr = (void *)&q->desc[q->pidx]; + + wr_mid = V_FW_WR_LEN16(DIV_ROUND_UP(q->coalesce.flits, 2)); +@@ -856,8 +855,11 @@ static inline void ship_tx_pkt_coalesce_wr(struct adapter *adap, + wr->npkt = q->coalesce.idx; + wr->r3 = 0; + if (is_pf4(adap)) { +- wr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR)); + wr->type = q->coalesce.type; ++ if (likely(wr->type != 0)) ++ wr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR)); ++ else ++ wr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); + } else { + wr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS_VM_WR)); + vmwr->r4 = 0; +@@ -881,7 +883,7 @@ static inline void ship_tx_pkt_coalesce_wr(struct adapter *adap, + } + + /** +- * should_tx_packet_coalesce - decides wether to coalesce an mbuf or not ++ * should_tx_packet_coalesce - decides whether to coalesce an mbuf or not + * @txq: tx queue where the mbuf is sent + * @mbuf: mbuf to be sent + * @nflits: return value for number of flits needed +@@ -936,13 +938,16 @@ static inline int should_tx_packet_coalesce(struct sge_eth_txq *txq, + ndesc = DIV_ROUND_UP(q->coalesce.flits + flits, 8); + credits = txq_avail(q) - ndesc; + ++ if (unlikely(wraps_around(q, ndesc))) ++ return 0; ++ + /* If we are wrapping or this is last mbuf then, send the + * already coalesced mbufs and let the non-coalesce pass + * handle the mbuf. + */ +- if (unlikely(credits < 0 || wraps_around(q, ndesc))) { ++ if (unlikely(credits < 0)) { + ship_tx_pkt_coalesce_wr(adap, txq); +- return 0; ++ return -EBUSY; + } + + /* If the max coalesce len or the max WR len is reached +@@ -966,8 +971,12 @@ static inline int should_tx_packet_coalesce(struct sge_eth_txq *txq, + ndesc = flits_to_desc(q->coalesce.flits + flits); + credits = txq_avail(q) - ndesc; + +- if (unlikely(credits < 0 || wraps_around(q, ndesc))) ++ if (unlikely(wraps_around(q, ndesc))) + return 0; ++ ++ if (unlikely(credits < 0)) ++ return -EBUSY; ++ + q->coalesce.flits += wr_size / sizeof(__be64); + q->coalesce.type = type; + q->coalesce.ptr = (unsigned char *)&q->desc[q->pidx] + +@@ -1110,7 +1119,7 @@ int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf, + unsigned int flits, ndesc, cflits; + int l3hdr_len, l4hdr_len, eth_xtra_len; + int len, last_desc; +- int credits; ++ int should_coal, credits; + u32 wr_mid; + u64 cntrl, *end; + bool v6; +@@ -1141,9 +1150,9 @@ int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf, + /* align the end of coalesce WR to a 512 byte boundary */ + txq->q.coalesce.max = (8 - (txq->q.pidx & 7)) * 8; + +- if (!((m->ol_flags & PKT_TX_TCP_SEG) || +- m->pkt_len > RTE_ETHER_MAX_LEN)) { +- if (should_tx_packet_coalesce(txq, mbuf, &cflits, adap)) { ++ if ((m->ol_flags & PKT_TX_TCP_SEG) == 0) { ++ should_coal = should_tx_packet_coalesce(txq, mbuf, &cflits, adap); ++ if (should_coal > 0) { + if (unlikely(map_mbuf(mbuf, addr) < 0)) { + dev_warn(adap, "%s: mapping err for coalesce\n", + __func__); +@@ -1152,8 +1161,8 @@ int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf, + } + return tx_do_packet_coalesce(txq, mbuf, cflits, adap, + pi, addr, nb_pkts); +- } else { +- return -EBUSY; ++ } else if (should_coal < 0) { ++ return should_coal; + } + } + +@@ -1200,8 +1209,7 @@ int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf, + end = (u64 *)vmwr + flits; + } + +- len = 0; +- len += sizeof(*cpl); ++ len = sizeof(*cpl); + + /* Coalescing skipped and we send through normal path */ + if (!(m->ol_flags & PKT_TX_TCP_SEG)) { +@@ -1849,7 +1857,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, + * for its status page) along with the associated software + * descriptor ring. The free list size needs to be a multiple + * of the Egress Queue Unit and at least 2 Egress Units larger +- * than the SGE's Egress Congrestion Threshold ++ * than the SGE's Egress Congestion Threshold + * (fl_starve_thres - 1). + */ + if (fl->size < s->fl_starve_thres - 1 + 2 * 8) +@@ -1913,7 +1921,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, + iq->stat = (void *)&iq->desc[iq->size * 8]; + iq->eth_dev = eth_dev; + iq->handler = hnd; +- iq->port_id = pi->pidx; ++ iq->port_id = eth_dev->data->port_id; + iq->mb_pool = mp; + + /* set offset to -1 to distinguish ingress queues without FL */ +diff --git a/dpdk/drivers/net/dpaa/dpaa_ethdev.c b/dpdk/drivers/net/dpaa/dpaa_ethdev.c +index 0c87c136d7..a792c55b85 100644 +--- a/dpdk/drivers/net/dpaa/dpaa_ethdev.c ++++ b/dpdk/drivers/net/dpaa/dpaa_ethdev.c +@@ -49,6 +49,9 @@ + #include + #include + ++#define CHECK_INTERVAL 100 /* 100ms */ ++#define MAX_REPEAT_TIME 90 /* 9s (90 * 100ms) in total */ ++ + /* Supported Rx offloads */ + static uint64_t dev_rx_offloads_sup = + DEV_RX_OFFLOAD_JUMBO_FRAME | +@@ -535,9 +538,11 @@ dpaa_fw_version_get(struct rte_eth_dev *dev __rte_unused, + + ret = snprintf(fw_version, fw_size, "SVR:%x-fman-v%x", + svr_ver, fman_ip_rev); +- ret += 1; /* add the size of '\0' */ ++ if (ret < 0) ++ return -EINVAL; + +- if (fw_size < (uint32_t)ret) ++ ret += 1; /* add the size of '\0' */ ++ if (fw_size < (size_t)ret) + return ret; + else + return 0; +@@ -669,23 +674,30 @@ dpaa_dev_tx_burst_mode_get(struct rte_eth_dev *dev, + } + + static int dpaa_eth_link_update(struct rte_eth_dev *dev, +- int wait_to_complete __rte_unused) ++ int wait_to_complete) + { + struct dpaa_if *dpaa_intf = dev->data->dev_private; + struct rte_eth_link *link = &dev->data->dev_link; + struct fman_if *fif = dev->process_private; + struct __fman_if *__fif = container_of(fif, struct __fman_if, __if); + int ret, ioctl_version; ++ uint8_t count; + + PMD_INIT_FUNC_TRACE(); + + ioctl_version = dpaa_get_ioctl_version_number(); + +- + if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) { +- ret = dpaa_get_link_status(__fif->node_name, link); +- if (ret) +- return ret; ++ for (count = 0; count <= MAX_REPEAT_TIME; count++) { ++ ret = dpaa_get_link_status(__fif->node_name, link); ++ if (ret) ++ return ret; ++ if (link->link_status == ETH_LINK_DOWN && ++ wait_to_complete) ++ rte_delay_ms(CHECK_INTERVAL); ++ else ++ break; ++ } + } else { + link->link_status = dpaa_intf->valid; + } +@@ -1036,7 +1048,7 @@ int dpaa_eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + QM_FQCTRL_CTXASTASHING | + QM_FQCTRL_PREFERINCACHE; + opts.fqd.context_a.stashing.exclusive = 0; +- /* In muticore scenario stashing becomes a bottleneck on LS1046. ++ /* In multicore scenario stashing becomes a bottleneck on LS1046. + * So do not enable stashing in this case + */ + if (dpaa_svr_family != SVR_LS1046A_FAMILY) +@@ -1195,23 +1207,17 @@ int + dpaa_eth_eventq_detach(const struct rte_eth_dev *dev, + int eth_rx_queue_id) + { +- struct qm_mcc_initfq opts; ++ struct qm_mcc_initfq opts = {0}; + int ret; + u32 flags = 0; + struct dpaa_if *dpaa_intf = dev->data->dev_private; + struct qman_fq *rxq = &dpaa_intf->rx_queues[eth_rx_queue_id]; + +- dpaa_poll_queue_default_config(&opts); +- +- if (dpaa_intf->cgr_rx) { +- opts.we_mask |= QM_INITFQ_WE_CGID; +- opts.fqd.cgid = dpaa_intf->cgr_rx[eth_rx_queue_id].cgrid; +- opts.fqd.fq_ctrl |= QM_FQCTRL_CGE; +- } +- ++ qman_retire_fq(rxq, NULL); ++ qman_oos_fq(rxq); + ret = qman_init_fq(rxq, flags, &opts); + if (ret) { +- DPAA_PMD_ERR("init rx fqid %d failed with ret: %d", ++ DPAA_PMD_ERR("detach rx fqid %d failed with ret: %d", + rxq->fqid, ret); + } + +@@ -1867,7 +1873,7 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev) + + dpaa_intf->name = dpaa_device->name; + +- /* save fman_if & cfg in the interface struture */ ++ /* save fman_if & cfg in the interface structure */ + eth_dev->process_private = fman_intf; + dpaa_intf->ifid = dev_id; + dpaa_intf->cfg = cfg; +@@ -2176,7 +2182,7 @@ rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv, + if (dpaa_svr_family == SVR_LS1043A_FAMILY) + dpaa_push_mode_max_queue = 0; + +- /* if push mode queues to be enabled. Currenly we are allowing ++ /* if push mode queues to be enabled. Currently we are allowing + * only one queue per thread. + */ + if (getenv("DPAA_PUSH_QUEUES_NUMBER")) { +@@ -2219,8 +2225,6 @@ rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv, + if (dpaa_drv->drv_flags & RTE_DPAA_DRV_INTR_LSC) + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; + +- eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; +- + /* Invoke PMD device initialization function */ + diag = dpaa_dev_init(eth_dev); + if (diag == 0) { +diff --git a/dpdk/drivers/net/dpaa/dpaa_flow.c b/dpdk/drivers/net/dpaa/dpaa_flow.c +index a0087df670..c5b5ec8695 100644 +--- a/dpdk/drivers/net/dpaa/dpaa_flow.c ++++ b/dpdk/drivers/net/dpaa/dpaa_flow.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright 2017-2019 NXP ++ * Copyright 2017-2019,2021 NXP + */ + + /* System headers */ +@@ -999,6 +999,9 @@ static int dpaa_port_vsp_configure(struct dpaa_if *dpaa_intf, + buf_prefix_cont.pass_time_stamp = true; + buf_prefix_cont.pass_hash_result = false; + buf_prefix_cont.pass_all_other_pcdinfo = false; ++ buf_prefix_cont.manip_ext_space = ++ RTE_PKTMBUF_HEADROOM - DPAA_MBUF_HW_ANNOTATION; ++ + ret = fm_vsp_config_buffer_prefix_content(dpaa_intf->vsp_handle[vsp_id], + &buf_prefix_cont); + if (ret != E_OK) { +diff --git a/dpdk/drivers/net/dpaa/dpaa_rxtx.c b/dpdk/drivers/net/dpaa/dpaa_rxtx.c +index e2459d9b99..bf57eddca3 100644 +--- a/dpdk/drivers/net/dpaa/dpaa_rxtx.c ++++ b/dpdk/drivers/net/dpaa/dpaa_rxtx.c +@@ -600,8 +600,8 @@ void dpaa_rx_cb_prepare(struct qm_dqrr_entry *dq, void **bufs) + void *ptr = rte_dpaa_mem_ptov(qm_fd_addr(&dq->fd)); + + /* In case of LS1046, annotation stashing is disabled due to L2 cache +- * being bottleneck in case of multicore scanario for this platform. +- * So we prefetch the annoation beforehand, so that it is available ++ * being bottleneck in case of multicore scenario for this platform. ++ * So we prefetch the annotation beforehand, so that it is available + * in cache when accessed. + */ + rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF)); +diff --git a/dpdk/drivers/net/dpaa/fmlib/fm_ext.h b/dpdk/drivers/net/dpaa/fmlib/fm_ext.h +index 27c9fb471e..8e7153bdaf 100644 +--- a/dpdk/drivers/net/dpaa/fmlib/fm_ext.h ++++ b/dpdk/drivers/net/dpaa/fmlib/fm_ext.h +@@ -176,7 +176,7 @@ typedef struct t_fm_prs_result { + #define FM_FD_ERR_PRS_HDR_ERR 0x00000020 + /**< Header error was identified during parsing */ + #define FM_FD_ERR_BLOCK_LIMIT_EXCEEDED 0x00000008 +- /**< Frame parsed beyind 256 first bytes */ ++ /**< Frame parsed beyond 256 first bytes */ + + #define FM_FD_TX_STATUS_ERR_MASK (FM_FD_ERR_UNSUPPORTED_FORMAT | \ + FM_FD_ERR_LENGTH | \ +diff --git a/dpdk/drivers/net/dpaa/fmlib/fm_pcd_ext.h b/dpdk/drivers/net/dpaa/fmlib/fm_pcd_ext.h +index 8be3885fbc..3802b42916 100644 +--- a/dpdk/drivers/net/dpaa/fmlib/fm_pcd_ext.h ++++ b/dpdk/drivers/net/dpaa/fmlib/fm_pcd_ext.h +@@ -276,7 +276,7 @@ typedef struct ioc_fm_pcd_counters_params_t { + } ioc_fm_pcd_counters_params_t; + + /* +- * @Description structure for FM exception definitios ++ * @Description structure for FM exception definitions + */ + typedef struct ioc_fm_pcd_exception_params_t { + ioc_fm_pcd_exceptions exception; /**< The requested exception */ +@@ -883,7 +883,7 @@ typedef enum ioc_fm_pcd_manip_hdr_rmv_specific_l2 { + e_IOC_FM_PCD_MANIP_HDR_RMV_ETHERNET, /**< Ethernet/802.3 MAC */ + e_IOC_FM_PCD_MANIP_HDR_RMV_STACKED_QTAGS, /**< stacked QTags */ + e_IOC_FM_PCD_MANIP_HDR_RMV_ETHERNET_AND_MPLS, +- /**< MPLS and Ethernet/802.3 MAC header unitl the header ++ /**< MPLS and Ethernet/802.3 MAC header until the header + * which follows the MPLS header + */ + e_IOC_FM_PCD_MANIP_HDR_RMV_MPLS +@@ -3293,7 +3293,7 @@ typedef struct ioc_fm_pcd_cc_tbl_get_stats_t { + /* + * @Function fm_pcd_net_env_characteristics_delete + * +- * @Description Deletes a set of Network Environment Charecteristics. ++ * @Description Deletes a set of Network Environment Characteristics. + * + * @Param[in] ioc_fm_obj_t The id of a Network Environment object. + * +@@ -3493,7 +3493,7 @@ typedef struct ioc_fm_pcd_cc_tbl_get_stats_t { + * @Return 0 on success; Error code otherwise. + * + * @Cautions Allowed only following fm_pcd_match_table_set() not only of +- * the relevnt node but also the node that points to this node. ++ * the relevant node but also the node that points to this node. + */ + #define FM_PCD_IOC_MATCH_TABLE_MODIFY_KEY_AND_NEXT_ENGINE \ + _IOW(FM_IOC_TYPE_BASE, FM_PCD_IOC_NUM(35), \ +diff --git a/dpdk/drivers/net/dpaa/fmlib/fm_port_ext.h b/dpdk/drivers/net/dpaa/fmlib/fm_port_ext.h +index 6f5479fbe1..bb2e00222e 100644 +--- a/dpdk/drivers/net/dpaa/fmlib/fm_port_ext.h ++++ b/dpdk/drivers/net/dpaa/fmlib/fm_port_ext.h +@@ -498,7 +498,7 @@ typedef struct ioc_fm_port_pcd_prs_params_t { + /**< Number of bytes from beginning of packet to start parsing + */ + ioc_net_header_type first_prs_hdr; +- /**< The type of the first header axpected at 'parsing_offset' ++ /**< The type of the first header expected at 'parsing_offset' + */ + bool include_in_prs_statistics; + /**< TRUE to include this port in the parser statistics */ +@@ -524,7 +524,7 @@ typedef struct ioc_fm_port_pcd_prs_params_t { + } ioc_fm_port_pcd_prs_params_t; + + /* +- * @Description A structure for defining coarse alassification parameters ++ * @Description A structure for defining coarse classification parameters + * (Must match t_fm_portPcdCcParams defined in fm_port_ext.h) + */ + typedef struct ioc_fm_port_pcd_cc_params_t { +@@ -602,7 +602,7 @@ typedef struct ioc_fm_pcd_prs_start_t { + /**< Number of bytes from beginning of packet to start parsing + */ + ioc_net_header_type first_prs_hdr; +- /**< The type of the first header axpected at 'parsing_offset' ++ /**< The type of the first header expected at 'parsing_offset' + */ + } ioc_fm_pcd_prs_start_t; + +@@ -1356,7 +1356,7 @@ typedef uint32_t fm_port_frame_err_select_t; + #define FM_PORT_FRM_ERR_PRS_HDR_ERR FM_FD_ERR_PRS_HDR_ERR + /**< Header error was identified during parsing */ + #define FM_PORT_FRM_ERR_BLOCK_LIMIT_EXCEEDED FM_FD_ERR_BLOCK_LIMIT_EXCEEDED +- /**< Frame parsed beyind 256 first bytes */ ++ /**< Frame parsed beyond 256 first bytes */ + #define FM_PORT_FRM_ERR_PROCESS_TIMEOUT 0x00000001 + /**< FPM Frame Processing Timeout Exceeded */ + /* @} */ +@@ -1390,7 +1390,7 @@ typedef void (t_fm_port_exception_callback) (t_handle h_app, + * @Param[in] length length of received data + * @Param[in] status receive status and errors + * @Param[in] position position of buffer in frame +- * @Param[in] h_buf_context A handle of the user acossiated with this buffer ++ * @Param[in] h_buf_context A handle of the user associated with this buffer + * + * @Retval e_RX_STORE_RESPONSE_CONTINUE + * order the driver to continue Rx operation for all ready data. +@@ -1414,7 +1414,7 @@ typedef e_rx_store_response(t_fm_port_im_rx_store_callback) (t_handle h_app, + * @Param[in] p_data A pointer to data received + * @Param[in] status transmit status and errors + * @Param[in] last_buffer is last buffer in frame +- * @Param[in] h_buf_context A handle of the user acossiated with this buffer ++ * @Param[in] h_buf_context A handle of the user associated with this buffer + */ + typedef void (t_fm_port_im_tx_conf_callback) (t_handle h_app, + uint8_t *p_data, +@@ -2585,7 +2585,7 @@ typedef struct t_fm_port_congestion_grps { + bool pfc_prio_enable[FM_NUM_CONG_GRPS][FM_MAX_PFC_PRIO]; + /**< a matrix that represents the map between the CG ids + * defined in 'congestion_grps_to_consider' to the +- * priorties mapping array. ++ * priorities mapping array. + */ + } t_fm_port_congestion_grps; + +diff --git a/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c b/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c +index 6f38da3cce..2b43cfce4e 100644 +--- a/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c ++++ b/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c +@@ -31,6 +31,8 @@ + + #define DRIVER_LOOPBACK_MODE "drv_loopback" + #define DRIVER_NO_PREFETCH_MODE "drv_no_prefetch" ++#define CHECK_INTERVAL 100 /* 100ms */ ++#define MAX_REPEAT_TIME 90 /* 9s (90 * 100ms) in total */ + + /* Supported Rx offloads */ + static uint64_t dev_rx_offloads_sup = +@@ -141,7 +143,7 @@ dpaa2_vlan_offload_set(struct rte_eth_dev *dev, int mask) + PMD_INIT_FUNC_TRACE(); + + if (mask & ETH_VLAN_FILTER_MASK) { +- /* VLAN Filter not avaialble */ ++ /* VLAN Filter not available */ + if (!priv->max_vlan_filters) { + DPAA2_PMD_INFO("VLAN filter not available"); + return -ENOTSUP; +@@ -223,9 +225,11 @@ dpaa2_fw_version_get(struct rte_eth_dev *dev, + mc_ver_info.major, + mc_ver_info.minor, + mc_ver_info.revision); ++ if (ret < 0) ++ return -EINVAL; + + ret += 1; /* add the size of '\0' */ +- if (fw_size < (uint32_t)ret) ++ if (fw_size < (size_t)ret) + return ret; + else + return 0; +@@ -886,7 +890,7 @@ dpaa2_dev_tx_queue_setup(struct rte_eth_dev *dev, + cong_notif_cfg.units = DPNI_CONGESTION_UNIT_FRAMES; + cong_notif_cfg.threshold_entry = nb_tx_desc; + /* Notify that the queue is not congested when the data in +- * the queue is below this thershold. ++ * the queue is below this threshold. + */ + cong_notif_cfg.threshold_exit = nb_tx_desc - 24; + cong_notif_cfg.message_ctx = 0; +@@ -1035,7 +1039,7 @@ dpaa2_supported_ptypes_get(struct rte_eth_dev *dev) + * Dpaa2 link Interrupt handler + * + * @param param +- * The address of parameter (struct rte_eth_dev *) regsitered before. ++ * The address of parameter (struct rte_eth_dev *) registered before. + * + * @return + * void +@@ -1209,7 +1213,12 @@ dpaa2_dev_stop(struct rte_eth_dev *dev) + struct fsl_mc_io *dpni = (struct fsl_mc_io *)dev->process_private; + int ret; + struct rte_eth_link link; +- struct rte_intr_handle *intr_handle = dev->intr_handle; ++ struct rte_device *rdev = dev->device; ++ struct rte_intr_handle *intr_handle; ++ struct rte_dpaa2_device *dpaa2_dev; ++ ++ dpaa2_dev = container_of(rdev, struct rte_dpaa2_device, device); ++ intr_handle = &dpaa2_dev->intr_handle; + + PMD_INIT_FUNC_TRACE(); + +@@ -1805,23 +1814,32 @@ dpaa2_dev_stats_reset(struct rte_eth_dev *dev) + /* return 0 means link status changed, -1 means not changed */ + static int + dpaa2_dev_link_update(struct rte_eth_dev *dev, +- int wait_to_complete __rte_unused) ++ int wait_to_complete) + { + int ret; + struct dpaa2_dev_priv *priv = dev->data->dev_private; + struct fsl_mc_io *dpni = (struct fsl_mc_io *)dev->process_private; + struct rte_eth_link link; + struct dpni_link_state state = {0}; ++ uint8_t count; + + if (dpni == NULL) { + DPAA2_PMD_ERR("dpni is NULL"); + return 0; + } + +- ret = dpni_get_link_state(dpni, CMD_PRI_LOW, priv->token, &state); +- if (ret < 0) { +- DPAA2_PMD_DEBUG("error: dpni_get_link_state %d", ret); +- return -1; ++ for (count = 0; count <= MAX_REPEAT_TIME; count++) { ++ ret = dpni_get_link_state(dpni, CMD_PRI_LOW, priv->token, ++ &state); ++ if (ret < 0) { ++ DPAA2_PMD_DEBUG("error: dpni_get_link_state %d", ret); ++ return -1; ++ } ++ if (state.up == ETH_LINK_DOWN && ++ wait_to_complete) ++ rte_delay_ms(CHECK_INTERVAL); ++ else ++ break; + } + + memset(&link, 0, sizeof(struct rte_eth_link)); +@@ -2196,7 +2214,7 @@ int dpaa2_eth_eventq_attach(const struct rte_eth_dev *dev, + ocfg.oa = 1; + /* Late arrival window size disabled */ + ocfg.olws = 0; +- /* ORL resource exhaustaion advance NESN disabled */ ++ /* ORL resource exhaustion advance NESN disabled */ + ocfg.oeane = 0; + /* Loose ordering enabled */ + ocfg.oloe = 1; +@@ -2673,13 +2691,13 @@ dpaa2_dev_init(struct rte_eth_dev *eth_dev) + } + eth_dev->tx_pkt_burst = dpaa2_dev_tx; + +- /*Init fields w.r.t. classficaition*/ ++ /* Init fields w.r.t. classification */ + memset(&priv->extract.qos_key_extract, 0, + sizeof(struct dpaa2_key_extract)); + priv->extract.qos_extract_param = (size_t)rte_malloc(NULL, 256, 64); + if (!priv->extract.qos_extract_param) { + DPAA2_PMD_ERR(" Error(%d) in allocation resources for flow " +- " classificaiton ", ret); ++ " classification ", ret); + goto init_err; + } + priv->extract.qos_key_extract.key_info.ipv4_src_offset = +@@ -2697,7 +2715,7 @@ dpaa2_dev_init(struct rte_eth_dev *eth_dev) + priv->extract.tc_extract_param[i] = + (size_t)rte_malloc(NULL, 256, 64); + if (!priv->extract.tc_extract_param[i]) { +- DPAA2_PMD_ERR(" Error(%d) in allocation resources for flow classificaiton", ++ DPAA2_PMD_ERR(" Error(%d) in allocation resources for flow classification", + ret); + goto init_err; + } +diff --git a/dpdk/drivers/net/dpaa2/dpaa2_ethdev.h b/dpdk/drivers/net/dpaa2/dpaa2_ethdev.h +index cacb11bd3e..203d03d272 100644 +--- a/dpdk/drivers/net/dpaa2/dpaa2_ethdev.h ++++ b/dpdk/drivers/net/dpaa2/dpaa2_ethdev.h +@@ -59,6 +59,8 @@ + + /* Disable RX tail drop, default is enable */ + #define DPAA2_RX_TAILDROP_OFF 0x04 ++/* Tx confirmation enabled */ ++#define DPAA2_TX_CONF_ENABLE 0x06 + + #define DPAA2_RSS_OFFLOAD_ALL ( \ + ETH_RSS_L2_PAYLOAD | \ +@@ -107,7 +109,7 @@ extern int dpaa2_timestamp_dynfield_offset; + + #define DPAA2_FLOW_MAX_KEY_SIZE 16 + +-/*Externaly defined*/ ++/* Externally defined */ + extern const struct rte_flow_ops dpaa2_flow_ops; + extern enum rte_filter_type dpaa2_filter_type; + +diff --git a/dpdk/drivers/net/dpaa2/dpaa2_flow.c b/dpdk/drivers/net/dpaa2/dpaa2_flow.c +index 29f1f2e654..379dab0939 100644 +--- a/dpdk/drivers/net/dpaa2/dpaa2_flow.c ++++ b/dpdk/drivers/net/dpaa2/dpaa2_flow.c +@@ -1444,7 +1444,7 @@ dpaa2_configure_flow_generic_ip( + flow, pattern, &local_cfg, + device_configured, group); + if (ret) { +- DPAA2_PMD_ERR("IP discrimation failed!"); ++ DPAA2_PMD_ERR("IP discrimination failed!"); + return -1; + } + +@@ -3285,7 +3285,7 @@ dpaa2_flow_verify_action( + (actions[j].conf); + if (rss_conf->queue_num > priv->dist_queues) { + DPAA2_PMD_ERR( +- "RSS number exceeds the distrbution size"); ++ "RSS number exceeds the distribution size"); + return -ENOTSUP; + } + for (i = 0; i < (int)rss_conf->queue_num; i++) { +@@ -3512,7 +3512,7 @@ dpaa2_generic_flow_set(struct rte_flow *flow, + qos_cfg.keep_entries = true; + qos_cfg.key_cfg_iova = + (size_t)priv->extract.qos_extract_param; +- /* QoS table is effecitive for multiple TCs.*/ ++ /* QoS table is effective for multiple TCs. */ + if (priv->num_rx_tc > 1) { + ret = dpni_set_qos_table(dpni, CMD_PRI_LOW, + priv->token, &qos_cfg); +@@ -3571,7 +3571,7 @@ dpaa2_generic_flow_set(struct rte_flow *flow, + 0, 0); + if (ret < 0) { + DPAA2_PMD_ERR( +- "Error in addnig entry to QoS table(%d)", ret); ++ "Error in adding entry to QoS table(%d)", ret); + return ret; + } + } +diff --git a/dpdk/drivers/net/dpaa2/dpaa2_mux.c b/dpdk/drivers/net/dpaa2/dpaa2_mux.c +index f8366e839e..5fb14e268d 100644 +--- a/dpdk/drivers/net/dpaa2/dpaa2_mux.c ++++ b/dpdk/drivers/net/dpaa2/dpaa2_mux.c +@@ -91,7 +91,7 @@ rte_pmd_dpaa2_mux_flow_create(uint32_t dpdmux_id, + mask_iova = (void *)((size_t)key_iova + DIST_PARAM_IOVA_SIZE); + + /* Currently taking only IP protocol as an extract type. +- * This can be exended to other fields using pattern->type. ++ * This can be extended to other fields using pattern->type. + */ + memset(&kg_cfg, 0, sizeof(struct dpkg_profile_cfg)); + +diff --git a/dpdk/drivers/net/dpaa2/dpaa2_ptp.c b/dpdk/drivers/net/dpaa2/dpaa2_ptp.c +index 899dd5d442..26a037acd4 100644 +--- a/dpdk/drivers/net/dpaa2/dpaa2_ptp.c ++++ b/dpdk/drivers/net/dpaa2/dpaa2_ptp.c +@@ -111,10 +111,12 @@ int dpaa2_timesync_read_tx_timestamp(struct rte_eth_dev *dev, + { + struct dpaa2_dev_priv *priv = dev->data->dev_private; + +- if (priv->next_tx_conf_queue) +- dpaa2_dev_tx_conf(priv->next_tx_conf_queue); +- else ++ if (priv->next_tx_conf_queue) { ++ while (!priv->tx_timestamp) ++ dpaa2_dev_tx_conf(priv->next_tx_conf_queue); ++ } else { + return -1; ++ } + *timestamp = rte_ns_to_timespec(priv->tx_timestamp); + + return 0; +diff --git a/dpdk/drivers/net/dpaa2/dpaa2_rxtx.c b/dpdk/drivers/net/dpaa2/dpaa2_rxtx.c +index 9cca6d16c3..23e193f86c 100644 +--- a/dpdk/drivers/net/dpaa2/dpaa2_rxtx.c ++++ b/dpdk/drivers/net/dpaa2/dpaa2_rxtx.c +@@ -139,8 +139,10 @@ dpaa2_dev_rx_parse_slow(struct rte_mbuf *mbuf, + annotation->word3, annotation->word4); + + #if defined(RTE_LIBRTE_IEEE1588) +- if (BIT_ISSET_AT_POS(annotation->word1, DPAA2_ETH_FAS_PTP)) ++ if (BIT_ISSET_AT_POS(annotation->word1, DPAA2_ETH_FAS_PTP)) { + mbuf->ol_flags |= PKT_RX_IEEE1588_PTP; ++ mbuf->ol_flags |= PKT_RX_IEEE1588_TMST; ++ } + #endif + + if (BIT_ISSET_AT_POS(annotation->word3, L2_VLAN_1_PRESENT)) { +@@ -585,7 +587,7 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + rte_prefetch0((void *)(size_t)(dq_storage + 1)); + + /* Prepare next pull descriptor. This will give space for the +- * prefething done on DQRR entries ++ * prefetching done on DQRR entries + */ + q_storage->toggle ^= 1; + dq_storage1 = q_storage->dq_storage[q_storage->toggle]; +@@ -640,7 +642,10 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + else + bufs[num_rx] = eth_fd_to_mbuf(fd, eth_data->port_id); + #if defined(RTE_LIBRTE_IEEE1588) +- priv->rx_timestamp = *dpaa2_timestamp_dynfield(bufs[num_rx]); ++ if (bufs[num_rx]->ol_flags & PKT_RX_IEEE1588_TMST) { ++ priv->rx_timestamp = ++ *dpaa2_timestamp_dynfield(bufs[num_rx]); ++ } + #endif + + if (eth_data->dev_conf.rxmode.offloads & +@@ -767,6 +772,9 @@ dpaa2_dev_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + const struct qbman_fd *fd; + struct qbman_pull_desc pulldesc; + struct rte_eth_dev_data *eth_data = dpaa2_q->eth_data; ++#if defined(RTE_LIBRTE_IEEE1588) ++ struct dpaa2_dev_priv *priv = eth_data->dev_private; ++#endif + + if (unlikely(!DPAA2_PER_LCORE_DPIO)) { + ret = dpaa2_affine_qbman_swp(); +@@ -853,6 +861,13 @@ dpaa2_dev_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + bufs[num_rx] = eth_fd_to_mbuf(fd, + eth_data->port_id); + ++#if defined(RTE_LIBRTE_IEEE1588) ++ if (bufs[num_rx]->ol_flags & PKT_RX_IEEE1588_TMST) { ++ priv->rx_timestamp = ++ *dpaa2_timestamp_dynfield(bufs[num_rx]); ++ } ++#endif ++ + if (eth_data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_STRIP) { + rte_vlan_strip(bufs[num_rx]); +@@ -888,6 +903,8 @@ uint16_t dpaa2_dev_tx_conf(void *queue) + struct rte_eth_dev_data *eth_data = dpaa2_q->eth_data; + struct dpaa2_dev_priv *priv = eth_data->dev_private; + struct dpaa2_annot_hdr *annotation; ++ void *v_addr; ++ struct rte_mbuf *mbuf; + #endif + + if (unlikely(!DPAA2_PER_LCORE_DPIO)) { +@@ -972,10 +989,16 @@ uint16_t dpaa2_dev_tx_conf(void *queue) + num_tx_conf++; + num_pulled++; + #if defined(RTE_LIBRTE_IEEE1588) +- annotation = (struct dpaa2_annot_hdr *)((size_t) +- DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd)) + +- DPAA2_FD_PTA_SIZE); +- priv->tx_timestamp = annotation->word2; ++ v_addr = DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd)); ++ mbuf = DPAA2_INLINE_MBUF_FROM_BUF(v_addr, ++ rte_dpaa2_bpid_info[DPAA2_GET_FD_BPID(fd)].meta_data_size); ++ ++ if (mbuf->ol_flags & PKT_TX_IEEE1588_TMST) { ++ annotation = (struct dpaa2_annot_hdr *)((size_t) ++ DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd)) + ++ DPAA2_FD_PTA_SIZE); ++ priv->tx_timestamp = annotation->word2; ++ } + #endif + } while (pending); + +@@ -1050,8 +1073,11 @@ dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + * corresponding to last packet transmitted for reading + * the timestamp + */ +- priv->next_tx_conf_queue = dpaa2_q->tx_conf_queue; +- dpaa2_dev_tx_conf(dpaa2_q->tx_conf_queue); ++ if ((*bufs)->ol_flags & PKT_TX_IEEE1588_TMST) { ++ priv->next_tx_conf_queue = dpaa2_q->tx_conf_queue; ++ dpaa2_dev_tx_conf(dpaa2_q->tx_conf_queue); ++ priv->tx_timestamp = 0; ++ } + #endif + + /*Prepare enqueue descriptor*/ +@@ -1339,7 +1365,7 @@ dpaa2_dev_tx_ordered(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + if (*dpaa2_seqn(*bufs)) { + /* Use only queue 0 for Tx in case of atomic/ + * ordered packets as packets can get unordered +- * when being tranmitted out from the interface ++ * when being transmitted out from the interface + */ + dpaa2_set_enqueue_descriptor(order_sendq, + (*bufs), +@@ -1566,7 +1592,7 @@ dpaa2_dev_loopback_rx(void *queue, + rte_prefetch0((void *)(size_t)(dq_storage + 1)); + + /* Prepare next pull descriptor. This will give space for the +- * prefething done on DQRR entries ++ * prefetching done on DQRR entries + */ + q_storage->toggle ^= 1; + dq_storage1 = q_storage->dq_storage[q_storage->toggle]; +diff --git a/dpdk/drivers/net/dpaa2/dpaa2_sparser.h b/dpdk/drivers/net/dpaa2/dpaa2_sparser.h +index 365b8062a9..ed0897928b 100644 +--- a/dpdk/drivers/net/dpaa2/dpaa2_sparser.h ++++ b/dpdk/drivers/net/dpaa2/dpaa2_sparser.h +@@ -13,10 +13,6 @@ + #ifndef _DPAA2_SPARSER_H + #define _DPAA2_SPARSER_H + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + #define WRIOP_SS_INITIALIZER(priv) \ + do { \ + /* Base offset of parse profile memory in WRIOP */ \ +diff --git a/dpdk/drivers/net/dpaa2/mc/fsl_dpni.h b/dpdk/drivers/net/dpaa2/mc/fsl_dpni.h +index 598911ddd1..e211a3e6d3 100644 +--- a/dpdk/drivers/net/dpaa2/mc/fsl_dpni.h ++++ b/dpdk/drivers/net/dpaa2/mc/fsl_dpni.h +@@ -88,7 +88,7 @@ struct fsl_mc_io; + */ + #define DPNI_OPT_OPR_PER_TC 0x000080 + /** +- * All Tx traffic classes will use a single sender (ignore num_queueus for tx) ++ * All Tx traffic classes will use a single sender (ignore num_queues for tx) + */ + #define DPNI_OPT_SINGLE_SENDER 0x000100 + /** +@@ -579,7 +579,7 @@ int dpni_get_tx_data_offset(struct fsl_mc_io *mc_io, + * @page_3.ceetm_reject_bytes: Cumulative count of the number of bytes in all + * frames whose enqueue was rejected + * @page_3.ceetm_reject_frames: Cumulative count of all frame enqueues rejected +- * @page_4: congestion point drops for seleted TC ++ * @page_4: congestion point drops for selected TC + * @page_4.cgr_reject_frames: number of rejected frames due to congestion point + * @page_4.cgr_reject_bytes: number of rejected bytes due to congestion point + * @page_5: policer statistics per TC +@@ -1166,7 +1166,7 @@ int dpni_get_tx_confirmation_mode(struct fsl_mc_io *mc_io, + * dpkg_prepare_key_cfg() + * @discard_on_miss: Set to '1' to discard frames in case of no match (miss); + * '0' to use the 'default_tc' in such cases +- * @keep_entries: if set to one will not delele existing table entries. This ++ * @keep_entries: if set to one will not delete existing table entries. This + * option will work properly only for dpni objects created with + * DPNI_OPT_HAS_KEY_MASKING option. All previous QoS entries must + * be compatible with new key composition rule. +@@ -1519,7 +1519,7 @@ int dpni_load_sw_sequence(struct fsl_mc_io *mc_io, + struct dpni_load_ss_cfg *cfg); + + /** +- * dpni_eanble_sw_sequence() - Enables a software sequence in the parser ++ * dpni_enable_sw_sequence() - Enables a software sequence in the parser + * profile + * corresponding to the ingress or egress of the DPNI. + * @mc_io: Pointer to MC portal's I/O object +diff --git a/dpdk/drivers/net/e1000/base/e1000_i210.c b/dpdk/drivers/net/e1000/base/e1000_i210.c +index 3c349d33ff..52800376e4 100644 +--- a/dpdk/drivers/net/e1000/base/e1000_i210.c ++++ b/dpdk/drivers/net/e1000/base/e1000_i210.c +@@ -310,6 +310,8 @@ STATIC s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, + } + + for (i = 0; i < words; i++) { ++ ret_val = -E1000_ERR_NVM; ++ + eewr = ((offset + i) << E1000_NVM_RW_ADDR_SHIFT) | + (data[i] << E1000_NVM_RW_REG_DATA) | + E1000_NVM_RW_REG_START; +diff --git a/dpdk/drivers/net/e1000/e1000_ethdev.h b/dpdk/drivers/net/e1000/e1000_ethdev.h +index 3b4d9c3ee6..2681ca3100 100644 +--- a/dpdk/drivers/net/e1000/e1000_ethdev.h ++++ b/dpdk/drivers/net/e1000/e1000_ethdev.h +@@ -102,7 +102,7 @@ + * Maximum number of Ring Descriptors. + * + * Since RDLEN/TDLEN should be multiple of 128 bytes, the number of ring +- * desscriptors should meet the following condition: ++ * descriptors should meet the following condition: + * (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0 + */ + #define E1000_MIN_RING_DESC 32 +@@ -251,7 +251,7 @@ struct igb_rte_flow_rss_conf { + }; + + /* +- * Structure to store filters'info. ++ * Structure to store filters' info. + */ + struct e1000_filter_info { + uint8_t ethertype_mask; /* Bit mask for every used ethertype filter */ +diff --git a/dpdk/drivers/net/e1000/e1000_logs.c b/dpdk/drivers/net/e1000/e1000_logs.c +index 231f5c03ef..d9b8a4672f 100644 +--- a/dpdk/drivers/net/e1000/e1000_logs.c ++++ b/dpdk/drivers/net/e1000/e1000_logs.c +@@ -4,53 +4,14 @@ + + #include "e1000_logs.h" + +-/* declared as extern in e1000_logs.h */ +-int e1000_logtype_init; +-int e1000_logtype_driver; +- +-#ifdef RTE_LIBRTE_E1000_DEBUG_RX +-int e1000_logtype_rx; +-#endif +-#ifdef RTE_LIBRTE_E1000_DEBUG_TX +-int e1000_logtype_tx; +-#endif +-#ifdef RTE_LIBRTE_E1000_DEBUG_TX_FREE +-int e1000_logtype_tx_free; +-#endif +- +-/* avoids double registering of logs if EM and IGB drivers are in use */ +-static int e1000_log_initialized; +- +-void +-e1000_igb_init_log(void) +-{ +- if (e1000_log_initialized) +- return; +- +- e1000_logtype_init = rte_log_register("pmd.net.e1000.init"); +- if (e1000_logtype_init >= 0) +- rte_log_set_level(e1000_logtype_init, RTE_LOG_NOTICE); +- e1000_logtype_driver = rte_log_register("pmd.net.e1000.driver"); +- if (e1000_logtype_driver >= 0) +- rte_log_set_level(e1000_logtype_driver, RTE_LOG_NOTICE); +- ++RTE_LOG_REGISTER(e1000_logtype_init, pmd.net.e1000.init, NOTICE) ++RTE_LOG_REGISTER(e1000_logtype_driver, pmd.net.e1000.driver, NOTICE) + #ifdef RTE_LIBRTE_E1000_DEBUG_RX +- e1000_logtype_rx = rte_log_register("pmd.net.e1000.rx"); +- if (e1000_logtype_rx >= 0) +- rte_log_set_level(e1000_logtype_rx, RTE_LOG_DEBUG); ++RTE_LOG_REGISTER(e1000_logtype_rx, pmd.net.e1000.rx, DEBUG) + #endif +- + #ifdef RTE_LIBRTE_E1000_DEBUG_TX +- e1000_logtype_tx = rte_log_register("pmd.net.e1000.tx"); +- if (e1000_logtype_tx >= 0) +- rte_log_set_level(e1000_logtype_tx, RTE_LOG_DEBUG); ++RTE_LOG_REGISTER(e1000_logtype_tx, pmd.net.e1000.tx, DEBUG) + #endif +- + #ifdef RTE_LIBRTE_E1000_DEBUG_TX_FREE +- e1000_logtype_tx_free = rte_log_register("pmd.net.e1000.tx_free"); +- if (e1000_logtype_tx_free >= 0) +- rte_log_set_level(e1000_logtype_tx_free, RTE_LOG_DEBUG); ++RTE_LOG_REGISTER(e1000_logtype_tx_free, pmd.net.e1000.tx_free, DEBUG) + #endif +- +- e1000_log_initialized = 1; +-} +diff --git a/dpdk/drivers/net/e1000/em_ethdev.c b/dpdk/drivers/net/e1000/em_ethdev.c +index 2036c6e917..ef482bdde4 100644 +--- a/dpdk/drivers/net/e1000/em_ethdev.c ++++ b/dpdk/drivers/net/e1000/em_ethdev.c +@@ -265,7 +265,6 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) + } + + rte_eth_copy_pci_info(eth_dev, pci_dev); +- eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + + hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; + hw->device_id = pci_dev->id.device_id; +@@ -975,8 +974,7 @@ eth_em_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats) + + /* Rx Errors */ + rte_stats->imissed = stats->mpc; +- rte_stats->ierrors = stats->crcerrs + +- stats->rlec + stats->ruc + stats->roc + ++ rte_stats->ierrors = stats->crcerrs + stats->rlec + + stats->rxerrc + stats->algnerrc + stats->cexterr; + + /* Tx Errors */ +@@ -1066,8 +1064,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) + + /* + * Starting with 631xESB hw supports 2 TX/RX queues per port. +- * Unfortunatelly, all these nics have just one TX context. +- * So we have few choises for TX: ++ * Unfortunately, all these nics have just one TX context. ++ * So we have few choices for TX: + * - Use just one TX queue. + * - Allow cksum offload only for one TX queue. + * - Don't allow TX cksum offload at all. +@@ -1076,7 +1074,7 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) + * (Multiple Receive Queues are mutually exclusive with UDP + * fragmentation and are not supported when a legacy receive + * descriptor format is used). +- * Which means separate RX routinies - as legacy nics (82540, 82545) ++ * Which means separate RX routines - as legacy nics (82540, 82545) + * don't support extended RXD. + * To avoid it we support just one RX queue for now (no RSS). + */ +@@ -1564,7 +1562,7 @@ eth_em_interrupt_get_status(struct rte_eth_dev *dev) + } + + /* +- * It executes link_update after knowing an interrupt is prsent. ++ * It executes link_update after knowing an interrupt is present. + * + * @param dev + * Pointer to struct rte_eth_dev. +@@ -1622,7 +1620,7 @@ eth_em_interrupt_action(struct rte_eth_dev *dev, + * @param handle + * Pointer to interrupt handle. + * @param param +- * The address of parameter (struct rte_eth_dev *) regsitered before. ++ * The address of parameter (struct rte_eth_dev *) registered before. + * + * @return + * void +@@ -1805,11 +1803,15 @@ eth_em_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) + if (mtu < RTE_ETHER_MIN_MTU || frame_size > dev_info.max_rx_pktlen) + return -EINVAL; + +- /* refuse mtu that requires the support of scattered packets when this +- * feature has not been enabled before. */ +- if (!dev->data->scattered_rx && +- frame_size > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM) ++ /* ++ * If device is started, refuse mtu that requires the support of ++ * scattered packets when this feature has not been enabled before. ++ */ ++ if (dev->data->dev_started && !dev->data->scattered_rx && ++ frame_size > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM) { ++ PMD_INIT_LOG(ERR, "Stop port first."); + return -EINVAL; ++ } + + hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + rctl = E1000_READ_REG(hw, E1000_RCTL); +@@ -1846,9 +1848,3 @@ eth_em_set_mc_addr_list(struct rte_eth_dev *dev, + RTE_PMD_REGISTER_PCI(net_e1000_em, rte_em_pmd); + RTE_PMD_REGISTER_PCI_TABLE(net_e1000_em, pci_id_em_map); + RTE_PMD_REGISTER_KMOD_DEP(net_e1000_em, "* igb_uio | uio_pci_generic | vfio-pci"); +- +-/* see e1000_logs.c */ +-RTE_INIT(igb_init_log) +-{ +- e1000_igb_init_log(); +-} +diff --git a/dpdk/drivers/net/e1000/em_rxtx.c b/dpdk/drivers/net/e1000/em_rxtx.c +index 19e3bffd46..b86cc43e2c 100644 +--- a/dpdk/drivers/net/e1000/em_rxtx.c ++++ b/dpdk/drivers/net/e1000/em_rxtx.c +@@ -104,6 +104,7 @@ struct em_rx_queue { + uint8_t hthresh; /**< Host threshold register. */ + uint8_t wthresh; /**< Write-back threshold register. */ + uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */ ++ const struct rte_memzone *mz; + }; + + /** +@@ -141,7 +142,7 @@ union em_vlan_macip { + struct em_ctx_info { + uint64_t flags; /**< ol_flags related to context build. */ + uint32_t cmp_mask; /**< compare mask */ +- union em_vlan_macip hdrlen; /**< L2 and L3 header lenghts */ ++ union em_vlan_macip hdrlen; /**< L2 and L3 header lengths */ + }; + + /** +@@ -173,6 +174,7 @@ struct em_tx_queue { + struct em_ctx_info ctx_cache; + /**< Hardware context history.*/ + uint64_t offloads; /**< offloads of DEV_TX_OFFLOAD_* */ ++ const struct rte_memzone *mz; + }; + + #if 1 +@@ -828,7 +830,7 @@ eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + * register. + * Update the RDT with the value of the last processed RX descriptor + * minus 1, to guarantee that the RDT register is never equal to the +- * RDH register, which creates a "full" ring situtation from the ++ * RDH register, which creates a "full" ring situation from the + * hardware point of view... + */ + nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold); +@@ -1073,7 +1075,7 @@ eth_em_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + * register. + * Update the RDT with the value of the last processed RX descriptor + * minus 1, to guarantee that the RDT register is never equal to the +- * RDH register, which creates a "full" ring situtation from the ++ * RDH register, which creates a "full" ring situation from the + * hardware point of view... + */ + nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold); +@@ -1116,6 +1118,7 @@ em_tx_queue_release(struct em_tx_queue *txq) + if (txq != NULL) { + em_tx_queue_release_mbufs(txq); + rte_free(txq->sw_ring); ++ rte_memzone_free(txq->mz); + rte_free(txq); + } + } +@@ -1286,6 +1289,7 @@ eth_em_tx_queue_setup(struct rte_eth_dev *dev, + RTE_CACHE_LINE_SIZE)) == NULL) + return -ENOMEM; + ++ txq->mz = tz; + /* Allocate software ring */ + if ((txq->sw_ring = rte_zmalloc("txq->sw_ring", + sizeof(txq->sw_ring[0]) * nb_desc, +@@ -1338,6 +1342,7 @@ em_rx_queue_release(struct em_rx_queue *rxq) + if (rxq != NULL) { + em_rx_queue_release_mbufs(rxq); + rte_free(rxq->sw_ring); ++ rte_memzone_free(rxq->mz); + rte_free(rxq); + } + } +@@ -1452,6 +1457,7 @@ eth_em_rx_queue_setup(struct rte_eth_dev *dev, + RTE_CACHE_LINE_SIZE)) == NULL) + return -ENOMEM; + ++ rxq->mz = rz; + /* Allocate software ring. */ + if ((rxq->sw_ring = rte_zmalloc("rxq->sw_ring", + sizeof (rxq->sw_ring[0]) * nb_desc, +@@ -1611,14 +1617,12 @@ em_dev_free_queues(struct rte_eth_dev *dev) + for (i = 0; i < dev->data->nb_rx_queues; i++) { + eth_em_rx_queue_release(dev->data->rx_queues[i]); + dev->data->rx_queues[i] = NULL; +- rte_eth_dma_zone_free(dev, "rx_ring", i); + } + dev->data->nb_rx_queues = 0; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + eth_em_tx_queue_release(dev->data->tx_queues[i]); + dev->data->tx_queues[i] = NULL; +- rte_eth_dma_zone_free(dev, "tx_ring", i); + } + dev->data->nb_tx_queues = 0; + } +diff --git a/dpdk/drivers/net/e1000/igb_ethdev.c b/dpdk/drivers/net/e1000/igb_ethdev.c +index 5bcc67d75f..5a4f3fda66 100644 +--- a/dpdk/drivers/net/e1000/igb_ethdev.c ++++ b/dpdk/drivers/net/e1000/igb_ethdev.c +@@ -745,7 +745,6 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev) + } + + rte_eth_copy_pci_info(eth_dev, pci_dev); +- eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + + hw->hw_addr= (void *)pci_dev->mem_resource[0].addr; + +@@ -940,7 +939,6 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev) + + pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + rte_eth_copy_pci_info(eth_dev, pci_dev); +- eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + + hw->device_id = pci_dev->id.device_id; + hw->vendor_id = pci_dev->id.vendor_id; +@@ -1160,7 +1158,7 @@ eth_igb_configure(struct rte_eth_dev *dev) + if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) + dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; + +- /* multipe queue mode checking */ ++ /* multiple queue mode checking */ + ret = igb_check_mq_mode(dev); + if (ret != 0) { + PMD_DRV_LOG(ERR, "igb_check_mq_mode fails with %d.", +@@ -1277,7 +1275,7 @@ eth_igb_start(struct rte_eth_dev *dev) + } + } + +- /* confiugre msix for rx interrupt */ ++ /* configure MSI-X for Rx interrupt */ + eth_igb_configure_msix_intr(dev); + + /* Configure for OS presence */ +@@ -1827,8 +1825,7 @@ eth_igb_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats) + + /* Rx Errors */ + rte_stats->imissed = stats->mpc; +- rte_stats->ierrors = stats->crcerrs + +- stats->rlec + stats->ruc + stats->roc + ++ rte_stats->ierrors = stats->crcerrs + stats->rlec + + stats->rxerrc + stats->algnerrc + stats->cexterr; + + /* Tx Errors */ +@@ -2162,9 +2159,11 @@ eth_igb_fw_version_get(struct rte_eth_dev *dev, char *fw_version, + } + break; + } ++ if (ret < 0) ++ return -EINVAL; + + ret += 1; /* add the size of '\0' */ +- if (fw_size < (u32)ret) ++ if (fw_size < (size_t)ret) + return ret; + else + return 0; +@@ -2689,8 +2688,7 @@ igb_vlan_hw_extend_disable(struct rte_eth_dev *dev) + /* Update maximum packet length */ + if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) + E1000_WRITE_REG(hw, E1000_RLPML, +- dev->data->dev_conf.rxmode.max_rx_pkt_len + +- VLAN_TAG_SIZE); ++ dev->data->dev_conf.rxmode.max_rx_pkt_len); + } + + static void +@@ -2709,7 +2707,7 @@ igb_vlan_hw_extend_enable(struct rte_eth_dev *dev) + if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) + E1000_WRITE_REG(hw, E1000_RLPML, + dev->data->dev_conf.rxmode.max_rx_pkt_len + +- 2 * VLAN_TAG_SIZE); ++ VLAN_TAG_SIZE); + } + + static int +@@ -2839,7 +2837,7 @@ eth_igb_interrupt_get_status(struct rte_eth_dev *dev) + } + + /* +- * It executes link_update after knowing an interrupt is prsent. ++ * It executes link_update after knowing an interrupt is present. + * + * @param dev + * Pointer to struct rte_eth_dev. +@@ -2909,7 +2907,7 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev, + * @param handle + * Pointer to interrupt handle. + * @param param +- * The address of parameter (struct rte_eth_dev *) regsitered before. ++ * The address of parameter (struct rte_eth_dev *) registered before. + * + * @return + * void +@@ -3809,7 +3807,7 @@ igb_inject_2uple_filter(struct rte_eth_dev *dev, + * + * @param + * dev: Pointer to struct rte_eth_dev. +- * ntuple_filter: ponter to the filter that will be added. ++ * ntuple_filter: pointer to the filter that will be added. + * + * @return + * - On success, zero. +@@ -3890,7 +3888,7 @@ igb_delete_2tuple_filter(struct rte_eth_dev *dev, + * + * @param + * dev: Pointer to struct rte_eth_dev. +- * ntuple_filter: ponter to the filter that will be removed. ++ * ntuple_filter: pointer to the filter that will be removed. + * + * @return + * - On success, zero. +@@ -4248,7 +4246,7 @@ igb_inject_5tuple_filter_82576(struct rte_eth_dev *dev, + * + * @param + * dev: Pointer to struct rte_eth_dev. +- * ntuple_filter: ponter to the filter that will be added. ++ * ntuple_filter: pointer to the filter that will be added. + * + * @return + * - On success, zero. +@@ -4335,7 +4333,7 @@ igb_delete_5tuple_filter_82576(struct rte_eth_dev *dev, + * + * @param + * dev: Pointer to struct rte_eth_dev. +- * ntuple_filter: ponter to the filter that will be removed. ++ * ntuple_filter: pointer to the filter that will be removed. + * + * @return + * - On success, zero. +@@ -4394,11 +4392,15 @@ eth_igb_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) + frame_size > dev_info.max_rx_pktlen) + return -EINVAL; + +- /* refuse mtu that requires the support of scattered packets when this +- * feature has not been enabled before. */ +- if (!dev->data->scattered_rx && +- frame_size > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM) ++ /* ++ * If device is started, refuse mtu that requires the support of ++ * scattered packets when this feature has not been enabled before. ++ */ ++ if (dev->data->dev_started && !dev->data->scattered_rx && ++ frame_size > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM) { ++ PMD_INIT_LOG(ERR, "Stop port first."); + return -EINVAL; ++ } + + rctl = E1000_READ_REG(hw, E1000_RCTL); + +@@ -4884,7 +4886,7 @@ igb_timesync_disable(struct rte_eth_dev *dev) + /* Disable L2 filtering of IEEE1588/802.1AS Ethernet frame types. */ + E1000_WRITE_REG(hw, E1000_ETQF(E1000_ETQF_FILTER_1588), 0); + +- /* Stop incrementating the System Time registers. */ ++ /* Stop incrementing the System Time registers. */ + E1000_WRITE_REG(hw, E1000_TIMINCA, 0); + + return 0; +@@ -5132,9 +5134,6 @@ eth_igb_get_module_eeprom(struct rte_eth_dev *dev, + u16 first_word, last_word; + int i = 0; + +- if (info->length == 0) +- return -EINVAL; +- + first_word = info->offset >> 1; + last_word = (info->offset + info->length - 1) >> 1; + +@@ -5434,9 +5433,3 @@ RTE_PMD_REGISTER_KMOD_DEP(net_e1000_igb, "* igb_uio | uio_pci_generic | vfio-pci + RTE_PMD_REGISTER_PCI(net_e1000_igb_vf, rte_igbvf_pmd); + RTE_PMD_REGISTER_PCI_TABLE(net_e1000_igb_vf, pci_id_igbvf_map); + RTE_PMD_REGISTER_KMOD_DEP(net_e1000_igb_vf, "* igb_uio | vfio-pci"); +- +-/* see e1000_logs.c */ +-RTE_INIT(e1000_init_log) +-{ +- e1000_igb_init_log(); +-} +diff --git a/dpdk/drivers/net/e1000/igb_flow.c b/dpdk/drivers/net/e1000/igb_flow.c +index ccb184df95..52cc3c81b3 100644 +--- a/dpdk/drivers/net/e1000/igb_flow.c ++++ b/dpdk/drivers/net/e1000/igb_flow.c +@@ -57,7 +57,7 @@ struct igb_flex_filter_list igb_filter_flex_list; + struct igb_rss_filter_list igb_filter_rss_list; + + /** +- * Please aware there's an asumption for all the parsers. ++ * Please be aware there's an assumption for all the parsers. + * rte_flow_item is using big endian, rte_flow_attr and + * rte_flow_action are using CPU order. + * Because the pattern is used to describe the packets, +@@ -350,7 +350,7 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr, + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, +- item, "Not supported action."); ++ act, "Not supported action."); + return -rte_errno; + } + filter->queue = +@@ -1608,7 +1608,7 @@ igb_flow_create(struct rte_eth_dev *dev, + + /** + * Check if the flow rule is supported by igb. +- * It only checkes the format. Don't guarantee the rule can be programmed into ++ * It only checks the format. Don't guarantee the rule can be programmed into + * the HW. Because there can be no enough room for the rule. + */ + static int +diff --git a/dpdk/drivers/net/e1000/igb_pf.c b/dpdk/drivers/net/e1000/igb_pf.c +index 9d74c08abe..b621a12254 100644 +--- a/dpdk/drivers/net/e1000/igb_pf.c ++++ b/dpdk/drivers/net/e1000/igb_pf.c +@@ -155,7 +155,7 @@ int igb_pf_host_configure(struct rte_eth_dev *eth_dev) + else + E1000_WRITE_REG(hw, E1000_DTXSWC, E1000_DTXSWC_VMDQ_LOOPBACK_EN); + +- /* clear VMDq map to perment rar 0 */ ++ /* clear VMDq map to permanent rar 0 */ + rah = E1000_READ_REG(hw, E1000_RAH(0)); + rah &= ~ (0xFF << E1000_RAH_POOLSEL_SHIFT); + E1000_WRITE_REG(hw, E1000_RAH(0), rah); +diff --git a/dpdk/drivers/net/e1000/igb_rxtx.c b/dpdk/drivers/net/e1000/igb_rxtx.c +index dd520cd82c..2c8b1a0c8c 100644 +--- a/dpdk/drivers/net/e1000/igb_rxtx.c ++++ b/dpdk/drivers/net/e1000/igb_rxtx.c +@@ -112,6 +112,7 @@ struct igb_rx_queue { + uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */ + uint32_t flags; /**< RX flags. */ + uint64_t offloads; /**< offloads of DEV_RX_OFFLOAD_* */ ++ const struct rte_memzone *mz; + }; + + /** +@@ -150,7 +151,7 @@ union igb_tx_offload { + (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK) + + /** +- * Strucutre to check if new context need be built ++ * Structure to check if new context need be built + */ + struct igb_advctx_info { + uint64_t flags; /**< ol_flags related to context build. */ +@@ -186,6 +187,7 @@ struct igb_tx_queue { + struct igb_advctx_info ctx_cache[IGB_CTX_NUM]; + /**< Hardware context history.*/ + uint64_t offloads; /**< offloads of DEV_TX_OFFLOAD_* */ ++ const struct rte_memzone *mz; + }; + + #if 1 +@@ -966,7 +968,7 @@ eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + * register. + * Update the RDT with the value of the last processed RX descriptor + * minus 1, to guarantee that the RDT register is never equal to the +- * RDH register, which creates a "full" ring situtation from the ++ * RDH register, which creates a "full" ring situation from the + * hardware point of view... + */ + nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold); +@@ -1228,7 +1230,7 @@ eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + * register. + * Update the RDT with the value of the last processed RX descriptor + * minus 1, to guarantee that the RDT register is never equal to the +- * RDH register, which creates a "full" ring situtation from the ++ * RDH register, which creates a "full" ring situation from the + * hardware point of view... + */ + nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold); +@@ -1251,7 +1253,7 @@ eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + * Maximum number of Ring Descriptors. + * + * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring +- * desscriptors should meet the following condition: ++ * descriptors should meet the following condition: + * (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0 + */ + +@@ -1276,6 +1278,7 @@ igb_tx_queue_release(struct igb_tx_queue *txq) + if (txq != NULL) { + igb_tx_queue_release_mbufs(txq); + rte_free(txq->sw_ring); ++ rte_memzone_free(txq->mz); + rte_free(txq); + } + } +@@ -1348,7 +1351,7 @@ igb_tx_done_cleanup(struct igb_tx_queue *txq, uint32_t free_cnt) + sw_ring[tx_id].last_id = tx_id; + } + +- /* Move to next segemnt. */ ++ /* Move to next segment. */ + tx_id = sw_ring[tx_id].next_id; + + } while (tx_id != tx_next); +@@ -1381,7 +1384,7 @@ igb_tx_done_cleanup(struct igb_tx_queue *txq, uint32_t free_cnt) + + /* Walk the list and find the next mbuf, if any. */ + do { +- /* Move to next segemnt. */ ++ /* Move to next segment. */ + tx_id = sw_ring[tx_id].next_id; + + if (sw_ring[tx_id].mbuf) +@@ -1545,6 +1548,7 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev, + return -ENOMEM; + } + ++ txq->mz = tz; + txq->nb_tx_desc = nb_desc; + txq->pthresh = tx_conf->tx_thresh.pthresh; + txq->hthresh = tx_conf->tx_thresh.hthresh; +@@ -1601,6 +1605,7 @@ igb_rx_queue_release(struct igb_rx_queue *rxq) + if (rxq != NULL) { + igb_rx_queue_release_mbufs(rxq); + rte_free(rxq->sw_ring); ++ rte_memzone_free(rxq->mz); + rte_free(rxq); + } + } +@@ -1746,6 +1751,8 @@ eth_igb_rx_queue_setup(struct rte_eth_dev *dev, + igb_rx_queue_release(rxq); + return -ENOMEM; + } ++ ++ rxq->mz = rz; + rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx)); + rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx)); + rxq->rx_ring_phys_addr = rz->iova; +@@ -1885,14 +1892,12 @@ igb_dev_free_queues(struct rte_eth_dev *dev) + for (i = 0; i < dev->data->nb_rx_queues; i++) { + eth_igb_rx_queue_release(dev->data->rx_queues[i]); + dev->data->rx_queues[i] = NULL; +- rte_eth_dma_zone_free(dev, "rx_ring", i); + } + dev->data->nb_rx_queues = 0; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + eth_igb_tx_queue_release(dev->data->tx_queues[i]); + dev->data->tx_queues[i] = NULL; +- rte_eth_dma_zone_free(dev, "tx_ring", i); + } + dev->data->nb_tx_queues = 0; + } +@@ -2160,7 +2165,7 @@ igb_vmdq_rx_hw_configure(struct rte_eth_dev *dev) + + igb_rss_disable(dev); + +- /* RCTL: eanble VLAN filter */ ++ /* RCTL: enable VLAN filter */ + rctl = E1000_READ_REG(hw, E1000_RCTL); + rctl |= E1000_RCTL_VFE; + E1000_WRITE_REG(hw, E1000_RCTL, rctl); +@@ -2343,15 +2348,18 @@ eth_igb_rx_init(struct rte_eth_dev *dev) + * Configure support of jumbo frames, if any. + */ + if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { ++ uint32_t max_len = dev->data->dev_conf.rxmode.max_rx_pkt_len; ++ + rctl |= E1000_RCTL_LPE; + + /* + * Set maximum packet length by default, and might be updated + * together with enabling/disabling dual VLAN. + */ +- E1000_WRITE_REG(hw, E1000_RLPML, +- dev->data->dev_conf.rxmode.max_rx_pkt_len + +- VLAN_TAG_SIZE); ++ if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_EXTEND) ++ max_len += VLAN_TAG_SIZE; ++ ++ E1000_WRITE_REG(hw, E1000_RLPML, max_len); + } else + rctl &= ~E1000_RCTL_LPE; + +diff --git a/dpdk/drivers/net/ena/base/ena_com.c b/dpdk/drivers/net/ena/base/ena_com.c +index aae68721fb..00dea4bfc7 100644 +--- a/dpdk/drivers/net/ena/base/ena_com.c ++++ b/dpdk/drivers/net/ena/base/ena_com.c +@@ -587,7 +587,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c + return ret; + } + +-/** ++/* + * Set the LLQ configurations of the firmware + * + * The driver provides only the enabled feature values to the device, +@@ -1078,7 +1078,7 @@ static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev) + /* The key buffer is stored in the device in an array of + * uint32 elements. + */ +- hash_key->keys_num = ENA_ADMIN_RSS_KEY_PARTS; ++ hash_key->key_parts = ENA_ADMIN_RSS_KEY_PARTS; + } + + static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) +@@ -1151,7 +1151,7 @@ static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev, + int ret; + + ret = ena_com_get_feature(ena_dev, &get_resp, +- ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, 0); ++ ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG, 0); + if (unlikely(ret)) + return ret; + +@@ -1366,7 +1366,7 @@ int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue, + ena_trc_err("Failed to submit command [%ld]\n", + PTR_ERR(comp_ctx)); + +- return PTR_ERR(comp_ctx); ++ return (int)PTR_ERR(comp_ctx); + } + + ret = ena_com_wait_and_process_admin_cq(comp_ctx, admin_queue); +@@ -1586,7 +1586,7 @@ int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag) + int ena_com_get_dma_width(struct ena_com_dev *ena_dev) + { + u32 caps = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF); +- int width; ++ u32 width; + + if (unlikely(caps == ENA_MMIO_READ_TIMEOUT)) { + ena_trc_err("Reg read timeout occurred\n"); +@@ -1655,6 +1655,22 @@ int ena_com_validate_version(struct ena_com_dev *ena_dev) + return 0; + } + ++static void ++ena_com_free_ena_admin_queue_comp_ctx(struct ena_com_dev *ena_dev, ++ struct ena_com_admin_queue *admin_queue) ++ ++{ ++ if (!admin_queue->comp_ctx) ++ return; ++ ++ ENA_WAIT_EVENTS_DESTROY(admin_queue); ++ ENA_MEM_FREE(ena_dev->dmadev, ++ admin_queue->comp_ctx, ++ (admin_queue->q_depth * sizeof(struct ena_comp_ctx))); ++ ++ admin_queue->comp_ctx = NULL; ++} ++ + void ena_com_admin_destroy(struct ena_com_dev *ena_dev) + { + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; +@@ -1663,14 +1679,8 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev) + struct ena_com_aenq *aenq = &ena_dev->aenq; + u16 size; + +- if (admin_queue->comp_ctx) { +- ENA_WAIT_EVENT_DESTROY(admin_queue->comp_ctx->wait_event); +- ENA_MEM_FREE(ena_dev->dmadev, +- admin_queue->comp_ctx, +- (admin_queue->q_depth * sizeof(struct ena_comp_ctx))); +- } ++ ena_com_free_ena_admin_queue_comp_ctx(ena_dev, admin_queue); + +- admin_queue->comp_ctx = NULL; + size = ADMIN_SQ_SIZE(admin_queue->q_depth); + if (sq->entries) + ENA_MEM_FREE_COHERENT(ena_dev->dmadev, size, sq->entries, +@@ -1961,6 +1971,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, + + memcpy(&get_feat_ctx->dev_attr, &get_resp.u.dev_attr, + sizeof(get_resp.u.dev_attr)); ++ + ena_dev->supported_features = get_resp.u.dev_attr.supported_features; + + if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { +@@ -2028,7 +2039,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, + return rc; + + rc = ena_com_get_feature(ena_dev, &get_resp, +- ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, 0); ++ ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG, 0); + if (!rc) + memcpy(&get_feat_ctx->ind_table, &get_resp.u.ind_table, + sizeof(get_resp.u.ind_table)); +@@ -2090,9 +2101,9 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) + timestamp = (u64)aenq_common->timestamp_low | + ((u64)aenq_common->timestamp_high << 32); + ENA_TOUCH(timestamp); /* In case debug is disabled */ +- ena_trc_dbg("AENQ! Group[%x] Syndrom[%x] timestamp: [%" ENA_PRIu64 "s]\n", ++ ena_trc_dbg("AENQ! Group[%x] Syndrome[%x] timestamp: [%" ENA_PRIu64 "s]\n", + aenq_common->group, +- aenq_common->syndrom, ++ aenq_common->syndrome, + timestamp); + + /* Handle specific event*/ +@@ -2263,7 +2274,7 @@ int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu) + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = 0; + cmd.feat_common.feature_id = ENA_ADMIN_MTU; +- cmd.u.mtu.mtu = mtu; ++ cmd.u.mtu.mtu = (u32)mtu; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, +@@ -2394,7 +2405,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + } + memcpy(hash_key->key, key, key_len); + rss->hash_init_val = init_val; +- hash_key->keys_num = key_len / sizeof(u32); ++ hash_key->key_parts = key_len / sizeof(hash_key->key[0]); + } + break; + case ENA_ADMIN_CRC32: +@@ -2449,7 +2460,8 @@ int ena_com_get_hash_key(struct ena_com_dev *ena_dev, u8 *key) + ena_dev->rss.hash_key; + + if (key) +- memcpy(key, hash_key->key, (size_t)(hash_key->keys_num) << 2); ++ memcpy(key, hash_key->key, ++ (size_t)(hash_key->key_parts) * sizeof(hash_key->key[0])); + + return 0; + } +@@ -2644,9 +2656,9 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev) + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, +- ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG)) { ++ ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG)) { + ena_trc_dbg("Feature %d isn't supported\n", +- ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG); ++ ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG); + return ENA_COM_UNSUPPORTED; + } + +@@ -2661,7 +2673,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev) + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; +- cmd.feat_common.feature_id = ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG; ++ cmd.feat_common.feature_id = ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG; + cmd.u.ind_table.size = rss->tbl_log_size; + cmd.u.ind_table.inline_index = 0xFFFFFFFF; + +@@ -2673,7 +2685,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev) + return ret; + } + +- cmd.control_buffer.length = (1ULL << rss->tbl_log_size) * ++ cmd.control_buffer.length = (u32)(1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + ret = ena_com_execute_admin_command(admin_queue, +@@ -2695,11 +2707,11 @@ int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl) + u32 tbl_size; + int i, rc; + +- tbl_size = (1ULL << rss->tbl_log_size) * ++ tbl_size = (u32)(1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, +- ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, ++ ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG, + rss->rss_ind_tbl_dma_addr, + tbl_size, 0); + if (unlikely(rc)) +diff --git a/dpdk/drivers/net/ena/base/ena_defs/ena_admin_defs.h b/dpdk/drivers/net/ena/base/ena_defs/ena_admin_defs.h +index 30e5eead71..40c2db717c 100644 +--- a/dpdk/drivers/net/ena/base/ena_defs/ena_admin_defs.h ++++ b/dpdk/drivers/net/ena/base/ena_defs/ena_admin_defs.h +@@ -2,13 +2,9 @@ + * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. + * All rights reserved. + */ +- + #ifndef _ENA_ADMIN_H_ + #define _ENA_ADMIN_H_ + +-#define ENA_ADMIN_EXTRA_PROPERTIES_STRING_LEN 32 +-#define ENA_ADMIN_EXTRA_PROPERTIES_COUNT 32 +- + #define ENA_ADMIN_RSS_KEY_PARTS 10 + + enum ena_admin_aq_opcode { +@@ -33,6 +29,7 @@ enum ena_admin_aq_completion_status { + ENA_ADMIN_RESOURCE_BUSY = 7, + }; + ++/* subcommands for the set/get feature admin commands */ + enum ena_admin_aq_feature_id { + ENA_ADMIN_DEVICE_ATTRIBUTES = 1, + ENA_ADMIN_MAX_QUEUES_NUM = 2, +@@ -43,7 +40,7 @@ enum ena_admin_aq_feature_id { + ENA_ADMIN_MAX_QUEUES_EXT = 7, + ENA_ADMIN_RSS_HASH_FUNCTION = 10, + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11, +- ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG = 12, ++ ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG = 12, + ENA_ADMIN_MTU = 14, + ENA_ADMIN_RSS_HASH_INPUT = 18, + ENA_ADMIN_INTERRUPT_MODERATION = 20, +@@ -175,7 +172,7 @@ struct ena_admin_acq_common_desc { + uint16_t extended_status; + + /* indicates to the driver which AQ entry has been consumed by the +- * device and could be reused ++ * device and could be reused + */ + uint16_t sq_head_indx; + }; +@@ -220,8 +217,8 @@ struct ena_admin_aq_create_sq_cmd { + */ + uint8_t sq_caps_3; + +- /* associated completion queue id. This CQ must be created prior to +- * SQ creation ++ /* associated completion queue id. This CQ must be created prior to SQ ++ * creation + */ + uint16_t cq_idx; + +@@ -360,7 +357,7 @@ struct ena_admin_aq_get_stats_cmd { + uint16_t queue_idx; + + /* device id, value 0xFFFF means mine. only privileged device can get +- * stats of other device ++ * stats of other device + */ + uint16_t device_id; + }; +@@ -442,8 +439,8 @@ struct ena_admin_get_set_feature_common_desc { + uint8_t feature_id; + + /* The driver specifies the max feature version it supports and the +- * device responds with the currently supported feature version. The +- * field is zero based ++ * device responds with the currently supported feature version. The ++ * field is zero based + */ + uint8_t feature_version; + +@@ -455,7 +452,9 @@ struct ena_admin_device_attr_feature_desc { + + uint32_t device_version; + +- /* bitmap of ena_admin_aq_feature_id */ ++ /* bitmap of ena_admin_aq_feature_id, which represents supported ++ * subcommands for the set/get feature admin commands. ++ */ + uint32_t supported_features; + + uint32_t reserved3; +@@ -541,32 +540,30 @@ struct ena_admin_feature_llq_desc { + + uint32_t max_llq_depth; + +- /* specify the header locations the device supports. bitfield of +- * enum ena_admin_llq_header_location. ++ /* specify the header locations the device supports. bitfield of enum ++ * ena_admin_llq_header_location. + */ + uint16_t header_location_ctrl_supported; + + /* the header location the driver selected to use. */ + uint16_t header_location_ctrl_enabled; + +- /* if inline header is specified - this is the size of descriptor +- * list entry. If header in a separate ring is specified - this is +- * the size of header ring entry. bitfield of enum +- * ena_admin_llq_ring_entry_size. specify the entry sizes the device +- * supports ++ /* if inline header is specified - this is the size of descriptor list ++ * entry. If header in a separate ring is specified - this is the size ++ * of header ring entry. bitfield of enum ena_admin_llq_ring_entry_size. ++ * specify the entry sizes the device supports + */ + uint16_t entry_size_ctrl_supported; + + /* the entry size the driver selected to use. */ + uint16_t entry_size_ctrl_enabled; + +- /* valid only if inline header is specified. First entry associated +- * with the packet includes descriptors and header. Rest of the +- * entries occupied by descriptors. This parameter defines the max +- * number of descriptors precedding the header in the first entry. +- * The field is bitfield of enum +- * ena_admin_llq_num_descs_before_header and specify the values the +- * device supports ++ /* valid only if inline header is specified. First entry associated with ++ * the packet includes descriptors and header. Rest of the entries ++ * occupied by descriptors. This parameter defines the max number of ++ * descriptors precedding the header in the first entry. The field is ++ * bitfield of enum ena_admin_llq_num_descs_before_header and specify ++ * the values the device supports + */ + uint16_t desc_num_before_header_supported; + +@@ -574,7 +571,7 @@ struct ena_admin_feature_llq_desc { + uint16_t desc_num_before_header_enabled; + + /* valid only if inline was chosen. bitfield of enum +- * ena_admin_llq_stride_ctrl ++ * ena_admin_llq_stride_ctrl + */ + uint16_t descriptors_stride_ctrl_supported; + +@@ -584,8 +581,8 @@ struct ena_admin_feature_llq_desc { + /* reserved */ + uint32_t reserved1; + +- /* accelerated low latency queues requirement. Driver needs to +- * support those requirements in order to use accelerated LLQ ++ /* accelerated low latency queues requirement. driver needs to ++ * support those requirements in order to use accelerated llq + */ + struct ena_admin_accel_mode_req accel_mode; + }; +@@ -609,8 +606,8 @@ struct ena_admin_queue_ext_feature_fields { + + uint32_t max_tx_header_size; + +- /* Maximum Descriptors number, including meta descriptor, allowed for +- * a single Tx packet ++ /* Maximum Descriptors number, including meta descriptor, allowed for a ++ * single Tx packet + */ + uint16_t max_per_packet_tx_descs; + +@@ -633,8 +630,8 @@ struct ena_admin_queue_feature_desc { + + uint32_t max_header_size; + +- /* Maximum Descriptors number, including meta descriptor, allowed for +- * a single Tx packet ++ /* Maximum Descriptors number, including meta descriptor, allowed for a ++ * single Tx packet + */ + uint16_t max_packet_tx_descs; + +@@ -730,7 +727,7 @@ enum ena_admin_hash_functions { + }; + + struct ena_admin_feature_rss_flow_hash_control { +- uint32_t keys_num; ++ uint32_t key_parts; + + uint32_t reserved; + +@@ -872,7 +869,7 @@ struct ena_admin_host_info { + /* 0 : mutable_rss_table_size + * 1 : rx_offset + * 2 : interrupt_moderation +- * 3 : map_rx_buf_bidirectional ++ * 3 : rx_buf_mirroring + * 4 : rss_configurable_function_key + * 31:5 : reserved + */ +@@ -956,7 +953,7 @@ struct ena_admin_queue_ext_feature_desc { + struct ena_admin_queue_ext_feature_fields max_queue_ext; + + uint32_t raw[10]; +- } ; ++ }; + }; + + struct ena_admin_get_feat_resp { +@@ -1039,7 +1036,7 @@ struct ena_admin_set_feat_resp { + struct ena_admin_aenq_common_desc { + uint16_t group; + +- uint16_t syndrom; ++ uint16_t syndrome; + + /* 0 : phase + * 7:1 : reserved - MBZ +@@ -1063,7 +1060,7 @@ enum ena_admin_aenq_group { + ENA_ADMIN_AENQ_GROUPS_NUM = 5, + }; + +-enum ena_admin_aenq_notification_syndrom { ++enum ena_admin_aenq_notification_syndrome { + ENA_ADMIN_SUSPEND = 0, + ENA_ADMIN_RESUME = 1, + ENA_ADMIN_UPDATE_HINTS = 2, +@@ -1197,8 +1194,8 @@ struct ena_admin_ena_mmio_req_read_less_resp { + #define ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK BIT(1) + #define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_SHIFT 2 + #define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK BIT(2) +-#define ENA_ADMIN_HOST_INFO_MAP_RX_BUF_BIDIRECTIONAL_SHIFT 3 +-#define ENA_ADMIN_HOST_INFO_MAP_RX_BUF_BIDIRECTIONAL_MASK BIT(3) ++#define ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_SHIFT 3 ++#define ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK BIT(3) + #define ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_SHIFT 4 + #define ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK BIT(4) + +@@ -1652,14 +1649,14 @@ static inline void set_ena_admin_host_info_interrupt_moderation(struct ena_admin + p->driver_supported_features |= (val << ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_SHIFT) & ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK; + } + +-static inline uint32_t get_ena_admin_host_info_map_rx_buf_bidirectional(const struct ena_admin_host_info *p) ++static inline uint32_t get_ena_admin_host_info_rx_buf_mirroring(const struct ena_admin_host_info *p) + { +- return (p->driver_supported_features & ENA_ADMIN_HOST_INFO_MAP_RX_BUF_BIDIRECTIONAL_MASK) >> ENA_ADMIN_HOST_INFO_MAP_RX_BUF_BIDIRECTIONAL_SHIFT; ++ return (p->driver_supported_features & ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK) >> ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_SHIFT; + } + +-static inline void set_ena_admin_host_info_map_rx_buf_bidirectional(struct ena_admin_host_info *p, uint32_t val) ++static inline void set_ena_admin_host_info_rx_buf_mirroring(struct ena_admin_host_info *p, uint32_t val) + { +- p->driver_supported_features |= (val << ENA_ADMIN_HOST_INFO_MAP_RX_BUF_BIDIRECTIONAL_SHIFT) & ENA_ADMIN_HOST_INFO_MAP_RX_BUF_BIDIRECTIONAL_MASK; ++ p->driver_supported_features |= (val << ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_SHIFT) & ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK; + } + + static inline uint32_t get_ena_admin_host_info_rss_configurable_function_key(const struct ena_admin_host_info *p) +diff --git a/dpdk/drivers/net/ena/base/ena_eth_com.c b/dpdk/drivers/net/ena/base/ena_eth_com.c +index 5583a310a1..042dc1c20e 100644 +--- a/dpdk/drivers/net/ena/base/ena_eth_com.c ++++ b/dpdk/drivers/net/ena/base/ena_eth_com.c +@@ -323,16 +323,18 @@ static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, + + *have_meta = true; + return ena_com_create_meta(io_sq, ena_meta); +- } else if (ena_com_meta_desc_changed(io_sq, ena_tx_ctx)) { ++ } ++ ++ if (ena_com_meta_desc_changed(io_sq, ena_tx_ctx)) { + *have_meta = true; + /* Cache the meta desc */ + memcpy(&io_sq->cached_tx_meta, ena_meta, + sizeof(struct ena_com_tx_meta)); + return ena_com_create_meta(io_sq, ena_meta); +- } else { +- *have_meta = false; +- return ENA_COM_OK; + } ++ ++ *have_meta = false; ++ return ENA_COM_OK; + } + + static void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, +@@ -604,9 +606,9 @@ int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq, + desc->length = ena_buf->len; + + desc->ctrl = ENA_ETH_IO_RX_DESC_FIRST_MASK | +- ENA_ETH_IO_RX_DESC_LAST_MASK | +- (io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK) | +- ENA_ETH_IO_RX_DESC_COMP_REQ_MASK; ++ ENA_ETH_IO_RX_DESC_LAST_MASK | ++ ENA_ETH_IO_RX_DESC_COMP_REQ_MASK | ++ (io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK); + + desc->req_id = req_id; + +diff --git a/dpdk/drivers/net/ena/base/ena_plat_dpdk.h b/dpdk/drivers/net/ena/base/ena_plat_dpdk.h +index a1d749f83f..bd8ebad9a7 100644 +--- a/dpdk/drivers/net/ena/base/ena_plat_dpdk.h ++++ b/dpdk/drivers/net/ena/base/ena_plat_dpdk.h +@@ -25,6 +25,7 @@ + #include + + #include ++#include + + typedef uint64_t u64; + typedef uint32_t u32; +@@ -62,7 +63,11 @@ typedef uint64_t dma_addr_t; + #define ENA_UDELAY(x) rte_delay_us_block(x) + + #define ENA_TOUCH(x) ((void)(x)) +-#define memcpy_toio memcpy ++/* Avoid nested declaration on arm64, as it may define rte_memcpy as memcpy. */ ++#if defined(RTE_ARCH_X86) ++#undef memcpy ++#define memcpy rte_memcpy ++#endif + #define wmb rte_wmb + #define rmb rte_rmb + #define mb rte_mb +@@ -289,7 +294,7 @@ extern rte_atomic32_t ena_alloc_cnt; + #define ENA_TIME_EXPIRE(timeout) (timeout < rte_get_timer_cycles()) + #define ENA_GET_SYSTEM_TIMEOUT(timeout_us) \ + (timeout_us * rte_get_timer_hz() / 1000000 + rte_get_timer_cycles()) +-#define ENA_WAIT_EVENT_DESTROY(waitqueue) ((void)(waitqueue)) ++#define ENA_WAIT_EVENTS_DESTROY(admin_queue) ((void)(admin_queue)) + + #ifndef READ_ONCE + #define READ_ONCE(var) (*((volatile typeof(var) *)(&(var)))) +diff --git a/dpdk/drivers/net/ena/ena_ethdev.c b/dpdk/drivers/net/ena/ena_ethdev.c +index 8baec80040..64e38e49fa 100644 +--- a/dpdk/drivers/net/ena/ena_ethdev.c ++++ b/dpdk/drivers/net/ena/ena_ethdev.c +@@ -51,10 +51,7 @@ + + #define ENA_MIN_RING_DESC 128 + +-enum ethtool_stringset { +- ETH_SS_TEST = 0, +- ETH_SS_STATS, +-}; ++#define ENA_PTYPE_HAS_HASH (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP) + + struct ena_stats { + char name[ETH_GSTRING_LEN]; +@@ -153,6 +150,23 @@ static const struct ena_stats ena_stats_rx_strings[] = { + #define ENA_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) + ++/** HW specific offloads capabilities. */ ++/* IPv4 checksum offload. */ ++#define ENA_L3_IPV4_CSUM 0x0001 ++/* TCP/UDP checksum offload for IPv4 packets. */ ++#define ENA_L4_IPV4_CSUM 0x0002 ++/* TCP/UDP checksum offload for IPv4 packets with pseudo header checksum. */ ++#define ENA_L4_IPV4_CSUM_PARTIAL 0x0004 ++/* TCP/UDP checksum offload for IPv6 packets. */ ++#define ENA_L4_IPV6_CSUM 0x0008 ++/* TCP/UDP checksum offload for IPv6 packets with pseudo header checksum. */ ++#define ENA_L4_IPV6_CSUM_PARTIAL 0x0010 ++/* TSO support for IPv4 packets. */ ++#define ENA_IPV4_TSO 0x0020 ++ ++/* Device supports setting RSS hash. */ ++#define ENA_RX_RSS_HASH 0x0040 ++ + static const struct rte_pci_id pci_id_ena_map[] = { + { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, + { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF_RSERV0) }, +@@ -218,6 +232,10 @@ static int ena_queue_start(struct ena_ring *ring); + static int ena_queue_start_all(struct rte_eth_dev *dev, + enum ena_ring_type ring_type); + static void ena_stats_restart(struct rte_eth_dev *dev); ++static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter); ++static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter); ++static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter); ++static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter); + static int ena_infos_get(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info); + static int ena_rss_reta_update(struct rte_eth_dev *dev, +@@ -285,6 +303,15 @@ void ena_rss_key_fill(void *key, size_t size) + rte_memcpy(key, default_key, size); + } + ++static inline void ena_trigger_reset(struct ena_adapter *adapter, ++ enum ena_regs_reset_reason_types reason) ++{ ++ if (likely(!adapter->trigger_reset)) { ++ adapter->reset_reason = reason; ++ adapter->trigger_reset = true; ++ } ++} ++ + static inline void ena_rx_mbuf_prepare(struct rte_mbuf *mbuf, + struct ena_com_rx_ctx *ena_rx_ctx) + { +@@ -310,10 +337,21 @@ static inline void ena_rx_mbuf_prepare(struct rte_mbuf *mbuf, + ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN; + else + if (unlikely(ena_rx_ctx->l4_csum_err)) +- ol_flags |= PKT_RX_L4_CKSUM_BAD; ++ /* ++ * For the L4 Rx checksum offload the HW may indicate ++ * bad checksum although it's valid. Because of that, ++ * we're setting the UNKNOWN flag to let the app ++ * re-verify the checksum. ++ */ ++ ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN; + else + ol_flags |= PKT_RX_L4_CKSUM_GOOD; + ++ if (likely((packet_type & ENA_PTYPE_HAS_HASH) && !ena_rx_ctx->frag)) { ++ ol_flags |= PKT_RX_RSS_HASH; ++ mbuf->hash.rss = ena_rx_ctx->hash; ++ } ++ + mbuf->ol_flags = ol_flags; + mbuf->packet_type = packet_type; + } +@@ -342,6 +380,8 @@ static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, + + if (mbuf->ol_flags & PKT_TX_IPV6) { + ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; ++ /* For the IPv6 packets, DF always needs to be true. */ ++ ena_tx_ctx->df = 1; + } else { + ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; + +@@ -349,7 +389,7 @@ static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, + if (mbuf->packet_type & + (RTE_PTYPE_L4_NONFRAG + | RTE_PTYPE_INNER_L4_NONFRAG)) +- ena_tx_ctx->df = true; ++ ena_tx_ctx->df = 1; + } + + /* check if L4 checksum is needed */ +@@ -397,8 +437,7 @@ static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) + + /* Trigger device reset */ + ++tx_ring->tx_stats.bad_req_id; +- tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; +- tx_ring->adapter->trigger_reset = true; ++ ena_trigger_reset(tx_ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID); + return -EFAULT; + } + +@@ -767,8 +806,10 @@ static void ena_tx_queue_release_bufs(struct ena_ring *ring) + for (i = 0; i < ring->ring_size; ++i) { + struct ena_tx_buffer *tx_buf = &ring->tx_buffer_info[i]; + +- if (tx_buf->mbuf) ++ if (tx_buf->mbuf) { + rte_pktmbuf_free(tx_buf->mbuf); ++ tx_buf->mbuf = NULL; ++ } + } + } + +@@ -1273,9 +1314,6 @@ static int ena_tx_queue_setup(struct rte_eth_dev *dev, + return -EINVAL; + } + +- if (nb_desc == RTE_ETH_DEV_FALLBACK_TX_RINGSIZE) +- nb_desc = adapter->max_tx_ring_size; +- + txq->port_id = dev->data->port_id; + txq->next_to_clean = 0; + txq->next_to_use = 0; +@@ -1347,9 +1385,6 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev, + return ENA_COM_FAULT; + } + +- if (nb_desc == RTE_ETH_DEV_FALLBACK_RX_RINGSIZE) +- nb_desc = adapter->max_rx_ring_size; +- + if (!rte_is_power_of_2(nb_desc)) { + PMD_DRV_LOG(ERR, + "Unsupported size of RX queue: %d is not a power of 2.\n", +@@ -1457,7 +1492,7 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) + "bad ring state\n"); + + /* get resources for incoming packets */ +- rc = rte_mempool_get_bulk(rxq->mb_pool, (void **)mbufs, count); ++ rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, count); + if (unlikely(rc < 0)) { + rte_atomic64_inc(&rxq->adapter->drv_stats->rx_nombuf); + ++rxq->rx_stats.mbuf_alloc_fail; +@@ -1486,12 +1521,11 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) + if (unlikely(i < count)) { + PMD_DRV_LOG(WARNING, "refilled rx qid %d with only %d " + "buffers (from %d)\n", rxq->id, i, count); +- rte_mempool_put_bulk(rxq->mb_pool, (void **)(&mbufs[i]), +- count - i); ++ rte_pktmbuf_free_bulk(&mbufs[i], count - i); + ++rxq->rx_stats.refill_partial; + } + +- /* When we submitted free recources to device... */ ++ /* When we submitted free resources to device... */ + if (likely(i > 0)) { + /* ...let HW know that it can fill buffers with data. */ + ena_com_write_sq_doorbell(rxq->ena_com_io_sq); +@@ -1612,8 +1646,7 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter) + if (unlikely((rte_get_timer_cycles() - adapter->timestamp_wd) >= + adapter->keep_alive_timeout)) { + PMD_DRV_LOG(ERR, "Keep alive timeout\n"); +- adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO; +- adapter->trigger_reset = true; ++ ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); + ++adapter->dev_stats.wd_expired; + } + } +@@ -1623,8 +1656,7 @@ static void check_for_admin_com_state(struct ena_adapter *adapter) + { + if (unlikely(!ena_com_get_admin_running_state(&adapter->ena_dev))) { + PMD_DRV_LOG(ERR, "ENA admin queue is not in running state!\n"); +- adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO; +- adapter->trigger_reset = true; ++ ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); + } + } + +@@ -1634,6 +1666,9 @@ static void ena_timer_wd_callback(__rte_unused struct rte_timer *timer, + struct ena_adapter *adapter = arg; + struct rte_eth_dev *dev = adapter->rte_dev; + ++ if (unlikely(adapter->trigger_reset)) ++ return; ++ + check_for_missing_keep_alive(adapter); + check_for_admin_com_state(adapter); + +@@ -1683,6 +1718,13 @@ ena_set_queues_placement_policy(struct ena_adapter *adapter, + return 0; + } + ++ if (adapter->dev_mem_base == NULL) { ++ PMD_DRV_LOG(ERR, ++ "LLQ is advertised as supported, but device doesn't expose mem bar\n"); ++ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; ++ return 0; ++ } ++ + rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); + if (unlikely(rc)) { + PMD_INIT_LOG(WARNING, "Failed to config dev mode. " +@@ -1695,13 +1737,6 @@ ena_set_queues_placement_policy(struct ena_adapter *adapter, + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + return 0; + +- if (!adapter->dev_mem_base) { +- PMD_DRV_LOG(ERR, "Unable to access LLQ bar resource. " +- "Fallback to host mode policy.\n."); +- ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; +- return 0; +- } +- + ena_dev->mem_bar = adapter->dev_mem_base; + + return 0; +@@ -1744,6 +1779,50 @@ static uint32_t ena_calc_max_io_queue_num(struct ena_com_dev *ena_dev, + return max_num_io_queues; + } + ++static void ++ena_set_offloads(struct ena_offloads *offloads, ++ struct ena_admin_feature_offload_desc *offload_desc) ++{ ++ if (offload_desc->tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) ++ offloads->tx_offloads |= ENA_IPV4_TSO; ++ ++ /* Tx IPv4 checksum offloads */ ++ if (offload_desc->tx & ++ ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) ++ offloads->tx_offloads |= ENA_L3_IPV4_CSUM; ++ if (offload_desc->tx & ++ ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK) ++ offloads->tx_offloads |= ENA_L4_IPV4_CSUM; ++ if (offload_desc->tx & ++ ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) ++ offloads->tx_offloads |= ENA_L4_IPV4_CSUM_PARTIAL; ++ ++ /* Tx IPv6 checksum offloads */ ++ if (offload_desc->tx & ++ ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK) ++ offloads->tx_offloads |= ENA_L4_IPV6_CSUM; ++ if (offload_desc->tx & ++ ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) ++ offloads->tx_offloads |= ENA_L4_IPV6_CSUM_PARTIAL; ++ ++ /* Rx IPv4 checksum offloads */ ++ if (offload_desc->rx_supported & ++ ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK) ++ offloads->rx_offloads |= ENA_L3_IPV4_CSUM; ++ if (offload_desc->rx_supported & ++ ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) ++ offloads->rx_offloads |= ENA_L4_IPV4_CSUM; ++ ++ /* Rx IPv6 checksum offloads */ ++ if (offload_desc->rx_supported & ++ ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) ++ offloads->rx_offloads |= ENA_L4_IPV6_CSUM; ++ ++ if (offload_desc->rx_supported & ++ ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK) ++ offloads->rx_offloads |= ENA_RX_RSS_HASH; ++} ++ + static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) + { + struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; +@@ -1866,14 +1945,7 @@ static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) + /* Set max MTU for this device */ + adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; + +- /* set device support for offloads */ +- adapter->offloads.tso4_supported = (get_feat_ctx.offload.tx & +- ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0; +- adapter->offloads.tx_csum_supported = (get_feat_ctx.offload.tx & +- ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) != 0; +- adapter->offloads.rx_csum_supported = +- (get_feat_ctx.offload.rx_supported & +- ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) != 0; ++ ena_set_offloads(&adapter->offloads, &get_feat_ctx.offload); + + /* Copy MAC address and point DPDK to it */ + eth_dev->data->mac_addrs = (struct rte_ether_addr *)adapter->mac_addr; +@@ -1959,8 +2031,15 @@ static int ena_dev_configure(struct rte_eth_dev *dev) + + adapter->state = ENA_ADAPTER_STATE_CONFIG; + +- adapter->tx_selected_offloads = dev->data->dev_conf.txmode.offloads; +- adapter->rx_selected_offloads = dev->data->dev_conf.rxmode.offloads; ++ if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) ++ dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; ++ dev->data->dev_conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS; ++ ++ /* Scattered Rx cannot be turned off in the HW, so this capability must ++ * be forced. ++ */ ++ dev->data->scattered_rx = 1; ++ + return 0; + } + +@@ -1993,12 +2072,65 @@ static void ena_init_rings(struct ena_adapter *adapter, + } + } + ++static uint64_t ena_get_rx_port_offloads(struct ena_adapter *adapter) ++{ ++ uint64_t port_offloads = 0; ++ ++ if (adapter->offloads.rx_offloads & ENA_L3_IPV4_CSUM) ++ port_offloads |= DEV_RX_OFFLOAD_IPV4_CKSUM; ++ ++ if (adapter->offloads.rx_offloads & ++ (ENA_L4_IPV4_CSUM | ENA_L4_IPV6_CSUM)) ++ port_offloads |= ++ DEV_RX_OFFLOAD_UDP_CKSUM | DEV_RX_OFFLOAD_TCP_CKSUM; ++ ++ if (adapter->offloads.rx_offloads & ENA_RX_RSS_HASH) ++ port_offloads |= DEV_RX_OFFLOAD_RSS_HASH; ++ ++ port_offloads |= DEV_RX_OFFLOAD_SCATTER; ++ ++ return port_offloads; ++} ++ ++static uint64_t ena_get_tx_port_offloads(struct ena_adapter *adapter) ++{ ++ uint64_t port_offloads = 0; ++ ++ if (adapter->offloads.tx_offloads & ENA_IPV4_TSO) ++ port_offloads |= DEV_TX_OFFLOAD_TCP_TSO; ++ ++ if (adapter->offloads.tx_offloads & ENA_L3_IPV4_CSUM) ++ port_offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM; ++ if (adapter->offloads.tx_offloads & ++ (ENA_L4_IPV4_CSUM_PARTIAL | ENA_L4_IPV4_CSUM | ++ ENA_L4_IPV6_CSUM | ENA_L4_IPV6_CSUM_PARTIAL)) ++ port_offloads |= ++ DEV_TX_OFFLOAD_UDP_CKSUM | DEV_TX_OFFLOAD_TCP_CKSUM; ++ ++ port_offloads |= DEV_TX_OFFLOAD_MULTI_SEGS; ++ ++ return port_offloads; ++} ++ ++static uint64_t ena_get_rx_queue_offloads(struct ena_adapter *adapter) ++{ ++ RTE_SET_USED(adapter); ++ ++ return 0; ++} ++ ++static uint64_t ena_get_tx_queue_offloads(struct ena_adapter *adapter) ++{ ++ RTE_SET_USED(adapter); ++ ++ return 0; ++} ++ + static int ena_infos_get(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info) + { + struct ena_adapter *adapter; + struct ena_com_dev *ena_dev; +- uint64_t rx_feat = 0, tx_feat = 0; + + ena_assert_msg(dev->data != NULL, "Uninitialized device\n"); + ena_assert_msg(dev->data->dev_private != NULL, "Uninitialized device\n"); +@@ -2017,27 +2149,11 @@ static int ena_infos_get(struct rte_eth_dev *dev, + ETH_LINK_SPEED_50G | + ETH_LINK_SPEED_100G; + +- /* Set Tx & Rx features available for device */ +- if (adapter->offloads.tso4_supported) +- tx_feat |= DEV_TX_OFFLOAD_TCP_TSO; +- +- if (adapter->offloads.tx_csum_supported) +- tx_feat |= DEV_TX_OFFLOAD_IPV4_CKSUM | +- DEV_TX_OFFLOAD_UDP_CKSUM | +- DEV_TX_OFFLOAD_TCP_CKSUM; +- +- if (adapter->offloads.rx_csum_supported) +- rx_feat |= DEV_RX_OFFLOAD_IPV4_CKSUM | +- DEV_RX_OFFLOAD_UDP_CKSUM | +- DEV_RX_OFFLOAD_TCP_CKSUM; +- +- rx_feat |= DEV_RX_OFFLOAD_JUMBO_FRAME; +- + /* Inform framework about available features */ +- dev_info->rx_offload_capa = rx_feat; +- dev_info->rx_queue_offload_capa = rx_feat; +- dev_info->tx_offload_capa = tx_feat; +- dev_info->tx_queue_offload_capa = tx_feat; ++ dev_info->rx_offload_capa = ena_get_rx_port_offloads(adapter); ++ dev_info->tx_offload_capa = ena_get_tx_port_offloads(adapter); ++ dev_info->rx_queue_offload_capa = ena_get_rx_queue_offloads(adapter); ++ dev_info->tx_queue_offload_capa = ena_get_tx_queue_offloads(adapter); + + dev_info->flow_type_rss_offloads = ETH_RSS_IP | ETH_RSS_TCP | + ETH_RSS_UDP; +@@ -2050,9 +2166,6 @@ static int ena_infos_get(struct rte_eth_dev *dev, + dev_info->max_tx_queues = adapter->max_num_io_queues; + dev_info->reta_size = ENA_RX_RSS_TABLE_SIZE; + +- adapter->tx_supported_offloads = tx_feat; +- adapter->rx_supported_offloads = rx_feat; +- + dev_info->rx_desc_lim.nb_max = adapter->max_rx_ring_size; + dev_info->rx_desc_lim.nb_min = ENA_MIN_RING_DESC; + dev_info->rx_desc_lim.nb_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, +@@ -2067,6 +2180,9 @@ static int ena_infos_get(struct rte_eth_dev *dev, + dev_info->tx_desc_lim.nb_mtu_seg_max = RTE_MIN(ENA_PKT_MAX_BUFS, + adapter->max_tx_sgl_size); + ++ dev_info->default_rxportconf.ring_size = ENA_DEFAULT_RING_SIZE; ++ dev_info->default_txportconf.ring_size = ENA_DEFAULT_RING_SIZE; ++ + return 0; + } + +@@ -2209,14 +2325,13 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + PMD_DRV_LOG(ERR, "ena_com_rx_pkt error %d\n", rc); + if (rc == ENA_COM_NO_SPACE) { + ++rx_ring->rx_stats.bad_desc_num; +- rx_ring->adapter->reset_reason = +- ENA_REGS_RESET_TOO_MANY_RX_DESCS; ++ ena_trigger_reset(rx_ring->adapter, ++ ENA_REGS_RESET_TOO_MANY_RX_DESCS); + } else { + ++rx_ring->rx_stats.bad_req_id; +- rx_ring->adapter->reset_reason = +- ENA_REGS_RESET_INV_RX_REQ_ID; ++ ena_trigger_reset(rx_ring->adapter, ++ ENA_REGS_RESET_INV_RX_REQ_ID); + } +- rx_ring->adapter->trigger_reset = true; + return 0; + } + +@@ -2244,8 +2359,6 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + ++rx_ring->rx_stats.bad_csum; + } + +- mbuf->hash.rss = ena_rx_ctx.hash; +- + rx_pkts[completed] = mbuf; + rx_ring->rx_stats.bytes += mbuf->pkt_len; + } +@@ -2275,45 +2388,60 @@ eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint32_t i; + struct rte_mbuf *m; + struct ena_ring *tx_ring = (struct ena_ring *)(tx_queue); ++ struct ena_adapter *adapter = tx_ring->adapter; + struct rte_ipv4_hdr *ip_hdr; + uint64_t ol_flags; ++ uint64_t l4_csum_flag; ++ uint64_t dev_offload_capa; + uint16_t frag_field; ++ bool need_pseudo_csum; + ++ dev_offload_capa = adapter->offloads.tx_offloads; + for (i = 0; i != nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + +- if (!(ol_flags & PKT_TX_IPV4)) ++ /* Check if any offload flag was set */ ++ if (ol_flags == 0) + continue; + +- /* If there was not L2 header length specified, assume it is +- * length of the ethernet header. +- */ +- if (unlikely(m->l2_len == 0)) +- m->l2_len = sizeof(struct rte_ether_hdr); +- +- ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, +- m->l2_len); +- frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); +- +- if ((frag_field & RTE_IPV4_HDR_DF_FLAG) != 0) { +- m->packet_type |= RTE_PTYPE_L4_NONFRAG; +- +- /* If IPv4 header has DF flag enabled and TSO support is +- * disabled, partial chcecksum should not be calculated. +- */ +- if (!tx_ring->adapter->offloads.tso4_supported) +- continue; +- } +- +- if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) != 0 || +- (ol_flags & PKT_TX_L4_MASK) == +- PKT_TX_SCTP_CKSUM) { ++ l4_csum_flag = ol_flags & PKT_TX_L4_MASK; ++ /* SCTP checksum offload is not supported by the ENA. */ ++ if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) || ++ l4_csum_flag == PKT_TX_SCTP_CKSUM) { ++ PMD_TX_LOG(DEBUG, ++ "mbuf[%" PRIu32 "] has unsupported offloads flags set: 0x%" PRIu64 "\n", ++ i, ol_flags); + rte_errno = ENOTSUP; + return i; + } + + #ifdef RTE_LIBRTE_ETHDEV_DEBUG ++ /* Check if requested offload is also enabled for the queue */ ++ if ((ol_flags & PKT_TX_IP_CKSUM && ++ !(tx_ring->offloads & DEV_TX_OFFLOAD_IPV4_CKSUM)) || ++ (l4_csum_flag == PKT_TX_TCP_CKSUM && ++ !(tx_ring->offloads & DEV_TX_OFFLOAD_TCP_CKSUM)) || ++ (l4_csum_flag == PKT_TX_UDP_CKSUM && ++ !(tx_ring->offloads & DEV_TX_OFFLOAD_UDP_CKSUM))) { ++ PMD_TX_LOG(DEBUG, ++ "mbuf[%" PRIu32 "]: requested offloads: %" PRIu16 " are not enabled for the queue[%u]\n", ++ i, m->nb_segs, tx_ring->id); ++ rte_errno = EINVAL; ++ return i; ++ } ++ ++ /* The caller is obligated to set l2 and l3 len if any cksum ++ * offload is enabled. ++ */ ++ if (unlikely(ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK) && ++ (m->l2_len == 0 || m->l3_len == 0))) { ++ PMD_TX_LOG(DEBUG, ++ "mbuf[%" PRIu32 "]: l2_len or l3_len values are 0 while the offload was requested\n", ++ i); ++ rte_errno = EINVAL; ++ return i; ++ } + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = -ret; +@@ -2321,16 +2449,76 @@ eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + } + #endif + +- /* In case we are supposed to TSO and have DF not set (DF=0) +- * hardware must be provided with partial checksum, otherwise +- * it will take care of necessary calculations. ++ /* Verify HW support for requested offloads and determine if ++ * pseudo header checksum is needed. + */ ++ need_pseudo_csum = false; ++ if (ol_flags & PKT_TX_IPV4) { ++ if (ol_flags & PKT_TX_IP_CKSUM && ++ !(dev_offload_capa & ENA_L3_IPV4_CSUM)) { ++ rte_errno = ENOTSUP; ++ return i; ++ } + +- ret = rte_net_intel_cksum_flags_prepare(m, +- ol_flags & ~PKT_TX_TCP_SEG); +- if (ret != 0) { +- rte_errno = -ret; +- return i; ++ if (ol_flags & PKT_TX_TCP_SEG && ++ !(dev_offload_capa & ENA_IPV4_TSO)) { ++ rte_errno = ENOTSUP; ++ return i; ++ } ++ ++ /* Check HW capabilities and if pseudo csum is needed ++ * for L4 offloads. ++ */ ++ if (l4_csum_flag != PKT_TX_L4_NO_CKSUM && ++ !(dev_offload_capa & ENA_L4_IPV4_CSUM)) { ++ if (dev_offload_capa & ++ ENA_L4_IPV4_CSUM_PARTIAL) { ++ need_pseudo_csum = true; ++ } else { ++ rte_errno = ENOTSUP; ++ return i; ++ } ++ } ++ ++ /* Parse the DF flag */ ++ ip_hdr = rte_pktmbuf_mtod_offset(m, ++ struct rte_ipv4_hdr *, m->l2_len); ++ frag_field = rte_be_to_cpu_16(ip_hdr->fragment_offset); ++ if (frag_field & RTE_IPV4_HDR_DF_FLAG) { ++ m->packet_type |= RTE_PTYPE_L4_NONFRAG; ++ } else if (ol_flags & PKT_TX_TCP_SEG) { ++ /* In case we are supposed to TSO and have DF ++ * not set (DF=0) hardware must be provided with ++ * partial checksum. ++ */ ++ need_pseudo_csum = true; ++ } ++ } else if (ol_flags & PKT_TX_IPV6) { ++ /* There is no support for IPv6 TSO as for now. */ ++ if (ol_flags & PKT_TX_TCP_SEG) { ++ rte_errno = ENOTSUP; ++ return i; ++ } ++ ++ /* Check HW capabilities and if pseudo csum is needed */ ++ if (l4_csum_flag != PKT_TX_L4_NO_CKSUM && ++ !(dev_offload_capa & ENA_L4_IPV6_CSUM)) { ++ if (dev_offload_capa & ++ ENA_L4_IPV6_CSUM_PARTIAL) { ++ need_pseudo_csum = true; ++ } else { ++ rte_errno = ENOTSUP; ++ return i; ++ } ++ } ++ } ++ ++ if (need_pseudo_csum) { ++ ret = rte_net_intel_cksum_flags_prepare(m, ol_flags); ++ if (ret != 0) { ++ rte_errno = -ret; ++ return i; ++ } + } + } + +@@ -2530,7 +2718,10 @@ static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) + rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, + &nb_hw_desc); + if (unlikely(rc)) { ++ PMD_DRV_LOG(ERR, "Failed to prepare Tx buffers, rc: %d\n", rc); + ++tx_ring->tx_stats.prepare_ctx_err; ++ ena_trigger_reset(tx_ring->adapter, ++ ENA_REGS_RESET_DRIVER_INVALID_STATE); + return rc; + } + +@@ -2864,7 +3055,7 @@ static int ena_process_bool_devarg(const char *key, + } + + /* Now, assign it to the proper adapter field. */ +- if (strcmp(key, ENA_DEVARG_LARGE_LLQ_HDR)) ++ if (strcmp(key, ENA_DEVARG_LARGE_LLQ_HDR) == 0) + adapter->use_large_llq_hdr = bool_value; + + return 0; +@@ -2875,6 +3066,7 @@ static int ena_parse_devargs(struct ena_adapter *adapter, + { + static const char * const allowed_args[] = { + ENA_DEVARG_LARGE_LLQ_HDR, ++ NULL, + }; + struct rte_kvargs *kvlist; + int rc; +@@ -2972,7 +3164,7 @@ static void ena_notification(void *data, + aenq_e->aenq_common_desc.group, + ENA_ADMIN_NOTIFICATION); + +- switch (aenq_e->aenq_common_desc.syndrom) { ++ switch (aenq_e->aenq_common_desc.syndrome) { + case ENA_ADMIN_UPDATE_HINTS: + hints = (struct ena_admin_ena_hw_hints *) + (&aenq_e->inline_data_w4); +@@ -2980,7 +3172,7 @@ static void ena_notification(void *data, + break; + default: + PMD_DRV_LOG(ERR, "Invalid aenq notification link state %d\n", +- aenq_e->aenq_common_desc.syndrom); ++ aenq_e->aenq_common_desc.syndrome); + } + } + +diff --git a/dpdk/drivers/net/ena/ena_ethdev.h b/dpdk/drivers/net/ena/ena_ethdev.h +index ae235897ee..7bb3ff2b9e 100644 +--- a/dpdk/drivers/net/ena/ena_ethdev.h ++++ b/dpdk/drivers/net/ena/ena_ethdev.h +@@ -32,7 +32,7 @@ + + /* While processing submitted and completed descriptors (rx and tx path + * respectively) in a loop it is desired to: +- * - perform batch submissions while populating sumbissmion queue ++ * - perform batch submissions while populating submission queue + * - avoid blocking transmission of other packets during cleanup phase + * Hence the utilization ratio of 1/8 of a queue size or max value if the size + * of the ring is very big - like 8k Rx rings. +@@ -202,9 +202,8 @@ struct ena_stats_eni { + }; + + struct ena_offloads { +- bool tso4_supported; +- bool tx_csum_supported; +- bool rx_csum_supported; ++ uint32_t tx_offloads; ++ uint32_t rx_offloads; + }; + + /* board specific private data structure */ +@@ -247,11 +246,6 @@ struct ena_adapter { + struct ena_driver_stats *drv_stats; + enum ena_adapter_state state; + +- uint64_t tx_supported_offloads; +- uint64_t tx_selected_offloads; +- uint64_t rx_supported_offloads; +- uint64_t rx_selected_offloads; +- + bool link_status; + + enum ena_regs_reset_reason_types reset_reason; +diff --git a/dpdk/drivers/net/ena/ena_platform.h b/dpdk/drivers/net/ena/ena_platform.h +index d3e40e0e9e..748928b2d9 100644 +--- a/dpdk/drivers/net/ena/ena_platform.h ++++ b/dpdk/drivers/net/ena/ena_platform.h +@@ -6,18 +6,6 @@ + #ifndef __ENA_PLATFORM_H__ + #define __ENA_PLATFORM_H__ + +-#define swap16_to_le(x) (x) +- +-#define swap32_to_le(x) (x) +- +-#define swap64_to_le(x) (x) +- +-#define swap16_from_le(x) (x) +- +-#define swap32_from_le(x) (x) +- +-#define swap64_from_le(x) (x) +- + #define ena_assert_msg(cond, msg) \ + do { \ + if (unlikely(!(cond))) { \ +diff --git a/dpdk/drivers/net/enetc/enetc_ethdev.c b/dpdk/drivers/net/enetc/enetc_ethdev.c +index 4d2c9c0474..19769a78ff 100644 +--- a/dpdk/drivers/net/enetc/enetc_ethdev.c ++++ b/dpdk/drivers/net/enetc/enetc_ethdev.c +@@ -885,8 +885,6 @@ enetc_dev_init(struct rte_eth_dev *eth_dev) + eth_dev->rx_pkt_burst = &enetc_recv_pkts; + eth_dev->tx_pkt_burst = &enetc_xmit_pkts; + +- eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; +- + /* Retrieving and storing the HW base address of device */ + hw->hw.reg = (void *)pci_dev->mem_resource[0].addr; + hw->device_id = pci_dev->id.device_id; +diff --git a/dpdk/drivers/net/enic/base/vnic_dev.c b/dpdk/drivers/net/enic/base/vnic_dev.c +index aaca07ca67..8a82b7f491 100644 +--- a/dpdk/drivers/net/enic/base/vnic_dev.c ++++ b/dpdk/drivers/net/enic/base/vnic_dev.c +@@ -594,6 +594,9 @@ static int vnic_dev_flowman_enable(struct vnic_dev *vdev, uint32_t *mode, + uint64_t ops; + static uint32_t instance; + ++ /* Advanced filtering is a prerequisite */ ++ if (!vnic_dev_capable_adv_filters(vdev)) ++ return 0; + /* flowman devcmd available? */ + if (!vnic_dev_capable(vdev, CMD_FLOW_MANAGER_OP)) + return 0; +@@ -644,8 +647,8 @@ static int vnic_dev_flowman_enable(struct vnic_dev *vdev, uint32_t *mode, + return 1; + } + +-/* Determine the "best" filtering mode VIC is capaible of. Returns one of 4 +- * value or 0 on error: ++/* Determine the "best" filtering mode VIC is capable of. Returns one of 4 ++ * value or 0 if filtering is unavailble: + * FILTER_FLOWMAN- flowman api capable + * FILTER_DPDK_1- advanced filters availabile + * FILTER_USNIC_IP_FLAG - advanced filters but with the restriction that +@@ -680,6 +683,14 @@ int vnic_dev_capable_filter_mode(struct vnic_dev *vdev, uint32_t *mode, + args[0] = CMD_ADD_FILTER; + args[1] = 0; + err = vnic_dev_cmd_args(vdev, CMD_CAPABILITY, args, 2, 1000); ++ /* ++ * ERR_EPERM may be returned if, for example, vNIC is ++ * on a VF. It simply means no filtering is available ++ */ ++ if (err == -ERR_EPERM) { ++ *mode = 0; ++ return 0; ++ } + if (err) + return err; + max_level = args[1]; +@@ -1318,5 +1329,5 @@ int vnic_dev_capable_geneve(struct vnic_dev *vdev) + int ret; + + ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait); +- return ret == 0 && (a1 & FEATURE_GENEVE_OPTIONS); ++ return ret == 0 && !!(a1 & FEATURE_GENEVE_OPTIONS); + } +diff --git a/dpdk/drivers/net/enic/base/vnic_enet.h b/dpdk/drivers/net/enic/base/vnic_enet.h +index 7687951c90..2a97a33044 100644 +--- a/dpdk/drivers/net/enic/base/vnic_enet.h ++++ b/dpdk/drivers/net/enic/base/vnic_enet.h +@@ -55,6 +55,7 @@ struct vnic_enet_config { + #define VENETF_NICSWITCH 0x80000 /* NICSWITCH enabled */ + #define VENETF_RSSHASH_UDPIPV4 0x100000 /* Hash on UDP + IPv4 fields */ + #define VENETF_RSSHASH_UDPIPV6 0x200000 /* Hash on UDP + IPv6 fields */ ++#define VENETF_GENEVE 0x400000 /* GENEVE offload */ + + #define VENET_INTR_TYPE_MIN 0 /* Timer specs min interrupt spacing */ + #define VENET_INTR_TYPE_IDLE 1 /* Timer specs idle time before irq */ +diff --git a/dpdk/drivers/net/enic/enic.h b/dpdk/drivers/net/enic/enic.h +index 079f194275..67d872e4b1 100644 +--- a/dpdk/drivers/net/enic/enic.h ++++ b/dpdk/drivers/net/enic/enic.h +@@ -137,15 +137,15 @@ struct enic { + uint8_t adv_filters; + uint32_t flow_filter_mode; + uint8_t filter_actions; /* HW supported actions */ ++ bool geneve; + bool vxlan; + bool disable_overlay; /* devargs disable_overlay=1 */ + uint8_t enable_avx2_rx; /* devargs enable-avx2-rx=1 */ +- uint8_t geneve_opt_avail; /* Geneve with options offload available */ +- uint8_t geneve_opt_enabled; /* Geneve with options offload enabled */ + uint8_t geneve_opt_request; /* devargs geneve-opt=1 */ + bool nic_cfg_chk; /* NIC_CFG_CHK available */ + bool udp_rss_weak; /* Bodega style UDP RSS */ + uint8_t ig_vlan_rewrite_mode; /* devargs ig-vlan-rewrite */ ++ uint16_t geneve_port; /* current geneve port pushed to NIC */ + uint16_t vxlan_port; /* current vxlan port pushed to NIC */ + int use_simple_tx_handler; + int use_noscatter_vec_rx_handler; +diff --git a/dpdk/drivers/net/enic/enic_ethdev.c b/dpdk/drivers/net/enic/enic_ethdev.c +index d041a6bee9..4ba76daf09 100644 +--- a/dpdk/drivers/net/enic/enic_ethdev.c ++++ b/dpdk/drivers/net/enic/enic_ethdev.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -66,7 +67,6 @@ static const struct vic_speed_capa { + + #define ENIC_DEVARG_DISABLE_OVERLAY "disable-overlay" + #define ENIC_DEVARG_ENABLE_AVX2_RX "enable-avx2-rx" +-#define ENIC_DEVARG_GENEVE_OPT "geneve-opt" + #define ENIC_DEVARG_IG_VLAN_REWRITE "ig-vlan-rewrite" + #define ENIC_DEVARG_REPRESENTOR "representor" + +@@ -83,12 +83,6 @@ enicpmd_dev_filter_ctrl(struct rte_eth_dev *dev, + + ENICPMD_FUNC_TRACE(); + +- /* +- * Currently, when Geneve with options offload is enabled, host +- * cannot insert match-action rules. +- */ +- if (enic->geneve_opt_enabled) +- return -ENOTSUP; + switch (filter_type) { + case RTE_ETH_FILTER_GENERIC: + if (filter_op != RTE_ETH_FILTER_GET) +@@ -983,26 +977,32 @@ static int enicpmd_dev_rx_queue_intr_disable(struct rte_eth_dev *eth_dev, + static int udp_tunnel_common_check(struct enic *enic, + struct rte_eth_udp_tunnel *tnl) + { +- if (tnl->prot_type != RTE_TUNNEL_TYPE_VXLAN) ++ if (tnl->prot_type != RTE_TUNNEL_TYPE_VXLAN && ++ tnl->prot_type != RTE_TUNNEL_TYPE_GENEVE) + return -ENOTSUP; + if (!enic->overlay_offload) { +- ENICPMD_LOG(DEBUG, " vxlan (overlay offload) is not " +- "supported\n"); ++ ENICPMD_LOG(DEBUG, " overlay offload is not supported\n"); + return -ENOTSUP; + } + return 0; + } + +-static int update_vxlan_port(struct enic *enic, uint16_t port) ++static int update_tunnel_port(struct enic *enic, uint16_t port, bool vxlan) + { +- if (vnic_dev_overlay_offload_cfg(enic->vdev, +- OVERLAY_CFG_VXLAN_PORT_UPDATE, +- port)) { +- ENICPMD_LOG(DEBUG, " failed to update vxlan port\n"); ++ uint8_t cfg; ++ ++ cfg = vxlan ? OVERLAY_CFG_VXLAN_PORT_UPDATE : ++ OVERLAY_CFG_GENEVE_PORT_UPDATE; ++ if (vnic_dev_overlay_offload_cfg(enic->vdev, cfg, port)) { ++ ENICPMD_LOG(DEBUG, " failed to update tunnel port\n"); + return -EINVAL; + } +- ENICPMD_LOG(DEBUG, " updated vxlan port to %u\n", port); +- enic->vxlan_port = port; ++ ENICPMD_LOG(DEBUG, " updated %s port to %u\n", ++ vxlan ? "vxlan" : "geneve", port); ++ if (vxlan) ++ enic->vxlan_port = port; ++ else ++ enic->geneve_port = port; + return 0; + } + +@@ -1010,34 +1010,48 @@ static int enicpmd_dev_udp_tunnel_port_add(struct rte_eth_dev *eth_dev, + struct rte_eth_udp_tunnel *tnl) + { + struct enic *enic = pmd_priv(eth_dev); ++ uint16_t port; ++ bool vxlan; + int ret; + + ENICPMD_FUNC_TRACE(); + ret = udp_tunnel_common_check(enic, tnl); + if (ret) + return ret; ++ vxlan = (tnl->prot_type == RTE_TUNNEL_TYPE_VXLAN); ++ if (vxlan) ++ port = enic->vxlan_port; ++ else ++ port = enic->geneve_port; + /* +- * The NIC has 1 configurable VXLAN port number. "Adding" a new port +- * number replaces it. ++ * The NIC has 1 configurable port number per tunnel type. ++ * "Adding" a new port number replaces it. + */ +- if (tnl->udp_port == enic->vxlan_port || tnl->udp_port == 0) { ++ if (tnl->udp_port == port || tnl->udp_port == 0) { + ENICPMD_LOG(DEBUG, " %u is already configured or invalid\n", + tnl->udp_port); + return -EINVAL; + } +- return update_vxlan_port(enic, tnl->udp_port); ++ return update_tunnel_port(enic, tnl->udp_port, vxlan); + } + + static int enicpmd_dev_udp_tunnel_port_del(struct rte_eth_dev *eth_dev, + struct rte_eth_udp_tunnel *tnl) + { + struct enic *enic = pmd_priv(eth_dev); ++ uint16_t port; ++ bool vxlan; + int ret; + + ENICPMD_FUNC_TRACE(); + ret = udp_tunnel_common_check(enic, tnl); + if (ret) + return ret; ++ vxlan = (tnl->prot_type == RTE_TUNNEL_TYPE_VXLAN); ++ if (vxlan) ++ port = enic->vxlan_port; ++ else ++ port = enic->geneve_port; + /* + * Clear the previously set port number and restore the + * hardware default port number. Some drivers disable VXLAN +@@ -1045,12 +1059,13 @@ static int enicpmd_dev_udp_tunnel_port_del(struct rte_eth_dev *eth_dev, + * enic does not do that as VXLAN is part of overlay offload, + * which is tied to inner RSS and TSO. + */ +- if (tnl->udp_port != enic->vxlan_port) { +- ENICPMD_LOG(DEBUG, " %u is not a configured vxlan port\n", ++ if (tnl->udp_port != port) { ++ ENICPMD_LOG(DEBUG, " %u is not a configured tunnel port\n", + tnl->udp_port); + return -EINVAL; + } +- return update_vxlan_port(enic, RTE_VXLAN_DEFAULT_PORT); ++ port = vxlan ? RTE_VXLAN_DEFAULT_PORT : RTE_GENEVE_DEFAULT_PORT; ++ return update_tunnel_port(enic, port, vxlan); + } + + static int enicpmd_dev_fw_version_get(struct rte_eth_dev *eth_dev, +@@ -1061,16 +1076,21 @@ static int enicpmd_dev_fw_version_get(struct rte_eth_dev *eth_dev, + int ret; + + ENICPMD_FUNC_TRACE(); +- if (fw_version == NULL || fw_size <= 0) +- return -EINVAL; ++ + enic = pmd_priv(eth_dev); + ret = vnic_dev_fw_info(enic->vdev, &info); + if (ret) + return ret; +- snprintf(fw_version, fw_size, "%s %s", ++ ret = snprintf(fw_version, fw_size, "%s %s", + info->fw_version, info->fw_build); +- fw_version[fw_size - 1] = '\0'; +- return 0; ++ if (ret < 0) ++ return -EINVAL; ++ ++ ret += 1; /* add the size of '\0' */ ++ if (fw_size < (size_t)ret) ++ return ret; ++ else ++ return 0; + } + + static const struct eth_dev_ops enicpmd_eth_dev_ops = { +@@ -1149,8 +1169,6 @@ static int enic_parse_zero_one(const char *key, + enic->disable_overlay = b; + if (strcmp(key, ENIC_DEVARG_ENABLE_AVX2_RX) == 0) + enic->enable_avx2_rx = b; +- if (strcmp(key, ENIC_DEVARG_GENEVE_OPT) == 0) +- enic->geneve_opt_request = b; + return 0; + } + +@@ -1192,7 +1210,6 @@ static int enic_check_devargs(struct rte_eth_dev *dev) + static const char *const valid_keys[] = { + ENIC_DEVARG_DISABLE_OVERLAY, + ENIC_DEVARG_ENABLE_AVX2_RX, +- ENIC_DEVARG_GENEVE_OPT, + ENIC_DEVARG_IG_VLAN_REWRITE, + ENIC_DEVARG_REPRESENTOR, + NULL}; +@@ -1203,7 +1220,6 @@ static int enic_check_devargs(struct rte_eth_dev *dev) + + enic->disable_overlay = false; + enic->enable_avx2_rx = false; +- enic->geneve_opt_request = false; + enic->ig_vlan_rewrite_mode = IG_VLAN_REWRITE_MODE_PASS_THRU; + if (!dev->device->devargs) + return 0; +@@ -1214,8 +1230,6 @@ static int enic_check_devargs(struct rte_eth_dev *dev) + enic_parse_zero_one, enic) < 0 || + rte_kvargs_process(kvlist, ENIC_DEVARG_ENABLE_AVX2_RX, + enic_parse_zero_one, enic) < 0 || +- rte_kvargs_process(kvlist, ENIC_DEVARG_GENEVE_OPT, +- enic_parse_zero_one, enic) < 0 || + rte_kvargs_process(kvlist, ENIC_DEVARG_IG_VLAN_REWRITE, + enic_parse_ig_vlan_rewrite, enic) < 0) { + rte_kvargs_free(kvlist); +@@ -1252,7 +1266,6 @@ static int eth_enic_dev_init(struct rte_eth_dev *eth_dev, + + pdev = RTE_ETH_DEV_TO_PCI(eth_dev); + rte_eth_copy_pci_info(eth_dev, pdev); +- eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + enic->pdev = pdev; + addr = &pdev->addr; + +@@ -1384,5 +1397,4 @@ RTE_PMD_REGISTER_KMOD_DEP(net_enic, "* igb_uio | uio_pci_generic | vfio-pci"); + RTE_PMD_REGISTER_PARAM_STRING(net_enic, + ENIC_DEVARG_DISABLE_OVERLAY "=0|1 " + ENIC_DEVARG_ENABLE_AVX2_RX "=0|1 " +- ENIC_DEVARG_GENEVE_OPT "=0|1 " + ENIC_DEVARG_IG_VLAN_REWRITE "=trunk|untag|priority|pass"); +diff --git a/dpdk/drivers/net/enic/enic_flow.c b/dpdk/drivers/net/enic/enic_flow.c +index 92b1c9eda6..9520e157a3 100644 +--- a/dpdk/drivers/net/enic/enic_flow.c ++++ b/dpdk/drivers/net/enic/enic_flow.c +@@ -405,7 +405,7 @@ enic_copy_item_ipv4_v1(struct copy_item_args *arg) + return ENOTSUP; + } + +- /* check that the suppied mask exactly matches capabilty */ ++ /* check that the supplied mask exactly matches capability */ + if (!mask_exact_match((const uint8_t *)&supported_mask, + (const uint8_t *)item->mask, sizeof(*mask))) { + ENICPMD_LOG(ERR, "IPv4 exact match mask"); +@@ -443,7 +443,7 @@ enic_copy_item_udp_v1(struct copy_item_args *arg) + return ENOTSUP; + } + +- /* check that the suppied mask exactly matches capabilty */ ++ /* check that the supplied mask exactly matches capability */ + if (!mask_exact_match((const uint8_t *)&supported_mask, + (const uint8_t *)item->mask, sizeof(*mask))) { + ENICPMD_LOG(ERR, "UDP exact match mask"); +@@ -482,7 +482,7 @@ enic_copy_item_tcp_v1(struct copy_item_args *arg) + return ENOTSUP; + } + +- /* check that the suppied mask exactly matches capabilty */ ++ /* check that the supplied mask exactly matches capability */ + if (!mask_exact_match((const uint8_t *)&supported_mask, + (const uint8_t *)item->mask, sizeof(*mask))) { + ENICPMD_LOG(ERR, "TCP exact match mask"); +@@ -1044,14 +1044,14 @@ fixup_l5_layer(struct enic *enic, struct filter_generic_1 *gp, + } + + /** +- * Build the intenal enic filter structure from the provided pattern. The ++ * Build the internal enic filter structure from the provided pattern. The + * pattern is validated as the items are copied. + * + * @param pattern[in] + * @param items_info[in] + * Info about this NICs item support, like valid previous items. + * @param enic_filter[out] +- * NIC specfilc filters derived from the pattern. ++ * NIC specific filters derived from the pattern. + * @param error[out] + */ + static int +@@ -1123,12 +1123,12 @@ enic_copy_filter(const struct rte_flow_item pattern[], + } + + /** +- * Build the intenal version 1 NIC action structure from the provided pattern. ++ * Build the internal version 1 NIC action structure from the provided pattern. + * The pattern is validated as the items are copied. + * + * @param actions[in] + * @param enic_action[out] +- * NIC specfilc actions derived from the actions. ++ * NIC specific actions derived from the actions. + * @param error[out] + */ + static int +@@ -1170,12 +1170,12 @@ enic_copy_action_v1(__rte_unused struct enic *enic, + } + + /** +- * Build the intenal version 2 NIC action structure from the provided pattern. ++ * Build the internal version 2 NIC action structure from the provided pattern. + * The pattern is validated as the items are copied. + * + * @param actions[in] + * @param enic_action[out] +- * NIC specfilc actions derived from the actions. ++ * NIC specific actions derived from the actions. + * @param error[out] + */ + static int +diff --git a/dpdk/drivers/net/enic/enic_fm_flow.c b/dpdk/drivers/net/enic/enic_fm_flow.c +index 86b91ed8b1..a083ec1ed8 100644 +--- a/dpdk/drivers/net/enic/enic_fm_flow.c ++++ b/dpdk/drivers/net/enic/enic_fm_flow.c +@@ -1670,7 +1670,7 @@ enic_fm_dump_tcam_actions(const struct fm_action *fm_action) + /* Remove trailing comma */ + if (buf[0]) + *(bp - 1) = '\0'; +- ENICPMD_LOG(DEBUG, " Acions: %s", buf); ++ ENICPMD_LOG(DEBUG, " Actions: %s", buf); + } + + static int +@@ -2188,7 +2188,7 @@ enic_action_handle_get(struct enic_flowman *fm, struct fm_action *action_in, + if (ret < 0 && ret != -ENOENT) + return rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, +- NULL, "enic: rte_hash_lookup(aciton)"); ++ NULL, "enic: rte_hash_lookup(action)"); + + if (ret == -ENOENT) { + /* Allocate a new action on the NIC. */ +@@ -2196,11 +2196,11 @@ enic_action_handle_get(struct enic_flowman *fm, struct fm_action *action_in, + memcpy(fma, action_in, sizeof(*fma)); + + ah = calloc(1, sizeof(*ah)); +- memcpy(&ah->key, action_in, sizeof(struct fm_action)); + if (ah == NULL) + return rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "enic: calloc(fm-action)"); ++ memcpy(&ah->key, action_in, sizeof(struct fm_action)); + args[0] = FM_ACTION_ALLOC; + args[1] = fm->cmd.pa; + ret = flowman_cmd(fm, args, 2); +@@ -2259,7 +2259,7 @@ __enic_fm_flow_add_entry(struct enic_flowman *fm, + + ENICPMD_FUNC_TRACE(); + +- /* Get or create an aciton handle. */ ++ /* Get or create an action handle. */ + ret = enic_action_handle_get(fm, action_in, error, &ah); + if (ret) + return ret; +@@ -2886,7 +2886,7 @@ enic_fm_init(struct enic *enic) + rc = enic_fm_init_actions(fm); + if (rc) { + ENICPMD_LOG(ERR, "cannot create action hash, error:%d", rc); +- goto error_tables; ++ goto error_counters; + } + /* + * One default exact match table for each direction. We hold onto +@@ -2895,7 +2895,7 @@ enic_fm_init(struct enic *enic) + rc = enic_fet_alloc(fm, 1, NULL, 128, &fm->default_ig_fet); + if (rc) { + ENICPMD_LOG(ERR, "cannot alloc default IG exact match table"); +- goto error_counters; ++ goto error_actions; + } + fm->default_ig_fet->ref = 1; + rc = enic_fet_alloc(fm, 0, NULL, 128, &fm->default_eg_fet); +@@ -2910,6 +2910,8 @@ enic_fm_init(struct enic *enic) + + error_ig_fet: + enic_fet_free(fm, fm->default_ig_fet); ++error_actions: ++ rte_hash_free(fm->action_hash); + error_counters: + enic_fm_free_all_counters(fm); + error_tables: +diff --git a/dpdk/drivers/net/enic/enic_main.c b/dpdk/drivers/net/enic/enic_main.c +index d0d41035fd..498476fce5 100644 +--- a/dpdk/drivers/net/enic/enic_main.c ++++ b/dpdk/drivers/net/enic/enic_main.c +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + + #include "enic_compat.h" + #include "enic.h" +@@ -1122,7 +1123,7 @@ int enic_disable(struct enic *enic) + } + + /* If we were using interrupts, set the interrupt vector to -1 +- * to disable interrupts. We are not disabling link notifcations, ++ * to disable interrupts. We are not disabling link notifications, + * though, as we want the polling of link status to continue working. + */ + if (enic->rte_dev->data->dev_conf.intr_conf.lsc) +@@ -1704,6 +1705,85 @@ int enic_set_mtu(struct enic *enic, uint16_t new_mtu) + return rc; + } + ++static void ++enic_disable_overlay_offload(struct enic *enic) ++{ ++ /* ++ * Disabling fails if the feature is provisioned but ++ * not enabled. So ignore result and do not log error. ++ */ ++ if (enic->vxlan) { ++ vnic_dev_overlay_offload_ctrl(enic->vdev, ++ OVERLAY_FEATURE_VXLAN, OVERLAY_OFFLOAD_DISABLE); ++ } ++ if (enic->geneve) { ++ vnic_dev_overlay_offload_ctrl(enic->vdev, ++ OVERLAY_FEATURE_GENEVE, OVERLAY_OFFLOAD_DISABLE); ++ } ++} ++ ++static int ++enic_enable_overlay_offload(struct enic *enic) ++{ ++ if (enic->vxlan && vnic_dev_overlay_offload_ctrl(enic->vdev, ++ OVERLAY_FEATURE_VXLAN, OVERLAY_OFFLOAD_ENABLE) != 0) { ++ dev_err(NULL, "failed to enable VXLAN offload\n"); ++ return -EINVAL; ++ } ++ if (enic->geneve && vnic_dev_overlay_offload_ctrl(enic->vdev, ++ OVERLAY_FEATURE_GENEVE, OVERLAY_OFFLOAD_ENABLE) != 0) { ++ dev_err(NULL, "failed to enable Geneve offload\n"); ++ return -EINVAL; ++ } ++ enic->tx_offload_capa |= ++ DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | ++ (enic->geneve ? DEV_TX_OFFLOAD_GENEVE_TNL_TSO : 0) | ++ (enic->vxlan ? DEV_TX_OFFLOAD_VXLAN_TNL_TSO : 0); ++ enic->tx_offload_mask |= ++ PKT_TX_OUTER_IPV6 | ++ PKT_TX_OUTER_IPV4 | ++ PKT_TX_OUTER_IP_CKSUM | ++ PKT_TX_TUNNEL_MASK; ++ enic->overlay_offload = true; ++ ++ if (enic->vxlan && enic->geneve) ++ dev_info(NULL, "Overlay offload is enabled (VxLAN, Geneve)\n"); ++ else if (enic->vxlan) ++ dev_info(NULL, "Overlay offload is enabled (VxLAN)\n"); ++ else ++ dev_info(NULL, "Overlay offload is enabled (Geneve)\n"); ++ ++ return 0; ++} ++ ++static int ++enic_reset_overlay_port(struct enic *enic) ++{ ++ if (enic->vxlan) { ++ enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT; ++ /* ++ * Reset the vxlan port to the default, as the NIC firmware ++ * does not reset it automatically and keeps the old setting. ++ */ ++ if (vnic_dev_overlay_offload_cfg(enic->vdev, ++ OVERLAY_CFG_VXLAN_PORT_UPDATE, ++ RTE_VXLAN_DEFAULT_PORT)) { ++ dev_err(enic, "failed to update vxlan port\n"); ++ return -EINVAL; ++ } ++ } ++ if (enic->geneve) { ++ enic->geneve_port = RTE_GENEVE_DEFAULT_PORT; ++ if (vnic_dev_overlay_offload_cfg(enic->vdev, ++ OVERLAY_CFG_GENEVE_PORT_UPDATE, ++ RTE_GENEVE_DEFAULT_PORT)) { ++ dev_err(enic, "failed to update vxlan port\n"); ++ return -EINVAL; ++ } ++ } ++ return 0; ++} ++ + static int enic_dev_init(struct enic *enic) + { + int err; +@@ -1773,85 +1853,32 @@ static int enic_dev_init(struct enic *enic) + /* set up link status checking */ + vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */ + ++ enic->overlay_offload = false; + /* +- * When Geneve with options offload is available, always disable it +- * first as it can interfere with user flow rules. ++ * First, explicitly disable overlay offload as the setting is ++ * sticky, and resetting vNIC may not disable it. + */ +- if (enic->geneve_opt_avail) { +- /* +- * Disabling fails if the feature is provisioned but +- * not enabled. So ignore result and do not log error. +- */ +- vnic_dev_overlay_offload_ctrl(enic->vdev, +- OVERLAY_FEATURE_GENEVE, +- OVERLAY_OFFLOAD_DISABLE); +- } +- enic->overlay_offload = false; +- if (enic->disable_overlay && enic->vxlan) { +- /* +- * Explicitly disable overlay offload as the setting is +- * sticky, and resetting vNIC does not disable it. +- */ +- if (vnic_dev_overlay_offload_ctrl(enic->vdev, +- OVERLAY_FEATURE_VXLAN, +- OVERLAY_OFFLOAD_DISABLE)) { +- dev_err(enic, "failed to disable overlay offload\n"); +- } else { +- dev_info(enic, "Overlay offload is disabled\n"); +- } +- } +- if (!enic->disable_overlay && enic->vxlan && +- /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */ +- vnic_dev_overlay_offload_ctrl(enic->vdev, +- OVERLAY_FEATURE_VXLAN, +- OVERLAY_OFFLOAD_ENABLE) == 0) { +- enic->tx_offload_capa |= +- DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | +- DEV_TX_OFFLOAD_GENEVE_TNL_TSO | +- DEV_TX_OFFLOAD_VXLAN_TNL_TSO; +- enic->tx_offload_mask |= +- PKT_TX_OUTER_IPV6 | +- PKT_TX_OUTER_IPV4 | +- PKT_TX_OUTER_IP_CKSUM | +- PKT_TX_TUNNEL_MASK; +- enic->overlay_offload = true; +- dev_info(enic, "Overlay offload is enabled\n"); +- } +- /* Geneve with options offload requires overlay offload */ +- if (enic->overlay_offload && enic->geneve_opt_avail && +- enic->geneve_opt_request) { +- if (vnic_dev_overlay_offload_ctrl(enic->vdev, +- OVERLAY_FEATURE_GENEVE, +- OVERLAY_OFFLOAD_ENABLE)) { +- dev_err(enic, "failed to enable geneve+option\n"); +- } else { +- enic->geneve_opt_enabled = 1; +- dev_info(enic, "Geneve with options is enabled\n"); ++ enic_disable_overlay_offload(enic); ++ /* Then, enable overlay offload according to vNIC flags */ ++ if (!enic->disable_overlay && (enic->vxlan || enic->geneve)) { ++ err = enic_enable_overlay_offload(enic); ++ if (err) { ++ dev_info(NULL, "failed to enable overlay offload\n"); ++ return err; + } + } + /* +- * Reset the vxlan port if HW vxlan parsing is available. It ++ * Reset the vxlan/geneve port if HW parsing is available. It + * is always enabled regardless of overlay offload + * enable/disable. + */ +- if (enic->vxlan) { +- enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT; +- /* +- * Reset the vxlan port to the default, as the NIC firmware +- * does not reset it automatically and keeps the old setting. +- */ +- if (vnic_dev_overlay_offload_cfg(enic->vdev, +- OVERLAY_CFG_VXLAN_PORT_UPDATE, +- RTE_VXLAN_DEFAULT_PORT)) { +- dev_err(enic, "failed to update vxlan port\n"); +- return -EINVAL; +- } +- } ++ err = enic_reset_overlay_port(enic); ++ if (err) ++ return err; + + if (enic_fm_init(enic)) + dev_warning(enic, "Init of flowman failed.\n"); + return 0; +- + } + + static void lock_devcmd(void *priv) +diff --git a/dpdk/drivers/net/enic/enic_res.c b/dpdk/drivers/net/enic/enic_res.c +index 20888eb257..d079e2f0e7 100644 +--- a/dpdk/drivers/net/enic/enic_res.c ++++ b/dpdk/drivers/net/enic/enic_res.c +@@ -178,10 +178,9 @@ int enic_get_vnic_config(struct enic *enic) + + enic->vxlan = ENIC_SETTING(enic, VXLAN) && + vnic_dev_capable_vxlan(enic->vdev); +- if (vnic_dev_capable_geneve(enic->vdev)) { +- dev_info(NULL, "Geneve with options offload available\n"); +- enic->geneve_opt_avail = 1; +- } ++ enic->geneve = ENIC_SETTING(enic, GENEVE) && ++ vnic_dev_capable_geneve(enic->vdev); ++ + /* + * Default hardware capabilities. enic_dev_init() may add additional + * flags if it enables overlay offloads. +diff --git a/dpdk/drivers/net/enic/enic_rxtx.c b/dpdk/drivers/net/enic/enic_rxtx.c +index 6a8718c086..0942d761a8 100644 +--- a/dpdk/drivers/net/enic/enic_rxtx.c ++++ b/dpdk/drivers/net/enic/enic_rxtx.c +@@ -624,7 +624,7 @@ static void enqueue_simple_pkts(struct rte_mbuf **pkts, + * The app should not send oversized + * packets. tx_pkt_prepare includes a check as + * well. But some apps ignore the device max size and +- * tx_pkt_prepare. Oversized packets cause WQ errrors ++ * tx_pkt_prepare. Oversized packets cause WQ errors + * and the NIC ends up disabling the whole WQ. So + * truncate packets.. + */ +diff --git a/dpdk/drivers/net/enic/enic_vf_representor.c b/dpdk/drivers/net/enic/enic_vf_representor.c +index c2c03c0281..984a754e35 100644 +--- a/dpdk/drivers/net/enic/enic_vf_representor.c ++++ b/dpdk/drivers/net/enic/enic_vf_representor.c +@@ -672,8 +672,7 @@ int enic_vf_representor_init(struct rte_eth_dev *eth_dev, void *init_params) + + eth_dev->device->driver = pf->rte_dev->device->driver; + eth_dev->dev_ops = &enic_vf_representor_dev_ops; +- eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR | +- RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; ++ eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR; + eth_dev->data->representor_id = vf->vf_id; + eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr_vf", + sizeof(struct rte_ether_addr) * +diff --git a/dpdk/drivers/net/failsafe/failsafe.c b/dpdk/drivers/net/failsafe/failsafe.c +index ba81c82174..0b9b537984 100644 +--- a/dpdk/drivers/net/failsafe/failsafe.c ++++ b/dpdk/drivers/net/failsafe/failsafe.c +@@ -329,8 +329,7 @@ rte_pmd_failsafe_probe(struct rte_vdev_device *vdev) + INFO("Initializing " FAILSAFE_DRIVER_NAME " for %s", + name); + +- if (rte_eal_process_type() == RTE_PROC_SECONDARY && +- strlen(rte_vdev_device_args(vdev)) == 0) { ++ if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { + ERROR("Failed to probe %s", name); +diff --git a/dpdk/drivers/net/failsafe/failsafe_ops.c b/dpdk/drivers/net/failsafe/failsafe_ops.c +index 492047f587..c9c38258a9 100644 +--- a/dpdk/drivers/net/failsafe/failsafe_ops.c ++++ b/dpdk/drivers/net/failsafe/failsafe_ops.c +@@ -1095,6 +1095,8 @@ static void + fs_dev_merge_info(struct rte_eth_dev_info *info, + const struct rte_eth_dev_info *sinfo) + { ++ info->min_mtu = RTE_MAX(info->min_mtu, sinfo->min_mtu); ++ info->max_mtu = RTE_MIN(info->max_mtu, sinfo->max_mtu); + info->max_rx_pktlen = RTE_MIN(info->max_rx_pktlen, sinfo->max_rx_pktlen); + info->max_rx_queues = RTE_MIN(info->max_rx_queues, sinfo->max_rx_queues); + info->max_tx_queues = RTE_MIN(info->max_tx_queues, sinfo->max_tx_queues); +@@ -1163,6 +1165,8 @@ fs_dev_infos_get(struct rte_eth_dev *dev, + int ret; + + /* Use maximum upper bounds by default */ ++ infos->min_mtu = RTE_ETHER_MIN_MTU; ++ infos->max_mtu = UINT16_MAX; + infos->max_rx_pktlen = UINT32_MAX; + infos->max_rx_queues = RTE_MAX_QUEUES_PER_PORT; + infos->max_tx_queues = RTE_MAX_QUEUES_PER_PORT; +@@ -1192,7 +1196,8 @@ fs_dev_infos_get(struct rte_eth_dev *dev, + DEV_RX_OFFLOAD_JUMBO_FRAME | + DEV_RX_OFFLOAD_SCATTER | + DEV_RX_OFFLOAD_TIMESTAMP | +- DEV_RX_OFFLOAD_SECURITY; ++ DEV_RX_OFFLOAD_SECURITY | ++ DEV_RX_OFFLOAD_RSS_HASH; + + infos->rx_queue_offload_capa = + DEV_RX_OFFLOAD_VLAN_STRIP | +@@ -1209,7 +1214,8 @@ fs_dev_infos_get(struct rte_eth_dev *dev, + DEV_RX_OFFLOAD_JUMBO_FRAME | + DEV_RX_OFFLOAD_SCATTER | + DEV_RX_OFFLOAD_TIMESTAMP | +- DEV_RX_OFFLOAD_SECURITY; ++ DEV_RX_OFFLOAD_SECURITY | ++ DEV_RX_OFFLOAD_RSS_HASH; + + infos->tx_offload_capa = + DEV_TX_OFFLOAD_MULTI_SEGS | +diff --git a/dpdk/drivers/net/fm10k/fm10k.h b/dpdk/drivers/net/fm10k/fm10k.h +index 916b856acc..a8a3e375fb 100644 +--- a/dpdk/drivers/net/fm10k/fm10k.h ++++ b/dpdk/drivers/net/fm10k/fm10k.h +@@ -44,7 +44,7 @@ + #define FM10K_TX_MAX_MTU_SEG UINT8_MAX + + /* +- * byte aligment for HW RX data buffer ++ * byte alignment for HW RX data buffer + * Datasheet requires RX buffer addresses shall either be 512-byte aligned or + * be 8-byte aligned but without crossing host memory pages (4KB alignment + * boundaries). Satisfy first option. +diff --git a/dpdk/drivers/net/fm10k/fm10k_ethdev.c b/dpdk/drivers/net/fm10k/fm10k_ethdev.c +index c187088a33..d76c1c3b12 100644 +--- a/dpdk/drivers/net/fm10k/fm10k_ethdev.c ++++ b/dpdk/drivers/net/fm10k/fm10k_ethdev.c +@@ -254,7 +254,7 @@ rx_queue_clean(struct fm10k_rx_queue *q) + for (i = 0; i < q->nb_fake_desc; ++i) + q->hw_ring[q->nb_desc + i] = zero; + +- /* vPMD driver has a different way of releasing mbufs. */ ++ /* vPMD has a different way of releasing mbufs. */ + if (q->rx_using_sse) { + fm10k_rx_queue_release_mbufs_vec(q); + return; +@@ -289,7 +289,7 @@ rx_queue_free(struct fm10k_rx_queue *q) + } + + /* +- * disable RX queue, wait unitl HW finished necessary flush operation ++ * disable RX queue, wait until HW finished necessary flush operation + */ + static inline int + rx_queue_disable(struct fm10k_hw *hw, uint16_t qnum) +@@ -378,7 +378,7 @@ tx_queue_free(struct fm10k_tx_queue *q) + } + + /* +- * disable TX queue, wait unitl HW finished necessary flush operation ++ * disable TX queue, wait until HW finished necessary flush operation + */ + static inline int + tx_queue_disable(struct fm10k_hw *hw, uint16_t qnum) +@@ -452,7 +452,7 @@ fm10k_dev_configure(struct rte_eth_dev *dev) + if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) + dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; + +- /* multipe queue mode checking */ ++ /* multiple queue mode checking */ + ret = fm10k_check_mq_mode(dev); + if (ret != 0) { + PMD_DRV_LOG(ERR, "fm10k_check_mq_mode fails with %d.", +@@ -2552,7 +2552,7 @@ fm10k_dev_handle_fault(struct fm10k_hw *hw, uint32_t eicr) + * @param handle + * Pointer to interrupt handle. + * @param param +- * The address of parameter (struct rte_eth_dev *) regsitered before. ++ * The address of parameter (struct rte_eth_dev *) registered before. + * + * @return + * void +@@ -2675,7 +2675,7 @@ fm10k_dev_interrupt_handler_pf(void *param) + * @param handle + * Pointer to interrupt handle. + * @param param +- * The address of parameter (struct rte_eth_dev *) regsitered before. ++ * The address of parameter (struct rte_eth_dev *) registered before. + * + * @return + * void +@@ -3033,7 +3033,7 @@ fm10k_params_init(struct rte_eth_dev *dev) + struct fm10k_dev_info *info = + FM10K_DEV_PRIVATE_TO_INFO(dev->data->dev_private); + +- /* Inialize bus info. Normally we would call fm10k_get_bus_info(), but ++ /* Initialize bus info. Normally we would call fm10k_get_bus_info(), but + * there is no way to get link status without reading BAR4. Until this + * works, assume we have maximum bandwidth. + * @todo - fix bus info +diff --git a/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c b/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c +index 6fcc939ad9..80cab1f6b2 100644 +--- a/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c ++++ b/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c +@@ -211,7 +211,7 @@ fm10k_rx_vec_condition_check(struct rte_eth_dev *dev) + struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf; + + #ifndef RTE_FM10K_RX_OLFLAGS_ENABLE +- /* whithout rx ol_flags, no VP flag report */ ++ /* without rx ol_flags, no VP flag report */ + if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_EXTEND) + return -1; + #endif +@@ -238,7 +238,7 @@ fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq) + struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ + + mb_def.nb_segs = 1; +- /* data_off will be ajusted after new mbuf allocated for 512-byte ++ /* data_off will be adjusted after new mbuf allocated for 512-byte + * alignment. + */ + mb_def.data_off = RTE_PKTMBUF_HEADROOM; +@@ -409,7 +409,7 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + if (!(rxdp->d.staterr & FM10K_RXD_STATUS_DD)) + return 0; + +- /* Vecotr RX will process 4 packets at a time, strip the unaligned ++ /* Vector RX will process 4 packets at a time, strip the unaligned + * tails in case it's not multiple of 4. + */ + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_FM10K_DESCS_PER_LOOP); +@@ -472,7 +472,7 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + mbp1 = _mm_loadu_si128((__m128i *)&mbufp[pos]); + + /* Read desc statuses backwards to avoid race condition */ +- /* A.1 load 4 pkts desc */ ++ /* A.1 load desc[3] */ + descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); + rte_compiler_barrier(); + +@@ -480,13 +480,13 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); + + #if defined(RTE_ARCH_X86_64) +- /* B.1 load 2 64 bit mbuf poitns */ ++ /* B.1 load 2 64 bit mbuf points */ + mbp2 = _mm_loadu_si128((__m128i *)&mbufp[pos+2]); + #endif + ++ /* A.1 load desc[2-0] */ + descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); + rte_compiler_barrier(); +- /* B.1 load 2 mbuf point */ + descs0[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); + rte_compiler_barrier(); + descs0[0] = _mm_loadu_si128((__m128i *)(rxdp)); +@@ -544,7 +544,7 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + /* and with mask to extract bits, flipping 1-0 */ + __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); + /* the staterr values are not in order, as the count +- * count of dd bits doesn't care. However, for end of ++ * of dd bits doesn't care. However, for end of + * packet tracking, we do care, so shuffle. This also + * compresses the 32-bit values to 8-bit + */ +@@ -572,7 +572,7 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + + fm10k_desc_to_pktype_v(descs0, &rx_pkts[pos]); + +- /* C.4 calc avaialbe number of desc */ ++ /* C.4 calc available number of desc */ + var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); + nb_pkts_recd += var; + if (likely(var != RTE_FM10K_DESCS_PER_LOOP)) +diff --git a/dpdk/drivers/net/hinic/base/hinic_compat.h b/dpdk/drivers/net/hinic/base/hinic_compat.h +index 6dd210ec06..aea332046e 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_compat.h ++++ b/dpdk/drivers/net/hinic/base/hinic_compat.h +@@ -171,6 +171,7 @@ static inline u32 readl(const volatile void *addr) + #else + #define CLOCK_TYPE CLOCK_MONOTONIC + #endif ++#define HINIC_MUTEX_TIMEOUT 10 + + static inline unsigned long clock_gettime_ms(void) + { +@@ -225,24 +226,14 @@ static inline int hinic_mutex_destroy(pthread_mutex_t *pthreadmutex) + static inline int hinic_mutex_lock(pthread_mutex_t *pthreadmutex) + { + int err; ++ struct timespec tout; + +- err = pthread_mutex_lock(pthreadmutex); +- if (!err) { +- return err; +- } else if (err == EOWNERDEAD) { +- PMD_DRV_LOG(ERR, "Mutex lock failed. (ErrorNo=%d)", errno); +-#if defined(__GLIBC__) +-#if __GLIBC_PREREQ(2, 12) +- (void)pthread_mutex_consistent(pthreadmutex); +-#else +- (void)pthread_mutex_consistent_np(pthreadmutex); +-#endif +-#else +- (void)pthread_mutex_consistent(pthreadmutex); +-#endif +- } else { +- PMD_DRV_LOG(ERR, "Mutex lock failed. (ErrorNo=%d)", errno); +- } ++ (void)clock_gettime(CLOCK_TYPE, &tout); ++ ++ tout.tv_sec += HINIC_MUTEX_TIMEOUT; ++ err = pthread_mutex_timedlock(pthreadmutex, &tout); ++ if (err) ++ PMD_DRV_LOG(ERR, "Mutex lock failed. (ErrorNo=%d)", err); + + return err; + } +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_cmdq.h b/dpdk/drivers/net/hinic/base/hinic_pmd_cmdq.h +index 0d5e380123..58a1fbda71 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_cmdq.h ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_cmdq.h +@@ -9,7 +9,7 @@ + + #define HINIC_SCMD_DATA_LEN 16 + +-/* pmd driver uses 64, kernel l2nic use 4096 */ ++/* PMD uses 64, kernel l2nic use 4096 */ + #define HINIC_CMDQ_DEPTH 64 + + #define HINIC_CMDQ_BUF_SIZE 2048U +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_hwdev.c b/dpdk/drivers/net/hinic/base/hinic_pmd_hwdev.c +index 41c99bdbf8..4c2242ddaf 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_hwdev.c ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_hwdev.c +@@ -826,7 +826,7 @@ static int set_vf_dma_attr_entry(struct hinic_hwdev *hwdev, u8 entry_idx, + } + + /** +- * dma_attr_table_init - initialize the the default dma attributes ++ * dma_attr_table_init - initialize the default dma attributes + * @hwdev: the pointer to the private hardware device object + */ + static int dma_attr_table_init(struct hinic_hwdev *hwdev) +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_mgmt.c b/dpdk/drivers/net/hinic/base/hinic_pmd_mgmt.c +index 9b399502de..6ecca407a1 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_mgmt.c ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_mgmt.c +@@ -133,16 +133,12 @@ static void prepare_header(struct hinic_msg_pf_to_mgmt *pf_to_mgmt, + static void prepare_mgmt_cmd(u8 *mgmt_cmd, u64 *header, void *msg, + int msg_len) + { +- u32 cmd_buf_max = MAX_PF_MGMT_BUF_SIZE; +- + memset(mgmt_cmd, 0, MGMT_MSG_RSVD_FOR_DEV); + + mgmt_cmd += MGMT_MSG_RSVD_FOR_DEV; +- cmd_buf_max -= MGMT_MSG_RSVD_FOR_DEV; + memcpy(mgmt_cmd, header, sizeof(*header)); + + mgmt_cmd += sizeof(*header); +- cmd_buf_max -= sizeof(*header); + memcpy(mgmt_cmd, msg, msg_len); + } + +@@ -615,7 +611,6 @@ static int recv_mgmt_msg_handler(struct hinic_msg_pf_to_mgmt *pf_to_mgmt, + void *msg_body = header + sizeof(msg_header); + u8 *dest_msg; + u8 seq_id, seq_len; +- u32 msg_buf_max = MAX_PF_MGMT_BUF_SIZE; + u8 front_id; + u16 msg_id; + +@@ -635,7 +630,6 @@ static int recv_mgmt_msg_handler(struct hinic_msg_pf_to_mgmt *pf_to_mgmt, + } + + dest_msg = (u8 *)recv_msg->msg + seq_id * HINIC_MSG_SEG_LEN; +- msg_buf_max -= seq_id * HINIC_MSG_SEG_LEN; + memcpy(dest_msg, msg_body, seq_len); + + if (!HINIC_MSG_HEADER_GET(msg_header, LAST)) +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_niccfg.h b/dpdk/drivers/net/hinic/base/hinic_pmd_niccfg.h +index 04cd374ca6..0d0a6700d4 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_niccfg.h ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_niccfg.h +@@ -116,15 +116,6 @@ enum hinic_link_mode { + #define HINIC_DEFAULT_RX_MODE (HINIC_RX_MODE_UC | HINIC_RX_MODE_MC | \ + HINIC_RX_MODE_BC) + +-#define HINIC_MAX_MTU_SIZE (9600) +-#define HINIC_MIN_MTU_SIZE (256) +- +-/* MIN_MTU + ETH_HLEN + CRC (256+14+4) */ +-#define HINIC_MIN_FRAME_SIZE 274 +- +-/* MAX_MTU + ETH_HLEN + CRC + VLAN(9600+14+4+4) */ +-#define HINIC_MAX_JUMBO_FRAME_SIZE (9622) +- + #define HINIC_PORT_DISABLE 0x0 + #define HINIC_PORT_ENABLE 0x3 + +diff --git a/dpdk/drivers/net/hinic/base/hinic_pmd_nicio.c b/dpdk/drivers/net/hinic/base/hinic_pmd_nicio.c +index 162308b02f..ad5db9f1de 100644 +--- a/dpdk/drivers/net/hinic/base/hinic_pmd_nicio.c ++++ b/dpdk/drivers/net/hinic/base/hinic_pmd_nicio.c +@@ -230,8 +230,8 @@ static void hinic_rq_prepare_ctxt(struct hinic_rq *rq, + wq_block_pfn_hi = upper_32_bits(wq_block_pfn); + wq_block_pfn_lo = lower_32_bits(wq_block_pfn); + +- /* must config as ceq enable but do not generate ceq */ +- rq_ctxt->ceq_attr = RQ_CTXT_CEQ_ATTR_SET(1, EN) | ++ /* config as ceq disable, but must set msix state disable */ ++ rq_ctxt->ceq_attr = RQ_CTXT_CEQ_ATTR_SET(0, EN) | + RQ_CTXT_CEQ_ATTR_SET(1, OWNER); + + rq_ctxt->pi_intr_attr = RQ_CTXT_PI_SET(pi_start, IDX) | +diff --git a/dpdk/drivers/net/hinic/hinic_pmd_ethdev.c b/dpdk/drivers/net/hinic/hinic_pmd_ethdev.c +index 5a2c171099..191ec8b3a6 100644 +--- a/dpdk/drivers/net/hinic/hinic_pmd_ethdev.c ++++ b/dpdk/drivers/net/hinic/hinic_pmd_ethdev.c +@@ -69,15 +69,6 @@ + + #define HINIC_VLAN_FILTER_EN (1U << 0) + +-#define HINIC_MTU_TO_PKTLEN(mtu) \ +- ((mtu) + ETH_HLEN + ETH_CRC_LEN) +- +-#define HINIC_PKTLEN_TO_MTU(pktlen) \ +- ((pktlen) - (ETH_HLEN + ETH_CRC_LEN)) +- +-/* The max frame size with default MTU */ +-#define HINIC_ETH_MAX_LEN (RTE_ETHER_MTU + ETH_HLEN + ETH_CRC_LEN) +- + /* lro numer limit for one packet */ + #define HINIC_LRO_WQE_NUM_DEFAULT 8 + +@@ -264,7 +255,7 @@ static int hinic_vlan_offload_set(struct rte_eth_dev *dev, int mask); + * Interrupt handler triggered by NIC for handling + * specific event. + * +- * @param: The address of parameter (struct rte_eth_dev *) regsitered before. ++ * @param: The address of parameter (struct rte_eth_dev *) registered before. + */ + static void hinic_dev_interrupt_handler(void *param) + { +@@ -345,7 +336,7 @@ static int hinic_dev_configure(struct rte_eth_dev *dev) + return err; + } + +- /* init vlan offoad */ ++ /* init VLAN offload */ + err = hinic_vlan_offload_set(dev, + ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK); + if (err) { +@@ -1617,6 +1608,9 @@ static int hinic_vlan_filter_set(struct rte_eth_dev *dev, + if (vlan_id > RTE_ETHER_MAX_VLAN_ID) + return -EINVAL; + ++ if (vlan_id == 0) ++ return 0; ++ + func_id = hinic_global_func_id(nic_dev->hwdev); + + if (enable) { +@@ -3085,6 +3079,10 @@ static const struct eth_dev_ops hinic_pmd_vf_ops = { + .filter_ctrl = hinic_dev_filter_ctrl, + }; + ++static const struct eth_dev_ops hinic_dev_sec_ops = { ++ .dev_infos_get = hinic_dev_infos_get, ++}; ++ + static int hinic_func_init(struct rte_eth_dev *eth_dev) + { + struct rte_pci_device *pci_dev; +@@ -3099,6 +3097,7 @@ static int hinic_func_init(struct rte_eth_dev *eth_dev) + + /* EAL is SECONDARY and eth_dev is already created */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { ++ eth_dev->dev_ops = &hinic_dev_sec_ops; + PMD_DRV_LOG(INFO, "Initialize %s in secondary process", + eth_dev->data->name); + +diff --git a/dpdk/drivers/net/hinic/hinic_pmd_ethdev.h b/dpdk/drivers/net/hinic/hinic_pmd_ethdev.h +index c7338d83be..b4e34f0350 100644 +--- a/dpdk/drivers/net/hinic/hinic_pmd_ethdev.h ++++ b/dpdk/drivers/net/hinic/hinic_pmd_ethdev.h +@@ -32,6 +32,23 @@ + #define HINIC_UINT32_BIT_SIZE (CHAR_BIT * sizeof(uint32_t)) + #define HINIC_VFTA_SIZE (4096 / HINIC_UINT32_BIT_SIZE) + ++#define HINIC_MAX_MTU_SIZE 9600 ++#define HINIC_MIN_MTU_SIZE 256 ++ ++#define HINIC_VLAN_TAG_SIZE 4 ++#define HINIC_ETH_OVERHEAD \ ++ (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + HINIC_VLAN_TAG_SIZE * 2) ++ ++#define HINIC_MIN_FRAME_SIZE (HINIC_MIN_MTU_SIZE + HINIC_ETH_OVERHEAD) ++#define HINIC_MAX_JUMBO_FRAME_SIZE (HINIC_MAX_MTU_SIZE + HINIC_ETH_OVERHEAD) ++ ++#define HINIC_MTU_TO_PKTLEN(mtu) ((mtu) + HINIC_ETH_OVERHEAD) ++ ++#define HINIC_PKTLEN_TO_MTU(pktlen) ((pktlen) - HINIC_ETH_OVERHEAD) ++ ++/* The max frame size with default MTU */ ++#define HINIC_ETH_MAX_LEN (RTE_ETHER_MTU + HINIC_ETH_OVERHEAD) ++ + enum hinic_dev_status { + HINIC_DEV_INIT, + HINIC_DEV_CLOSE, +@@ -154,7 +171,7 @@ struct tag_tcam_key_mem { + /* + * tunnel packet, mask must be 0xff, spec value is 1; + * normal packet, mask must be 0, spec value is 0; +- * if tunnal packet, ucode use ++ * if tunnel packet, ucode use + * sip/dip/protocol/src_port/dst_dport from inner packet + */ + u32 tunnel_flag:8; +diff --git a/dpdk/drivers/net/hinic/hinic_pmd_flow.c b/dpdk/drivers/net/hinic/hinic_pmd_flow.c +index d71a42afbd..2cf24ebcf6 100644 +--- a/dpdk/drivers/net/hinic/hinic_pmd_flow.c ++++ b/dpdk/drivers/net/hinic/hinic_pmd_flow.c +@@ -734,7 +734,7 @@ static int hinic_check_ntuple_item_ele(const struct rte_flow_item *item, + * END + * other members in mask and spec should set to 0x00. + * item->last should be NULL. +- * Please aware there's an asumption for all the parsers. ++ * Please be aware there's an assumption for all the parsers. + * rte_flow_item is using big endian, rte_flow_attr and + * rte_flow_action are using CPU order. + * Because the pattern is used to describe the packets, +@@ -1630,7 +1630,7 @@ static int hinic_parse_fdir_filter(struct rte_eth_dev *dev, + + /** + * Check if the flow rule is supported by nic. +- * It only checkes the format. Don't guarantee the rule can be programmed into ++ * It only checks the format. Don't guarantee the rule can be programmed into + * the HW. Because there can be no enough room for the rule. + */ + static int hinic_flow_validate(struct rte_eth_dev *dev, +diff --git a/dpdk/drivers/net/hinic/hinic_pmd_tx.c b/dpdk/drivers/net/hinic/hinic_pmd_tx.c +index 669f82389c..bc972de301 100644 +--- a/dpdk/drivers/net/hinic/hinic_pmd_tx.c ++++ b/dpdk/drivers/net/hinic/hinic_pmd_tx.c +@@ -1144,7 +1144,7 @@ u16 hinic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts) + mbuf_pkt = *tx_pkts++; + queue_info = 0; + +- /* 1. parse sge and tx offlod info from mbuf */ ++ /* 1. parse sge and tx offload info from mbuf */ + if (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt, + &sqe_info, &off_info))) { + txq->txq_stats.off_errs++; +diff --git a/dpdk/drivers/net/hns3/hns3_cmd.c b/dpdk/drivers/net/hns3/hns3_cmd.c +index 76d16a5a92..dc65ef8c26 100644 +--- a/dpdk/drivers/net/hns3/hns3_cmd.c ++++ b/dpdk/drivers/net/hns3/hns3_cmd.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #include +@@ -10,10 +10,6 @@ + #include "hns3_intr.h" + #include "hns3_logs.h" + +-#define hns3_is_csq(ring) ((ring)->flag & HNS3_TYPE_CSQ) +- +-#define cmq_ring_to_dev(ring) (&(ring)->dev->pdev->dev) +- + static int + hns3_ring_space(struct hns3_cmq_ring *ring) + { +@@ -48,10 +44,12 @@ static int + hns3_allocate_dma_mem(struct hns3_hw *hw, struct hns3_cmq_ring *ring, + uint64_t size, uint32_t alignment) + { ++ static uint64_t hns3_dma_memzone_id; + const struct rte_memzone *mz = NULL; + char z_name[RTE_MEMZONE_NAMESIZE]; + +- snprintf(z_name, sizeof(z_name), "hns3_dma_%" PRIu64, rte_rand()); ++ snprintf(z_name, sizeof(z_name), "hns3_dma_%" PRIu64, ++ __atomic_fetch_add(&hns3_dma_memzone_id, 1, __ATOMIC_RELAXED)); + mz = rte_memzone_reserve_bounded(z_name, size, SOCKET_ID_ANY, + RTE_MEMZONE_IOVA_CONTIG, alignment, + RTE_PGSIZE_2M); +@@ -195,12 +193,14 @@ hns3_cmd_csq_clean(struct hns3_hw *hw) + { + struct hns3_cmq_ring *csq = &hw->cmq.csq; + uint32_t head; ++ uint32_t addr; + int clean; + + head = hns3_read_dev(hw, HNS3_CMDQ_TX_HEAD_REG); +- if (!is_valid_csq_clean_head(csq, head)) { +- hns3_err(hw, "wrong cmd head (%u, %u-%u)", head, +- csq->next_to_use, csq->next_to_clean); ++ addr = hns3_read_dev(hw, HNS3_CMDQ_TX_ADDR_L_REG); ++ if (!is_valid_csq_clean_head(csq, head) || addr == 0) { ++ hns3_err(hw, "wrong cmd addr(%0x) head (%u, %u-%u)", addr, head, ++ csq->next_to_use, csq->next_to_clean); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_atomic16_set(&hw->reset.disable_cmd, 1); + hns3_schedule_delayed_reset(HNS3_DEV_HW_TO_ADAPTER(hw)); +@@ -424,8 +424,14 @@ static void hns3_parse_capability(struct hns3_hw *hw, + hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_PTP_B, 1); + if (hns3_get_bit(caps, HNS3_CAPS_TX_PUSH_B)) + hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_TX_PUSH_B, 1); ++ /* ++ * Currently, the query of link status and link info on copper ports ++ * are not supported. So it is necessary for driver to set the copper ++ * capability bit to zero when the firmware supports the configuration ++ * of the PHY. ++ */ + if (hns3_get_bit(caps, HNS3_CAPS_PHY_IMP_B)) +- hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_COPPER_B, 1); ++ hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_COPPER_B, 0); + if (hns3_get_bit(caps, HNS3_CAPS_TQP_TXRX_INDEP_B)) + hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_INDEP_TXRX_B, 1); + if (hns3_get_bit(caps, HNS3_CAPS_STASH_B)) +@@ -442,7 +448,7 @@ hns3_build_api_caps(void) + return rte_cpu_to_le_32(api_caps); + } + +-static enum hns3_cmd_status ++static int + hns3_cmd_query_firmware_version_and_capability(struct hns3_hw *hw) + { + struct hns3_query_version_cmd *resp; +diff --git a/dpdk/drivers/net/hns3/hns3_cmd.h b/dpdk/drivers/net/hns3/hns3_cmd.h +index 20c373590f..344474db6f 100644 +--- a/dpdk/drivers/net/hns3/hns3_cmd.h ++++ b/dpdk/drivers/net/hns3/hns3_cmd.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_CMD_H_ +@@ -55,13 +55,6 @@ enum hns3_cmd_return_status { + HNS3_CMD_INVALID = 11, + }; + +-enum hns3_cmd_status { +- HNS3_STATUS_SUCCESS = 0, +- HNS3_ERR_CSQ_FULL = -1, +- HNS3_ERR_CSQ_TIMEOUT = -2, +- HNS3_ERR_CSQ_ERROR = -3, +-}; +- + struct hns3_misc_vector { + uint8_t *addr; + int vector_irq; +@@ -71,7 +64,7 @@ struct hns3_cmq { + struct hns3_cmq_ring csq; + struct hns3_cmq_ring crq; + uint16_t tx_timeout; +- enum hns3_cmd_status last_status; ++ enum hns3_cmd_return_status last_status; + }; + + enum hns3_opcode_type { +@@ -432,8 +425,6 @@ struct hns3_umv_spc_alc_cmd { + #define HNS3_CFG_RD_LEN_BYTES 16 + #define HNS3_CFG_RD_LEN_UNIT 4 + +-#define HNS3_CFG_VMDQ_S 0 +-#define HNS3_CFG_VMDQ_M GENMASK(7, 0) + #define HNS3_CFG_TC_NUM_S 8 + #define HNS3_CFG_TC_NUM_M GENMASK(15, 8) + #define HNS3_CFG_TQP_DESC_N_S 16 +@@ -566,7 +557,6 @@ struct hns3_cfg_gro_status_cmd { + + #define HNS3_RSS_HASH_KEY_OFFSET_B 4 + +-#define HNS3_RSS_CFG_TBL_SIZE 16 + #define HNS3_RSS_HASH_KEY_NUM 16 + /* Configure the algorithm mode and Hash Key, opcode:0x0D01 */ + struct hns3_rss_generic_config_cmd { +@@ -821,7 +811,8 @@ enum hns3_mac_vlan_add_resp_code { + HNS3_ADD_MC_OVERFLOW, /* ADD failed for MC overflow */ + }; + +-#define HNS3_MC_MAC_VLAN_ADD_DESC_NUM 3 ++#define HNS3_MC_MAC_VLAN_OPS_DESC_NUM 3 ++#define HNS3_UC_MAC_VLAN_OPS_DESC_NUM 1 + + #define HNS3_MAC_VLAN_BIT0_EN_B 0 + #define HNS3_MAC_VLAN_BIT1_EN_B 1 +@@ -856,10 +847,16 @@ struct hns3_reset_tqp_queue_cmd { + + #define HNS3_CFG_RESET_MAC_B 3 + #define HNS3_CFG_RESET_FUNC_B 7 ++#define HNS3_CFG_RESET_RCB_B 1 + struct hns3_reset_cmd { + uint8_t mac_func_reset; + uint8_t fun_reset_vfid; +- uint8_t rsv[22]; ++ uint8_t fun_reset_rcb; ++ uint8_t rsv1; ++ uint16_t fun_reset_rcb_vqid_start; ++ uint16_t fun_reset_rcb_vqid_num; ++ uint8_t fun_reset_rcb_return_status; ++ uint8_t rsv2[15]; + }; + + #define HNS3_QUERY_DEV_SPECS_BD_NUM 4 +diff --git a/dpdk/drivers/net/hns3/hns3_dcb.c b/dpdk/drivers/net/hns3/hns3_dcb.c +index ab77acd948..a5cfccee28 100644 +--- a/dpdk/drivers/net/hns3/hns3_dcb.c ++++ b/dpdk/drivers/net/hns3/hns3_dcb.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #include +@@ -25,7 +25,7 @@ + * IR(Mbps) = ------------------------- * CLOCK(1000Mbps) + * Tick * (2 ^ IR_s) + * +- * @return: 0: calculate sucessful, negative: fail ++ * @return: 0: calculate successful, negative: fail + */ + static int + hns3_shaper_para_calc(struct hns3_hw *hw, uint32_t ir, uint8_t shaper_level, +@@ -36,8 +36,8 @@ hns3_shaper_para_calc(struct hns3_hw *hw, uint32_t ir, uint8_t shaper_level, + #define DIVISOR_IR_B_126 (126 * DIVISOR_CLK) + + const uint16_t tick_array[HNS3_SHAPER_LVL_CNT] = { +- 6 * 256, /* Prioriy level */ +- 6 * 32, /* Prioriy group level */ ++ 6 * 256, /* Priority level */ ++ 6 * 32, /* Priority group level */ + 6 * 8, /* Port level */ + 6 * 256 /* Qset level */ + }; +@@ -685,6 +685,18 @@ hns3_queue_to_tc_mapping(struct hns3_hw *hw, uint16_t nb_rx_q, uint16_t nb_tx_q) + { + int ret; + ++ if (nb_rx_q < hw->num_tc) { ++ hns3_err(hw, "number of Rx queues(%u) is less than number of TC(%u).", ++ nb_rx_q, hw->num_tc); ++ return -EINVAL; ++ } ++ ++ if (nb_tx_q < hw->num_tc) { ++ hns3_err(hw, "number of Tx queues(%u) is less than number of TC(%u).", ++ nb_tx_q, hw->num_tc); ++ return -EINVAL; ++ } ++ + ret = hns3_set_rss_size(hw, nb_rx_q); + if (ret) + return ret; +@@ -1196,7 +1208,7 @@ hns3_qs_bp_cfg(struct hns3_hw *hw, uint8_t tc, uint8_t grp_id, uint32_t bit_map) + static void + hns3_get_rx_tx_en_status(struct hns3_hw *hw, bool *tx_en, bool *rx_en) + { +- switch (hw->current_mode) { ++ switch (hw->requested_fc_mode) { + case HNS3_FC_NONE: + *tx_en = false; + *rx_en = false; +@@ -1373,7 +1385,7 @@ hns3_dcb_cfg_validate(struct hns3_adapter *hns, uint8_t *tc, bool *changed) + * We ensure that dcb information can be reconfigured + * after the hns3_priority_flow_ctrl_set function called. + */ +- if (hw->current_mode != HNS3_FC_FULL) ++ if (hw->requested_fc_mode != HNS3_FC_FULL) + *changed = true; + pfc_en = RTE_LEN2MASK((uint8_t)dcb_rx_conf->nb_tcs, uint8_t); + if (hw->dcb_info.pfc_en != pfc_en) +@@ -1487,9 +1499,10 @@ hns3_dcb_hw_configure(struct hns3_adapter *hns) + struct hns3_pf *pf = &hns->pf; + struct hns3_hw *hw = &hns->hw; + enum hns3_fc_status fc_status = hw->current_fc_status; +- enum hns3_fc_mode current_mode = hw->current_mode; ++ enum hns3_fc_mode requested_fc_mode = hw->requested_fc_mode; + uint8_t hw_pfc_map = hw->dcb_info.hw_pfc_map; +- int ret, status; ++ uint8_t pfc_en = hw->dcb_info.pfc_en; ++ int ret; + + if (pf->tx_sch_mode != HNS3_FLAG_TC_BASE_SCH_MODE && + pf->tx_sch_mode != HNS3_FLAG_VNET_BASE_SCH_MODE) +@@ -1497,7 +1510,7 @@ hns3_dcb_hw_configure(struct hns3_adapter *hns) + + ret = hns3_dcb_schd_setup_hw(hw); + if (ret) { +- hns3_err(hw, "dcb schdule configure failed! ret = %d", ret); ++ hns3_err(hw, "dcb schedule configure failed! ret = %d", ret); + return ret; + } + +@@ -1514,10 +1527,10 @@ hns3_dcb_hw_configure(struct hns3_adapter *hns) + + ret = hns3_buffer_alloc(hw); + if (ret) +- return ret; ++ goto buffer_alloc_fail; + + hw->current_fc_status = HNS3_FC_STATUS_PFC; +- hw->current_mode = HNS3_FC_FULL; ++ hw->requested_fc_mode = HNS3_FC_FULL; + ret = hns3_dcb_pause_setup_hw(hw); + if (ret) { + hns3_err(hw, "setup pfc failed! ret = %d", ret); +@@ -1538,12 +1551,12 @@ hns3_dcb_hw_configure(struct hns3_adapter *hns) + return 0; + + pfc_setup_fail: +- hw->current_mode = current_mode; ++ hw->requested_fc_mode = requested_fc_mode; + hw->current_fc_status = fc_status; ++ ++buffer_alloc_fail: ++ hw->dcb_info.pfc_en = pfc_en; + hw->dcb_info.hw_pfc_map = hw_pfc_map; +- status = hns3_buffer_alloc(hw); +- if (status) +- hns3_err(hw, "recover packet buffer fail! status = %d", status); + + return ret; + } +@@ -1562,7 +1575,7 @@ hns3_dcb_configure(struct hns3_adapter *hns) + int ret; + + hns3_dcb_cfg_validate(hns, &num_tc, &map_changed); +- if (map_changed || rte_atomic16_read(&hw->reset.resetting)) { ++ if (map_changed) { + ret = hns3_dcb_info_update(hns, num_tc); + if (ret) { + hns3_err(hw, "dcb info update failed: %d", ret); +@@ -1616,8 +1629,7 @@ hns3_dcb_init(struct hns3_hw *hw) + * will be changed. + */ + if (hw->adapter_state == HNS3_NIC_UNINITIALIZED) { +- hw->requested_mode = HNS3_FC_NONE; +- hw->current_mode = hw->requested_mode; ++ hw->requested_fc_mode = HNS3_FC_NONE; + pf->pause_time = HNS3_DEFAULT_PAUSE_TRANS_TIME; + hw->current_fc_status = HNS3_FC_STATUS_NONE; + +@@ -1659,14 +1671,18 @@ hns3_dcb_init(struct hns3_hw *hw) + return 0; + } + +-static int ++int + hns3_update_queue_map_configure(struct hns3_adapter *hns) + { + struct hns3_hw *hw = &hns->hw; ++ enum rte_eth_rx_mq_mode mq_mode = hw->data->dev_conf.rxmode.mq_mode; + uint16_t nb_rx_q = hw->data->nb_rx_queues; + uint16_t nb_tx_q = hw->data->nb_tx_queues; + int ret; + ++ if ((uint32_t)mq_mode & ETH_MQ_RX_DCB_FLAG) ++ return 0; ++ + ret = hns3_dcb_update_tc_queue_mapping(hw, nb_rx_q, nb_tx_q); + if (ret) { + hns3_err(hw, "failed to update tc queue mapping, ret = %d.", +@@ -1680,37 +1696,35 @@ hns3_update_queue_map_configure(struct hns3_adapter *hns) + return ret; + } + +-int +-hns3_dcb_cfg_update(struct hns3_adapter *hns) ++static void ++hns3_get_fc_mode(struct hns3_hw *hw, enum rte_eth_fc_mode mode) + { +- struct hns3_hw *hw = &hns->hw; +- enum rte_eth_rx_mq_mode mq_mode = hw->data->dev_conf.rxmode.mq_mode; +- int ret; +- +- if ((uint32_t)mq_mode & ETH_MQ_RX_DCB_FLAG) { +- ret = hns3_dcb_configure(hns); +- if (ret) +- hns3_err(hw, "Failed to config dcb: %d", ret); +- } else { +- /* +- * Update queue map without PFC configuration, +- * due to queues reconfigured by user. +- */ +- ret = hns3_update_queue_map_configure(hns); +- if (ret) +- hns3_err(hw, +- "Failed to update queue mapping configure: %d", +- ret); ++ switch (mode) { ++ case RTE_FC_NONE: ++ hw->requested_fc_mode = HNS3_FC_NONE; ++ break; ++ case RTE_FC_RX_PAUSE: ++ hw->requested_fc_mode = HNS3_FC_RX_PAUSE; ++ break; ++ case RTE_FC_TX_PAUSE: ++ hw->requested_fc_mode = HNS3_FC_TX_PAUSE; ++ break; ++ case RTE_FC_FULL: ++ hw->requested_fc_mode = HNS3_FC_FULL; ++ break; ++ default: ++ hw->requested_fc_mode = HNS3_FC_NONE; ++ hns3_warn(hw, "fc_mode(%u) exceeds member scope and is " ++ "configured to RTE_FC_NONE", mode); ++ break; + } +- +- return ret; + } + + /* + * hns3_dcb_pfc_enable - Enable priority flow control + * @dev: pointer to ethernet device + * +- * Configures the pfc settings for one porority. ++ * Configures the pfc settings for one priority. + */ + int + hns3_dcb_pfc_enable(struct rte_eth_dev *dev, struct rte_eth_pfc_conf *pfc_conf) +@@ -1718,15 +1732,15 @@ hns3_dcb_pfc_enable(struct rte_eth_dev *dev, struct rte_eth_pfc_conf *pfc_conf) + struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct hns3_pf *pf = HNS3_DEV_PRIVATE_TO_PF(dev->data->dev_private); + enum hns3_fc_status fc_status = hw->current_fc_status; +- enum hns3_fc_mode current_mode = hw->current_mode; ++ enum hns3_fc_mode old_fc_mode = hw->requested_fc_mode; + uint8_t hw_pfc_map = hw->dcb_info.hw_pfc_map; + uint8_t pfc_en = hw->dcb_info.pfc_en; + uint8_t priority = pfc_conf->priority; + uint16_t pause_time = pf->pause_time; +- int ret, status; ++ int ret; + + pf->pause_time = pfc_conf->fc.pause_time; +- hw->current_mode = hw->requested_mode; ++ hns3_get_fc_mode(hw, pfc_conf->fc.mode); + hw->current_fc_status = HNS3_FC_STATUS_PFC; + hw->dcb_info.pfc_en |= BIT(priority); + hw->dcb_info.hw_pfc_map = +@@ -1737,7 +1751,7 @@ hns3_dcb_pfc_enable(struct rte_eth_dev *dev, struct rte_eth_pfc_conf *pfc_conf) + + /* + * The flow control mode of all UPs will be changed based on +- * current_mode coming from user. ++ * requested_fc_mode coming from user. + */ + ret = hns3_dcb_pause_setup_hw(hw); + if (ret) { +@@ -1748,14 +1762,11 @@ hns3_dcb_pfc_enable(struct rte_eth_dev *dev, struct rte_eth_pfc_conf *pfc_conf) + return 0; + + pfc_setup_fail: +- hw->current_mode = current_mode; ++ hw->requested_fc_mode = old_fc_mode; + hw->current_fc_status = fc_status; + pf->pause_time = pause_time; + hw->dcb_info.pfc_en = pfc_en; + hw->dcb_info.hw_pfc_map = hw_pfc_map; +- status = hns3_buffer_alloc(hw); +- if (status) +- hns3_err(hw, "recover packet buffer fail: %d", status); + + return ret; + } +@@ -1771,19 +1782,19 @@ hns3_fc_enable(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) + { + struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct hns3_pf *pf = HNS3_DEV_PRIVATE_TO_PF(dev->data->dev_private); ++ enum hns3_fc_mode old_fc_mode = hw->requested_fc_mode; + enum hns3_fc_status fc_status = hw->current_fc_status; +- enum hns3_fc_mode current_mode = hw->current_mode; + uint16_t pause_time = pf->pause_time; + int ret; + + pf->pause_time = fc_conf->pause_time; +- hw->current_mode = hw->requested_mode; ++ hns3_get_fc_mode(hw, fc_conf->mode); + + /* + * In fact, current_fc_status is HNS3_FC_STATUS_NONE when mode + * of flow control is configured to be HNS3_FC_NONE. + */ +- if (hw->current_mode == HNS3_FC_NONE) ++ if (hw->requested_fc_mode == HNS3_FC_NONE) + hw->current_fc_status = HNS3_FC_STATUS_NONE; + else + hw->current_fc_status = HNS3_FC_STATUS_MAC_PAUSE; +@@ -1797,7 +1808,7 @@ hns3_fc_enable(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) + return 0; + + setup_fc_fail: +- hw->current_mode = current_mode; ++ hw->requested_fc_mode = old_fc_mode; + hw->current_fc_status = fc_status; + pf->pause_time = pause_time; + +diff --git a/dpdk/drivers/net/hns3/hns3_dcb.h b/dpdk/drivers/net/hns3/hns3_dcb.h +index fee23d9b4c..0d167e75dc 100644 +--- a/dpdk/drivers/net/hns3/hns3_dcb.h ++++ b/dpdk/drivers/net/hns3/hns3_dcb.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_DCB_H_ +@@ -207,7 +207,7 @@ int hns3_dcb_pfc_enable(struct rte_eth_dev *dev, + int hns3_queue_to_tc_mapping(struct hns3_hw *hw, uint16_t nb_rx_q, + uint16_t nb_tx_q); + +-int hns3_dcb_cfg_update(struct hns3_adapter *hns); ++int hns3_update_queue_map_configure(struct hns3_adapter *hns); + int hns3_dcb_port_shaper_cfg(struct hns3_hw *hw); + + #endif /* _HNS3_DCB_H_ */ +diff --git a/dpdk/drivers/net/hns3/hns3_ethdev.c b/dpdk/drivers/net/hns3/hns3_ethdev.c +index ba7d6e38a2..693b256ffb 100644 +--- a/dpdk/drivers/net/hns3/hns3_ethdev.c ++++ b/dpdk/drivers/net/hns3/hns3_ethdev.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #include +@@ -101,6 +101,7 @@ static int hns3_remove_mc_addr(struct hns3_hw *hw, + struct rte_ether_addr *mac_addr); + static int hns3_restore_fec(struct hns3_hw *hw); + static int hns3_query_dev_fec_info(struct hns3_hw *hw); ++static int hns3_do_stop(struct hns3_adapter *hns); + + static void + hns3_pf_disable_irq0(struct hns3_hw *hw) +@@ -238,6 +239,7 @@ hns3_interrupt_handler(void *param) + hns3_pf_disable_irq0(hw); + + event_cause = hns3_check_event_cause(hns, &clearval); ++ hns3_clear_event_cause(hw, event_cause, clearval); + /* vector 0 interrupt is shared with reset and mailbox source events. */ + if (event_cause == HNS3_VECTOR0_EVENT_ERR) { + hns3_warn(hw, "Received err interrupt"); +@@ -252,7 +254,6 @@ hns3_interrupt_handler(void *param) + else + hns3_err(hw, "Received unknown event"); + +- hns3_clear_event_cause(hw, event_cause, clearval); + /* Enable interrupt if it is not cause by reset */ + hns3_pf_enable_irq0(hw); + } +@@ -376,7 +377,7 @@ hns3_vlan_filter_configure(struct hns3_adapter *hns, uint16_t vlan_id, int on) + * When port base vlan enabled, we use port base vlan as the vlan + * filter condition. In this case, we don't update vlan filter table + * when user add new vlan or remove exist vlan, just update the +- * vlan list. The vlan id in vlan list will be writen in vlan filter ++ * vlan list. The vlan id in vlan list will be written in vlan filter + * table until port base vlan disabled + */ + if (hw->port_base_vlan_cfg.state == HNS3_PORT_BASE_VLAN_DISABLE) { +@@ -497,7 +498,7 @@ hns3_set_vlan_rx_offload_cfg(struct hns3_adapter *hns, + hns3_set_bit(req->vport_vlan_cfg, HNS3_SHOW_TAG2_EN_B, + vcfg->vlan2_vlan_prionly ? 1 : 0); + +- /* firmwall will ignore this configuration for PCI_REVISION_ID_HIP08 */ ++ /* firmware will ignore this configuration for PCI_REVISION_ID_HIP08 */ + hns3_set_bit(req->vport_vlan_cfg, HNS3_DISCARD_TAG1_EN_B, + vcfg->strip_tag1_discard_en ? 1 : 0); + hns3_set_bit(req->vport_vlan_cfg, HNS3_DISCARD_TAG2_EN_B, +@@ -557,7 +558,8 @@ hns3_en_hw_strip_rxvtag(struct hns3_adapter *hns, bool enable) + + ret = hns3_set_vlan_rx_offload_cfg(hns, &rxvlan_cfg); + if (ret) { +- hns3_err(hw, "enable strip rx vtag failed, ret =%d", ret); ++ hns3_err(hw, "%s strip rx vtag failed, ret = %d.", ++ enable ? "enable" : "disable", ret); + return ret; + } + +@@ -706,7 +708,7 @@ hns3_set_vlan_tx_offload_cfg(struct hns3_adapter *hns, + vcfg->insert_tag2_en ? 1 : 0); + hns3_set_bit(req->vport_vlan_cfg, HNS3_CFG_NIC_ROCE_SEL_B, 0); + +- /* firmwall will ignore this configuration for PCI_REVISION_ID_HIP08 */ ++ /* firmware will ignore this configuration for PCI_REVISION_ID_HIP08 */ + hns3_set_bit(req->vport_vlan_cfg, HNS3_TAG_SHIFT_MODE_EN_B, + vcfg->tag_shift_mode_en ? 1 : 0); + +@@ -894,7 +896,7 @@ hns3_vlan_pvid_configure(struct hns3_adapter *hns, uint16_t pvid, int on) + { + struct hns3_hw *hw = &hns->hw; + uint16_t port_base_vlan_state; +- int ret; ++ int ret, err; + + if (on == 0 && pvid != hw->port_base_vlan_cfg.pvid) { + if (hw->port_base_vlan_cfg.pvid != HNS3_INVALID_PVID) +@@ -917,7 +919,7 @@ hns3_vlan_pvid_configure(struct hns3_adapter *hns, uint16_t pvid, int on) + if (ret) { + hns3_err(hw, "failed to config rx vlan strip for pvid, " + "ret = %d", ret); +- return ret; ++ goto pvid_vlan_strip_fail; + } + + if (pvid == HNS3_INVALID_PVID) +@@ -926,13 +928,27 @@ hns3_vlan_pvid_configure(struct hns3_adapter *hns, uint16_t pvid, int on) + if (ret) { + hns3_err(hw, "failed to update vlan filter entries, ret = %d", + ret); +- return ret; ++ goto vlan_filter_set_fail; + } + + out: + hw->port_base_vlan_cfg.state = port_base_vlan_state; + hw->port_base_vlan_cfg.pvid = on ? pvid : HNS3_INVALID_PVID; + return ret; ++ ++vlan_filter_set_fail: ++ err = hns3_en_pvid_strip(hns, hw->port_base_vlan_cfg.state == ++ HNS3_PORT_BASE_VLAN_ENABLE); ++ if (err) ++ hns3_err(hw, "fail to rollback pvid strip, ret = %d", err); ++ ++pvid_vlan_strip_fail: ++ err = hns3_vlan_txvlan_cfg(hns, hw->port_base_vlan_cfg.state, ++ hw->port_base_vlan_cfg.pvid); ++ if (err) ++ hns3_err(hw, "fail to rollback txvlan status, ret = %d", err); ++ ++ return ret; + } + + static int +@@ -968,7 +984,7 @@ hns3_vlan_pvid_set(struct rte_eth_dev *dev, uint16_t pvid, int on) + return ret; + /* + * Only in HNS3_SW_SHIFT_AND_MODE the PVID related operation in Tx/Rx +- * need be processed by PMD driver. ++ * need be processed by PMD. + */ + if (pvid_en_state_change && + hw->vlan_mode == HNS3_SW_SHIFT_AND_DISCARD_MODE) +@@ -1328,28 +1344,31 @@ hns3_get_mac_vlan_cmd_status(struct hns3_hw *hw, uint16_t cmdq_resp, + static int + hns3_lookup_mac_vlan_tbl(struct hns3_hw *hw, + struct hns3_mac_vlan_tbl_entry_cmd *req, +- struct hns3_cmd_desc *desc, bool is_mc) ++ struct hns3_cmd_desc *desc, uint8_t desc_num) + { + uint8_t resp_code; + uint16_t retval; + int ret; ++ int i; + +- hns3_cmd_setup_basic_desc(&desc[0], HNS3_OPC_MAC_VLAN_ADD, true); +- if (is_mc) { +- desc[0].flag |= rte_cpu_to_le_16(HNS3_CMD_FLAG_NEXT); +- memcpy(desc[0].data, req, +- sizeof(struct hns3_mac_vlan_tbl_entry_cmd)); +- hns3_cmd_setup_basic_desc(&desc[1], HNS3_OPC_MAC_VLAN_ADD, +- true); +- desc[1].flag |= rte_cpu_to_le_16(HNS3_CMD_FLAG_NEXT); +- hns3_cmd_setup_basic_desc(&desc[2], HNS3_OPC_MAC_VLAN_ADD, ++ if (desc_num == HNS3_MC_MAC_VLAN_OPS_DESC_NUM) { ++ for (i = 0; i < desc_num - 1; i++) { ++ hns3_cmd_setup_basic_desc(&desc[i], ++ HNS3_OPC_MAC_VLAN_ADD, true); ++ desc[i].flag |= rte_cpu_to_le_16(HNS3_CMD_FLAG_NEXT); ++ if (i == 0) ++ memcpy(desc[i].data, req, ++ sizeof(struct hns3_mac_vlan_tbl_entry_cmd)); ++ } ++ hns3_cmd_setup_basic_desc(&desc[i], HNS3_OPC_MAC_VLAN_ADD, + true); +- ret = hns3_cmd_send(hw, desc, HNS3_MC_MAC_VLAN_ADD_DESC_NUM); + } else { ++ hns3_cmd_setup_basic_desc(&desc[0], HNS3_OPC_MAC_VLAN_ADD, ++ true); + memcpy(desc[0].data, req, + sizeof(struct hns3_mac_vlan_tbl_entry_cmd)); +- ret = hns3_cmd_send(hw, desc, 1); + } ++ ret = hns3_cmd_send(hw, desc, desc_num); + if (ret) { + hns3_err(hw, "lookup mac addr failed for cmd_send, ret =%d.", + ret); +@@ -1365,38 +1384,40 @@ hns3_lookup_mac_vlan_tbl(struct hns3_hw *hw, + static int + hns3_add_mac_vlan_tbl(struct hns3_hw *hw, + struct hns3_mac_vlan_tbl_entry_cmd *req, +- struct hns3_cmd_desc *mc_desc) ++ struct hns3_cmd_desc *desc, uint8_t desc_num) + { + uint8_t resp_code; + uint16_t retval; + int cfg_status; + int ret; ++ int i; + +- if (mc_desc == NULL) { +- struct hns3_cmd_desc desc; +- +- hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_MAC_VLAN_ADD, false); +- memcpy(desc.data, req, ++ if (desc_num == HNS3_UC_MAC_VLAN_OPS_DESC_NUM) { ++ hns3_cmd_setup_basic_desc(desc, HNS3_OPC_MAC_VLAN_ADD, false); ++ memcpy(desc->data, req, + sizeof(struct hns3_mac_vlan_tbl_entry_cmd)); +- ret = hns3_cmd_send(hw, &desc, 1); +- resp_code = (rte_le_to_cpu_32(desc.data[0]) >> 8) & 0xff; +- retval = rte_le_to_cpu_16(desc.retval); ++ ret = hns3_cmd_send(hw, desc, desc_num); ++ resp_code = (rte_le_to_cpu_32(desc->data[0]) >> 8) & 0xff; ++ retval = rte_le_to_cpu_16(desc->retval); + + cfg_status = hns3_get_mac_vlan_cmd_status(hw, retval, resp_code, + HNS3_MAC_VLAN_ADD); + } else { +- hns3_cmd_reuse_desc(&mc_desc[0], false); +- mc_desc[0].flag |= rte_cpu_to_le_16(HNS3_CMD_FLAG_NEXT); +- hns3_cmd_reuse_desc(&mc_desc[1], false); +- mc_desc[1].flag |= rte_cpu_to_le_16(HNS3_CMD_FLAG_NEXT); +- hns3_cmd_reuse_desc(&mc_desc[2], false); +- mc_desc[2].flag &= rte_cpu_to_le_16(~HNS3_CMD_FLAG_NEXT); +- memcpy(mc_desc[0].data, req, ++ for (i = 0; i < desc_num; i++) { ++ hns3_cmd_reuse_desc(&desc[i], false); ++ if (i == desc_num - 1) ++ desc[i].flag &= ++ rte_cpu_to_le_16(~HNS3_CMD_FLAG_NEXT); ++ else ++ desc[i].flag |= ++ rte_cpu_to_le_16(HNS3_CMD_FLAG_NEXT); ++ } ++ memcpy(desc[0].data, req, + sizeof(struct hns3_mac_vlan_tbl_entry_cmd)); +- mc_desc[0].retval = 0; +- ret = hns3_cmd_send(hw, mc_desc, HNS3_MC_MAC_VLAN_ADD_DESC_NUM); +- resp_code = (rte_le_to_cpu_32(mc_desc[0].data[0]) >> 8) & 0xff; +- retval = rte_le_to_cpu_16(mc_desc[0].retval); ++ desc[0].retval = 0; ++ ret = hns3_cmd_send(hw, desc, desc_num); ++ resp_code = (rte_le_to_cpu_32(desc[0].data[0]) >> 8) & 0xff; ++ retval = rte_le_to_cpu_16(desc[0].retval); + + cfg_status = hns3_get_mac_vlan_cmd_status(hw, retval, resp_code, + HNS3_MAC_VLAN_ADD); +@@ -1441,7 +1462,7 @@ hns3_add_uc_addr_common(struct hns3_hw *hw, struct rte_ether_addr *mac_addr) + struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); + struct hns3_mac_vlan_tbl_entry_cmd req; + struct hns3_pf *pf = &hns->pf; +- struct hns3_cmd_desc desc[3]; ++ struct hns3_cmd_desc desc; + char mac_str[RTE_ETHER_ADDR_FMT_SIZE]; + uint16_t egress_port = 0; + uint8_t vf_id; +@@ -1475,10 +1496,12 @@ hns3_add_uc_addr_common(struct hns3_hw *hw, struct rte_ether_addr *mac_addr) + * it if the entry is inexistent. Repeated unicast entry + * is not allowed in the mac vlan table. + */ +- ret = hns3_lookup_mac_vlan_tbl(hw, &req, desc, false); ++ ret = hns3_lookup_mac_vlan_tbl(hw, &req, &desc, ++ HNS3_UC_MAC_VLAN_OPS_DESC_NUM); + if (ret == -ENOENT) { + if (!hns3_is_umv_space_full(hw)) { +- ret = hns3_add_mac_vlan_tbl(hw, &req, NULL); ++ ret = hns3_add_mac_vlan_tbl(hw, &req, &desc, ++ HNS3_UC_MAC_VLAN_OPS_DESC_NUM); + if (!ret) + hns3_update_umv_space(hw, false); + return ret; +@@ -1552,7 +1575,7 @@ hns3_remove_mc_addr_common(struct hns3_hw *hw, struct rte_ether_addr *mac_addr) + + static int + hns3_add_mac_addr(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr, +- uint32_t idx, __rte_unused uint32_t pool) ++ __rte_unused uint32_t idx, __rte_unused uint32_t pool) + { + struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); + char mac_str[RTE_ETHER_ADDR_FMT_SIZE]; +@@ -1583,8 +1606,6 @@ hns3_add_mac_addr(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr, + return ret; + } + +- if (idx == 0) +- hw->mac.default_addr_setted = true; + rte_spinlock_unlock(&hw->lock); + + return ret; +@@ -1649,30 +1670,19 @@ hns3_set_default_mac_addr(struct rte_eth_dev *dev, + struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_ether_addr *oaddr; + char mac_str[RTE_ETHER_ADDR_FMT_SIZE]; +- bool default_addr_setted; +- bool rm_succes = false; + int ret, ret_val; + +- /* +- * It has been guaranteed that input parameter named mac_addr is valid +- * address in the rte layer of DPDK framework. +- */ ++ rte_spinlock_lock(&hw->lock); + oaddr = (struct rte_ether_addr *)hw->mac.mac_addr; +- default_addr_setted = hw->mac.default_addr_setted; +- if (default_addr_setted && !!rte_is_same_ether_addr(mac_addr, oaddr)) +- return 0; ++ ret = hns3_remove_uc_addr_common(hw, oaddr); ++ if (ret) { ++ rte_ether_format_addr(mac_str, RTE_ETHER_ADDR_FMT_SIZE, ++ oaddr); ++ hns3_warn(hw, "Remove old uc mac address(%s) fail: %d", ++ mac_str, ret); + +- rte_spinlock_lock(&hw->lock); +- if (default_addr_setted) { +- ret = hns3_remove_uc_addr_common(hw, oaddr); +- if (ret) { +- rte_ether_format_addr(mac_str, RTE_ETHER_ADDR_FMT_SIZE, +- oaddr); +- hns3_warn(hw, "Remove old uc mac address(%s) fail: %d", +- mac_str, ret); +- rm_succes = false; +- } else +- rm_succes = true; ++ rte_spinlock_unlock(&hw->lock); ++ return ret; + } + + ret = hns3_add_uc_addr_common(hw, mac_addr); +@@ -1691,7 +1701,6 @@ hns3_set_default_mac_addr(struct rte_eth_dev *dev, + + rte_ether_addr_copy(mac_addr, + (struct rte_ether_addr *)hw->mac.mac_addr); +- hw->mac.default_addr_setted = true; + rte_spinlock_unlock(&hw->lock); + + return 0; +@@ -1707,16 +1716,11 @@ hns3_set_default_mac_addr(struct rte_eth_dev *dev, + } + + err_add_uc_addr: +- if (rm_succes) { +- ret_val = hns3_add_uc_addr_common(hw, oaddr); +- if (ret_val) { +- rte_ether_format_addr(mac_str, RTE_ETHER_ADDR_FMT_SIZE, +- oaddr); +- hns3_warn(hw, +- "Failed to restore old uc mac addr(%s): %d", ++ ret_val = hns3_add_uc_addr_common(hw, oaddr); ++ if (ret_val) { ++ rte_ether_format_addr(mac_str, RTE_ETHER_ADDR_FMT_SIZE, oaddr); ++ hns3_warn(hw, "Failed to restore old uc mac addr(%s): %d", + mac_str, ret_val); +- hw->mac.default_addr_setted = false; +- } + } + rte_spinlock_unlock(&hw->lock); + +@@ -1787,8 +1791,8 @@ hns3_update_desc_vfid(struct hns3_cmd_desc *desc, uint8_t vfid, bool clr) + static int + hns3_add_mc_addr(struct hns3_hw *hw, struct rte_ether_addr *mac_addr) + { ++ struct hns3_cmd_desc desc[HNS3_MC_MAC_VLAN_OPS_DESC_NUM]; + struct hns3_mac_vlan_tbl_entry_cmd req; +- struct hns3_cmd_desc desc[3]; + char mac_str[RTE_ETHER_ADDR_FMT_SIZE]; + uint8_t vf_id; + int ret; +@@ -1805,7 +1809,8 @@ hns3_add_mc_addr(struct hns3_hw *hw, struct rte_ether_addr *mac_addr) + memset(&req, 0, sizeof(req)); + hns3_set_bit(req.entry_type, HNS3_MAC_VLAN_BIT0_EN_B, 0); + hns3_prepare_mac_addr(&req, mac_addr->addr_bytes, true); +- ret = hns3_lookup_mac_vlan_tbl(hw, &req, desc, true); ++ ret = hns3_lookup_mac_vlan_tbl(hw, &req, desc, ++ HNS3_MC_MAC_VLAN_OPS_DESC_NUM); + if (ret) { + /* This mac addr do not exist, add new entry for it */ + memset(desc[0].data, 0, sizeof(desc[0].data)); +@@ -1819,7 +1824,8 @@ hns3_add_mc_addr(struct hns3_hw *hw, struct rte_ether_addr *mac_addr) + */ + vf_id = HNS3_PF_FUNC_ID; + hns3_update_desc_vfid(desc, vf_id, false); +- ret = hns3_add_mac_vlan_tbl(hw, &req, desc); ++ ret = hns3_add_mac_vlan_tbl(hw, &req, desc, ++ HNS3_MC_MAC_VLAN_OPS_DESC_NUM); + if (ret) { + if (ret == -ENOSPC) + hns3_err(hw, "mc mac vlan table is full"); +@@ -1852,7 +1858,8 @@ hns3_remove_mc_addr(struct hns3_hw *hw, struct rte_ether_addr *mac_addr) + memset(&req, 0, sizeof(req)); + hns3_set_bit(req.entry_type, HNS3_MAC_VLAN_BIT0_EN_B, 0); + hns3_prepare_mac_addr(&req, mac_addr->addr_bytes, true); +- ret = hns3_lookup_mac_vlan_tbl(hw, &req, desc, true); ++ ret = hns3_lookup_mac_vlan_tbl(hw, &req, desc, ++ HNS3_MC_MAC_VLAN_OPS_DESC_NUM); + if (ret == 0) { + /* + * This mac addr exist, remove this handle's VFID for it. +@@ -2123,24 +2130,17 @@ hns3_check_mq_mode(struct rte_eth_dev *dev) + int max_tc = 0; + int i; + +- dcb_rx_conf = &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf; +- dcb_tx_conf = &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf; +- +- if (rx_mq_mode == ETH_MQ_RX_VMDQ_DCB_RSS) { +- hns3_err(hw, "ETH_MQ_RX_VMDQ_DCB_RSS is not supported. " +- "rx_mq_mode = %d", rx_mq_mode); +- return -EINVAL; +- } +- +- if (rx_mq_mode == ETH_MQ_RX_VMDQ_DCB || +- tx_mq_mode == ETH_MQ_TX_VMDQ_DCB) { +- hns3_err(hw, "ETH_MQ_RX_VMDQ_DCB and ETH_MQ_TX_VMDQ_DCB " +- "is not supported. rx_mq_mode = %d, tx_mq_mode = %d", ++ if ((rx_mq_mode & ETH_MQ_RX_VMDQ_FLAG) || ++ (tx_mq_mode == ETH_MQ_TX_VMDQ_DCB || ++ tx_mq_mode == ETH_MQ_TX_VMDQ_ONLY)) { ++ hns3_err(hw, "VMDQ is not supported, rx_mq_mode = %d, tx_mq_mode = %d.", + rx_mq_mode, tx_mq_mode); +- return -EINVAL; ++ return -EOPNOTSUPP; + } + +- if (rx_mq_mode == ETH_MQ_RX_DCB_RSS) { ++ dcb_rx_conf = &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf; ++ dcb_tx_conf = &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf; ++ if (rx_mq_mode & ETH_MQ_RX_DCB_FLAG) { + if (dcb_rx_conf->nb_tcs > pf->tc_max) { + hns3_err(hw, "nb_tcs(%u) > max_tc(%u) driver supported.", + dcb_rx_conf->nb_tcs, pf->tc_max); +@@ -2183,25 +2183,6 @@ hns3_check_mq_mode(struct rte_eth_dev *dev) + return 0; + } + +-static int +-hns3_check_dcb_cfg(struct rte_eth_dev *dev) +-{ +- struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); +- +- if (!hns3_dev_dcb_supported(hw)) { +- hns3_err(hw, "this port does not support dcb configurations."); +- return -EOPNOTSUPP; +- } +- +- if (hw->current_fc_status == HNS3_FC_STATUS_MAC_PAUSE) { +- hns3_err(hw, "MAC pause enabled, cannot config dcb info."); +- return -EOPNOTSUPP; +- } +- +- /* Check multiple queue mode */ +- return hns3_check_mq_mode(dev); +-} +- + static int + hns3_bind_ring_with_vector(struct hns3_hw *hw, uint16_t vector_id, bool en, + enum hns3_ring_type queue_type, uint16_t queue_id) +@@ -2209,11 +2190,11 @@ hns3_bind_ring_with_vector(struct hns3_hw *hw, uint16_t vector_id, bool en, + struct hns3_cmd_desc desc; + struct hns3_ctrl_vector_chain_cmd *req = + (struct hns3_ctrl_vector_chain_cmd *)desc.data; +- enum hns3_cmd_status status; + enum hns3_opcode_type op; + uint16_t tqp_type_and_id = 0; + uint16_t type; + uint16_t gl; ++ int ret; + + op = en ? HNS3_OPC_ADD_RING_TO_VECTOR : HNS3_OPC_DEL_RING_TO_VECTOR; + hns3_cmd_setup_basic_desc(&desc, op, false); +@@ -2236,11 +2217,11 @@ hns3_bind_ring_with_vector(struct hns3_hw *hw, uint16_t vector_id, bool en, + gl); + req->tqp_type_and_id[0] = rte_cpu_to_le_16(tqp_type_and_id); + req->int_cause_num = 1; +- status = hns3_cmd_send(hw, &desc, 1); +- if (status) { +- hns3_err(hw, "%s TQP %u fail, vector_id is %u, status is %d.", +- en ? "Map" : "Unmap", queue_id, vector_id, status); +- return status; ++ ret = hns3_cmd_send(hw, &desc, 1); ++ if (ret) { ++ hns3_err(hw, "%s TQP %u fail, vector_id = %u, ret = %d.", ++ en ? "Map" : "Unmap", queue_id, vector_id, ret); ++ return ret; + } + + return 0; +@@ -2302,6 +2283,65 @@ hns3_init_ring_with_vector(struct hns3_hw *hw) + return 0; + } + ++static int ++hns3_refresh_mtu(struct rte_eth_dev *dev, struct rte_eth_conf *conf) ++{ ++ struct hns3_adapter *hns = dev->data->dev_private; ++ struct hns3_hw *hw = &hns->hw; ++ uint32_t max_rx_pkt_len; ++ uint16_t mtu; ++ int ret; ++ ++ if (!(conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME)) ++ return 0; ++ ++ /* ++ * If jumbo frames are enabled, MTU needs to be refreshed ++ * according to the maximum RX packet length. ++ */ ++ max_rx_pkt_len = conf->rxmode.max_rx_pkt_len; ++ if (max_rx_pkt_len > HNS3_MAX_FRAME_LEN || ++ max_rx_pkt_len <= HNS3_DEFAULT_FRAME_LEN) { ++ hns3_err(hw, "maximum Rx packet length must be greater than %u " ++ "and no more than %u when jumbo frame enabled.", ++ (uint16_t)HNS3_DEFAULT_FRAME_LEN, ++ (uint16_t)HNS3_MAX_FRAME_LEN); ++ return -EINVAL; ++ } ++ ++ mtu = (uint16_t)HNS3_PKTLEN_TO_MTU(max_rx_pkt_len); ++ ret = hns3_dev_mtu_set(dev, mtu); ++ if (ret) ++ return ret; ++ dev->data->mtu = mtu; ++ ++ return 0; ++} ++ ++static int ++hns3_setup_dcb(struct rte_eth_dev *dev) ++{ ++ struct hns3_adapter *hns = dev->data->dev_private; ++ struct hns3_hw *hw = &hns->hw; ++ int ret; ++ ++ if (!hns3_dev_dcb_supported(hw)) { ++ hns3_err(hw, "this port does not support dcb configurations."); ++ return -EOPNOTSUPP; ++ } ++ ++ if (hw->current_fc_status == HNS3_FC_STATUS_MAC_PAUSE) { ++ hns3_err(hw, "MAC pause enabled, cannot config dcb info."); ++ return -EOPNOTSUPP; ++ } ++ ++ ret = hns3_dcb_configure(hns); ++ if (ret) ++ hns3_err(hw, "failed to config dcb: %d", ret); ++ ++ return ret; ++} ++ + static int + hns3_dev_configure(struct rte_eth_dev *dev) + { +@@ -2313,8 +2353,6 @@ hns3_dev_configure(struct rte_eth_dev *dev) + uint16_t nb_rx_q = dev->data->nb_rx_queues; + uint16_t nb_tx_q = dev->data->nb_tx_queues; + struct rte_eth_rss_conf rss_conf; +- uint32_t max_rx_pkt_len; +- uint16_t mtu; + bool gro_en; + int ret; + +@@ -2330,13 +2368,11 @@ hns3_dev_configure(struct rte_eth_dev *dev) + * work as usual. But these fake queues are imperceptible, and can not + * be used by upper applications. + */ +- if (!hns3_dev_indep_txrx_supported(hw)) { +- ret = hns3_set_fake_rx_or_tx_queues(dev, nb_rx_q, nb_tx_q); +- if (ret) { +- hns3_err(hw, "fail to set Rx/Tx fake queues, ret = %d.", +- ret); +- return ret; +- } ++ ret = hns3_set_fake_rx_or_tx_queues(dev, nb_rx_q, nb_tx_q); ++ if (ret) { ++ hns3_err(hw, "fail to set Rx/Tx fake queues, ret = %d.", ret); ++ hw->cfg_max_queues = 0; ++ return ret; + } + + hw->adapter_state = HNS3_NIC_CONFIGURING; +@@ -2345,18 +2381,19 @@ hns3_dev_configure(struct rte_eth_dev *dev) + ret = -EINVAL; + goto cfg_err; + } ++ ret = hns3_check_mq_mode(dev); ++ if (ret) ++ goto cfg_err; + + if ((uint32_t)mq_mode & ETH_MQ_RX_DCB_FLAG) { +- ret = hns3_check_dcb_cfg(dev); ++ ret = hns3_setup_dcb(dev); + if (ret) + goto cfg_err; + } + +- /* When RSS is not configured, redirect the packet queue 0 */ + if ((uint32_t)mq_mode & ETH_MQ_RX_RSS_FLAG) { + conf->rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; + rss_conf = conf->rx_adv_conf.rss_conf; +- hw->rss_dis_flag = false; + if (rss_conf.rss_key == NULL) { + rss_conf.rss_key = rss_cfg->key; + rss_conf.rss_key_len = HNS3_RSS_KEY_SIZE; +@@ -2367,28 +2404,9 @@ hns3_dev_configure(struct rte_eth_dev *dev) + goto cfg_err; + } + +- /* +- * If jumbo frames are enabled, MTU needs to be refreshed +- * according to the maximum RX packet length. +- */ +- if (conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { +- max_rx_pkt_len = conf->rxmode.max_rx_pkt_len; +- if (max_rx_pkt_len > HNS3_MAX_FRAME_LEN || +- max_rx_pkt_len <= HNS3_DEFAULT_FRAME_LEN) { +- hns3_err(hw, "maximum Rx packet length must be greater " +- "than %u and less than %u when jumbo frame enabled.", +- (uint16_t)HNS3_DEFAULT_FRAME_LEN, +- (uint16_t)HNS3_MAX_FRAME_LEN); +- ret = -EINVAL; +- goto cfg_err; +- } +- +- mtu = (uint16_t)HNS3_PKTLEN_TO_MTU(max_rx_pkt_len); +- ret = hns3_dev_mtu_set(dev, mtu); +- if (ret) +- goto cfg_err; +- dev->data->mtu = mtu; +- } ++ ret = hns3_refresh_mtu(dev, conf); ++ if (ret) ++ goto cfg_err; + + ret = hns3_dev_configure_vlan(dev); + if (ret) +@@ -2411,6 +2429,7 @@ hns3_dev_configure(struct rte_eth_dev *dev) + return 0; + + cfg_err: ++ hw->cfg_max_queues = 0; + (void)hns3_set_fake_rx_or_tx_queues(dev, 0, 0); + hw->adapter_state = HNS3_NIC_INITIALIZED; + +@@ -2435,17 +2454,30 @@ hns3_set_mac_mtu(struct hns3_hw *hw, uint16_t new_mps) + static int + hns3_config_mtu(struct hns3_hw *hw, uint16_t mps) + { ++ struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); ++ int err; + int ret; + + ret = hns3_set_mac_mtu(hw, mps); + if (ret) { +- hns3_err(hw, "Failed to set mtu, ret = %d", ret); ++ hns3_err(hw, "failed to set mtu, ret = %d", ret); + return ret; + } + + ret = hns3_buffer_alloc(hw); +- if (ret) +- hns3_err(hw, "Failed to allocate buffer, ret = %d", ret); ++ if (ret) { ++ hns3_err(hw, "failed to allocate buffer, ret = %d", ret); ++ goto rollback; ++ } ++ ++ hns->pf.mps = mps; ++ ++ return 0; ++ ++rollback: ++ err = hns3_set_mac_mtu(hw, hns->pf.mps); ++ if (err) ++ hns3_err(hw, "fail to rollback MTU, err = %d", err); + + return ret; + } +@@ -2480,7 +2512,7 @@ hns3_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) + dev->data->port_id, mtu, ret); + return ret; + } +- hns->pf.mps = (uint16_t)frame_size; ++ + if (is_jumbo_frame) + dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_JUMBO_FRAME; +@@ -2573,8 +2605,6 @@ hns3_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) + .offloads = 0, + }; + +- info->vmdq_queue_num = 0; +- + info->reta_size = hw->rss_ind_tbl_size; + info->hash_key_size = HNS3_RSS_KEY_SIZE; + info->flow_type_rss_offloads = HNS3_ETH_RSS_SUPPORT; +@@ -2607,28 +2637,33 @@ hns3_fw_version_get(struct rte_eth_dev *eth_dev, char *fw_version, + HNS3_FW_VERSION_BYTE1_S), + hns3_get_field(version, HNS3_FW_VERSION_BYTE0_M, + HNS3_FW_VERSION_BYTE0_S)); ++ if (ret < 0) ++ return -EINVAL; ++ + ret += 1; /* add the size of '\0' */ +- if (fw_size < (uint32_t)ret) ++ if (fw_size < (size_t)ret) + return ret; + else + return 0; + } + + static int +-hns3_dev_link_update(struct rte_eth_dev *eth_dev, +- __rte_unused int wait_to_complete) ++hns3_update_port_link_info(struct rte_eth_dev *eth_dev) + { +- struct hns3_adapter *hns = eth_dev->data->dev_private; +- struct hns3_hw *hw = &hns->hw; +- struct hns3_mac *mac = &hw->mac; +- struct rte_eth_link new_link; ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + +- if (!hns3_is_reset_pending(hns)) { +- hns3_update_link_status(hw); +- hns3_update_link_info(eth_dev); +- } ++ (void)hns3_update_link_status(hw); ++ ++ return hns3_update_link_info(eth_dev); ++} ++ ++static void ++hns3_setup_linkstatus(struct rte_eth_dev *eth_dev, ++ struct rte_eth_link *new_link) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); ++ struct hns3_mac *mac = &hw->mac; + +- memset(&new_link, 0, sizeof(new_link)); + switch (mac->link_speed) { + case ETH_SPEED_NUM_10M: + case ETH_SPEED_NUM_100M: +@@ -2639,18 +2674,52 @@ hns3_dev_link_update(struct rte_eth_dev *eth_dev, + case ETH_SPEED_NUM_50G: + case ETH_SPEED_NUM_100G: + case ETH_SPEED_NUM_200G: +- new_link.link_speed = mac->link_speed; ++ if (mac->link_status) ++ new_link->link_speed = mac->link_speed; + break; + default: +- new_link.link_speed = ETH_SPEED_NUM_100M; ++ if (mac->link_status) ++ new_link->link_speed = ETH_SPEED_NUM_UNKNOWN; + break; + } + +- new_link.link_duplex = mac->link_duplex; +- new_link.link_status = mac->link_status ? ETH_LINK_UP : ETH_LINK_DOWN; +- new_link.link_autoneg = ++ if (!mac->link_status) ++ new_link->link_speed = ETH_SPEED_NUM_NONE; ++ ++ new_link->link_duplex = mac->link_duplex; ++ new_link->link_status = mac->link_status ? ETH_LINK_UP : ETH_LINK_DOWN; ++ new_link->link_autoneg = + !(eth_dev->data->dev_conf.link_speeds & ETH_LINK_SPEED_FIXED); ++} ++ ++static int ++hns3_dev_link_update(struct rte_eth_dev *eth_dev, ++ __rte_unused int wait_to_complete) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); ++ struct hns3_mac *mac = &hw->mac; ++ struct rte_eth_link new_link; ++ int ret; ++ ++ /* When port is stopped, report link down. */ ++ if (eth_dev->data->dev_started == 0) { ++ new_link.link_autoneg = mac->link_autoneg; ++ new_link.link_duplex = mac->link_duplex; ++ new_link.link_speed = ETH_SPEED_NUM_NONE; ++ new_link.link_status = ETH_LINK_DOWN; ++ goto out; ++ } + ++ ret = hns3_update_port_link_info(eth_dev); ++ if (ret) { ++ mac->link_status = ETH_LINK_DOWN; ++ hns3_err(hw, "failed to get port link info, ret = %d.", ret); ++ } ++ ++ memset(&new_link, 0, sizeof(new_link)); ++ hns3_setup_linkstatus(eth_dev, &new_link); ++ ++out: + return rte_eth_linkstatus_set(eth_dev, &new_link); + } + +@@ -2803,8 +2872,6 @@ hns3_parse_cfg(struct hns3_cfg *cfg, struct hns3_cmd_desc *desc) + req = (struct hns3_cfg_param_cmd *)desc[0].data; + + /* get the configuration */ +- cfg->vmdq_vport_num = hns3_get_field(rte_le_to_cpu_32(req->param[0]), +- HNS3_CFG_VMDQ_M, HNS3_CFG_VMDQ_S); + cfg->tc_num = hns3_get_field(rte_le_to_cpu_32(req->param[0]), + HNS3_CFG_TC_NUM_M, HNS3_CFG_TC_NUM_S); + cfg->tqp_desc_num = hns3_get_field(rte_le_to_cpu_32(req->param[0]), +@@ -2858,7 +2925,7 @@ hns3_parse_cfg(struct hns3_cfg *cfg, struct hns3_cmd_desc *desc) + * Field ext_rss_size_max obtained from firmware will be more flexible + * for future changes and expansions, which is an exponent of 2, instead + * of reading out directly. If this field is not zero, hns3 PF PMD +- * driver uses it as rss_size_max under one TC. Device, whose revision ++ * uses it as rss_size_max under one TC. Device, whose revision + * id is greater than or equal to PCI_REVISION_ID_HIP09_A, obtains the + * maximum number of queues supported under a TC through this field. + */ +@@ -3033,6 +3100,10 @@ hns3_get_capability(struct hns3_hw *hw) + } + hw->revision = revision; + ++ ret = hns3_query_mac_stats_reg_num(hw); ++ if (ret) ++ return ret; ++ + if (revision < PCI_REVISION_ID_HIP09_A) { + hns3_set_default_dev_specifications(hw); + hw->intr.mapping_mode = HNS3_INTR_MAPPING_VEC_RSV_ONE; +@@ -3042,6 +3113,7 @@ hns3_get_capability(struct hns3_hw *hw) + hw->min_tx_pkt_len = HNS3_HIP08_MIN_TX_PKT_LEN; + pf->tqp_config_mode = HNS3_FIXED_MAX_TQP_NUM_MODE; + hw->rss_info.ipv6_sctp_offload_supported = false; ++ hw->udp_cksum_mode = HNS3_SPECIAL_PORT_SW_CKSUM_MODE; + return 0; + } + +@@ -3060,10 +3132,39 @@ hns3_get_capability(struct hns3_hw *hw) + hw->min_tx_pkt_len = HNS3_HIP09_MIN_TX_PKT_LEN; + pf->tqp_config_mode = HNS3_FLEX_MAX_TQP_NUM_MODE; + hw->rss_info.ipv6_sctp_offload_supported = true; ++ hw->udp_cksum_mode = HNS3_SPECIAL_PORT_HW_CKSUM_MODE; + + return 0; + } + ++static int ++hns3_check_media_type(struct hns3_hw *hw, uint8_t media_type) ++{ ++ int ret; ++ ++ switch (media_type) { ++ case HNS3_MEDIA_TYPE_COPPER: ++ if (!hns3_dev_copper_supported(hw)) { ++ PMD_INIT_LOG(ERR, ++ "Media type is copper, not supported."); ++ ret = -EOPNOTSUPP; ++ } else { ++ ret = 0; ++ } ++ break; ++ case HNS3_MEDIA_TYPE_FIBER: ++ case HNS3_MEDIA_TYPE_BACKPLANE: ++ ret = 0; ++ break; ++ default: ++ PMD_INIT_LOG(ERR, "Unknown media type = %u!", media_type); ++ ret = -EINVAL; ++ break; ++ } ++ ++ return ret; ++} ++ + static int + hns3_get_board_configuration(struct hns3_hw *hw) + { +@@ -3078,18 +3179,14 @@ hns3_get_board_configuration(struct hns3_hw *hw) + return ret; + } + +- if (cfg.media_type == HNS3_MEDIA_TYPE_COPPER && +- !hns3_dev_copper_supported(hw)) { +- PMD_INIT_LOG(ERR, "media type is copper, not supported."); +- return -EOPNOTSUPP; +- } ++ ret = hns3_check_media_type(hw, cfg.media_type); ++ if (ret) ++ return ret; + + hw->mac.media_type = cfg.media_type; + hw->rss_size_max = cfg.rss_size_max; +- hw->rss_dis_flag = false; + memcpy(hw->mac.mac_addr, cfg.mac_addr, RTE_ETHER_ADDR_LEN); + hw->mac.phy_addr = cfg.phy_addr; +- hw->mac.default_addr_setted = false; + hw->num_tx_desc = cfg.tqp_desc_num; + hw->num_rx_desc = cfg.tqp_desc_num; + hw->dcb_info.num_pg = 1; +@@ -3654,7 +3751,7 @@ hns3_only_alloc_priv_buff(struct hns3_hw *hw, + * hns3_rx_buffer_calc: calculate the rx private buffer size for all TCs + * @hw: pointer to struct hns3_hw + * @buf_alloc: pointer to buffer calculation data +- * @return: 0: calculate sucessful, negative: fail ++ * @return: 0: calculate successful, negative: fail + */ + static int + hns3_rx_buffer_calc(struct hns3_hw *hw, struct hns3_pkt_buf_alloc *buf_alloc) +@@ -3690,8 +3787,8 @@ hns3_rx_buffer_calc(struct hns3_hw *hw, struct hns3_pkt_buf_alloc *buf_alloc) + * For different application scenes, the enabled port number, TC number + * and no_drop TC number are different. In order to obtain the better + * performance, software could allocate the buffer size and configure +- * the waterline by tring to decrease the private buffer size according +- * to the order, namely, waterline of valided tc, pfc disabled tc, pfc ++ * the waterline by trying to decrease the private buffer size according ++ * to the order, namely, waterline of valid tc, pfc disabled tc, pfc + * enabled tc. + */ + if (hns3_rx_buf_calc_all(hw, false, buf_alloc)) +@@ -4414,14 +4511,11 @@ hns3_update_link_info(struct rte_eth_dev *eth_dev) + { + struct hns3_adapter *hns = eth_dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; +- int ret = 0; + + if (hw->mac.media_type == HNS3_MEDIA_TYPE_COPPER) + return 0; +- else if (hw->mac.media_type == HNS3_MEDIA_TYPE_FIBER) +- ret = hns3_update_fiber_link_info(hw); + +- return ret; ++ return hns3_update_fiber_link_info(hw); + } + + static int +@@ -4524,6 +4618,10 @@ hns3_init_hardware(struct hns3_adapter *hns) + struct hns3_hw *hw = &hns->hw; + int ret; + ++ /* ++ * All queue-related HW operations must be performed after the TCAM ++ * table is configured. ++ */ + ret = hns3_map_tqp(hw); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to map tqp: %d", ret); +@@ -4639,7 +4737,7 @@ hns3_config_all_msix_error(struct hns3_hw *hw, bool enable) + * and belong to a different type from the MSI-x errors processed + * by the network driver. + * +- * Network driver should open the new error report on initialition ++ * Network driver should open the new error report on initialization. + */ + val = hns3_read_dev(hw, HNS3_VECTOR0_OTER_EN_REG); + hns3_set_bit(val, HNS3_VECTOR0_ALL_MSIX_ERR_B, enable ? 1 : 0); +@@ -4773,6 +4871,7 @@ hns3_uninit_pf(struct rte_eth_dev *eth_dev) + hns3_rss_uninit(hns); + (void)hns3_config_gro(hw, false); + hns3_promisc_uninit(hw); ++ hns3_flow_uninit(eth_dev); + hns3_fdir_filter_uninit(hns); + (void)hns3_firmware_compat_config(hw, false); + hns3_uninit_umv_space(hw); +@@ -4793,9 +4892,12 @@ hns3_do_start(struct hns3_adapter *hns, bool reset_queue) + struct hns3_hw *hw = &hns->hw; + int ret; + +- ret = hns3_dcb_cfg_update(hns); +- if (ret) ++ ret = hns3_update_queue_map_configure(hns); ++ if (ret) { ++ hns3_err(hw, "failed to update queue mapping configuration, ret = %d", ++ ret); + return ret; ++ } + + ret = hns3_init_queues(hns, reset_queue); + if (ret) { +@@ -4943,11 +5045,8 @@ hns3_dev_start(struct rte_eth_dev *dev) + return ret; + } + ret = hns3_map_rx_interrupt(dev); +- if (ret) { +- hw->adapter_state = HNS3_NIC_CONFIGURED; +- rte_spinlock_unlock(&hw->lock); +- return ret; +- } ++ if (ret) ++ goto map_rx_inter_err; + + /* + * There are three register used to control the status of a TQP +@@ -4961,19 +5060,12 @@ hns3_dev_start(struct rte_eth_dev *dev) + * status of queue in the dpdk framework. + */ + ret = hns3_start_all_txqs(dev); +- if (ret) { +- hw->adapter_state = HNS3_NIC_CONFIGURED; +- rte_spinlock_unlock(&hw->lock); +- return ret; +- } ++ if (ret) ++ goto map_rx_inter_err; + + ret = hns3_start_all_rxqs(dev); +- if (ret) { +- hns3_stop_all_txqs(dev); +- hw->adapter_state = HNS3_NIC_CONFIGURED; +- rte_spinlock_unlock(&hw->lock); +- return ret; +- } ++ if (ret) ++ goto start_all_rxqs_fail; + + hw->adapter_state = HNS3_NIC_STARTED; + rte_spinlock_unlock(&hw->lock); +@@ -4996,6 +5088,15 @@ hns3_dev_start(struct rte_eth_dev *dev) + + hns3_info(hw, "hns3 dev start successful!"); + return 0; ++ ++start_all_rxqs_fail: ++ hns3_stop_all_txqs(dev); ++map_rx_inter_err: ++ (void)hns3_do_stop(hns); ++ hw->adapter_state = HNS3_NIC_CONFIGURED; ++ rte_spinlock_unlock(&hw->lock); ++ ++ return ret; + } + + static int +@@ -5004,6 +5105,17 @@ hns3_do_stop(struct hns3_adapter *hns) + struct hns3_hw *hw = &hns->hw; + int ret; + ++ /* ++ * The "hns3_do_stop" function will also be called by .stop_service to ++ * prepare reset. At the time of global or IMP reset, the command cannot ++ * be sent to stop the tx/rx queues. The mbuf in Tx/Rx queues may be ++ * accessed during the reset process. So the mbuf can not be released ++ * during reset and is required to be released after the reset is ++ * completed. ++ */ ++ if (rte_atomic16_read(&hw->reset.resetting) == 0) ++ hns3_dev_release_mbufs(hns); ++ + ret = hns3_cfg_mac_mode(hw, false); + if (ret) + return ret; +@@ -5018,7 +5130,7 @@ hns3_do_stop(struct hns3_adapter *hns) + return ret; + } + } +- hw->mac.default_addr_setted = false; ++ + return 0; + } + +@@ -5073,14 +5185,13 @@ hns3_dev_stop(struct rte_eth_dev *dev) + /* Disable datapath on secondary process. */ + hns3_mp_req_stop_rxtx(dev); + /* Prevent crashes when queues are still in use. */ +- rte_delay_ms(hw->tqps_num); ++ rte_delay_ms(hw->cfg_max_queues); + + rte_spinlock_lock(&hw->lock); + if (rte_atomic16_read(&hw->reset.resetting) == 0) { + hns3_stop_tqps(hw); + hns3_do_stop(hns); + hns3_unmap_rx_interrupt(dev); +- hns3_dev_release_mbufs(hns); + hw->adapter_state = HNS3_NIC_CONFIGURED; + } + hns3_rx_scattered_reset(dev); +@@ -5098,8 +5209,8 @@ hns3_dev_close(struct rte_eth_dev *eth_dev) + int ret = 0; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { +- rte_free(eth_dev->process_private); +- eth_dev->process_private = NULL; ++ __atomic_fetch_sub(&hw->secondary_cnt, 1, __ATOMIC_RELAXED); ++ hns3_mp_uninit(); + return 0; + } + +@@ -5116,9 +5227,7 @@ hns3_dev_close(struct rte_eth_dev *eth_dev) + hns3_uninit_pf(eth_dev); + hns3_free_all_queues(eth_dev); + rte_free(hw->reset.wait_data); +- rte_free(eth_dev->process_private); +- eth_dev->process_private = NULL; +- hns3_mp_uninit_primary(); ++ hns3_mp_uninit(); + hns3_warn(hw, "Close port %u finished", hw->data->port_id); + + return ret; +@@ -5132,8 +5241,11 @@ hns3_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) + + fc_conf->pause_time = pf->pause_time; + +- /* return fc current mode */ +- switch (hw->current_mode) { ++ /* ++ * If fc auto-negotiation is not supported, the configured fc mode ++ * from user is the current fc mode. ++ */ ++ switch (hw->requested_fc_mode) { + case HNS3_FC_FULL: + fc_conf->mode = RTE_FC_FULL; + break; +@@ -5152,35 +5264,10 @@ hns3_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) + return 0; + } + +-static void +-hns3_get_fc_mode(struct hns3_hw *hw, enum rte_eth_fc_mode mode) +-{ +- switch (mode) { +- case RTE_FC_NONE: +- hw->requested_mode = HNS3_FC_NONE; +- break; +- case RTE_FC_RX_PAUSE: +- hw->requested_mode = HNS3_FC_RX_PAUSE; +- break; +- case RTE_FC_TX_PAUSE: +- hw->requested_mode = HNS3_FC_TX_PAUSE; +- break; +- case RTE_FC_FULL: +- hw->requested_mode = HNS3_FC_FULL; +- break; +- default: +- hw->requested_mode = HNS3_FC_NONE; +- hns3_warn(hw, "fc_mode(%u) exceeds member scope and is " +- "configured to RTE_FC_NONE", mode); +- break; +- } +-} +- + static int + hns3_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) + { + struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); +- struct hns3_pf *pf = HNS3_DEV_PRIVATE_TO_PF(dev->data->dev_private); + int ret; + + if (fc_conf->high_water || fc_conf->low_water || +@@ -5209,10 +5296,10 @@ hns3_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) + return -EOPNOTSUPP; + } + +- hns3_get_fc_mode(hw, fc_conf->mode); +- if (hw->requested_mode == hw->current_mode && +- pf->pause_time == fc_conf->pause_time) +- return 0; ++ if (hw->num_tc > 1) { ++ hns3_err(hw, "in multi-TC scenarios, MAC pause is not supported."); ++ return -EOPNOTSUPP; ++ } + + rte_spinlock_lock(&hw->lock); + ret = hns3_fc_enable(dev, fc_conf); +@@ -5226,8 +5313,6 @@ hns3_priority_flow_ctrl_set(struct rte_eth_dev *dev, + struct rte_eth_pfc_conf *pfc_conf) + { + struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); +- struct hns3_pf *pf = HNS3_DEV_PRIVATE_TO_PF(dev->data->dev_private); +- uint8_t priority; + int ret; + + if (!hns3_dev_dcb_supported(hw)) { +@@ -5262,13 +5347,6 @@ hns3_priority_flow_ctrl_set(struct rte_eth_dev *dev, + return -EOPNOTSUPP; + } + +- priority = pfc_conf->priority; +- hns3_get_fc_mode(hw, pfc_conf->fc.mode); +- if (hw->dcb_info.pfc_en & BIT(priority) && +- hw->requested_mode == hw->current_mode && +- pfc_conf->fc.pause_time == pf->pause_time) +- return 0; +- + rte_spinlock_lock(&hw->lock); + ret = hns3_dcb_pfc_enable(dev, pfc_conf); + rte_spinlock_unlock(&hw->lock); +@@ -5320,15 +5398,15 @@ hns3_reinit_dev(struct hns3_adapter *hns) + return ret; + } + +- ret = hns3_reset_all_tqps(hns); ++ ret = hns3_init_hardware(hns); + if (ret) { +- hns3_err(hw, "Failed to reset all queues: %d", ret); ++ hns3_err(hw, "Failed to init hardware: %d", ret); + return ret; + } + +- ret = hns3_init_hardware(hns); ++ ret = hns3_reset_all_tqps(hns); + if (ret) { +- hns3_err(hw, "Failed to init hardware: %d", ret); ++ hns3_err(hw, "Failed to reset all queues: %d", ret); + return ret; + } + +@@ -5404,7 +5482,7 @@ hns3_wait_hardware_ready(struct hns3_adapter *hns) + if (wait_data->result == HNS3_WAIT_SUCCESS) + return 0; + else if (wait_data->result == HNS3_WAIT_TIMEOUT) { +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + hns3_warn(hw, "Reset step4 hardware not ready after reset time=%ld.%.6ld", + tv.tv_sec, tv.tv_usec); + return -ETIME; +@@ -5414,7 +5492,7 @@ hns3_wait_hardware_ready(struct hns3_adapter *hns) + wait_data->hns = hns; + wait_data->check_completion = is_pf_reset_done; + wait_data->end_ms = (uint64_t)HNS3_RESET_WAIT_CNT * +- HNS3_RESET_WAIT_MS + get_timeofday_ms(); ++ HNS3_RESET_WAIT_MS + hns3_clock_gettime_ms(); + wait_data->interval = HNS3_RESET_WAIT_MS * USEC_PER_MSEC; + wait_data->count = HNS3_RESET_WAIT_CNT; + wait_data->result = HNS3_WAIT_REQUEST; +@@ -5453,7 +5531,7 @@ hns3_msix_process(struct hns3_adapter *hns, enum hns3_reset_level reset_level) + struct timeval tv; + uint32_t val; + +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + if (hns3_read_dev(hw, HNS3_GLOBAL_RESET_REG) || + hns3_read_dev(hw, HNS3_FUN_RST_ING)) { + hns3_warn(hw, "Don't process msix during resetting time=%ld.%.6ld", +@@ -5593,7 +5671,7 @@ hns3_stop_service(struct hns3_adapter *hns) + rte_wmb(); + /* Disable datapath on secondary process. */ + hns3_mp_req_stop_rxtx(eth_dev); +- rte_delay_ms(hw->tqps_num); ++ rte_delay_ms(hw->cfg_max_queues); + + rte_spinlock_lock(&hw->lock); + if (hns->hw.adapter_state == HNS3_NIC_STARTED || +@@ -5747,14 +5825,13 @@ hns3_reset_service(void *param) + */ + reset_level = hns3_get_reset_level(hns, &hw->reset.pending); + if (reset_level != HNS3_NONE_RESET) { +- gettimeofday(&tv_start, NULL); ++ hns3_clock_gettime(&tv_start); + ret = hns3_reset_process(hns, reset_level); +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + timersub(&tv, &tv_start, &tv_delta); +- msec = tv_delta.tv_sec * MSEC_PER_SEC + +- tv_delta.tv_usec / USEC_PER_MSEC; ++ msec = hns3_clock_calctime_ms(&tv_delta); + if (msec > HNS3_RESET_PROCESS_MS) +- hns3_err(hw, "%d handle long time delta %" PRIx64 ++ hns3_err(hw, "%d handle long time delta %" PRIu64 + " ms time=%ld.%.6ld", + hw->reset.level, msec, + tv.tv_sec, tv.tv_usec); +@@ -6038,10 +6115,12 @@ hns3_fec_set(struct rte_eth_dev *dev, uint32_t mode) + if (ret < 0) + return ret; + +- /* HNS3 PMD driver only support one bit set mode, e.g. 0x1, 0x4 */ +- if (!is_fec_mode_one_bit_set(mode)) +- hns3_err(hw, "FEC mode(0x%x) not supported in HNS3 PMD," ++ /* HNS3 PMD only support one bit set mode, e.g. 0x1, 0x4 */ ++ if (!is_fec_mode_one_bit_set(mode)) { ++ hns3_err(hw, "FEC mode(0x%x) not supported in HNS3 PMD, " + "FEC mode should be only one bit set", mode); ++ return -EINVAL; ++ } + + /* + * Check whether the configured mode is within the FEC capability. +@@ -6053,11 +6132,16 @@ hns3_fec_set(struct rte_eth_dev *dev, uint32_t mode) + return -EINVAL; + } + ++ rte_spinlock_lock(&hw->lock); + ret = hns3_set_fec_hw(hw, mode); +- if (ret) ++ if (ret) { ++ rte_spinlock_unlock(&hw->lock); + return ret; ++ } + + pf->fec_mode = mode; ++ rte_spinlock_unlock(&hw->lock); ++ + return 0; + } + +@@ -6091,6 +6175,39 @@ hns3_query_dev_fec_info(struct hns3_hw *hw) + return ret; + } + ++void ++hns3_clock_gettime(struct timeval *tv) ++{ ++#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ ++#define CLOCK_TYPE CLOCK_MONOTONIC_RAW ++#else ++#define CLOCK_TYPE CLOCK_MONOTONIC ++#endif ++#define NSEC_TO_USEC_DIV 1000 ++ ++ struct timespec spec; ++ (void)clock_gettime(CLOCK_TYPE, &spec); ++ ++ tv->tv_sec = spec.tv_sec; ++ tv->tv_usec = spec.tv_nsec / NSEC_TO_USEC_DIV; ++} ++ ++uint64_t ++hns3_clock_calctime_ms(struct timeval *tv) ++{ ++ return (uint64_t)tv->tv_sec * MSEC_PER_SEC + ++ tv->tv_usec / USEC_PER_MSEC; ++} ++ ++uint64_t ++hns3_clock_gettime_ms(void) ++{ ++ struct timeval tv; ++ ++ hns3_clock_gettime(&tv); ++ return hns3_clock_calctime_ms(&tv); ++} ++ + static const struct eth_dev_ops hns3_eth_dev_ops = { + .dev_configure = hns3_dev_configure, + .dev_start = hns3_dev_start, +@@ -6170,16 +6287,7 @@ hns3_dev_init(struct rte_eth_dev *eth_dev) + + PMD_INIT_FUNC_TRACE(); + +- eth_dev->process_private = (struct hns3_process_private *) +- rte_zmalloc_socket("hns3_filter_list", +- sizeof(struct hns3_process_private), +- RTE_CACHE_LINE_SIZE, eth_dev->device->numa_node); +- if (eth_dev->process_private == NULL) { +- PMD_INIT_LOG(ERR, "Failed to alloc memory for process private"); +- return -ENOMEM; +- } +- /* initialize flow filter lists */ +- hns3_filterlist_init(eth_dev); ++ hns3_flow_init(eth_dev); + + hns3_set_rxtx_function(eth_dev); + eth_dev->dev_ops = &hns3_eth_dev_ops; +@@ -6191,8 +6299,8 @@ hns3_dev_init(struct rte_eth_dev *eth_dev) + "process, ret = %d", ret); + goto err_mp_init_secondary; + } +- +- hw->secondary_cnt++; ++ __atomic_fetch_add(&hw->secondary_cnt, 1, __ATOMIC_RELAXED); ++ process_data.eth_dev_cnt++; + return 0; + } + +@@ -6205,6 +6313,7 @@ hns3_dev_init(struct rte_eth_dev *eth_dev) + ret); + goto err_mp_init_primary; + } ++ process_data.eth_dev_cnt++; + + hw->adapter_state = HNS3_NIC_UNINITIALIZED; + hns->is_vf = false; +@@ -6272,7 +6381,7 @@ hns3_dev_init(struct rte_eth_dev *eth_dev) + rte_free(hw->reset.wait_data); + + err_init_reset: +- hns3_mp_uninit_primary(); ++ hns3_mp_uninit(); + + err_mp_init_primary: + err_mp_init_secondary: +@@ -6280,8 +6389,6 @@ hns3_dev_init(struct rte_eth_dev *eth_dev) + eth_dev->rx_pkt_burst = NULL; + eth_dev->tx_pkt_burst = NULL; + eth_dev->tx_pkt_prepare = NULL; +- rte_free(eth_dev->process_private); +- eth_dev->process_private = NULL; + return ret; + } + +@@ -6294,8 +6401,8 @@ hns3_dev_uninit(struct rte_eth_dev *eth_dev) + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { +- rte_free(eth_dev->process_private); +- eth_dev->process_private = NULL; ++ __atomic_fetch_sub(&hw->secondary_cnt, 1, __ATOMIC_RELAXED); ++ hns3_mp_uninit(); + return 0; + } + +diff --git a/dpdk/drivers/net/hns3/hns3_ethdev.h b/dpdk/drivers/net/hns3/hns3_ethdev.h +index 4c40df1cbb..de0084c352 100644 +--- a/dpdk/drivers/net/hns3/hns3_ethdev.h ++++ b/dpdk/drivers/net/hns3/hns3_ethdev.h +@@ -1,10 +1,11 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_ETHDEV_H_ + #define _HNS3_ETHDEV_H_ + ++#include + #include + #include + +@@ -43,6 +44,9 @@ + #define HNS3_UNLIMIT_PROMISC_MODE 0 + #define HNS3_LIMIT_PROMISC_MODE 1 + ++#define HNS3_SPECIAL_PORT_SW_CKSUM_MODE 0 ++#define HNS3_SPECIAL_PORT_HW_CKSUM_MODE 1 ++ + #define HNS3_UC_MACADDR_NUM 128 + #define HNS3_VF_UC_MACADDR_NUM 48 + #define HNS3_MC_MACADDR_NUM 128 +@@ -116,7 +120,7 @@ struct hns3_tc_info { + uint8_t tc_sch_mode; /* 0: sp; 1: dwrr */ + uint8_t pgid; + uint32_t bw_limit; +- uint8_t up_to_tc_map; /* user priority maping on the TC */ ++ uint8_t up_to_tc_map; /* user priority mapping on the TC */ + }; + + struct hns3_dcb_info { +@@ -144,7 +148,6 @@ struct hns3_tc_queue_info { + }; + + struct hns3_cfg { +- uint8_t vmdq_vport_num; + uint8_t tc_num; + uint16_t tqp_desc_num; + uint16_t rx_buf_len; +@@ -169,7 +172,6 @@ enum hns3_media_type { + + struct hns3_mac { + uint8_t mac_addr[RTE_ETHER_ADDR_LEN]; +- bool default_addr_setted; /* whether default addr(mac_addr) is set */ + uint8_t media_type; + uint8_t phy_addr; + uint8_t link_duplex : 1; /* ETH_LINK_[HALF/FULL]_DUPLEX */ +@@ -399,8 +401,8 @@ struct hns3_queue_intr { + * enable Rx interrupt. + * + * - HNS3_INTR_MAPPING_VEC_ALL +- * PMD driver can map/unmmap all interrupt vectors with queues When +- * Rx interrupt in enabled. ++ * PMD can map/unmmap all interrupt vectors with queues when ++ * Rx interrupt is enabled. + */ + uint8_t mapping_mode; + /* +@@ -424,12 +426,12 @@ struct hns3_hw { + struct hns3_cmq cmq; + struct hns3_mbx_resp_status mbx_resp; /* mailbox response */ + struct hns3_mbx_arq_ring arq; /* mailbox async rx queue */ +- pthread_t irq_thread_id; + struct hns3_mac mac; + unsigned int secondary_cnt; /* Number of secondary processes init'd. */ + struct hns3_tqp_stats tqp_stats; + /* Include Mac stats | Rx stats | Tx stats */ + struct hns3_mac_stats mac_stats; ++ uint32_t mac_stats_reg_num; + uint32_t fw_version; + + uint16_t num_msi; +@@ -448,14 +450,12 @@ struct hns3_hw { + + /* The configuration info of RSS */ + struct hns3_rss_conf rss_info; +- bool rss_dis_flag; /* disable rss flag. true: disable, false: enable */ + uint16_t rss_ind_tbl_size; + uint16_t rss_key_size; + + uint8_t num_tc; /* Total number of enabled TCs */ + uint8_t hw_tc_map; +- enum hns3_fc_mode current_mode; +- enum hns3_fc_mode requested_mode; ++ enum hns3_fc_mode requested_fc_mode; /* FC mode requested by user */ + struct hns3_dcb_info dcb_info; + enum hns3_fc_status current_fc_status; /* current flow control status */ + struct hns3_tc_queue_info tc_queue[HNS3_MAX_TC_NUM]; +@@ -497,18 +497,18 @@ struct hns3_hw { + /* + * vlan mode. + * value range: +- * HNS3_SW_SHIFT_AND_DISCARD_MODE/HNS3_HW_SHFIT_AND_DISCARD_MODE ++ * HNS3_SW_SHIFT_AND_DISCARD_MODE/HNS3_HW_SHIFT_AND_DISCARD_MODE + * + * - HNS3_SW_SHIFT_AND_DISCARD_MODE + * For some versions of hardware network engine, because of the +- * hardware limitation, PMD driver needs to detect the PVID status +- * to work with haredware to implement PVID-related functions. ++ * hardware limitation, PMD needs to detect the PVID status ++ * to work with hardware to implement PVID-related functions. + * For example, driver need discard the stripped PVID tag to ensure + * the PVID will not report to mbuf and shift the inserted VLAN tag + * to avoid port based VLAN covering it. + * + * - HNS3_HW_SHIT_AND_DISCARD_MODE +- * PMD driver does not need to process PVID-related functions in ++ * PMD does not need to process PVID-related functions in + * I/O process, Hardware will adjust the sequence between port based + * VLAN tag and BD VLAN tag automatically and VLAN tag stripped by + * PVID will be invisible to driver. And in this mode, hns3 is able +@@ -535,8 +535,30 @@ struct hns3_hw { + */ + uint8_t promisc_mode; + uint8_t max_non_tso_bd_num; /* max BD number of one non-TSO packet */ ++ /* ++ * udp checksum mode. ++ * value range: ++ * HNS3_SPECIAL_PORT_HW_CKSUM_MODE/HNS3_SPECIAL_PORT_SW_CKSUM_MODE ++ * ++ * - HNS3_SPECIAL_PORT_SW_CKSUM_MODE ++ * In this mode, HW can not do checksum for special UDP port like ++ * 4789, 4790, 6081 for non-tunnel UDP packets and UDP tunnel ++ * packets without the PKT_TX_TUNEL_MASK in the mbuf. So, PMD need ++ * do the checksum for these packets to avoid a checksum error. ++ * ++ * - HNS3_SPECIAL_PORT_HW_CKSUM_MODE ++ * In this mode, HW does not have the preceding problems and can ++ * directly calculate the checksum of these UDP packets. ++ */ ++ uint8_t udp_cksum_mode; + + struct hns3_port_base_vlan_config port_base_vlan_cfg; ++ ++ pthread_mutex_t flows_lock; /* rte_flow ops lock */ ++ struct hns3_fdir_rule_list flow_fdir_list; /* flow fdir rule list */ ++ struct hns3_rss_filter_list flow_rss_list; /* flow RSS rule list */ ++ struct hns3_flow_mem_list flow_list; ++ + /* + * PMD setup and configuration is not thread safe. Since it is not + * performance sensitive, it is better to guarantee thread-safety +@@ -638,7 +660,7 @@ enum hns3_mp_req_type { + HNS3_MP_REQ_MAX + }; + +-/* Pameters for IPC. */ ++/* Parameters for IPC. */ + struct hns3_mp_param { + enum hns3_mp_req_type type; + int port_id; +@@ -659,12 +681,10 @@ struct hns3_mp_param { + #define HNS3_OL4TBL_NUM 16 + + struct hns3_ptype_table { +- uint32_t l2l3table[HNS3_L2TBL_NUM][HNS3_L3TBL_NUM]; ++ uint32_t l3table[HNS3_L3TBL_NUM]; + uint32_t l4table[HNS3_L4TBL_NUM]; +- uint32_t inner_l2table[HNS3_L2TBL_NUM]; + uint32_t inner_l3table[HNS3_L3TBL_NUM]; + uint32_t inner_l4table[HNS3_L4TBL_NUM]; +- uint32_t ol2table[HNS3_OL2TBL_NUM]; + uint32_t ol3table[HNS3_OL3TBL_NUM]; + uint32_t ol4table[HNS3_OL4TBL_NUM]; + }; +@@ -889,15 +909,9 @@ static inline uint32_t hns3_read_reg(void *base, uint32_t reg) + #define MSEC_PER_SEC 1000L + #define USEC_PER_MSEC 1000L + +-static inline uint64_t +-get_timeofday_ms(void) +-{ +- struct timeval tv; +- +- (void)gettimeofday(&tv, NULL); +- +- return (uint64_t)tv.tv_sec * MSEC_PER_SEC + tv.tv_usec / USEC_PER_MSEC; +-} ++void hns3_clock_gettime(struct timeval *tv); ++uint64_t hns3_clock_calctime_ms(struct timeval *tv); ++uint64_t hns3_clock_gettime_ms(void); + + static inline uint64_t + hns3_atomic_test_bit(unsigned int nr, volatile uint64_t *addr) +diff --git a/dpdk/drivers/net/hns3/hns3_ethdev_vf.c b/dpdk/drivers/net/hns3/hns3_ethdev_vf.c +index 9c84740d7b..c1cf7a8c60 100644 +--- a/dpdk/drivers/net/hns3/hns3_ethdev_vf.c ++++ b/dpdk/drivers/net/hns3/hns3_ethdev_vf.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #include +@@ -153,9 +153,12 @@ hns3vf_enable_msix(const struct rte_pci_device *device, bool op) + if (ret < 0) { + PMD_INIT_LOG(ERR, "failed to write PCI offset 0x%x", + (pos + PCI_MSIX_FLAGS)); ++ return -ENXIO; + } ++ + return 0; + } ++ + return -ENXIO; + } + +@@ -316,7 +319,7 @@ hns3vf_set_default_mac_addr(struct rte_eth_dev *dev, + HNS3_TWO_ETHER_ADDR_LEN, true, NULL, 0); + if (ret) { + /* +- * The hns3 VF PMD driver depends on the hns3 PF kernel ethdev ++ * The hns3 VF PMD depends on the hns3 PF kernel ethdev + * driver. When user has configured a MAC address for VF device + * by "ip link set ..." command based on the PF device, the hns3 + * PF kernel ethdev driver does not allow VF driver to request +@@ -326,7 +329,7 @@ hns3vf_set_default_mac_addr(struct rte_eth_dev *dev, + if (ret == -EPERM) { + rte_ether_format_addr(mac_str, RTE_ETHER_ADDR_FMT_SIZE, + old_addr); +- hns3_warn(hw, "Has permanet mac addr(%s) for vf", ++ hns3_warn(hw, "Has permanent mac addr(%s) for vf", + mac_str); + } else { + rte_ether_format_addr(mac_str, RTE_ETHER_ADDR_FMT_SIZE, +@@ -570,25 +573,25 @@ hns3vf_set_promisc_mode(struct hns3_hw *hw, bool en_bc_pmc, + req = (struct hns3_mbx_vf_to_pf_cmd *)desc.data; + + /* +- * The hns3 VF PMD driver depends on the hns3 PF kernel ethdev driver, ++ * The hns3 VF PMD depends on the hns3 PF kernel ethdev driver, + * so there are some features for promiscuous/allmulticast mode in hns3 +- * VF PMD driver as below: ++ * VF PMD as below: + * 1. The promiscuous/allmulticast mode can be configured successfully + * only based on the trusted VF device. If based on the non trusted + * VF device, configuring promiscuous/allmulticast mode will fail. +- * The hns3 VF device can be confiruged as trusted device by hns3 PF ++ * The hns3 VF device can be configured as trusted device by hns3 PF + * kernel ethdev driver on the host by the following command: + * "ip link set vf turst on" + * 2. After the promiscuous mode is configured successfully, hns3 VF PMD +- * driver can receive the ingress and outgoing traffic. In the words, ++ * can receive the ingress and outgoing traffic. This includes + * all the ingress packets, all the packets sent from the PF and + * other VFs on the same physical port. + * 3. Note: Because of the hardware constraints, By default vlan filter + * is enabled and couldn't be turned off based on VF device, so vlan + * filter is still effective even in promiscuous mode. If upper + * applications don't call rte_eth_dev_vlan_filter API function to +- * set vlan based on VF device, hns3 VF PMD driver will can't receive +- * the packets with vlan tag in promiscuoue mode. ++ * set vlan based on VF device, hns3 VF PMD will can't receive ++ * the packets with vlan tag in promiscuous mode. + */ + hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_MBX_VF_TO_PF, false); + req->msg[0] = HNS3_MBX_SET_PROMISC_MODE; +@@ -796,13 +799,11 @@ hns3vf_dev_configure(struct rte_eth_dev *dev) + * work as usual. But these fake queues are imperceptible, and can not + * be used by upper applications. + */ +- if (!hns3_dev_indep_txrx_supported(hw)) { +- ret = hns3_set_fake_rx_or_tx_queues(dev, nb_rx_q, nb_tx_q); +- if (ret) { +- hns3_err(hw, "fail to set Rx/Tx fake queues, ret = %d.", +- ret); +- return ret; +- } ++ ret = hns3_set_fake_rx_or_tx_queues(dev, nb_rx_q, nb_tx_q); ++ if (ret) { ++ hns3_err(hw, "fail to set Rx/Tx fake queues, ret = %d.", ret); ++ hw->cfg_max_queues = 0; ++ return ret; + } + + hw->adapter_state = HNS3_NIC_CONFIGURING; +@@ -815,7 +816,6 @@ hns3vf_dev_configure(struct rte_eth_dev *dev) + /* When RSS is not configured, redirect the packet queue 0 */ + if ((uint32_t)mq_mode & ETH_MQ_RX_RSS_FLAG) { + conf->rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; +- hw->rss_dis_flag = false; + rss_conf = conf->rx_adv_conf.rss_conf; + if (rss_conf.rss_key == NULL) { + rss_conf.rss_key = rss_cfg->key; +@@ -871,6 +871,7 @@ hns3vf_dev_configure(struct rte_eth_dev *dev) + return 0; + + cfg_err: ++ hw->cfg_max_queues = 0; + (void)hns3_set_fake_rx_or_tx_queues(dev, 0, 0); + hw->adapter_state = HNS3_NIC_INITIALIZED; + +@@ -900,9 +901,9 @@ hns3vf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) + /* + * The hns3 PF/VF devices on the same port share the hardware MTU + * configuration. Currently, we send mailbox to inform hns3 PF kernel +- * ethdev driver to finish hardware MTU configuration in hns3 VF PMD +- * driver, there is no need to stop the port for hns3 VF device, and the +- * MTU value issued by hns3 VF PMD driver must be less than or equal to ++ * ethdev driver to finish hardware MTU configuration in hns3 VF PMD, ++ * there is no need to stop the port for hns3 VF device, and the ++ * MTU value issued by hns3 VF PMD must be less than or equal to + * PF's MTU. + */ + if (rte_atomic16_read(&hw->reset.resetting)) { +@@ -912,8 +913,8 @@ hns3vf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) + + /* + * when Rx of scattered packets is off, we have some possibility of +- * using vector Rx process function or simple Rx functions in hns3 PMD +- * driver. If the input MTU is increased and the maximum length of ++ * using vector Rx process function or simple Rx functions in hns3 PMD. ++ * If the input MTU is increased and the maximum length of + * received packets is greater than the length of a buffer for Rx + * packet, the hardware network engine needs to use multiple BDs and + * buffers to store these packets. This will cause problems when still +@@ -1027,8 +1028,6 @@ hns3vf_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) + .offloads = 0, + }; + +- info->vmdq_queue_num = 0; +- + info->reta_size = hw->rss_ind_tbl_size; + info->hash_key_size = HNS3_RSS_KEY_SIZE; + info->flow_type_rss_offloads = HNS3_ETH_RSS_SUPPORT; +@@ -1067,7 +1066,6 @@ hns3vf_check_event_cause(struct hns3_adapter *hns, uint32_t *clearval) + + /* Fetch the events from their corresponding regs */ + cmdq_stat_reg = hns3_read_dev(hw, HNS3_VECTOR0_CMDQ_STAT_REG); +- + if (BIT(HNS3_VECTOR0_RST_INT_B) & cmdq_stat_reg) { + rst_ing_reg = hns3_read_dev(hw, HNS3_FUN_RST_ING); + hns3_warn(hw, "resetting reg: 0x%x", rst_ing_reg); +@@ -1112,14 +1110,13 @@ hns3vf_interrupt_handler(void *param) + enum hns3vf_evt_cause event_cause; + uint32_t clearval; + +- if (hw->irq_thread_id == 0) +- hw->irq_thread_id = pthread_self(); +- + /* Disable interrupt */ + hns3vf_disable_irq0(hw); + + /* Read out interrupt causes */ + event_cause = hns3vf_check_event_cause(hns, &clearval); ++ /* Clear interrupt causes */ ++ hns3vf_clear_event_cause(hw, clearval); + + switch (event_cause) { + case HNS3VF_VECTOR0_EVENT_RST: +@@ -1132,9 +1129,6 @@ hns3vf_interrupt_handler(void *param) + break; + } + +- /* Clear interrupt causes */ +- hns3vf_clear_event_cause(hw, clearval); +- + /* Enable interrupt */ + hns3vf_enable_irq0(hw); + } +@@ -1400,7 +1394,6 @@ hns3vf_get_configuration(struct hns3_hw *hw) + int ret; + + hw->mac.media_type = HNS3_MEDIA_TYPE_NONE; +- hw->rss_dis_flag = false; + + /* Get device capability */ + ret = hns3vf_get_capability(hw); +@@ -1438,31 +1431,18 @@ hns3vf_set_tc_queue_mapping(struct hns3_adapter *hns, uint16_t nb_rx_q, + { + struct hns3_hw *hw = &hns->hw; + +- if (nb_rx_q < hw->num_tc) { +- hns3_err(hw, "number of Rx queues(%u) is less than tcs(%u).", +- nb_rx_q, hw->num_tc); +- return -EINVAL; +- } +- +- if (nb_tx_q < hw->num_tc) { +- hns3_err(hw, "number of Tx queues(%u) is less than tcs(%u).", +- nb_tx_q, hw->num_tc); +- return -EINVAL; +- } +- + return hns3_queue_to_tc_mapping(hw, nb_rx_q, nb_tx_q); + } + + static void + hns3vf_request_link_info(struct hns3_hw *hw) + { +- uint8_t resp_msg; + int ret; + + if (rte_atomic16_read(&hw->reset.resetting)) + return; + ret = hns3_send_mbx_msg(hw, HNS3_MBX_GET_LINK_STATUS, 0, NULL, 0, false, +- &resp_msg, sizeof(resp_msg)); ++ NULL, 0); + if (ret) + hns3_err(hw, "Failed to fetch link status from PF: %d", ret); + } +@@ -1518,7 +1498,8 @@ hns3vf_en_hw_strip_rxvtag(struct hns3_hw *hw, bool enable) + ret = hns3_send_mbx_msg(hw, HNS3_MBX_SET_VLAN, HNS3_MBX_VLAN_RX_OFF_CFG, + &msg_data, sizeof(msg_data), false, NULL, 0); + if (ret) +- hns3_err(hw, "vf enable strip failed, ret =%d", ret); ++ hns3_err(hw, "vf %s strip failed, ret = %d.", ++ enable ? "enable" : "disable", ret); + + return ret; + } +@@ -1658,11 +1639,10 @@ hns3vf_keep_alive_handler(void *param) + struct rte_eth_dev *eth_dev = (struct rte_eth_dev *)param; + struct hns3_adapter *hns = eth_dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; +- uint8_t respmsg; + int ret; + + ret = hns3_send_mbx_msg(hw, HNS3_MBX_KEEP_ALIVE, 0, NULL, 0, +- false, &respmsg, sizeof(uint8_t)); ++ false, NULL, 0); + if (ret) + hns3_err(hw, "VF sends keeping alive cmd failed(=%d)", + ret); +@@ -1764,12 +1744,6 @@ hns3vf_init_hardware(struct hns3_adapter *hns) + goto err_init_hardware; + } + +- ret = hns3vf_set_alive(hw, true); +- if (ret) { +- PMD_INIT_LOG(ERR, "Failed to VF send alive to PF: %d", ret); +- goto err_init_hardware; +- } +- + return 0; + + err_init_hardware: +@@ -1861,6 +1835,12 @@ hns3vf_init_vf(struct rte_eth_dev *eth_dev) + + hns3_set_default_rss_args(hw); + ++ ret = hns3vf_set_alive(hw, true); ++ if (ret) { ++ PMD_INIT_LOG(ERR, "Failed to VF send alive to PF: %d", ret); ++ goto err_set_tc_queue; ++ } ++ + return 0; + + err_set_tc_queue: +@@ -1894,6 +1874,7 @@ hns3vf_uninit_vf(struct rte_eth_dev *eth_dev) + (void)hns3_config_gro(hw, false); + (void)hns3vf_set_alive(hw, false); + (void)hns3vf_set_promisc_mode(hw, false, false, false); ++ hns3_flow_uninit(eth_dev); + hns3_tqp_stats_uninit(hw); + hns3vf_disable_irq0(hw); + rte_intr_disable(&pci_dev->intr_handle); +@@ -1912,6 +1893,17 @@ hns3vf_do_stop(struct hns3_adapter *hns) + + hw->mac.link_status = ETH_LINK_DOWN; + ++ /* ++ * The "hns3vf_do_stop" function will also be called by .stop_service to ++ * prepare reset. At the time of global or IMP reset, the command cannot ++ * be sent to stop the tx/rx queues. The mbuf in Tx/Rx queues may be ++ * accessed during the reset process. So the mbuf can not be released ++ * during reset and is required to be released after the reset is ++ * completed. ++ */ ++ if (rte_atomic16_read(&hw->reset.resetting) == 0) ++ hns3_dev_release_mbufs(hns); ++ + if (rte_atomic16_read(&hw->reset.disable_cmd) == 0) { + hns3vf_configure_mac_addr(hns, true); + ret = hns3_reset_all_tqps(hns); +@@ -1974,14 +1966,13 @@ hns3vf_dev_stop(struct rte_eth_dev *dev) + /* Disable datapath on secondary process. */ + hns3_mp_req_stop_rxtx(dev); + /* Prevent crashes when queues are still in use. */ +- rte_delay_ms(hw->tqps_num); ++ rte_delay_ms(hw->cfg_max_queues); + + rte_spinlock_lock(&hw->lock); + if (rte_atomic16_read(&hw->reset.resetting) == 0) { + hns3_stop_tqps(hw); + hns3vf_do_stop(hns); + hns3vf_unmap_rx_interrupt(dev); +- hns3_dev_release_mbufs(hns); + hw->adapter_state = HNS3_NIC_CONFIGURED; + } + hns3_rx_scattered_reset(dev); +@@ -1999,8 +1990,8 @@ hns3vf_dev_close(struct rte_eth_dev *eth_dev) + int ret = 0; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { +- rte_free(eth_dev->process_private); +- eth_dev->process_private = NULL; ++ __atomic_fetch_sub(&hw->secondary_cnt, 1, __ATOMIC_RELAXED); ++ hns3_mp_uninit(); + return 0; + } + +@@ -2016,9 +2007,7 @@ hns3vf_dev_close(struct rte_eth_dev *eth_dev) + hns3vf_uninit_vf(eth_dev); + hns3_free_all_queues(eth_dev); + rte_free(hw->reset.wait_data); +- rte_free(eth_dev->process_private); +- eth_dev->process_private = NULL; +- hns3_mp_uninit_primary(); ++ hns3_mp_uninit(); + hns3_warn(hw, "Close port %u finished", hw->data->port_id); + + return ret; +@@ -2042,8 +2031,11 @@ hns3vf_fw_version_get(struct rte_eth_dev *eth_dev, char *fw_version, + HNS3_FW_VERSION_BYTE1_S), + hns3_get_field(version, HNS3_FW_VERSION_BYTE0_M, + HNS3_FW_VERSION_BYTE0_S)); ++ if (ret < 0) ++ return -EINVAL; ++ + ret += 1; /* add the size of '\0' */ +- if (fw_size < (uint32_t)ret) ++ if (fw_size < (size_t)ret) + return ret; + else + return 0; +@@ -2069,13 +2061,18 @@ hns3vf_dev_link_update(struct rte_eth_dev *eth_dev, + case ETH_SPEED_NUM_50G: + case ETH_SPEED_NUM_100G: + case ETH_SPEED_NUM_200G: +- new_link.link_speed = mac->link_speed; ++ if (mac->link_status) ++ new_link.link_speed = mac->link_speed; + break; + default: +- new_link.link_speed = ETH_SPEED_NUM_100M; ++ if (mac->link_status) ++ new_link.link_speed = ETH_SPEED_NUM_UNKNOWN; + break; + } + ++ if (!mac->link_status) ++ new_link.link_speed = ETH_SPEED_NUM_NONE; ++ + new_link.link_duplex = mac->link_duplex; + new_link.link_status = mac->link_status ? ETH_LINK_UP : ETH_LINK_DOWN; + new_link.link_autoneg = +@@ -2163,7 +2160,7 @@ hns3vf_map_rx_interrupt(struct rte_eth_dev *dev) + vf_bind_vector_error: + rte_intr_efd_disable(intr_handle); + if (intr_handle->intr_vec) { +- free(intr_handle->intr_vec); ++ rte_free(intr_handle->intr_vec); + intr_handle->intr_vec = NULL; + } + return ret; +@@ -2223,11 +2220,8 @@ hns3vf_dev_start(struct rte_eth_dev *dev) + return ret; + } + ret = hns3vf_map_rx_interrupt(dev); +- if (ret) { +- hw->adapter_state = HNS3_NIC_CONFIGURED; +- rte_spinlock_unlock(&hw->lock); +- return ret; +- } ++ if (ret) ++ goto map_rx_inter_err; + + /* + * There are three register used to control the status of a TQP +@@ -2241,19 +2235,12 @@ hns3vf_dev_start(struct rte_eth_dev *dev) + * status of queue in the dpdk framework. + */ + ret = hns3_start_all_txqs(dev); +- if (ret) { +- hw->adapter_state = HNS3_NIC_CONFIGURED; +- rte_spinlock_unlock(&hw->lock); +- return ret; +- } ++ if (ret) ++ goto map_rx_inter_err; + + ret = hns3_start_all_rxqs(dev); +- if (ret) { +- hns3_stop_all_txqs(dev); +- hw->adapter_state = HNS3_NIC_CONFIGURED; +- rte_spinlock_unlock(&hw->lock); +- return ret; +- } ++ if (ret) ++ goto start_all_rxqs_fail; + + hw->adapter_state = HNS3_NIC_STARTED; + rte_spinlock_unlock(&hw->lock); +@@ -2274,6 +2261,15 @@ hns3vf_dev_start(struct rte_eth_dev *dev) + */ + hns3_start_tqps(hw); + ++ return ret; ++ ++start_all_rxqs_fail: ++ hns3_stop_all_txqs(dev); ++map_rx_inter_err: ++ (void)hns3vf_do_stop(hns); ++ hw->adapter_state = HNS3_NIC_CONFIGURED; ++ rte_spinlock_unlock(&hw->lock); ++ + return ret; + } + +@@ -2333,6 +2329,7 @@ hns3vf_is_reset_pending(struct hns3_adapter *hns) + static int + hns3vf_wait_hardware_ready(struct hns3_adapter *hns) + { ++#define HNS3_WAIT_PF_RESET_READY_TIME 5 + struct hns3_hw *hw = &hns->hw; + struct hns3_wait_data *wait_data = hw->reset.wait_data; + struct timeval tv; +@@ -2353,15 +2350,17 @@ hns3vf_wait_hardware_ready(struct hns3_adapter *hns) + return 0; + + wait_data->check_completion = NULL; +- wait_data->interval = 1 * MSEC_PER_SEC * USEC_PER_MSEC; ++ wait_data->interval = HNS3_WAIT_PF_RESET_READY_TIME * ++ MSEC_PER_SEC * USEC_PER_MSEC; + wait_data->count = 1; + wait_data->result = HNS3_WAIT_REQUEST; + rte_eal_alarm_set(wait_data->interval, hns3_wait_callback, + wait_data); +- hns3_warn(hw, "hardware is ready, delay 1 sec for PF reset complete"); ++ hns3_warn(hw, "hardware is ready, delay %d sec for PF reset complete", ++ HNS3_WAIT_PF_RESET_READY_TIME); + return -EAGAIN; + } else if (wait_data->result == HNS3_WAIT_TIMEOUT) { +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + hns3_warn(hw, "Reset step4 hardware not ready after reset time=%ld.%.6ld", + tv.tv_sec, tv.tv_usec); + return -ETIME; +@@ -2371,7 +2370,7 @@ hns3vf_wait_hardware_ready(struct hns3_adapter *hns) + wait_data->hns = hns; + wait_data->check_completion = is_vf_reset_done; + wait_data->end_ms = (uint64_t)HNS3VF_RESET_WAIT_CNT * +- HNS3VF_RESET_WAIT_MS + get_timeofday_ms(); ++ HNS3VF_RESET_WAIT_MS + hns3_clock_gettime_ms(); + wait_data->interval = HNS3VF_RESET_WAIT_MS * USEC_PER_MSEC; + wait_data->count = HNS3VF_RESET_WAIT_CNT; + wait_data->result = HNS3_WAIT_REQUEST; +@@ -2411,7 +2410,7 @@ hns3vf_stop_service(struct hns3_adapter *hns) + rte_wmb(); + /* Disable datapath on secondary process. */ + hns3_mp_req_stop_rxtx(eth_dev); +- rte_delay_ms(hw->tqps_num); ++ rte_delay_ms(hw->cfg_max_queues); + + rte_spinlock_lock(&hw->lock); + if (hw->adapter_state == HNS3_NIC_STARTED || +@@ -2476,7 +2475,7 @@ hns3vf_check_default_mac_change(struct hns3_hw *hw) + * ethdev driver sets the MAC address for VF device after the + * initialization of the related VF device, the PF driver will notify + * VF driver to reset VF device to make the new MAC address effective +- * immediately. The hns3 VF PMD driver should check whether the MAC ++ * immediately. The hns3 VF PMD should check whether the MAC + * address has been changed by the PF kernel ethdev driver, if changed + * VF driver should configure hardware using the new MAC address in the + * recovering hardware configuration stage of the reset process. +@@ -2549,6 +2548,13 @@ hns3vf_restore_conf(struct hns3_adapter *hns) + hns3_info(hw, "hns3vf dev restart successful!"); + } else if (hw->adapter_state == HNS3_NIC_STOPPING) + hw->adapter_state = HNS3_NIC_CONFIGURED; ++ ++ ret = hns3vf_set_alive(hw, true); ++ if (ret) { ++ hns3_err(hw, "failed to VF send alive to PF: %d", ret); ++ goto err_vlan_table; ++ } ++ + return 0; + + err_vlan_table: +@@ -2617,14 +2623,13 @@ hns3vf_reset_service(void *param) + */ + reset_level = hns3vf_get_reset_level(hw, &hw->reset.pending); + if (reset_level != HNS3_NONE_RESET) { +- gettimeofday(&tv_start, NULL); ++ hns3_clock_gettime(&tv_start); + hns3_reset_process(hns, reset_level); +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + timersub(&tv, &tv_start, &tv_delta); +- msec = tv_delta.tv_sec * MSEC_PER_SEC + +- tv_delta.tv_usec / USEC_PER_MSEC; ++ msec = hns3_clock_calctime_ms(&tv_delta); + if (msec > HNS3_RESET_PROCESS_MS) +- hns3_err(hw, "%d handle long time delta %" PRIx64 ++ hns3_err(hw, "%d handle long time delta %" PRIu64 + " ms time=%ld.%.6ld", + hw->reset.level, msec, tv.tv_sec, tv.tv_usec); + } +@@ -2751,17 +2756,7 @@ hns3vf_dev_init(struct rte_eth_dev *eth_dev) + + PMD_INIT_FUNC_TRACE(); + +- eth_dev->process_private = (struct hns3_process_private *) +- rte_zmalloc_socket("hns3_filter_list", +- sizeof(struct hns3_process_private), +- RTE_CACHE_LINE_SIZE, eth_dev->device->numa_node); +- if (eth_dev->process_private == NULL) { +- PMD_INIT_LOG(ERR, "Failed to alloc memory for process private"); +- return -ENOMEM; +- } +- +- /* initialize flow filter lists */ +- hns3_filterlist_init(eth_dev); ++ hns3_flow_init(eth_dev); + + hns3_set_rxtx_function(eth_dev); + eth_dev->dev_ops = &hns3vf_eth_dev_ops; +@@ -2773,8 +2768,8 @@ hns3vf_dev_init(struct rte_eth_dev *eth_dev) + "process, ret = %d", ret); + goto err_mp_init_secondary; + } +- +- hw->secondary_cnt++; ++ __atomic_fetch_add(&hw->secondary_cnt, 1, __ATOMIC_RELAXED); ++ process_data.eth_dev_cnt++; + return 0; + } + +@@ -2787,6 +2782,7 @@ hns3vf_dev_init(struct rte_eth_dev *eth_dev) + ret); + goto err_mp_init_primary; + } ++ process_data.eth_dev_cnt++; + + hw->adapter_state = HNS3_NIC_UNINITIALIZED; + hns->is_vf = true; +@@ -2819,12 +2815,12 @@ hns3vf_dev_init(struct rte_eth_dev *eth_dev) + /* + * The hns3 PF ethdev driver in kernel support setting VF MAC address + * on the host by "ip link set ..." command. To avoid some incorrect +- * scenes, for example, hns3 VF PMD driver fails to receive and send ++ * scenes, for example, hns3 VF PMD fails to receive and send + * packets after user configure the MAC address by using the +- * "ip link set ..." command, hns3 VF PMD driver keep the same MAC ++ * "ip link set ..." command, hns3 VF PMD keep the same MAC + * address strategy as the hns3 kernel ethdev driver in the + * initialization. If user configure a MAC address by the ip command +- * for VF device, then hns3 VF PMD driver will start with it, otherwise ++ * for VF device, then hns3 VF PMD will start with it, otherwise + * start with a random MAC address in the initialization. + */ + if (rte_is_zero_ether_addr((struct rte_ether_addr *)hw->mac.mac_addr)) +@@ -2852,7 +2848,7 @@ hns3vf_dev_init(struct rte_eth_dev *eth_dev) + rte_free(hw->reset.wait_data); + + err_init_reset: +- hns3_mp_uninit_primary(); ++ hns3_mp_uninit(); + + err_mp_init_primary: + err_mp_init_secondary: +@@ -2860,8 +2856,6 @@ hns3vf_dev_init(struct rte_eth_dev *eth_dev) + eth_dev->rx_pkt_burst = NULL; + eth_dev->tx_pkt_burst = NULL; + eth_dev->tx_pkt_prepare = NULL; +- rte_free(eth_dev->process_private); +- eth_dev->process_private = NULL; + + return ret; + } +@@ -2875,8 +2869,8 @@ hns3vf_dev_uninit(struct rte_eth_dev *eth_dev) + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { +- rte_free(eth_dev->process_private); +- eth_dev->process_private = NULL; ++ __atomic_fetch_sub(&hw->secondary_cnt, 1, __ATOMIC_RELAXED); ++ hns3_mp_uninit(); + return 0; + } + +diff --git a/dpdk/drivers/net/hns3/hns3_fdir.c b/dpdk/drivers/net/hns3/hns3_fdir.c +index 857cc94c98..4e1667d2f8 100644 +--- a/dpdk/drivers/net/hns3/hns3_fdir.c ++++ b/dpdk/drivers/net/hns3/hns3_fdir.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #include +@@ -618,7 +618,7 @@ static bool hns3_fd_convert_tuple(struct hns3_hw *hw, + break; + default: + hns3_warn(hw, "not support tuple of (%u)", tuple); +- break; ++ return false; + } + return true; + } +@@ -830,7 +830,6 @@ int hns3_fdir_filter_init(struct hns3_adapter *hns) + + fdir_hash_params.socket_id = rte_socket_id(); + TAILQ_INIT(&fdir_info->fdir_list); +- rte_spinlock_init(&fdir_info->flows_lock); + snprintf(fdir_hash_name, RTE_HASH_NAMESIZE, "%s", hns->hw.data->name); + fdir_info->hash_handle = rte_hash_create(&fdir_hash_params); + if (fdir_info->hash_handle == NULL) { +@@ -856,7 +855,6 @@ void hns3_fdir_filter_uninit(struct hns3_adapter *hns) + struct hns3_fdir_info *fdir_info = &pf->fdir; + struct hns3_fdir_rule_ele *fdir_filter; + +- rte_spinlock_lock(&fdir_info->flows_lock); + if (fdir_info->hash_map) { + rte_free(fdir_info->hash_map); + fdir_info->hash_map = NULL; +@@ -865,7 +863,6 @@ void hns3_fdir_filter_uninit(struct hns3_adapter *hns) + rte_hash_free(fdir_info->hash_handle); + fdir_info->hash_handle = NULL; + } +- rte_spinlock_unlock(&fdir_info->flows_lock); + + fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list); + while (fdir_filter) { +@@ -891,10 +888,8 @@ static int hns3_fdir_filter_lookup(struct hns3_fdir_info *fdir_info, + hash_sig_t sig; + int ret; + +- rte_spinlock_lock(&fdir_info->flows_lock); + sig = rte_hash_crc(key, sizeof(*key), 0); + ret = rte_hash_lookup_with_hash(fdir_info->hash_handle, key, sig); +- rte_spinlock_unlock(&fdir_info->flows_lock); + + return ret; + } +@@ -908,11 +903,9 @@ static int hns3_insert_fdir_filter(struct hns3_hw *hw, + int ret; + + key = &fdir_filter->fdir_conf.key_conf; +- rte_spinlock_lock(&fdir_info->flows_lock); + sig = rte_hash_crc(key, sizeof(*key), 0); + ret = rte_hash_add_key_with_hash(fdir_info->hash_handle, key, sig); + if (ret < 0) { +- rte_spinlock_unlock(&fdir_info->flows_lock); + hns3_err(hw, "Hash table full? err:%d(%s)!", ret, + strerror(-ret)); + return ret; +@@ -920,7 +913,6 @@ static int hns3_insert_fdir_filter(struct hns3_hw *hw, + + fdir_info->hash_map[ret] = fdir_filter; + TAILQ_INSERT_TAIL(&fdir_info->fdir_list, fdir_filter, entries); +- rte_spinlock_unlock(&fdir_info->flows_lock); + + return ret; + } +@@ -933,11 +925,9 @@ static int hns3_remove_fdir_filter(struct hns3_hw *hw, + hash_sig_t sig; + int ret; + +- rte_spinlock_lock(&fdir_info->flows_lock); + sig = rte_hash_crc(key, sizeof(*key), 0); + ret = rte_hash_del_key_with_hash(fdir_info->hash_handle, key, sig); + if (ret < 0) { +- rte_spinlock_unlock(&fdir_info->flows_lock); + hns3_err(hw, "Delete hash key fail ret=%d", ret); + return ret; + } +@@ -945,7 +935,6 @@ static int hns3_remove_fdir_filter(struct hns3_hw *hw, + fdir_filter = fdir_info->hash_map[ret]; + fdir_info->hash_map[ret] = NULL; + TAILQ_REMOVE(&fdir_info->fdir_list, fdir_filter, entries); +- rte_spinlock_unlock(&fdir_info->flows_lock); + + rte_free(fdir_filter); + +@@ -1000,11 +989,9 @@ int hns3_fdir_filter_program(struct hns3_adapter *hns, + rule->location = ret; + node->fdir_conf.location = ret; + +- rte_spinlock_lock(&fdir_info->flows_lock); + ret = hns3_config_action(hw, rule); + if (!ret) + ret = hns3_config_key(hns, rule); +- rte_spinlock_unlock(&fdir_info->flows_lock); + if (ret) { + hns3_err(hw, "Failed to config fdir: %u src_ip:%x dst_ip:%x " + "src_port:%u dst_port:%u ret = %d", +@@ -1026,27 +1013,37 @@ int hns3_clear_all_fdir_filter(struct hns3_adapter *hns) + struct hns3_fdir_info *fdir_info = &pf->fdir; + struct hns3_fdir_rule_ele *fdir_filter; + struct hns3_hw *hw = &hns->hw; ++ int succ_cnt = 0; ++ int fail_cnt = 0; + int ret = 0; + + /* flush flow director */ +- rte_spinlock_lock(&fdir_info->flows_lock); + rte_hash_reset(fdir_info->hash_handle); +- rte_spinlock_unlock(&fdir_info->flows_lock); ++ ++ memset(fdir_info->hash_map, 0, ++ sizeof(struct hns3_fdir_rule_ele *) * ++ fdir_info->fd_cfg.rule_num[HNS3_FD_STAGE_1]); + + fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list); + while (fdir_filter) { + TAILQ_REMOVE(&fdir_info->fdir_list, fdir_filter, entries); +- ret += hns3_fd_tcam_config(hw, true, +- fdir_filter->fdir_conf.location, +- NULL, false); ++ ret = hns3_fd_tcam_config(hw, true, ++ fdir_filter->fdir_conf.location, ++ NULL, false); ++ if (ret == 0) ++ succ_cnt++; ++ else ++ fail_cnt++; + rte_free(fdir_filter); + fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list); + } + +- if (ret) { +- hns3_err(hw, "Fail to delete FDIR filter, ret = %d", ret); ++ if (fail_cnt > 0) { ++ hns3_err(hw, "fail to delete all FDIR filter, success num = %d " ++ "fail num = %d", succ_cnt, fail_cnt); + ret = -EIO; + } ++ + return ret; + } + +@@ -1059,6 +1056,17 @@ int hns3_restore_all_fdir_filter(struct hns3_adapter *hns) + bool err = false; + int ret; + ++ /* ++ * This API is called in the reset recovery process, the parent function ++ * must hold hw->lock. ++ * There maybe deadlock if acquire hw->flows_lock directly because rte ++ * flow driver ops first acquire hw->flows_lock and then may acquire ++ * hw->lock. ++ * So here first release the hw->lock and then acquire the ++ * hw->flows_lock to avoid deadlock. ++ */ ++ rte_spinlock_unlock(&hw->lock); ++ pthread_mutex_lock(&hw->flows_lock); + TAILQ_FOREACH(fdir_filter, &fdir_info->fdir_list, entries) { + ret = hns3_config_action(hw, &fdir_filter->fdir_conf); + if (!ret) +@@ -1069,6 +1077,8 @@ int hns3_restore_all_fdir_filter(struct hns3_adapter *hns) + break; + } + } ++ pthread_mutex_unlock(&hw->flows_lock); ++ rte_spinlock_lock(&hw->lock); + + if (err) { + hns3_err(hw, "Fail to restore FDIR filter, ret = %d", ret); +diff --git a/dpdk/drivers/net/hns3/hns3_fdir.h b/dpdk/drivers/net/hns3/hns3_fdir.h +index a5760a3ccf..839fdb3a88 100644 +--- a/dpdk/drivers/net/hns3/hns3_fdir.h ++++ b/dpdk/drivers/net/hns3/hns3_fdir.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_FDIR_H_ +@@ -149,7 +149,7 @@ struct hns3_fdir_rule { + uint32_t flags; + uint32_t fd_id; /* APP marked unique value for this rule. */ + uint8_t action; +- /* VF id, avaiblable when flags with HNS3_RULE_FLAG_VF_ID. */ ++ /* VF id, available when flags with HNS3_RULE_FLAG_VF_ID. */ + uint8_t vf_id; + /* + * equal 0 when action is drop. +@@ -189,17 +189,10 @@ TAILQ_HEAD(hns3_fdir_rule_list, hns3_fdir_rule_ele); + TAILQ_HEAD(hns3_rss_filter_list, hns3_rss_conf_ele); + TAILQ_HEAD(hns3_flow_mem_list, hns3_flow_mem); + +-struct hns3_process_private { +- struct hns3_fdir_rule_list fdir_list; +- struct hns3_rss_filter_list filter_rss_list; +- struct hns3_flow_mem_list flow_list; +-}; +- + /* + * A structure used to define fields of a FDIR related info. + */ + struct hns3_fdir_info { +- rte_spinlock_t flows_lock; + struct hns3_fdir_rule_list fdir_list; + struct hns3_fdir_rule_ele **hash_map; + struct rte_hash *hash_handle; +@@ -220,7 +213,8 @@ int hns3_fdir_filter_program(struct hns3_adapter *hns, + struct hns3_fdir_rule *rule, bool del); + int hns3_clear_all_fdir_filter(struct hns3_adapter *hns); + int hns3_get_count(struct hns3_hw *hw, uint32_t id, uint64_t *value); +-void hns3_filterlist_init(struct rte_eth_dev *dev); ++void hns3_flow_init(struct rte_eth_dev *dev); ++void hns3_flow_uninit(struct rte_eth_dev *dev); + int hns3_restore_all_fdir_filter(struct hns3_adapter *hns); + + #endif /* _HNS3_FDIR_H_ */ +diff --git a/dpdk/drivers/net/hns3/hns3_flow.c b/dpdk/drivers/net/hns3/hns3_flow.c +index 8e4519a425..a1acb0ebd5 100644 +--- a/dpdk/drivers/net/hns3/hns3_flow.c ++++ b/dpdk/drivers/net/hns3/hns3_flow.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #include +@@ -9,15 +9,6 @@ + #include "hns3_ethdev.h" + #include "hns3_logs.h" + +-/* Default default keys */ +-static uint8_t hns3_hash_key[] = { +- 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, +- 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0, +- 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, +- 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, +- 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA +-}; +- + static const uint8_t full_mask[VNI_OR_TNI_LEN] = { 0xFF, 0xFF, 0xFF }; + static const uint8_t zero_mask[VNI_OR_TNI_LEN] = { 0x00, 0x00, 0x00 }; + +@@ -158,7 +149,10 @@ hns3_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, + { + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_pf *pf = &hns->pf; ++ struct hns3_hw *hw = &hns->hw; + struct hns3_flow_counter *cnt; ++ uint64_t value; ++ int ret; + + cnt = hns3_counter_lookup(dev, id); + if (cnt) { +@@ -171,6 +165,13 @@ hns3_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, + return 0; + } + ++ /* Clear the counter by read ops because the counter is read-clear */ ++ ret = hns3_get_count(hw, id, &value); ++ if (ret) ++ return rte_flow_error_set(error, EIO, ++ RTE_FLOW_ERROR_TYPE_HANDLE, NULL, ++ "Clear counter failed!"); ++ + cnt = rte_zmalloc("hns3 counter", sizeof(*cnt), 0); + if (cnt == NULL) + return rte_flow_error_set(error, ENOMEM, +@@ -213,6 +214,8 @@ hns3_counter_query(struct rte_eth_dev *dev, struct rte_flow *flow, + } + qc->hits_set = 1; + qc->hits = value; ++ qc->bytes_set = 0; ++ qc->bytes = 0; + + return 0; + } +@@ -263,10 +266,10 @@ hns3_handle_action_queue(struct rte_eth_dev *dev, + struct hns3_hw *hw = &hns->hw; + + queue = (const struct rte_flow_action_queue *)action->conf; +- if (queue->index >= hw->used_rx_queues) { ++ if (queue->index >= hw->data->nb_rx_queues) { + hns3_err(hw, "queue ID(%u) is greater than number of " + "available queue (%u) in driver.", +- queue->index, hw->used_rx_queues); ++ queue->index, hw->data->nb_rx_queues); + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + action, "Invalid queue ID in PF"); +@@ -296,8 +299,8 @@ hns3_handle_action_queue_region(struct rte_eth_dev *dev, + + if ((!rte_is_power_of_2(conf->queue_num)) || + conf->queue_num > hw->rss_size_max || +- conf->queue[0] >= hw->used_rx_queues || +- conf->queue[0] + conf->queue_num > hw->used_rx_queues) { ++ conf->queue[0] >= hw->data->nb_rx_queues || ++ conf->queue[0] + conf->queue_num > hw->data->nb_rx_queues) { + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, action, + "Invalid start queue ID and queue num! the start queue " +@@ -325,7 +328,7 @@ hns3_handle_action_queue_region(struct rte_eth_dev *dev, + * + * @param actions[in] + * @param rule[out] +- * NIC specfilc actions derived from the actions. ++ * NIC specific actions derived from the actions. + * @param error[out] + */ + static int +@@ -356,7 +359,7 @@ hns3_handle_actions(struct rte_eth_dev *dev, + * Queue region is implemented by FDIR + RSS in hns3 hardware, + * the FDIR's action is one queue region (start_queue_id and + * queue_num), then RSS spread packets to the queue region by +- * RSS algorigthm. ++ * RSS algorithm. + */ + case RTE_FLOW_ACTION_TYPE_RSS: + ret = hns3_handle_action_queue_region(dev, actions, +@@ -962,7 +965,7 @@ hns3_parse_nvgre(const struct rte_flow_item *item, struct hns3_fdir_rule *rule, + if (nvgre_mask->protocol || nvgre_mask->c_k_s_rsvd0_ver) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, item, +- "Ver/protocal is not supported in NVGRE"); ++ "Ver/protocol is not supported in NVGRE"); + + /* TNI must be totally masked or not. */ + if (memcmp(nvgre_mask->tni, full_mask, VNI_OR_TNI_LEN) && +@@ -1016,7 +1019,7 @@ hns3_parse_geneve(const struct rte_flow_item *item, struct hns3_fdir_rule *rule, + if (geneve_mask->ver_opt_len_o_c_rsvd0 || geneve_mask->protocol) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, item, +- "Ver/protocal is not supported in GENEVE"); ++ "Ver/protocol is not supported in GENEVE"); + /* VNI must be totally masked or not. */ + if (memcmp(geneve_mask->vni, full_mask, VNI_OR_TNI_LEN) && + memcmp(geneve_mask->vni, zero_mask, VNI_OR_TNI_LEN)) +@@ -1149,42 +1152,29 @@ is_tunnel_packet(enum rte_flow_item_type type) + } + + /* +- * Parse the rule to see if it is a IP or MAC VLAN flow director rule. +- * And get the flow director filter info BTW. +- * UDP/TCP/SCTP PATTERN: +- * The first not void item can be ETH or IPV4 or IPV6 +- * The second not void item must be IPV4 or IPV6 if the first one is ETH. +- * The next not void item could be UDP or TCP or SCTP (optional) +- * The next not void item could be RAW (for flexbyte, optional) +- * The next not void item must be END. +- * A Fuzzy Match pattern can appear at any place before END. +- * Fuzzy Match is optional for IPV4 but is required for IPV6 +- * MAC VLAN PATTERN: +- * The first not void item must be ETH. +- * The second not void item must be MAC VLAN. +- * The next not void item must be END. +- * ACTION: +- * The first not void action should be QUEUE or DROP. +- * The second not void optional action should be MARK, +- * mark_id is a uint32_t number. +- * The next not void action should be END. +- * UDP/TCP/SCTP pattern example: +- * ITEM Spec Mask +- * ETH NULL NULL +- * IPV4 src_addr 192.168.1.20 0xFFFFFFFF +- * dst_addr 192.167.3.50 0xFFFFFFFF +- * UDP/TCP/SCTP src_port 80 0xFFFF +- * dst_port 80 0xFFFF +- * END +- * MAC VLAN pattern example: +- * ITEM Spec Mask +- * ETH dst_addr +- {0xAC, 0x7B, 0xA1, {0xFF, 0xFF, 0xFF, +- 0x2C, 0x6D, 0x36} 0xFF, 0xFF, 0xFF} +- * MAC VLAN tci 0x2016 0xEFFF +- * END +- * Other members in mask and spec should set to 0x00. +- * Item->last should be NULL. ++ * Parse the flow director rule. ++ * The supported PATTERN: ++ * case: non-tunnel packet: ++ * ETH : src-mac, dst-mac, ethertype ++ * VLAN: tag1, tag2 ++ * IPv4: src-ip, dst-ip, tos, proto ++ * IPv6: src-ip(last 32 bit addr), dst-ip(last 32 bit addr), proto ++ * UDP : src-port, dst-port ++ * TCP : src-port, dst-port ++ * SCTP: src-port, dst-port, tag ++ * case: tunnel packet: ++ * OUTER-ETH: ethertype ++ * OUTER-L3 : proto ++ * OUTER-L4 : src-port, dst-port ++ * TUNNEL : vni, flow-id(only valid when NVGRE) ++ * INNER-ETH/VLAN/IPv4/IPv6/UDP/TCP/SCTP: same as non-tunnel packet ++ * The supported ACTION: ++ * QUEUE ++ * DROP ++ * COUNT ++ * MARK: the id range [0, 4094] ++ * FLAG ++ * RSS: only valid if firmware support FD_QUEUE_REGION. + */ + static int + hns3_parse_fdir_filter(struct rte_eth_dev *dev, +@@ -1230,45 +1220,34 @@ hns3_parse_fdir_filter(struct rte_eth_dev *dev, + return hns3_handle_actions(dev, actions, rule, error); + } + +-void +-hns3_filterlist_init(struct rte_eth_dev *dev) +-{ +- struct hns3_process_private *process_list = dev->process_private; +- +- TAILQ_INIT(&process_list->fdir_list); +- TAILQ_INIT(&process_list->filter_rss_list); +- TAILQ_INIT(&process_list->flow_list); +-} +- + static void + hns3_filterlist_flush(struct rte_eth_dev *dev) + { +- struct hns3_process_private *process_list = dev->process_private; ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct hns3_fdir_rule_ele *fdir_rule_ptr; + struct hns3_rss_conf_ele *rss_filter_ptr; + struct hns3_flow_mem *flow_node; + +- fdir_rule_ptr = TAILQ_FIRST(&process_list->fdir_list); ++ fdir_rule_ptr = TAILQ_FIRST(&hw->flow_fdir_list); + while (fdir_rule_ptr) { +- TAILQ_REMOVE(&process_list->fdir_list, fdir_rule_ptr, entries); ++ TAILQ_REMOVE(&hw->flow_fdir_list, fdir_rule_ptr, entries); + rte_free(fdir_rule_ptr); +- fdir_rule_ptr = TAILQ_FIRST(&process_list->fdir_list); ++ fdir_rule_ptr = TAILQ_FIRST(&hw->flow_fdir_list); + } + +- rss_filter_ptr = TAILQ_FIRST(&process_list->filter_rss_list); ++ rss_filter_ptr = TAILQ_FIRST(&hw->flow_rss_list); + while (rss_filter_ptr) { +- TAILQ_REMOVE(&process_list->filter_rss_list, rss_filter_ptr, +- entries); ++ TAILQ_REMOVE(&hw->flow_rss_list, rss_filter_ptr, entries); + rte_free(rss_filter_ptr); +- rss_filter_ptr = TAILQ_FIRST(&process_list->filter_rss_list); ++ rss_filter_ptr = TAILQ_FIRST(&hw->flow_rss_list); + } + +- flow_node = TAILQ_FIRST(&process_list->flow_list); ++ flow_node = TAILQ_FIRST(&hw->flow_list); + while (flow_node) { +- TAILQ_REMOVE(&process_list->flow_list, flow_node, entries); ++ TAILQ_REMOVE(&hw->flow_list, flow_node, entries); + rte_free(flow_node->flow); + rte_free(flow_node); +- flow_node = TAILQ_FIRST(&process_list->flow_list); ++ flow_node = TAILQ_FIRST(&hw->flow_list); + } + } + +@@ -1276,6 +1255,7 @@ static bool + hns3_action_rss_same(const struct rte_flow_action_rss *comp, + const struct rte_flow_action_rss *with) + { ++ bool rss_key_is_same; + bool func_is_same; + + /* +@@ -1289,13 +1269,19 @@ hns3_action_rss_same(const struct rte_flow_action_rss *comp, + if (comp->func == RTE_ETH_HASH_FUNCTION_MAX) + func_is_same = false; + else +- func_is_same = (with->func ? (comp->func == with->func) : true); ++ func_is_same = (with->func != RTE_ETH_HASH_FUNCTION_DEFAULT) ? ++ (comp->func == with->func) : true; + +- return (func_is_same && ++ if (with->key_len == 0 || with->key == NULL) ++ rss_key_is_same = 1; ++ else ++ rss_key_is_same = comp->key_len == with->key_len && ++ !memcmp(comp->key, with->key, with->key_len); ++ ++ return (func_is_same && rss_key_is_same && + comp->types == (with->types & HNS3_ETH_RSS_SUPPORT) && +- comp->level == with->level && comp->key_len == with->key_len && ++ comp->level == with->level && + comp->queue_num == with->queue_num && +- !memcmp(comp->key, with->key, with->key_len) && + !memcmp(comp->queue, with->queue, + sizeof(*with->queue) * with->queue_num)); + } +@@ -1325,7 +1311,7 @@ hns3_rss_conf_copy(struct hns3_rss_conf *out, + } + + /* +- * This function is used to parse rss action validatation. ++ * This function is used to parse rss action validation. + */ + static int + hns3_parse_rss_filter(struct rte_eth_dev *dev, +@@ -1418,15 +1404,10 @@ hns3_disable_rss(struct hns3_hw *hw) + { + int ret; + +- /* Redirected the redirection table to queue 0 */ +- ret = hns3_rss_reset_indir_table(hw); ++ ret = hns3_set_rss_tuple_by_rss_hf(hw, 0); + if (ret) + return ret; + +- /* Disable RSS */ +- hw->rss_info.conf.types = 0; +- hw->rss_dis_flag = true; +- + return 0; + } + +@@ -1472,7 +1453,6 @@ hns3_parse_rss_algorithm(struct hns3_hw *hw, enum rte_eth_hash_function *func, + static int + hns3_hw_rss_hash_set(struct hns3_hw *hw, struct rte_flow_action_rss *rss_config) + { +- struct hns3_rss_tuple_cfg *tuple; + int ret; + + hns3_parse_rss_key(hw, rss_config); +@@ -1488,8 +1468,7 @@ hns3_hw_rss_hash_set(struct hns3_hw *hw, struct rte_flow_action_rss *rss_config) + + hw->rss_info.conf.func = rss_config->func; + +- tuple = &hw->rss_info.rss_tuple_sets; +- ret = hns3_set_rss_tuple_by_rss_hf(hw, tuple, rss_config->types); ++ ret = hns3_set_rss_tuple_by_rss_hf(hw, rss_config->types); + if (ret) + hns3_err(hw, "Update RSS tuples by rss hf failed %d", ret); + +@@ -1527,7 +1506,6 @@ static int + hns3_config_rss_filter(struct rte_eth_dev *dev, + const struct hns3_rss_conf *conf, bool add) + { +- struct hns3_process_private *process_list = dev->process_private; + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_rss_conf_ele *rss_filter_ptr; + struct hns3_hw *hw = &hns->hw; +@@ -1553,7 +1531,7 @@ hns3_config_rss_filter(struct rte_eth_dev *dev, + hw->rss_info.conf.types; + if (flow_types != rss_flow_conf.types) + hns3_warn(hw, "modified RSS types based on hardware support, " +- "requested:%" PRIx64 " configured:%" PRIx64, ++ "requested:0x%" PRIx64 " configured:0x%" PRIx64, + rss_flow_conf.types, flow_types); + /* Update the useful flow types */ + rss_flow_conf.types = flow_types; +@@ -1612,7 +1590,7 @@ hns3_config_rss_filter(struct rte_eth_dev *dev, + * When create a new RSS rule, the old rule will be overlaid and set + * invalid. + */ +- TAILQ_FOREACH(rss_filter_ptr, &process_list->filter_rss_list, entries) ++ TAILQ_FOREACH(rss_filter_ptr, &hw->flow_rss_list, entries) + rss_filter_ptr->filter_info.valid = false; + + rss_config_err: +@@ -1624,7 +1602,6 @@ hns3_config_rss_filter(struct rte_eth_dev *dev, + static int + hns3_clear_rss_filter(struct rte_eth_dev *dev) + { +- struct hns3_process_private *process_list = dev->process_private; + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_rss_conf_ele *rss_filter_ptr; + struct hns3_hw *hw = &hns->hw; +@@ -1632,10 +1609,9 @@ hns3_clear_rss_filter(struct rte_eth_dev *dev) + int rss_rule_fail_cnt = 0; /* count for failure of clearing RSS rules */ + int ret = 0; + +- rss_filter_ptr = TAILQ_FIRST(&process_list->filter_rss_list); ++ rss_filter_ptr = TAILQ_FIRST(&hw->flow_rss_list); + while (rss_filter_ptr) { +- TAILQ_REMOVE(&process_list->filter_rss_list, rss_filter_ptr, +- entries); ++ TAILQ_REMOVE(&hw->flow_rss_list, rss_filter_ptr, entries); + ret = hns3_config_rss_filter(dev, &rss_filter_ptr->filter_info, + false); + if (ret) +@@ -1643,7 +1619,7 @@ hns3_clear_rss_filter(struct rte_eth_dev *dev) + else + rss_rule_succ_cnt++; + rte_free(rss_filter_ptr); +- rss_filter_ptr = TAILQ_FIRST(&process_list->filter_rss_list); ++ rss_filter_ptr = TAILQ_FIRST(&hw->flow_rss_list); + } + + if (rss_rule_fail_cnt) { +@@ -1712,7 +1688,7 @@ hns3_flow_args_check(const struct rte_flow_attr *attr, + + /* + * Check if the flow rule is supported by hns3. +- * It only checkes the format. Don't guarantee the rule can be programmed into ++ * It only checks the format. Don't guarantee the rule can be programmed into + * the HW. Because there can be no enough room for the rule. + */ + static int +@@ -1747,7 +1723,6 @@ hns3_flow_create(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + const struct rte_flow_action actions[], + struct rte_flow_error *error) + { +- struct hns3_process_private *process_list = dev->process_private; + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; + const struct hns3_rss_conf *rss_conf; +@@ -1779,7 +1754,7 @@ hns3_flow_create(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + } + + flow_node->flow = flow; +- TAILQ_INSERT_TAIL(&process_list->flow_list, flow_node, entries); ++ TAILQ_INSERT_TAIL(&hw->flow_list, flow_node, entries); + + act = hns3_find_rss_general_action(pattern, actions); + if (act) { +@@ -1801,8 +1776,7 @@ hns3_flow_create(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + hns3_rss_conf_copy(&rss_filter_ptr->filter_info, + &rss_conf->conf); + rss_filter_ptr->filter_info.valid = true; +- TAILQ_INSERT_TAIL(&process_list->filter_rss_list, +- rss_filter_ptr, entries); ++ TAILQ_INSERT_TAIL(&hw->flow_rss_list, rss_filter_ptr, entries); + + flow->rule = rss_filter_ptr; + flow->filter_type = RTE_ETH_FILTER_HASH; +@@ -1836,8 +1810,7 @@ hns3_flow_create(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + if (!ret) { + memcpy(&fdir_rule_ptr->fdir_conf, &fdir_rule, + sizeof(struct hns3_fdir_rule)); +- TAILQ_INSERT_TAIL(&process_list->fdir_list, +- fdir_rule_ptr, entries); ++ TAILQ_INSERT_TAIL(&hw->flow_fdir_list, fdir_rule_ptr, entries); + flow->rule = fdir_rule_ptr; + flow->filter_type = RTE_ETH_FILTER_FDIR; + +@@ -1852,7 +1825,7 @@ hns3_flow_create(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to create flow"); + out: +- TAILQ_REMOVE(&process_list->flow_list, flow_node, entries); ++ TAILQ_REMOVE(&hw->flow_list, flow_node, entries); + rte_free(flow_node); + rte_free(flow); + return NULL; +@@ -1863,13 +1836,13 @@ static int + hns3_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow, + struct rte_flow_error *error) + { +- struct hns3_process_private *process_list = dev->process_private; + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_fdir_rule_ele *fdir_rule_ptr; + struct hns3_rss_conf_ele *rss_filter_ptr; + struct hns3_flow_mem *flow_node; + enum rte_filter_type filter_type; + struct hns3_fdir_rule fdir_rule; ++ struct hns3_hw *hw = &hns->hw; + int ret; + + if (flow == NULL) +@@ -1891,7 +1864,7 @@ hns3_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow, + "Destroy FDIR fail.Try again"); + if (fdir_rule.flags & HNS3_RULE_FLAG_COUNTER) + hns3_counter_release(dev, fdir_rule.act_cnt.id); +- TAILQ_REMOVE(&process_list->fdir_list, fdir_rule_ptr, entries); ++ TAILQ_REMOVE(&hw->flow_fdir_list, fdir_rule_ptr, entries); + rte_free(fdir_rule_ptr); + fdir_rule_ptr = NULL; + break; +@@ -1904,8 +1877,7 @@ hns3_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow, + RTE_FLOW_ERROR_TYPE_HANDLE, + flow, + "Destroy RSS fail.Try again"); +- TAILQ_REMOVE(&process_list->filter_rss_list, rss_filter_ptr, +- entries); ++ TAILQ_REMOVE(&hw->flow_rss_list, rss_filter_ptr, entries); + rte_free(rss_filter_ptr); + rss_filter_ptr = NULL; + break; +@@ -1915,10 +1887,9 @@ hns3_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow, + "Unsupported filter type"); + } + +- TAILQ_FOREACH(flow_node, &process_list->flow_list, entries) { ++ TAILQ_FOREACH(flow_node, &hw->flow_list, entries) { + if (flow_node->flow == flow) { +- TAILQ_REMOVE(&process_list->flow_list, flow_node, +- entries); ++ TAILQ_REMOVE(&hw->flow_list, flow_node, entries); + rte_free(flow_node); + flow_node = NULL; + break; +@@ -2007,12 +1978,87 @@ hns3_flow_query(struct rte_eth_dev *dev, struct rte_flow *flow, + return 0; + } + ++static int ++hns3_flow_validate_wrap(struct rte_eth_dev *dev, ++ const struct rte_flow_attr *attr, ++ const struct rte_flow_item pattern[], ++ const struct rte_flow_action actions[], ++ struct rte_flow_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ int ret; ++ ++ pthread_mutex_lock(&hw->flows_lock); ++ ret = hns3_flow_validate(dev, attr, pattern, actions, error); ++ pthread_mutex_unlock(&hw->flows_lock); ++ ++ return ret; ++} ++ ++static struct rte_flow * ++hns3_flow_create_wrap(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, ++ const struct rte_flow_item pattern[], ++ const struct rte_flow_action actions[], ++ struct rte_flow_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ struct rte_flow *flow; ++ ++ pthread_mutex_lock(&hw->flows_lock); ++ flow = hns3_flow_create(dev, attr, pattern, actions, error); ++ pthread_mutex_unlock(&hw->flows_lock); ++ ++ return flow; ++} ++ ++static int ++hns3_flow_destroy_wrap(struct rte_eth_dev *dev, struct rte_flow *flow, ++ struct rte_flow_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ int ret; ++ ++ pthread_mutex_lock(&hw->flows_lock); ++ ret = hns3_flow_destroy(dev, flow, error); ++ pthread_mutex_unlock(&hw->flows_lock); ++ ++ return ret; ++} ++ ++static int ++hns3_flow_flush_wrap(struct rte_eth_dev *dev, struct rte_flow_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ int ret; ++ ++ pthread_mutex_lock(&hw->flows_lock); ++ ret = hns3_flow_flush(dev, error); ++ pthread_mutex_unlock(&hw->flows_lock); ++ ++ return ret; ++} ++ ++static int ++hns3_flow_query_wrap(struct rte_eth_dev *dev, struct rte_flow *flow, ++ const struct rte_flow_action *actions, void *data, ++ struct rte_flow_error *error) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ int ret; ++ ++ pthread_mutex_lock(&hw->flows_lock); ++ ret = hns3_flow_query(dev, flow, actions, data, error); ++ pthread_mutex_unlock(&hw->flows_lock); ++ ++ return ret; ++} ++ + static const struct rte_flow_ops hns3_flow_ops = { +- .validate = hns3_flow_validate, +- .create = hns3_flow_create, +- .destroy = hns3_flow_destroy, +- .flush = hns3_flow_flush, +- .query = hns3_flow_query, ++ .validate = hns3_flow_validate_wrap, ++ .create = hns3_flow_create_wrap, ++ .destroy = hns3_flow_destroy_wrap, ++ .flush = hns3_flow_flush_wrap, ++ .query = hns3_flow_query_wrap, + .isolate = NULL, + }; + +@@ -2047,3 +2093,30 @@ hns3_dev_filter_ctrl(struct rte_eth_dev *dev, enum rte_filter_type filter_type, + + return ret; + } ++ ++void ++hns3_flow_init(struct rte_eth_dev *dev) ++{ ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ pthread_mutexattr_t attr; ++ ++ if (rte_eal_process_type() != RTE_PROC_PRIMARY) ++ return; ++ ++ pthread_mutexattr_init(&attr); ++ pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); ++ pthread_mutex_init(&hw->flows_lock, &attr); ++ dev->data->dev_flags |= RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE; ++ ++ TAILQ_INIT(&hw->flow_fdir_list); ++ TAILQ_INIT(&hw->flow_rss_list); ++ TAILQ_INIT(&hw->flow_list); ++} ++ ++void ++hns3_flow_uninit(struct rte_eth_dev *dev) ++{ ++ struct rte_flow_error error; ++ if (rte_eal_process_type() == RTE_PROC_PRIMARY) ++ hns3_flow_flush_wrap(dev, &error); ++} +diff --git a/dpdk/drivers/net/hns3/hns3_intr.c b/dpdk/drivers/net/hns3/hns3_intr.c +index 99c500dba3..0fa21e4824 100644 +--- a/dpdk/drivers/net/hns3/hns3_intr.c ++++ b/dpdk/drivers/net/hns3/hns3_intr.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #include +@@ -1345,16 +1345,12 @@ enable_tm_err_intr(struct hns3_adapter *hns, bool en) + } + + /* configure TM QCN hw errors */ +- hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_TM_QCN_MEM_INT_CFG, true); +- ret = hns3_cmd_send(hw, &desc, 1); +- if (ret) { +- hns3_err(hw, "fail to read TM QCN CFG status, ret = %d\n", ret); +- return ret; +- } +- +- hns3_cmd_reuse_desc(&desc, false); +- if (en) ++ hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_TM_QCN_MEM_INT_CFG, false); ++ desc.data[0] = rte_cpu_to_le_32(HNS3_TM_QCN_ERR_INT_TYPE); ++ if (en) { ++ desc.data[0] |= rte_cpu_to_le_32(HNS3_TM_QCN_FIFO_INT_EN); + desc.data[1] = rte_cpu_to_le_32(HNS3_TM_QCN_MEM_ERR_INT_EN); ++ } + + ret = hns3_cmd_send(hw, &desc, 1); + if (ret) +@@ -1791,7 +1787,8 @@ hns3_schedule_reset(struct hns3_adapter *hns) + return; + if (rte_atomic16_read(&hns->hw.reset.schedule) == SCHEDULE_DEFERRED) + rte_eal_alarm_cancel(hw->reset.ops->reset_service, hns); +- rte_atomic16_set(&hns->hw.reset.schedule, SCHEDULE_REQUESTED); ++ else ++ rte_atomic16_set(&hns->hw.reset.schedule, SCHEDULE_REQUESTED); + + rte_eal_alarm_set(SWITCH_CONTEXT_US, hw->reset.ops->reset_service, hns); + } +@@ -1829,7 +1826,7 @@ hns3_wait_callback(void *param) + * Check if the current time exceeds the deadline + * or a pending reset coming, or reset during close. + */ +- msec = get_timeofday_ms(); ++ msec = hns3_clock_gettime_ms(); + if (msec > data->end_ms || is_reset_pending(hns) || + hw->adapter_state == HNS3_NIC_CLOSING) { + done = false; +@@ -1941,7 +1938,7 @@ hns3_clear_reset_level(struct hns3_hw *hw, uint64_t *levels) + if (merge_cnt != hw->reset.stats.merge_cnt) + hns3_warn(hw, + "No need to do low-level reset after %s reset. " +- "merge cnt: %" PRIx64 " total merge cnt: %" PRIx64, ++ "merge cnt: %" PRIu64 " total merge cnt: %" PRIu64, + reset_string[hw->reset.level], + hw->reset.stats.merge_cnt - merge_cnt, + hw->reset.stats.merge_cnt); +@@ -1950,7 +1947,7 @@ hns3_clear_reset_level(struct hns3_hw *hw, uint64_t *levels) + static bool + hns3_reset_err_handle(struct hns3_adapter *hns) + { +-#define MAX_RESET_FAIL_CNT 5 ++#define MAX_RESET_FAIL_CNT 30 + + struct hns3_hw *hw = &hns->hw; + +@@ -1961,7 +1958,7 @@ hns3_reset_err_handle(struct hns3_adapter *hns) + hw->reset.attempts = 0; + hw->reset.stats.fail_cnt++; + hns3_warn(hw, "%s reset fail because new Reset is pending " +- "attempts:%" PRIx64, ++ "attempts:%" PRIu64, + reset_string[hw->reset.level], + hw->reset.stats.fail_cnt); + hw->reset.level = HNS3_NONE_RESET; +@@ -1988,10 +1985,10 @@ hns3_reset_err_handle(struct hns3_adapter *hns) + reset_fail: + hw->reset.attempts = 0; + hw->reset.stats.fail_cnt++; +- hns3_warn(hw, "%s reset fail fail_cnt:%" PRIx64 " success_cnt:%" PRIx64 +- " global_cnt:%" PRIx64 " imp_cnt:%" PRIx64 +- " request_cnt:%" PRIx64 " exec_cnt:%" PRIx64 +- " merge_cnt:%" PRIx64 "adapter_state:%d", ++ hns3_warn(hw, "%s reset fail fail_cnt:%" PRIu64 " success_cnt:%" PRIu64 ++ " global_cnt:%" PRIu64 " imp_cnt:%" PRIu64 ++ " request_cnt:%" PRIu64 " exec_cnt:%" PRIu64 ++ " merge_cnt:%" PRIu64 "adapter_state:%d", + reset_string[hw->reset.level], hw->reset.stats.fail_cnt, + hw->reset.stats.success_cnt, hw->reset.stats.global_cnt, + hw->reset.stats.imp_cnt, hw->reset.stats.request_cnt, +@@ -2014,7 +2011,7 @@ hns3_reset_pre(struct hns3_adapter *hns) + rte_atomic16_set(&hns->hw.reset.resetting, 1); + hw->reset.stage = RESET_STAGE_DOWN; + ret = hw->reset.ops->stop_service(hns); +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + if (ret) { + hns3_warn(hw, "Reset step1 down fail=%d time=%ld.%.6ld", + ret, tv.tv_sec, tv.tv_usec); +@@ -2026,7 +2023,7 @@ hns3_reset_pre(struct hns3_adapter *hns) + } + if (hw->reset.stage == RESET_STAGE_PREWAIT) { + ret = hw->reset.ops->prepare_reset(hns); +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + if (ret) { + hns3_warn(hw, + "Reset step2 prepare wait fail=%d time=%ld.%.6ld", +@@ -2044,7 +2041,7 @@ hns3_reset_pre(struct hns3_adapter *hns) + static int + hns3_reset_post(struct hns3_adapter *hns) + { +-#define TIMEOUT_RETRIES_CNT 5 ++#define TIMEOUT_RETRIES_CNT 30 + struct hns3_hw *hw = &hns->hw; + struct timeval tv_delta; + struct timeval tv; +@@ -2064,7 +2061,7 @@ hns3_reset_post(struct hns3_adapter *hns) + } + ret = hw->reset.ops->reinit_dev(hns); + rte_spinlock_unlock(&hw->lock); +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + if (ret) { + hns3_warn(hw, "Reset step5 devinit fail=%d retries=%d", + ret, hw->reset.retries); +@@ -2082,7 +2079,7 @@ hns3_reset_post(struct hns3_adapter *hns) + rte_spinlock_lock(&hw->lock); + ret = hw->reset.ops->restore_conf(hns); + rte_spinlock_unlock(&hw->lock); +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + if (ret) { + hns3_warn(hw, + "Reset step6 restore fail=%d retries=%d", +@@ -2105,22 +2102,21 @@ hns3_reset_post(struct hns3_adapter *hns) + rte_spinlock_lock(&hw->lock); + hw->reset.ops->start_service(hns); + rte_spinlock_unlock(&hw->lock); +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + timersub(&tv, &hw->reset.start_time, &tv_delta); +- hns3_warn(hw, "%s reset done fail_cnt:%" PRIx64 +- " success_cnt:%" PRIx64 " global_cnt:%" PRIx64 +- " imp_cnt:%" PRIx64 " request_cnt:%" PRIx64 +- " exec_cnt:%" PRIx64 " merge_cnt:%" PRIx64, ++ hns3_warn(hw, "%s reset done fail_cnt:%" PRIu64 ++ " success_cnt:%" PRIu64 " global_cnt:%" PRIu64 ++ " imp_cnt:%" PRIu64 " request_cnt:%" PRIu64 ++ " exec_cnt:%" PRIu64 " merge_cnt:%" PRIu64, + reset_string[hw->reset.level], + hw->reset.stats.fail_cnt, hw->reset.stats.success_cnt, + hw->reset.stats.global_cnt, hw->reset.stats.imp_cnt, + hw->reset.stats.request_cnt, hw->reset.stats.exec_cnt, + hw->reset.stats.merge_cnt); + hns3_warn(hw, +- "%s reset done delta %ld ms time=%ld.%.6ld", ++ "%s reset done delta %" PRIu64 " ms time=%ld.%.6ld", + reset_string[hw->reset.level], +- tv_delta.tv_sec * MSEC_PER_SEC + +- tv_delta.tv_usec / USEC_PER_MSEC, ++ hns3_clock_calctime_ms(&tv_delta), + tv.tv_sec, tv.tv_usec); + hw->reset.level = HNS3_NONE_RESET; + } +@@ -2160,7 +2156,7 @@ hns3_reset_process(struct hns3_adapter *hns, enum hns3_reset_level new_level) + if (hw->reset.level == HNS3_NONE_RESET) { + hw->reset.level = new_level; + hw->reset.stats.exec_cnt++; +- gettimeofday(&hw->reset.start_time, NULL); ++ hns3_clock_gettime(&hw->reset.start_time); + hns3_warn(hw, "Start %s reset time=%ld.%.6ld", + reset_string[hw->reset.level], + hw->reset.start_time.tv_sec, +@@ -2168,7 +2164,7 @@ hns3_reset_process(struct hns3_adapter *hns, enum hns3_reset_level new_level) + } + + if (is_reset_pending(hns)) { +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + hns3_warn(hw, + "%s reset is aborted by high level time=%ld.%.6ld", + reset_string[hw->reset.level], tv.tv_sec, tv.tv_usec); +@@ -2186,7 +2182,7 @@ hns3_reset_process(struct hns3_adapter *hns, enum hns3_reset_level new_level) + ret = hns3_reset_req_hw_reset(hns); + if (ret == -EAGAIN) + return ret; +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + hns3_warn(hw, + "Reset step3 request IMP reset success time=%ld.%.6ld", + tv.tv_sec, tv.tv_usec); +@@ -2197,7 +2193,7 @@ hns3_reset_process(struct hns3_adapter *hns, enum hns3_reset_level new_level) + ret = hw->reset.ops->wait_hardware_ready(hns); + if (ret) + goto retry; +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + hns3_warn(hw, "Reset step4 reset wait success time=%ld.%.6ld", + tv.tv_sec, tv.tv_usec); + hw->reset.stage = RESET_STAGE_DEV_INIT; +@@ -2225,12 +2221,11 @@ hns3_reset_process(struct hns3_adapter *hns, enum hns3_reset_level new_level) + rte_spinlock_unlock(&hw->lock); + rte_atomic16_clear(&hns->hw.reset.resetting); + hw->reset.stage = RESET_STAGE_NONE; +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + timersub(&tv, &hw->reset.start_time, &tv_delta); +- hns3_warn(hw, "%s reset fail delta %ld ms time=%ld.%.6ld", ++ hns3_warn(hw, "%s reset fail delta %" PRIu64 " ms time=%ld.%.6ld", + reset_string[hw->reset.level], +- tv_delta.tv_sec * MSEC_PER_SEC + +- tv_delta.tv_usec / USEC_PER_MSEC, ++ hns3_clock_calctime_ms(&tv_delta), + tv.tv_sec, tv.tv_usec); + hw->reset.level = HNS3_NONE_RESET; + } +@@ -2262,7 +2257,7 @@ hns3_reset_abort(struct hns3_adapter *hns) + rte_eal_alarm_cancel(hns3_wait_callback, hw->reset.wait_data); + + if (hw->reset.level != HNS3_NONE_RESET) { +- gettimeofday(&tv, NULL); ++ hns3_clock_gettime(&tv); + hns3_err(hw, "Failed to terminate reset: %s time=%ld.%.6ld", + reset_string[hw->reset.level], tv.tv_sec, tv.tv_usec); + } +diff --git a/dpdk/drivers/net/hns3/hns3_intr.h b/dpdk/drivers/net/hns3/hns3_intr.h +index 19de1aa2d8..17bd0519c4 100644 +--- a/dpdk/drivers/net/hns3/hns3_intr.h ++++ b/dpdk/drivers/net/hns3/hns3_intr.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_INTR_H_ +@@ -74,6 +74,8 @@ + #define HNS3_NCSI_ERR_INT_EN 0x3 + + #define HNS3_TM_SCH_ECC_ERR_INT_EN 0x3 ++#define HNS3_TM_QCN_ERR_INT_TYPE 0x29 ++#define HNS3_TM_QCN_FIFO_INT_EN 0xFFFF00 + #define HNS3_TM_QCN_MEM_ERR_INT_EN 0xFFFFFF + + #define HNS3_RESET_PROCESS_MS 200 +diff --git a/dpdk/drivers/net/hns3/hns3_logs.h b/dpdk/drivers/net/hns3/hns3_logs.h +index f3fc7b51d6..072a53bd69 100644 +--- a/dpdk/drivers/net/hns3/hns3_logs.h ++++ b/dpdk/drivers/net/hns3/hns3_logs.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_LOGS_H_ +diff --git a/dpdk/drivers/net/hns3/hns3_mbx.c b/dpdk/drivers/net/hns3/hns3_mbx.c +index d2a5db8aab..1c95eb219d 100644 +--- a/dpdk/drivers/net/hns3/hns3_mbx.c ++++ b/dpdk/drivers/net/hns3/hns3_mbx.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #include +@@ -41,37 +41,32 @@ hns3_resp_to_errno(uint16_t resp_code) + } + + static void +-hns3_poll_all_sync_msg(void) ++hns3_mbx_proc_timeout(struct hns3_hw *hw, uint16_t code, uint16_t subcode) + { +- struct rte_eth_dev *eth_dev; +- struct hns3_adapter *adapter; +- const char *name; +- uint16_t port_id; +- +- RTE_ETH_FOREACH_DEV(port_id) { +- eth_dev = &rte_eth_devices[port_id]; +- name = eth_dev->device->driver->name; +- if (strcmp(name, "net_hns3") && strcmp(name, "net_hns3_vf")) +- continue; +- adapter = eth_dev->data->dev_private; +- if (!adapter || adapter->hw.adapter_state == HNS3_NIC_CLOSED) +- continue; +- /* Synchronous msg, the mbx_resp.req_msg_data is non-zero */ +- if (adapter->hw.mbx_resp.req_msg_data) +- hns3_dev_handle_mbx_msg(&adapter->hw); ++ if (hw->mbx_resp.matching_scheme == ++ HNS3_MBX_RESP_MATCHING_SCHEME_OF_ORIGINAL) { ++ hw->mbx_resp.lost++; ++ hns3_err(hw, ++ "VF could not get mbx(%u,%u) head(%u) tail(%u) " ++ "lost(%u) from PF", ++ code, subcode, hw->mbx_resp.head, hw->mbx_resp.tail, ++ hw->mbx_resp.lost); ++ return; + } ++ ++ hns3_err(hw, "VF could not get mbx(%u,%u) from PF", code, subcode); + } + + static int +-hns3_get_mbx_resp(struct hns3_hw *hw, uint16_t code0, uint16_t code1, ++hns3_get_mbx_resp(struct hns3_hw *hw, uint16_t code, uint16_t subcode, + uint8_t *resp_data, uint16_t resp_len) + { +-#define HNS3_MAX_RETRY_MS 500 ++#define HNS3_MAX_RETRY_US 500000 ++#define HNS3_WAIT_RESP_US 100 + struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); + struct hns3_mbx_resp_status *mbx_resp; +- bool in_irq = false; +- uint64_t now; +- uint64_t end; ++ uint32_t wait_time = 0; ++ bool received; + + if (resp_len > HNS3_MBX_MAX_RESP_DATA_SIZE) { + hns3_err(hw, "VF mbx response len(=%u) exceeds maximum(=%d)", +@@ -79,43 +74,37 @@ hns3_get_mbx_resp(struct hns3_hw *hw, uint16_t code0, uint16_t code1, + return -EINVAL; + } + +- now = get_timeofday_ms(); +- end = now + HNS3_MAX_RETRY_MS; +- while ((hw->mbx_resp.head != hw->mbx_resp.tail + hw->mbx_resp.lost) && +- (now < end)) { ++ while (wait_time < HNS3_MAX_RETRY_US) { + if (rte_atomic16_read(&hw->reset.disable_cmd)) { +- hns3_err(hw, "Don't wait for mbx respone because of " ++ hns3_err(hw, "Don't wait for mbx response because of " + "disable_cmd"); + return -EBUSY; + } + + if (is_reset_pending(hns)) { + hw->mbx_resp.req_msg_data = 0; +- hns3_err(hw, "Don't wait for mbx respone because of " ++ hns3_err(hw, "Don't wait for mbx response because of " + "reset pending"); + return -EIO; + } + +- /* +- * The mbox response is running on the interrupt thread. +- * Sending mbox in the interrupt thread cannot wait for the +- * response, so polling the mbox response on the irq thread. +- */ +- if (pthread_equal(hw->irq_thread_id, pthread_self())) { +- in_irq = true; +- hns3_poll_all_sync_msg(); +- } else { +- rte_delay_ms(HNS3_POLL_RESPONE_MS); +- } +- now = get_timeofday_ms(); ++ hns3_dev_handle_mbx_msg(hw); ++ rte_delay_us(HNS3_WAIT_RESP_US); ++ ++ if (hw->mbx_resp.matching_scheme == ++ HNS3_MBX_RESP_MATCHING_SCHEME_OF_ORIGINAL) ++ received = (hw->mbx_resp.head == ++ hw->mbx_resp.tail + hw->mbx_resp.lost); ++ else ++ received = hw->mbx_resp.received_match_resp; ++ if (received) ++ break; ++ ++ wait_time += HNS3_WAIT_RESP_US; + } + hw->mbx_resp.req_msg_data = 0; +- if (now >= end) { +- hw->mbx_resp.lost++; +- hns3_err(hw, +- "VF could not get mbx(%u,%u) head(%u) tail(%u) lost(%u) from PF in_irq:%d", +- code0, code1, hw->mbx_resp.head, hw->mbx_resp.tail, +- hw->mbx_resp.lost, in_irq); ++ if (wait_time >= HNS3_MAX_RETRY_US) { ++ hns3_mbx_proc_timeout(hw, code, subcode); + return -ETIME; + } + rte_io_rmb(); +@@ -130,6 +119,29 @@ hns3_get_mbx_resp(struct hns3_hw *hw, uint16_t code0, uint16_t code1, + return 0; + } + ++static void ++hns3_mbx_prepare_resp(struct hns3_hw *hw, uint16_t code, uint16_t subcode) ++{ ++ /* ++ * Init both matching scheme fields because we may not know the exact ++ * scheme will be used when in the initial phase. ++ * ++ * Also, there are OK to init both matching scheme fields even though ++ * we get the exact scheme which is used. ++ */ ++ hw->mbx_resp.req_msg_data = (uint32_t)code << 16 | subcode; ++ hw->mbx_resp.head++; ++ ++ /* Update match_id and ensure the value of match_id is not zero */ ++ hw->mbx_resp.match_id++; ++ if (hw->mbx_resp.match_id == 0) ++ hw->mbx_resp.match_id = 1; ++ hw->mbx_resp.received_match_resp = false; ++ ++ hw->mbx_resp.resp_status = 0; ++ memset(hw->mbx_resp.additional_info, 0, HNS3_MBX_MAX_RESP_DATA_SIZE); ++} ++ + int + hns3_send_mbx_msg(struct hns3_hw *hw, uint16_t code, uint16_t subcode, + const uint8_t *msg_data, uint8_t msg_len, bool need_resp, +@@ -167,10 +179,11 @@ hns3_send_mbx_msg(struct hns3_hw *hw, uint16_t code, uint16_t subcode, + if (need_resp) { + req->mbx_need_resp |= HNS3_MBX_NEED_RESP_BIT; + rte_spinlock_lock(&hw->mbx_resp.lock); +- hw->mbx_resp.req_msg_data = (uint32_t)code << 16 | subcode; +- hw->mbx_resp.head++; ++ hns3_mbx_prepare_resp(hw, code, subcode); ++ req->match_id = hw->mbx_resp.match_id; + ret = hns3_cmd_send(hw, &desc, 1); + if (ret) { ++ hw->mbx_resp.head--; + rte_spinlock_unlock(&hw->mbx_resp.lock); + hns3_err(hw, "VF failed(=%d) to send mbx message to PF", + ret); +@@ -279,6 +292,46 @@ hns3_update_resp_position(struct hns3_hw *hw, uint32_t resp_msg) + resp->tail = tail; + } + ++static void ++hns3_handle_mbx_response(struct hns3_hw *hw, struct hns3_mbx_pf_to_vf_cmd *req) ++{ ++ struct hns3_mbx_resp_status *resp = &hw->mbx_resp; ++ uint32_t msg_data; ++ ++ if (req->match_id != 0) { ++ /* ++ * If match_id is not zero, it means PF support copy request's ++ * match_id to its response. So VF could use the match_id ++ * to match the request. ++ */ ++ if (resp->matching_scheme != ++ HNS3_MBX_RESP_MATCHING_SCHEME_OF_MATCH_ID) { ++ resp->matching_scheme = ++ HNS3_MBX_RESP_MATCHING_SCHEME_OF_MATCH_ID; ++ hns3_info(hw, "detect mailbox support match id!"); ++ } ++ if (req->match_id == resp->match_id) { ++ resp->resp_status = hns3_resp_to_errno(req->msg[3]); ++ memcpy(resp->additional_info, &req->msg[4], ++ HNS3_MBX_MAX_RESP_DATA_SIZE); ++ rte_io_wmb(); ++ resp->received_match_resp = true; ++ } ++ return; ++ } ++ ++ /* ++ * If the below instructions can be executed, it means PF does not ++ * support copy request's match_id to its response. So VF follows the ++ * original scheme to process. ++ */ ++ resp->resp_status = hns3_resp_to_errno(req->msg[3]); ++ memcpy(resp->additional_info, &req->msg[4], ++ HNS3_MBX_MAX_RESP_DATA_SIZE); ++ msg_data = (uint32_t)req->msg[1] << 16 | req->msg[2]; ++ hns3_update_resp_position(hw, msg_data); ++} ++ + static void + hns3_link_fail_parse(struct hns3_hw *hw, uint8_t link_fail_code) + { +@@ -302,7 +355,7 @@ hns3_link_fail_parse(struct hns3_hw *hw, uint8_t link_fail_code) + + static void + hns3_handle_link_change_event(struct hns3_hw *hw, +- struct hns3_mbx_pf_to_vf_cmd *req) ++ struct hns3_mbx_vf_to_pf_cmd *req) + { + #define LINK_STATUS_OFFSET 1 + #define LINK_FAIL_CODE_OFFSET 2 +@@ -352,23 +405,89 @@ hns3_handle_promisc_info(struct hns3_hw *hw, uint16_t promisc_en) + } + } + ++static void ++hns3_handle_mbx_msg_out_intr(struct hns3_hw *hw) ++{ ++ struct hns3_cmq_ring *crq = &hw->cmq.crq; ++ struct hns3_mbx_pf_to_vf_cmd *req; ++ struct hns3_cmd_desc *desc; ++ uint32_t tail, next_to_use; ++ uint8_t opcode; ++ uint16_t flag; ++ ++ tail = hns3_read_dev(hw, HNS3_CMDQ_RX_TAIL_REG); ++ next_to_use = crq->next_to_use; ++ while (next_to_use != tail) { ++ desc = &crq->desc[next_to_use]; ++ req = (struct hns3_mbx_pf_to_vf_cmd *)desc->data; ++ opcode = req->msg[0] & 0xff; ++ ++ flag = rte_le_to_cpu_16(crq->desc[next_to_use].flag); ++ if (!hns3_get_bit(flag, HNS3_CMDQ_RX_OUTVLD_B)) ++ goto scan_next; ++ ++ if (crq->desc[next_to_use].opcode == 0) ++ goto scan_next; ++ ++ if (opcode == HNS3_MBX_PF_VF_RESP) { ++ hns3_handle_mbx_response(hw, req); ++ /* ++ * Clear opcode to inform intr thread don't process ++ * again. ++ */ ++ crq->desc[crq->next_to_use].opcode = 0; ++ } ++ ++scan_next: ++ next_to_use = (next_to_use + 1) % hw->cmq.crq.desc_num; ++ } ++ ++ crq->next_to_use = next_to_use; ++ hns3_write_dev(hw, HNS3_CMDQ_RX_HEAD_REG, crq->next_to_use); ++} ++ + void + hns3_dev_handle_mbx_msg(struct hns3_hw *hw) + { +- struct hns3_mbx_resp_status *resp = &hw->mbx_resp; ++ struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); + struct hns3_cmq_ring *crq = &hw->cmq.crq; + struct hns3_mbx_pf_to_vf_cmd *req; + struct hns3_cmd_desc *desc; +- uint32_t msg_data; + uint16_t *msg_q; ++ bool handle_out; + uint8_t opcode; + uint16_t flag; +- uint8_t *temp; +- int i; ++ rte_spinlock_lock(&hw->cmq.crq.lock); ++ ++ handle_out = (rte_eal_process_type() != RTE_PROC_PRIMARY || ++ !rte_thread_is_intr()) && hns->is_vf; ++ if (handle_out) { ++ /* ++ * Currently, any threads in the primary and secondary processes ++ * could send mailbox sync request, so it will need to process ++ * the crq message (which is the HNS3_MBX_PF_VF_RESP) in there ++ * own thread context. It may also process other messages ++ * because it uses the policy of processing all pending messages ++ * at once. ++ * But some messages such as HNS3_MBX_PUSH_LINK_STATUS could ++ * only process within the intr thread in primary process, ++ * otherwise it may lead to report lsc event in secondary ++ * process. ++ * So the threads other than intr thread in primary process ++ * could only process HNS3_MBX_PF_VF_RESP message, if the ++ * message processed, its opcode will rewrite with zero, then ++ * the intr thread in primary process will not process again. ++ */ ++ hns3_handle_mbx_msg_out_intr(hw); ++ rte_spinlock_unlock(&hw->cmq.crq.lock); ++ return; ++ } + + while (!hns3_cmd_crq_empty(hw)) { +- if (rte_atomic16_read(&hw->reset.disable_cmd)) ++ if (rte_atomic16_read(&hw->reset.disable_cmd)) { ++ rte_spinlock_unlock(&hw->cmq.crq.lock); + return; ++ } + + desc = &crq->desc[crq->next_to_use]; + req = (struct hns3_mbx_pf_to_vf_cmd *)desc->data; +@@ -386,17 +505,17 @@ hns3_dev_handle_mbx_msg(struct hns3_hw *hw) + continue; + } + ++ handle_out = hns->is_vf && desc->opcode == 0; ++ if (handle_out) { ++ /* Message already processed by other thread */ ++ crq->desc[crq->next_to_use].flag = 0; ++ hns3_mbx_ring_ptr_move_crq(crq); ++ continue; ++ } ++ + switch (opcode) { + case HNS3_MBX_PF_VF_RESP: +- resp->resp_status = hns3_resp_to_errno(req->msg[3]); +- +- temp = (uint8_t *)&req->msg[4]; +- for (i = 0; i < HNS3_MBX_MAX_RESP_DATA_SIZE; i++) { +- resp->additional_info[i] = *temp; +- temp++; +- } +- msg_data = (uint32_t)req->msg[1] << 16 | req->msg[2]; +- hns3_update_resp_position(hw, msg_data); ++ hns3_handle_mbx_response(hw, req); + break; + case HNS3_MBX_LINK_STAT_CHANGE: + case HNS3_MBX_ASSERTING_RESET: +@@ -408,7 +527,14 @@ hns3_dev_handle_mbx_msg(struct hns3_hw *hw) + hns3_mbx_handler(hw); + break; + case HNS3_MBX_PUSH_LINK_STATUS: +- hns3_handle_link_change_event(hw, req); ++ /* ++ * This message is reported by the firmware and is ++ * reported in 'struct hns3_mbx_vf_to_pf_cmd' format. ++ * Therefore, we should cast the req variable to ++ * 'struct hns3_mbx_vf_to_pf_cmd' and then process it. ++ */ ++ hns3_handle_link_change_event(hw, ++ (struct hns3_mbx_vf_to_pf_cmd *)req); + break; + case HNS3_MBX_PUSH_VLAN_INFO: + /* +@@ -427,9 +553,8 @@ hns3_dev_handle_mbx_msg(struct hns3_hw *hw) + hns3_handle_promisc_info(hw, req->msg[1]); + break; + default: +- hns3_err(hw, +- "VF received unsupported(%u) mbx msg from PF", +- req->msg[0]); ++ hns3_err(hw, "received unsupported(%u) mbx msg", ++ opcode); + break; + } + +@@ -439,4 +564,6 @@ hns3_dev_handle_mbx_msg(struct hns3_hw *hw) + + /* Write back CMDQ_RQ header pointer, IMP need this pointer */ + hns3_write_dev(hw, HNS3_CMDQ_RX_HEAD_REG, crq->next_to_use); ++ ++ rte_spinlock_unlock(&hw->cmq.crq.lock); + } +diff --git a/dpdk/drivers/net/hns3/hns3_mbx.h b/dpdk/drivers/net/hns3/hns3_mbx.h +index 7f7ade13fa..3f2e10c513 100644 +--- a/dpdk/drivers/net/hns3/hns3_mbx.h ++++ b/dpdk/drivers/net/hns3/hns3_mbx.h +@@ -1,12 +1,10 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_MBX_H_ + #define _HNS3_MBX_H_ + +-#define HNS3_MBX_VF_MSG_DATA_NUM 16 +- + enum HNS3_MBX_OPCODE { + HNS3_MBX_RESET = 0x01, /* (VF -> PF) assert reset */ + HNS3_MBX_ASSERTING_RESET, /* (PF -> VF) PF is asserting reset */ +@@ -24,7 +22,7 @@ enum HNS3_MBX_OPCODE { + HNS3_MBX_GET_RETA, /* (VF -> PF) get RETA */ + HNS3_MBX_GET_RSS_KEY, /* (VF -> PF) get RSS key */ + HNS3_MBX_GET_MAC_ADDR, /* (VF -> PF) get MAC addr */ +- HNS3_MBX_PF_VF_RESP, /* (PF -> VF) generate respone to VF */ ++ HNS3_MBX_PF_VF_RESP, /* (PF -> VF) generate response to VF */ + HNS3_MBX_GET_BDNUM, /* (VF -> PF) get BD num */ + HNS3_MBX_GET_BUFSIZE, /* (VF -> PF) get buffer size */ + HNS3_MBX_GET_STREAMID, /* (VF -> PF) get stream id */ +@@ -80,15 +78,27 @@ enum hns3_mbx_link_fail_subcode { + + #define HNS3_MBX_MAX_MSG_SIZE 16 + #define HNS3_MBX_MAX_RESP_DATA_SIZE 8 +-#define HNS3_MBX_RING_MAP_BASIC_MSG_NUM 3 +-#define HNS3_MBX_RING_NODE_VARIABLE_NUM 3 ++ ++enum { ++ HNS3_MBX_RESP_MATCHING_SCHEME_OF_ORIGINAL = 0, ++ HNS3_MBX_RESP_MATCHING_SCHEME_OF_MATCH_ID ++}; + + struct hns3_mbx_resp_status { + rte_spinlock_t lock; /* protects against contending sync cmd resp */ ++ ++ uint8_t matching_scheme; ++ ++ /* The following fields used in the matching scheme for original */ + uint32_t req_msg_data; + uint32_t head; + uint32_t tail; + uint32_t lost; ++ ++ /* The following fields used in the matching scheme for match_id */ ++ uint16_t match_id; ++ bool received_match_resp; ++ + int resp_status; + uint8_t additional_info[HNS3_MBX_MAX_RESP_DATA_SIZE]; + }; +@@ -106,7 +116,8 @@ struct hns3_mbx_vf_to_pf_cmd { + uint8_t mbx_need_resp; + uint8_t rsv1; + uint8_t msg_len; +- uint8_t rsv2[3]; ++ uint8_t rsv2; ++ uint16_t match_id; + uint8_t msg[HNS3_MBX_MAX_MSG_SIZE]; + }; + +@@ -114,7 +125,8 @@ struct hns3_mbx_pf_to_vf_cmd { + uint8_t dest_vfid; + uint8_t rsv[3]; + uint8_t msg_len; +- uint8_t rsv1[3]; ++ uint8_t rsv1; ++ uint16_t match_id; + uint16_t msg[8]; + }; + +@@ -131,12 +143,6 @@ struct hns3_vf_bind_vector_msg { + struct hns3_ring_chain_param param[HNS3_MBX_MAX_RING_CHAIN_PARAM_NUM]; + }; + +-struct hns3_vf_rst_cmd { +- uint8_t dest_vfid; +- uint8_t vf_rst; +- uint8_t rsv[22]; +-}; +- + struct hns3_pf_rst_done_cmd { + uint8_t pf_rst_done; + uint8_t rsv[23]; +diff --git a/dpdk/drivers/net/hns3/hns3_mp.c b/dpdk/drivers/net/hns3/hns3_mp.c +index ed2567a8ff..8d7fb93065 100644 +--- a/dpdk/drivers/net/hns3/hns3_mp.c ++++ b/dpdk/drivers/net/hns3/hns3_mp.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #include +@@ -12,7 +12,8 @@ + #include "hns3_rxtx.h" + #include "hns3_mp.h" + +-static bool hns3_inited; ++/* local data for primary or secondary process. */ ++struct hns3_process_local_data process_data; + + /* + * Initialize IPC message. +@@ -86,8 +87,8 @@ mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer) + case HNS3_MP_REQ_START_RXTX: + PMD_INIT_LOG(INFO, "port %u starting datapath", + dev->data->port_id); +- rte_mb(); + hns3_set_rxtx_function(dev); ++ rte_mb(); + mp_init_msg(dev, &mp_res, param->type); + res->result = 0; + ret = rte_mp_reply(&mp_res, peer); +@@ -130,9 +131,11 @@ mp_req_on_rxtx(struct rte_eth_dev *dev, enum hns3_mp_req_type type) + int ret; + int i; + +- if (!hw->secondary_cnt) ++ if (rte_eal_process_type() == RTE_PROC_SECONDARY || ++ __atomic_load_n(&hw->secondary_cnt, __ATOMIC_RELAXED) == 0) + return; + if (type != HNS3_MP_REQ_START_RXTX && type != HNS3_MP_REQ_STOP_RXTX) { ++ + hns3_err(hw, "port %u unknown request (req_type %d)", + dev->data->port_id, type); + return; +@@ -196,25 +199,27 @@ int hns3_mp_init_primary(void) + { + int ret; + +- if (!hns3_inited) { +- /* primary is allowed to not support IPC */ +- ret = rte_mp_action_register(HNS3_MP_NAME, mp_primary_handle); +- if (ret && rte_errno != ENOTSUP) +- return ret; ++ if (process_data.init_done) ++ return 0; + +- hns3_inited = true; +- } ++ /* primary is allowed to not support IPC */ ++ ret = rte_mp_action_register(HNS3_MP_NAME, mp_primary_handle); ++ if (ret && rte_errno != ENOTSUP) ++ return ret; ++ ++ process_data.init_done = true; + + return 0; + } + +-/* +- * Un-initialize by primary process. +- */ +-void hns3_mp_uninit_primary(void) ++void hns3_mp_uninit(void) + { +- if (hns3_inited) ++ process_data.eth_dev_cnt--; ++ ++ if (process_data.eth_dev_cnt == 0) { + rte_mp_action_unregister(HNS3_MP_NAME); ++ process_data.init_done = false; ++ } + } + + /* +@@ -224,13 +229,14 @@ int hns3_mp_init_secondary(void) + { + int ret; + +- if (!hns3_inited) { +- ret = rte_mp_action_register(HNS3_MP_NAME, mp_secondary_handle); +- if (ret) +- return ret; ++ if (process_data.init_done) ++ return 0; + +- hns3_inited = true; +- } ++ ret = rte_mp_action_register(HNS3_MP_NAME, mp_secondary_handle); ++ if (ret) ++ return ret; ++ ++ process_data.init_done = true; + + return 0; + } +diff --git a/dpdk/drivers/net/hns3/hns3_mp.h b/dpdk/drivers/net/hns3/hns3_mp.h +index 036546ae11..94c0c957e0 100644 +--- a/dpdk/drivers/net/hns3/hns3_mp.h ++++ b/dpdk/drivers/net/hns3/hns3_mp.h +@@ -1,14 +1,21 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_MP_H_ + #define _HNS3_MP_H_ + ++/* Local data for primary or secondary process. */ ++struct hns3_process_local_data { ++ bool init_done; /* Process action register completed flag. */ ++ int eth_dev_cnt; /* Ethdev count under the current process. */ ++}; ++extern struct hns3_process_local_data process_data; ++ + void hns3_mp_req_start_rxtx(struct rte_eth_dev *dev); + void hns3_mp_req_stop_rxtx(struct rte_eth_dev *dev); + int hns3_mp_init_primary(void); +-void hns3_mp_uninit_primary(void); ++void hns3_mp_uninit(void); + int hns3_mp_init_secondary(void); + + #endif /* _HNS3_MP_H_ */ +diff --git a/dpdk/drivers/net/hns3/hns3_regs.c b/dpdk/drivers/net/hns3/hns3_regs.c +index 8afe132585..cbc377adee 100644 +--- a/dpdk/drivers/net/hns3/hns3_regs.c ++++ b/dpdk/drivers/net/hns3/hns3_regs.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #include +@@ -322,11 +322,6 @@ hns3_get_regs(struct rte_eth_dev *eth_dev, struct rte_dev_reg_info *regs) + uint32_t *data; + int ret; + +- if (regs == NULL) { +- hns3_err(hw, "the input parameter regs is NULL!"); +- return -EINVAL; +- } +- + ret = hns3_get_regs_length(hw, &length); + if (ret) + return ret; +@@ -342,6 +337,8 @@ hns3_get_regs(struct rte_eth_dev *eth_dev, struct rte_dev_reg_info *regs) + if (regs->length && regs->length != length) + return -ENOTSUP; + ++ regs->version = hw->fw_version; ++ + /* fetching per-PF registers values from PF PCIe register space */ + data += hns3_direct_access_regs(hw, data); + +diff --git a/dpdk/drivers/net/hns3/hns3_regs.h b/dpdk/drivers/net/hns3/hns3_regs.h +index 39fc5d1b18..cfdd208c7b 100644 +--- a/dpdk/drivers/net/hns3/hns3_regs.h ++++ b/dpdk/drivers/net/hns3/hns3_regs.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_REGS_H_ +diff --git a/dpdk/drivers/net/hns3/hns3_rss.c b/dpdk/drivers/net/hns3/hns3_rss.c +index 7bd7745859..6a4ba26b7e 100644 +--- a/dpdk/drivers/net/hns3/hns3_rss.c ++++ b/dpdk/drivers/net/hns3/hns3_rss.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #include +@@ -9,10 +9,8 @@ + #include "hns3_ethdev.h" + #include "hns3_logs.h" + +-/* +- * The hash key used for rss initialization. +- */ +-static const uint8_t hns3_hash_key[] = { ++/* Default hash keys */ ++const uint8_t hns3_hash_key[] = { + 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, + 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0, + 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, +@@ -152,10 +150,6 @@ static const struct { + BIT_ULL(HNS3_RSS_FIELD_IPV4_TCP_EN_IP_D) | + BIT_ULL(HNS3_RSS_FIELD_IPV4_TCP_EN_TCP_S) | + BIT_ULL(HNS3_RSS_FIELD_IPV4_TCP_EN_TCP_D) }, +- { ETH_RSS_NONFRAG_IPV4_TCP, BIT_ULL(HNS3_RSS_FIELD_IPV4_TCP_EN_IP_S) | +- BIT_ULL(HNS3_RSS_FIELD_IPV4_TCP_EN_IP_D) | +- BIT_ULL(HNS3_RSS_FIELD_IPV4_TCP_EN_TCP_S) | +- BIT_ULL(HNS3_RSS_FIELD_IPV4_TCP_EN_TCP_D) }, + { ETH_RSS_NONFRAG_IPV4_UDP, BIT_ULL(HNS3_RSS_FIELD_IPV4_UDP_EN_IP_S) | + BIT_ULL(HNS3_RSS_FIELD_IPV4_UDP_EN_IP_D) | + BIT_ULL(HNS3_RSS_FIELD_IPV4_UDP_EN_UDP_S) | +@@ -241,31 +235,6 @@ hns3_set_rss_algo_key(struct hns3_hw *hw, const uint8_t *key) + return 0; + } + +-/* +- * Used to configure the tuple selection for RSS hash input. +- */ +-static int +-hns3_set_rss_input_tuple(struct hns3_hw *hw) +-{ +- struct hns3_rss_conf *rss_config = &hw->rss_info; +- struct hns3_rss_input_tuple_cmd *req; +- struct hns3_cmd_desc desc_tuple; +- int ret; +- +- hns3_cmd_setup_basic_desc(&desc_tuple, HNS3_OPC_RSS_INPUT_TUPLE, false); +- +- req = (struct hns3_rss_input_tuple_cmd *)desc_tuple.data; +- +- req->tuple_field = +- rte_cpu_to_le_64(rss_config->rss_tuple_sets.rss_tuple_fields); +- +- ret = hns3_cmd_send(hw, &desc_tuple, 1); +- if (ret) +- hns3_err(hw, "Configure RSS input tuple mode failed %d", ret); +- +- return ret; +-} +- + /* + * rss_indirection_table command function, opcode:0x0D07. + * Used to configure the indirection table of rss. +@@ -339,8 +308,7 @@ hns3_rss_reset_indir_table(struct hns3_hw *hw) + } + + int +-hns3_set_rss_tuple_by_rss_hf(struct hns3_hw *hw, +- struct hns3_rss_tuple_cfg *tuple, uint64_t rss_hf) ++hns3_set_rss_tuple_by_rss_hf(struct hns3_hw *hw, uint64_t rss_hf) + { + struct hns3_rss_input_tuple_cmd *req; + struct hns3_cmd_desc desc; +@@ -385,7 +353,8 @@ hns3_set_rss_tuple_by_rss_hf(struct hns3_hw *hw, + return ret; + } + +- tuple->rss_tuple_fields = rte_le_to_cpu_64(req->tuple_field); ++ /* Update supported flow types when set tuple success */ ++ hw->rss_info.conf.types = rss_hf; + + return 0; + } +@@ -403,55 +372,36 @@ int + hns3_dev_rss_hash_update(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) + { +- struct hns3_adapter *hns = dev->data->dev_private; +- struct hns3_hw *hw = &hns->hw; +- struct hns3_rss_tuple_cfg *tuple = &hw->rss_info.rss_tuple_sets; +- struct hns3_rss_conf *rss_cfg = &hw->rss_info; ++ struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ uint64_t rss_hf_bk = hw->rss_info.conf.types; + uint8_t key_len = rss_conf->rss_key_len; + uint64_t rss_hf = rss_conf->rss_hf; + uint8_t *key = rss_conf->rss_key; + int ret; + +- if (hw->rss_dis_flag) ++ if (key && key_len != HNS3_RSS_KEY_SIZE) { ++ hns3_err(hw, "the hash key len(%u) is invalid, must be %u", ++ key_len, HNS3_RSS_KEY_SIZE); + return -EINVAL; ++ } + + rte_spinlock_lock(&hw->lock); +- ret = hns3_set_rss_tuple_by_rss_hf(hw, tuple, rss_hf); ++ ret = hns3_set_rss_tuple_by_rss_hf(hw, rss_hf); + if (ret) +- goto conf_err; +- +- if (rss_cfg->conf.types && rss_hf == 0) { +- /* Disable RSS, reset indirection table by local variable */ +- ret = hns3_rss_reset_indir_table(hw); +- if (ret) +- goto conf_err; +- } else if (rss_hf && rss_cfg->conf.types == 0) { +- /* Enable RSS, restore indirection table by hw's config */ +- ret = hns3_set_rss_indir_table(hw, rss_cfg->rss_indirection_tbl, +- hw->rss_ind_tbl_size); +- if (ret) +- goto conf_err; +- } +- +- /* Update supported flow types when set tuple success */ +- rss_cfg->conf.types = rss_hf; ++ goto set_tuple_fail; + + if (key) { +- if (key_len != HNS3_RSS_KEY_SIZE) { +- hns3_err(hw, "The hash key len(%u) is invalid", +- key_len); +- ret = -EINVAL; +- goto conf_err; +- } + ret = hns3_set_rss_algo_key(hw, key); + if (ret) +- goto conf_err; ++ goto set_algo_key_fail; + } + rte_spinlock_unlock(&hw->lock); + + return 0; + +-conf_err: ++set_algo_key_fail: ++ (void)hns3_set_rss_tuple_by_rss_hf(hw, rss_hf_bk); ++set_tuple_fail: + rte_spinlock_unlock(&hw->lock); + return ret; + } +@@ -582,33 +532,59 @@ hns3_dev_rss_reta_query(struct rte_eth_dev *dev, + return 0; + } + +-/* +- * Used to configure the tc_size and tc_offset. +- */ ++static void ++hns3_set_rss_tc_mode_entry(struct hns3_hw *hw, uint8_t *tc_valid, ++ uint16_t *tc_size, uint16_t *tc_offset, ++ uint8_t tc_num) ++{ ++ struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); ++ uint16_t rss_size = hw->alloc_rss_size; ++ uint16_t roundup_size; ++ uint16_t i; ++ ++ roundup_size = roundup_pow_of_two(rss_size); ++ roundup_size = ilog2(roundup_size); ++ ++ for (i = 0; i < tc_num; i++) { ++ if (hns->is_vf) { ++ /* ++ * For packets with VLAN priorities destined for the VF, ++ * hardware still assign Rx queue based on the Up-to-TC ++ * mapping PF configured. But VF has only one TC. If ++ * other TC don't enable, it causes that the priority ++ * packets that aren't destined for TC0 aren't received ++ * by RSS hash but is destined for queue 0. So driver ++ * has to enable the unused TC by using TC0 queue ++ * mapping configuration. ++ */ ++ tc_valid[i] = (hw->hw_tc_map & BIT(i)) ? ++ !!(hw->hw_tc_map & BIT(i)) : 1; ++ tc_size[i] = roundup_size; ++ tc_offset[i] = (hw->hw_tc_map & BIT(i)) ? ++ rss_size * i : 0; ++ } else { ++ tc_valid[i] = !!(hw->hw_tc_map & BIT(i)); ++ tc_size[i] = tc_valid[i] ? roundup_size : 0; ++ tc_offset[i] = tc_valid[i] ? rss_size * i : 0; ++ } ++ } ++} ++ + static int + hns3_set_rss_tc_mode(struct hns3_hw *hw) + { +- uint16_t rss_size = hw->alloc_rss_size; + struct hns3_rss_tc_mode_cmd *req; + uint16_t tc_offset[HNS3_MAX_TC_NUM]; + uint8_t tc_valid[HNS3_MAX_TC_NUM]; + uint16_t tc_size[HNS3_MAX_TC_NUM]; + struct hns3_cmd_desc desc; +- uint16_t roundup_size; + uint16_t i; + int ret; + +- req = (struct hns3_rss_tc_mode_cmd *)desc.data; +- +- roundup_size = roundup_pow_of_two(rss_size); +- roundup_size = ilog2(roundup_size); +- +- for (i = 0; i < HNS3_MAX_TC_NUM; i++) { +- tc_valid[i] = !!(hw->hw_tc_map & BIT(i)); +- tc_size[i] = roundup_size; +- tc_offset[i] = rss_size * i; +- } ++ hns3_set_rss_tc_mode_entry(hw, tc_valid, tc_size, ++ tc_offset, HNS3_MAX_TC_NUM); + ++ req = (struct hns3_rss_tc_mode_cmd *)desc.data; + hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_RSS_TC_MODE, false); + for (i = 0; i < HNS3_MAX_TC_NUM; i++) { + uint16_t mode = 0; +@@ -672,7 +648,7 @@ hns3_set_default_rss_args(struct hns3_hw *hw) + } + + /* +- * RSS initialization for hns3 pmd driver. ++ * RSS initialization for hns3 PMD. + */ + int + hns3_config_rss(struct hns3_adapter *hns) +@@ -680,7 +656,8 @@ hns3_config_rss(struct hns3_adapter *hns) + struct hns3_hw *hw = &hns->hw; + struct hns3_rss_conf *rss_cfg = &hw->rss_info; + uint8_t *hash_key = rss_cfg->key; +- int ret, ret1; ++ uint64_t rss_hf; ++ int ret; + + enum rte_eth_rx_mq_mode mq_mode = hw->data->dev_conf.rxmode.mq_mode; + +@@ -696,55 +673,34 @@ hns3_config_rss(struct hns3_adapter *hns) + break; + } + +- /* When RSS is off, redirect the packet queue 0 */ +- if (((uint32_t)mq_mode & ETH_MQ_RX_RSS_FLAG) == 0) +- hns3_rss_uninit(hns); +- + /* Configure RSS hash algorithm and hash key offset */ + ret = hns3_set_rss_algo_key(hw, hash_key); + if (ret) + return ret; + +- /* Configure the tuple selection for RSS hash input */ +- ret = hns3_set_rss_input_tuple(hw); ++ ret = hns3_set_rss_indir_table(hw, rss_cfg->rss_indirection_tbl, ++ hw->rss_ind_tbl_size); + if (ret) + return ret; + +- /* +- * When RSS is off, it doesn't need to configure rss redirection table +- * to hardware. +- */ +- if (((uint32_t)mq_mode & ETH_MQ_RX_RSS_FLAG)) { +- ret = hns3_set_rss_indir_table(hw, rss_cfg->rss_indirection_tbl, +- hw->rss_ind_tbl_size); +- if (ret) +- goto rss_tuple_uninit; +- } +- + ret = hns3_set_rss_tc_mode(hw); + if (ret) +- goto rss_indir_table_uninit; +- +- return ret; +- +-rss_indir_table_uninit: +- if (((uint32_t)mq_mode & ETH_MQ_RX_RSS_FLAG)) { +- ret1 = hns3_rss_reset_indir_table(hw); +- if (ret1 != 0) +- return ret; +- } +- +-rss_tuple_uninit: +- hns3_rss_tuple_uninit(hw); ++ return ret; + +- /* Disable RSS */ +- hw->rss_info.conf.types = 0; ++ /* ++ * When muli-queue RSS mode flag is not set or unsupported tuples are ++ * set, disable all tuples. ++ */ ++ rss_hf = hw->rss_info.conf.types; ++ if (!((uint32_t)mq_mode & ETH_MQ_RX_RSS_FLAG) || ++ !(rss_hf & HNS3_ETH_RSS_SUPPORT)) ++ rss_hf = 0; + +- return ret; ++ return hns3_set_rss_tuple_by_rss_hf(hw, rss_hf); + } + + /* +- * RSS uninitialization for hns3 pmd driver. ++ * RSS uninitialization for hns3 PMD. + */ + void + hns3_rss_uninit(struct hns3_adapter *hns) +diff --git a/dpdk/drivers/net/hns3/hns3_rss.h b/dpdk/drivers/net/hns3/hns3_rss.h +index 798c5c62df..7493292a1a 100644 +--- a/dpdk/drivers/net/hns3/hns3_rss.h ++++ b/dpdk/drivers/net/hns3/hns3_rss.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_RSS_H_ +@@ -41,9 +41,8 @@ struct hns3_rss_tuple_cfg { + struct hns3_rss_conf { + /* RSS parameters :algorithm, flow_types, key, queue */ + struct rte_flow_action_rss conf; +- uint8_t hash_algo; /* hash function type definited by hardware */ ++ uint8_t hash_algo; /* hash function type defined by hardware */ + uint8_t key[HNS3_RSS_KEY_SIZE]; /* Hash key */ +- struct hns3_rss_tuple_cfg rss_tuple_sets; + uint16_t rss_indirection_tbl[HNS3_RSS_IND_TBL_SIZE_MAX]; + uint16_t queue[HNS3_RSS_QUEUES_BUFFER_NUM]; /* Queues indices to use */ + bool valid; /* check if RSS rule is valid */ +@@ -89,6 +88,8 @@ static inline uint32_t roundup_pow_of_two(uint32_t x) + return 1UL << fls(x - 1); + } + ++extern const uint8_t hns3_hash_key[]; ++ + struct hns3_adapter; + + int hns3_dev_rss_hash_update(struct rte_eth_dev *dev, +@@ -107,9 +108,7 @@ int hns3_set_rss_indir_table(struct hns3_hw *hw, uint16_t *indir, + int hns3_rss_reset_indir_table(struct hns3_hw *hw); + int hns3_config_rss(struct hns3_adapter *hns); + void hns3_rss_uninit(struct hns3_adapter *hns); +-int hns3_set_rss_tuple_by_rss_hf(struct hns3_hw *hw, +- struct hns3_rss_tuple_cfg *tuple, +- uint64_t rss_hf); ++int hns3_set_rss_tuple_by_rss_hf(struct hns3_hw *hw, uint64_t rss_hf); + int hns3_set_rss_algo_key(struct hns3_hw *hw, const uint8_t *key); + int hns3_restore_rss_filter(struct rte_eth_dev *dev); + +diff --git a/dpdk/drivers/net/hns3/hns3_rxtx.c b/dpdk/drivers/net/hns3/hns3_rxtx.c +index 896567c791..a1f301c344 100644 +--- a/dpdk/drivers/net/hns3/hns3_rxtx.c ++++ b/dpdk/drivers/net/hns3/hns3_rxtx.c +@@ -1,16 +1,17 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #include + #include + #include ++#include + #include + #include + #include + #include + #include +-#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_SVE) ++#if defined(RTE_ARCH_ARM64) + #include + #endif + +@@ -307,7 +308,7 @@ hns3_init_rx_queue_hw(struct hns3_rx_queue *rxq) + + hns3_write_dev(rxq, HNS3_RING_RX_BASEADDR_L_REG, (uint32_t)dma_addr); + hns3_write_dev(rxq, HNS3_RING_RX_BASEADDR_H_REG, +- (uint32_t)((dma_addr >> 31) >> 1)); ++ (uint32_t)(dma_addr >> 32)); + + hns3_write_dev(rxq, HNS3_RING_RX_BD_LEN_REG, + hns3_buf_size2type(rx_buf_len)); +@@ -322,7 +323,7 @@ hns3_init_tx_queue_hw(struct hns3_tx_queue *txq) + + hns3_write_dev(txq, HNS3_RING_TX_BASEADDR_L_REG, (uint32_t)dma_addr); + hns3_write_dev(txq, HNS3_RING_TX_BASEADDR_H_REG, +- (uint32_t)((dma_addr >> 31) >> 1)); ++ (uint32_t)(dma_addr >> 32)); + + hns3_write_dev(txq, HNS3_RING_TX_BD_NUM_REG, + HNS3_CFG_DESC_NUM(txq->nb_tx_desc)); +@@ -624,14 +625,10 @@ static int + hns3pf_reset_tqp(struct hns3_hw *hw, uint16_t queue_id) + { + #define HNS3_TQP_RESET_TRY_MS 200 ++ uint16_t wait_time = 0; + uint8_t reset_status; +- uint64_t end; + int ret; + +- ret = hns3_tqp_enable(hw, queue_id, false); +- if (ret) +- return ret; +- + /* + * In current version VF is not supported when PF is driven by DPDK + * driver, all task queue pairs are mapped to PF function, so PF's queue +@@ -642,17 +639,18 @@ hns3pf_reset_tqp(struct hns3_hw *hw, uint16_t queue_id) + hns3_err(hw, "Send reset tqp cmd fail, ret = %d", ret); + return ret; + } +- end = get_timeofday_ms() + HNS3_TQP_RESET_TRY_MS; ++ + do { + /* Wait for tqp hw reset */ + rte_delay_ms(HNS3_POLL_RESPONE_MS); ++ wait_time += HNS3_POLL_RESPONE_MS; + ret = hns3_get_tqp_reset_status(hw, queue_id, &reset_status); + if (ret) + goto tqp_reset_fail; + + if (reset_status) + break; +- } while (get_timeofday_ms() < end); ++ } while (wait_time < HNS3_TQP_RESET_TRY_MS); + + if (!reset_status) { + ret = -ETIMEDOUT; +@@ -678,11 +676,6 @@ hns3vf_reset_tqp(struct hns3_hw *hw, uint16_t queue_id) + uint8_t msg_data[2]; + int ret; + +- /* Disable VF's queue before send queue reset msg to PF */ +- ret = hns3_tqp_enable(hw, queue_id, false); +- if (ret) +- return ret; +- + memcpy(msg_data, &queue_id, sizeof(uint16_t)); + + ret = hns3_send_mbx_msg(hw, HNS3_MBX_QUEUE_RESET, 0, msg_data, +@@ -694,14 +687,105 @@ hns3vf_reset_tqp(struct hns3_hw *hw, uint16_t queue_id) + } + + static int +-hns3_reset_tqp(struct hns3_adapter *hns, uint16_t queue_id) ++hns3_reset_rcb_cmd(struct hns3_hw *hw, uint8_t *reset_status) + { +- struct hns3_hw *hw = &hns->hw; ++ struct hns3_reset_cmd *req; ++ struct hns3_cmd_desc desc; ++ int ret; + +- if (hns->is_vf) +- return hns3vf_reset_tqp(hw, queue_id); +- else +- return hns3pf_reset_tqp(hw, queue_id); ++ hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_CFG_RST_TRIGGER, false); ++ req = (struct hns3_reset_cmd *)desc.data; ++ hns3_set_bit(req->fun_reset_rcb, HNS3_CFG_RESET_RCB_B, 1); ++ ++ /* ++ * The start qid should be the global qid of the first tqp of the ++ * function which should be reset in this port. Since our PF not ++ * support take over of VFs, so we only need to reset function 0, ++ * and its start qid is always 0. ++ */ ++ req->fun_reset_rcb_vqid_start = rte_cpu_to_le_16(0); ++ req->fun_reset_rcb_vqid_num = rte_cpu_to_le_16(hw->cfg_max_queues); ++ ++ ret = hns3_cmd_send(hw, &desc, 1); ++ if (ret) { ++ hns3_err(hw, "fail to send rcb reset cmd, ret = %d.", ret); ++ return ret; ++ } ++ ++ *reset_status = req->fun_reset_rcb_return_status; ++ return 0; ++} ++ ++static int ++hns3pf_reset_all_tqps(struct hns3_hw *hw) ++{ ++#define HNS3_RESET_RCB_NOT_SUPPORT 0U ++#define HNS3_RESET_ALL_TQP_SUCCESS 1U ++ uint8_t reset_status; ++ int ret; ++ int i; ++ ++ ret = hns3_reset_rcb_cmd(hw, &reset_status); ++ if (ret) ++ return ret; ++ ++ /* ++ * If the firmware version is low, it may not support the rcb reset ++ * which means reset all the tqps at a time. In this case, we should ++ * reset tqps one by one. ++ */ ++ if (reset_status == HNS3_RESET_RCB_NOT_SUPPORT) { ++ for (i = 0; i < hw->cfg_max_queues; i++) { ++ ret = hns3pf_reset_tqp(hw, i); ++ if (ret) { ++ hns3_err(hw, ++ "fail to reset tqp, queue_id = %d, ret = %d.", ++ i, ret); ++ return ret; ++ } ++ } ++ } else if (reset_status != HNS3_RESET_ALL_TQP_SUCCESS) { ++ hns3_err(hw, "fail to reset all tqps, reset_status = %u.", ++ reset_status); ++ return -EIO; ++ } ++ ++ return 0; ++} ++ ++static int ++hns3vf_reset_all_tqps(struct hns3_hw *hw) ++{ ++#define HNS3VF_RESET_ALL_TQP_DONE 1U ++ uint8_t reset_status; ++ uint8_t msg_data[2]; ++ int ret; ++ int i; ++ ++ memset(msg_data, 0, sizeof(msg_data)); ++ ret = hns3_send_mbx_msg(hw, HNS3_MBX_QUEUE_RESET, 0, msg_data, ++ sizeof(msg_data), true, &reset_status, ++ sizeof(reset_status)); ++ if (ret) { ++ hns3_err(hw, "fail to send rcb reset mbx, ret = %d.", ret); ++ return ret; ++ } ++ ++ if (reset_status == HNS3VF_RESET_ALL_TQP_DONE) ++ return 0; ++ ++ /* ++ * If the firmware version or kernel PF version is low, it may not ++ * support the rcb reset which means reset all the tqps at a time. ++ * In this case, we should reset tqps one by one. ++ */ ++ for (i = 1; i < hw->cfg_max_queues; i++) { ++ ret = hns3vf_reset_tqp(hw, i); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; + } + + int +@@ -710,14 +794,21 @@ hns3_reset_all_tqps(struct hns3_adapter *hns) + struct hns3_hw *hw = &hns->hw; + int ret, i; + ++ /* Disable all queues before reset all queues */ + for (i = 0; i < hw->cfg_max_queues; i++) { +- ret = hns3_reset_tqp(hns, i); ++ ret = hns3_tqp_enable(hw, i, false); + if (ret) { +- hns3_err(hw, "Failed to reset No.%d queue: %d", i, ret); ++ hns3_err(hw, ++ "fail to disable tqps before tqps reset, ret = %d.", ++ ret); + return ret; + } + } +- return 0; ++ ++ if (hns->is_vf) ++ return hns3vf_reset_all_tqps(hw); ++ else ++ return hns3pf_reset_all_tqps(hw); + } + + static int +@@ -1525,6 +1616,9 @@ hns3_set_fake_rx_or_tx_queues(struct rte_eth_dev *dev, uint16_t nb_rx_q, + uint16_t q; + int ret; + ++ if (hns3_dev_indep_txrx_supported(hw)) ++ return 0; ++ + /* Setup new number of fake RX/TX queues and reconfigure device. */ + rx_need_add_nb_q = hw->cfg_max_queues - nb_rx_q; + tx_need_add_nb_q = hw->cfg_max_queues - nb_tx_q; +@@ -1655,7 +1749,8 @@ hns3_rxq_conf_runtime_check(struct hns3_hw *hw, uint16_t buf_size, + return -EINVAL; + } + +- if (pkt_burst == hns3_recv_pkts_vec) { ++ if (pkt_burst == hns3_recv_pkts_vec || ++ pkt_burst == hns3_recv_pkts_vec_sve) { + min_vec_bds = HNS3_DEFAULT_RXQ_REARM_THRESH + + HNS3_DEFAULT_RX_BURST; + if (nb_desc < min_vec_bds || +@@ -1791,11 +1886,11 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, + /* + * For hns3 PF device, if the VLAN mode is HW_SHIFT_AND_DISCARD_MODE, + * the pvid_sw_discard_en in the queue struct should not be changed, +- * because PVID-related operations do not need to be processed by PMD +- * driver. For hns3 VF device, whether it needs to process PVID depends ++ * because PVID-related operations do not need to be processed by PMD. ++ * For hns3 VF device, whether it needs to process PVID depends + * on the configuration of PF kernel mode netdevice driver. And the + * related PF configuration is delivered through the mailbox and finally +- * reflectd in port_base_vlan_cfg. ++ * reflected in port_base_vlan_cfg. + */ + if (hns->is_vf || hw->vlan_mode == HNS3_SW_SHIFT_AND_DISCARD_MODE) + rxq->pvid_sw_discard_en = hw->port_base_vlan_cfg.state == +@@ -1873,8 +1968,6 @@ hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev) + { + static const uint32_t ptypes[] = { + RTE_PTYPE_L2_ETHER, +- RTE_PTYPE_L2_ETHER_VLAN, +- RTE_PTYPE_L2_ETHER_QINQ, + RTE_PTYPE_L2_ETHER_LLDP, + RTE_PTYPE_L2_ETHER_ARP, + RTE_PTYPE_L3_IPV4, +@@ -1888,8 +1981,6 @@ hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev) + RTE_PTYPE_L4_UDP, + RTE_PTYPE_TUNNEL_GRE, + RTE_PTYPE_INNER_L2_ETHER, +- RTE_PTYPE_INNER_L2_ETHER_VLAN, +- RTE_PTYPE_INNER_L2_ETHER_QINQ, + RTE_PTYPE_INNER_L3_IPV4, + RTE_PTYPE_INNER_L3_IPV6, + RTE_PTYPE_INNER_L3_IPV4_EXT, +@@ -1915,32 +2006,12 @@ hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev) + static void + hns3_init_non_tunnel_ptype_tbl(struct hns3_ptype_table *tbl) + { +- tbl->l2l3table[0][0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4; +- tbl->l2l3table[0][1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6; +- tbl->l2l3table[0][2] = RTE_PTYPE_L2_ETHER_ARP; +- tbl->l2l3table[0][3] = RTE_PTYPE_L2_ETHER; +- tbl->l2l3table[0][4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT; +- tbl->l2l3table[0][5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT; +- tbl->l2l3table[0][6] = RTE_PTYPE_L2_ETHER_LLDP; +- tbl->l2l3table[0][15] = RTE_PTYPE_L2_ETHER; +- +- tbl->l2l3table[1][0] = RTE_PTYPE_L2_ETHER_VLAN | RTE_PTYPE_L3_IPV4; +- tbl->l2l3table[1][1] = RTE_PTYPE_L2_ETHER_VLAN | RTE_PTYPE_L3_IPV6; +- tbl->l2l3table[1][2] = RTE_PTYPE_L2_ETHER_ARP; +- tbl->l2l3table[1][3] = RTE_PTYPE_L2_ETHER_VLAN; +- tbl->l2l3table[1][4] = RTE_PTYPE_L2_ETHER_VLAN | RTE_PTYPE_L3_IPV4_EXT; +- tbl->l2l3table[1][5] = RTE_PTYPE_L2_ETHER_VLAN | RTE_PTYPE_L3_IPV6_EXT; +- tbl->l2l3table[1][6] = RTE_PTYPE_L2_ETHER_LLDP; +- tbl->l2l3table[1][15] = RTE_PTYPE_L2_ETHER_VLAN; +- +- tbl->l2l3table[2][0] = RTE_PTYPE_L2_ETHER_QINQ | RTE_PTYPE_L3_IPV4; +- tbl->l2l3table[2][1] = RTE_PTYPE_L2_ETHER_QINQ | RTE_PTYPE_L3_IPV6; +- tbl->l2l3table[2][2] = RTE_PTYPE_L2_ETHER_ARP; +- tbl->l2l3table[2][3] = RTE_PTYPE_L2_ETHER_QINQ; +- tbl->l2l3table[2][4] = RTE_PTYPE_L2_ETHER_QINQ | RTE_PTYPE_L3_IPV4_EXT; +- tbl->l2l3table[2][5] = RTE_PTYPE_L2_ETHER_QINQ | RTE_PTYPE_L3_IPV6_EXT; +- tbl->l2l3table[2][6] = RTE_PTYPE_L2_ETHER_LLDP; +- tbl->l2l3table[2][15] = RTE_PTYPE_L2_ETHER_QINQ; ++ tbl->l3table[0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4; ++ tbl->l3table[1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6; ++ tbl->l3table[2] = RTE_PTYPE_L2_ETHER_ARP; ++ tbl->l3table[4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT; ++ tbl->l3table[5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT; ++ tbl->l3table[6] = RTE_PTYPE_L2_ETHER_LLDP; + + tbl->l4table[0] = RTE_PTYPE_L4_UDP; + tbl->l4table[1] = RTE_PTYPE_L4_TCP; +@@ -1953,17 +2024,17 @@ hns3_init_non_tunnel_ptype_tbl(struct hns3_ptype_table *tbl) + static void + hns3_init_tunnel_ptype_tbl(struct hns3_ptype_table *tbl) + { +- tbl->inner_l2table[0] = RTE_PTYPE_INNER_L2_ETHER; +- tbl->inner_l2table[1] = RTE_PTYPE_INNER_L2_ETHER_VLAN; +- tbl->inner_l2table[2] = RTE_PTYPE_INNER_L2_ETHER_QINQ; +- +- tbl->inner_l3table[0] = RTE_PTYPE_INNER_L3_IPV4; +- tbl->inner_l3table[1] = RTE_PTYPE_INNER_L3_IPV6; ++ tbl->inner_l3table[0] = RTE_PTYPE_INNER_L2_ETHER | ++ RTE_PTYPE_INNER_L3_IPV4; ++ tbl->inner_l3table[1] = RTE_PTYPE_INNER_L2_ETHER | ++ RTE_PTYPE_INNER_L3_IPV6; + /* There is not a ptype for inner ARP/RARP */ + tbl->inner_l3table[2] = RTE_PTYPE_UNKNOWN; + tbl->inner_l3table[3] = RTE_PTYPE_UNKNOWN; +- tbl->inner_l3table[4] = RTE_PTYPE_INNER_L3_IPV4_EXT; +- tbl->inner_l3table[5] = RTE_PTYPE_INNER_L3_IPV6_EXT; ++ tbl->inner_l3table[4] = RTE_PTYPE_INNER_L2_ETHER | ++ RTE_PTYPE_INNER_L3_IPV4_EXT; ++ tbl->inner_l3table[5] = RTE_PTYPE_INNER_L2_ETHER | ++ RTE_PTYPE_INNER_L3_IPV6_EXT; + + tbl->inner_l4table[0] = RTE_PTYPE_INNER_L4_UDP; + tbl->inner_l4table[1] = RTE_PTYPE_INNER_L4_TCP; +@@ -1974,19 +2045,15 @@ hns3_init_tunnel_ptype_tbl(struct hns3_ptype_table *tbl) + tbl->inner_l4table[4] = RTE_PTYPE_UNKNOWN; + tbl->inner_l4table[5] = RTE_PTYPE_INNER_L4_ICMP; + +- tbl->ol2table[0] = RTE_PTYPE_L2_ETHER; +- tbl->ol2table[1] = RTE_PTYPE_L2_ETHER_VLAN; +- tbl->ol2table[2] = RTE_PTYPE_L2_ETHER_QINQ; +- +- tbl->ol3table[0] = RTE_PTYPE_L3_IPV4; +- tbl->ol3table[1] = RTE_PTYPE_L3_IPV6; ++ tbl->ol3table[0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4; ++ tbl->ol3table[1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6; + tbl->ol3table[2] = RTE_PTYPE_UNKNOWN; + tbl->ol3table[3] = RTE_PTYPE_UNKNOWN; +- tbl->ol3table[4] = RTE_PTYPE_L3_IPV4_EXT; +- tbl->ol3table[5] = RTE_PTYPE_L3_IPV6_EXT; ++ tbl->ol3table[4] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT; ++ tbl->ol3table[5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT; + + tbl->ol4table[0] = RTE_PTYPE_UNKNOWN; +- tbl->ol4table[1] = RTE_PTYPE_TUNNEL_VXLAN; ++ tbl->ol4table[1] = RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN; + tbl->ol4table[2] = RTE_PTYPE_TUNNEL_NVGRE; + } + +@@ -2482,10 +2549,20 @@ hns3_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, + return ret; + } + ++static bool ++hns3_get_default_vec_support(void) ++{ ++#if defined(RTE_ARCH_ARM64) ++ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) ++ return true; ++#endif ++ return false; ++} ++ + static bool + hns3_check_sve_support(void) + { +-#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_SVE) ++#if defined(RTE_HAS_SVE_ACLE) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SVE)) + return true; + #endif +@@ -2498,9 +2575,12 @@ hns3_get_rx_function(struct rte_eth_dev *dev) + struct hns3_adapter *hns = dev->data->dev_private; + uint64_t offloads = dev->data->dev_conf.rxmode.offloads; + +- if (hns->rx_vec_allowed && hns3_rx_check_vec_support(dev) == 0) +- return hns3_check_sve_support() ? hns3_recv_pkts_vec_sve : +- hns3_recv_pkts_vec; ++ if (hns->rx_vec_allowed && hns3_rx_check_vec_support(dev) == 0) { ++ if (hns3_get_default_vec_support()) ++ return hns3_recv_pkts_vec; ++ else if (hns3_check_sve_support()) ++ return hns3_recv_pkts_vec_sve; ++ } + + if (hns->rx_simple_allowed && !dev->data->scattered_rx && + (offloads & DEV_RX_OFFLOAD_TCP_LRO) == 0) +@@ -2621,11 +2701,11 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, + /* + * For hns3 PF device, if the VLAN mode is HW_SHIFT_AND_DISCARD_MODE, + * the pvid_sw_shift_en in the queue struct should not be changed, +- * because PVID-related operations do not need to be processed by PMD +- * driver. For hns3 VF device, whether it needs to process PVID depends ++ * because PVID-related operations do not need to be processed by PMD. ++ * For hns3 VF device, whether it needs to process PVID depends + * on the configuration of PF kernel mode netdev driver. And the + * related PF configuration is delivered through the mailbox and finally +- * reflectd in port_base_vlan_cfg. ++ * reflected in port_base_vlan_cfg. + */ + if (hns->is_vf || hw->vlan_mode == HNS3_SW_SHIFT_AND_DISCARD_MODE) + txq->pvid_sw_shift_en = hw->port_base_vlan_cfg.state == +@@ -2640,8 +2720,11 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc, + HNS3_RING_TX_TAIL_REG); + txq->min_tx_pkt_len = hw->min_tx_pkt_len; + txq->tso_mode = hw->tso_mode; ++ txq->udp_cksum_mode = hw->udp_cksum_mode; + txq->over_length_pkt_cnt = 0; + txq->exceed_limit_bd_pkt_cnt = 0; ++ txq->mbuf_fast_free_en = !!(dev->data->dev_conf.txmode.offloads & ++ DEV_TX_OFFLOAD_MBUF_FAST_FREE); + txq->exceed_limit_bd_reassem_fail = 0; + txq->unsupported_tunnel_pkt_cnt = 0; + txq->queue_full_cnt = 0; +@@ -2773,7 +2856,7 @@ hns3_fill_first_desc(struct hns3_tx_queue *txq, struct hns3_desc *desc, + * in Tx direction based on hns3 network engine. So when the number of + * VLANs in the packets represented by rxm plus the number of VLAN + * offload by hardware such as PVID etc, exceeds two, the packets will +- * be discarded or the original VLAN of the packets will be overwitted ++ * be discarded or the original VLAN of the packets will be overwritten + * by hardware. When the PF PVID is enabled by calling the API function + * named rte_eth_dev_set_vlan_pvid or the VF PVID is enabled by the hns3 + * PF kernel ether driver, the outer VLAN tag will always be the PVID. +@@ -2954,7 +3037,7 @@ hns3_parse_inner_params(struct rte_mbuf *m, uint32_t *ol_type_vlan_len_msec, + /* + * The inner l2 length of mbuf is the sum of outer l4 length, + * tunneling header length and inner l2 length for a tunnel +- * packect. But in hns3 tx descriptor, the tunneling header ++ * packet. But in hns3 tx descriptor, the tunneling header + * length is contained in the field of outer L4 length. + * Therefore, driver need to calculate the outer L4 length and + * inner L2 length. +@@ -2970,7 +3053,7 @@ hns3_parse_inner_params(struct rte_mbuf *m, uint32_t *ol_type_vlan_len_msec, + tmp_outer |= hns3_gen_field_val(HNS3_TXD_TUNTYPE_M, + HNS3_TXD_TUNTYPE_S, HNS3_TUN_NVGRE); + /* +- * For NVGRE tunnel packect, the outer L4 is empty. So only ++ * For NVGRE tunnel packet, the outer L4 is empty. So only + * fill the NVGRE header length to the outer L4 field. + */ + tmp_outer |= hns3_gen_field_val(HNS3_TXD_L4LEN_M, +@@ -3011,7 +3094,7 @@ hns3_parse_tunneling_params(struct hns3_tx_queue *txq, struct rte_mbuf *m, + * mbuf, but for hns3 descriptor, it is contained in the outer L4. So, + * there is a need that switching between them. To avoid multiple + * calculations, the length of the L2 header include the outer and +- * inner, will be filled during the parsing of tunnel packects. ++ * inner, will be filled during the parsing of tunnel packets. + */ + if (!(m->ol_flags & PKT_TX_TUNNEL_MASK)) { + /* +@@ -3083,6 +3166,7 @@ hns3_parse_l4_cksum_params(struct rte_mbuf *m, uint32_t *type_cs_vlan_tso_len) + uint32_t tmp; + /* Enable L4 checksum offloads */ + switch (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG)) { ++ case PKT_TX_TCP_CKSUM | PKT_TX_TCP_SEG: + case PKT_TX_TCP_CKSUM: + case PKT_TX_TCP_SEG: + tmp = *type_cs_vlan_tso_len; +@@ -3292,6 +3376,69 @@ hns3_vld_vlan_chk(struct hns3_tx_queue *txq, struct rte_mbuf *m) + } + #endif + ++static uint16_t ++hns3_udp_cksum_help(struct rte_mbuf *m) ++{ ++ uint64_t ol_flags = m->ol_flags; ++ uint16_t cksum = 0; ++ uint32_t l4_len; ++ ++ if (ol_flags & PKT_TX_IPV4) { ++ struct rte_ipv4_hdr *ipv4_hdr = rte_pktmbuf_mtod_offset(m, ++ struct rte_ipv4_hdr *, m->l2_len); ++ l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) - m->l3_len; ++ } else { ++ struct rte_ipv6_hdr *ipv6_hdr = rte_pktmbuf_mtod_offset(m, ++ struct rte_ipv6_hdr *, m->l2_len); ++ l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len); ++ } ++ ++ rte_raw_cksum_mbuf(m, m->l2_len + m->l3_len, l4_len, &cksum); ++ ++ cksum = ~cksum; ++ /* ++ * RFC 768:If the computed checksum is zero for UDP, it is transmitted ++ * as all ones ++ */ ++ if (cksum == 0) ++ cksum = 0xffff; ++ ++ return (uint16_t)cksum; ++} ++ ++static bool ++hns3_validate_tunnel_cksum(struct hns3_tx_queue *tx_queue, struct rte_mbuf *m) ++{ ++ uint64_t ol_flags = m->ol_flags; ++ struct rte_udp_hdr *udp_hdr; ++ uint16_t dst_port; ++ ++ if (tx_queue->udp_cksum_mode == HNS3_SPECIAL_PORT_HW_CKSUM_MODE || ++ ol_flags & PKT_TX_TUNNEL_MASK || ++ (ol_flags & PKT_TX_L4_MASK) != PKT_TX_UDP_CKSUM) ++ return true; ++ /* ++ * A UDP packet with the same dst_port as VXLAN\VXLAN_GPE\GENEVE will ++ * be recognized as a tunnel packet in HW. In this case, if UDP CKSUM ++ * offload is set and the tunnel mask has not been set, the CKSUM will ++ * be wrong since the header length is wrong and driver should complete ++ * the CKSUM to avoid CKSUM error. ++ */ ++ udp_hdr = rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *, ++ m->l2_len + m->l3_len); ++ dst_port = rte_be_to_cpu_16(udp_hdr->dst_port); ++ switch (dst_port) { ++ case RTE_VXLAN_DEFAULT_PORT: ++ case RTE_VXLAN_GPE_DEFAULT_PORT: ++ case RTE_GENEVE_DEFAULT_PORT: ++ udp_hdr->dgram_cksum = hns3_udp_cksum_help(m); ++ m->ol_flags = ol_flags & ~PKT_TX_L4_MASK; ++ return false; ++ default: ++ return true; ++ } ++} ++ + static int + hns3_prep_pkt_proc(struct hns3_tx_queue *tx_queue, struct rte_mbuf *m) + { +@@ -3336,6 +3483,9 @@ hns3_prep_pkt_proc(struct hns3_tx_queue *tx_queue, struct rte_mbuf *m) + return ret; + } + ++ if (!hns3_validate_tunnel_cksum(tx_queue, m)) ++ return 0; ++ + hns3_outer_header_cksum_prepare(m); + + return 0; +@@ -3445,6 +3595,14 @@ hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq) + + tx_entry = &txq->sw_ring[txq->next_to_clean]; + ++ if (txq->mbuf_fast_free_en) { ++ rte_mempool_put_bulk(tx_entry->mbuf->pool, ++ (void **)tx_entry, txq->tx_rs_thresh); ++ for (i = 0; i < txq->tx_rs_thresh; i++) ++ tx_entry[i].mbuf = NULL; ++ goto update_field; ++ } ++ + for (i = 0; i < txq->tx_rs_thresh; i++) + rte_prefetch0((tx_entry + i)->mbuf); + for (i = 0; i < txq->tx_rs_thresh; i++, tx_entry++) { +@@ -3452,6 +3610,7 @@ hns3_tx_free_buffer_simple(struct hns3_tx_queue *txq) + tx_entry->mbuf = NULL; + } + ++update_field: + txq->next_to_clean = (tx_next_clean + 1) % txq->nb_tx_desc; + txq->tx_bd_ready += txq->tx_rs_thresh; + } +@@ -3734,8 +3893,10 @@ hns3_get_tx_function(struct rte_eth_dev *dev, eth_tx_prep_t *prep) + + if (hns->tx_vec_allowed && hns3_tx_check_vec_support(dev) == 0) { + *prep = NULL; +- return hns3_check_sve_support() ? hns3_xmit_pkts_vec_sve : +- hns3_xmit_pkts_vec; ++ if (hns3_get_default_vec_support()) ++ return hns3_xmit_pkts_vec; ++ else if (hns3_check_sve_support()) ++ return hns3_xmit_pkts_vec_sve; + } + + if (hns->tx_simple_allowed && +@@ -3769,7 +3930,7 @@ void hns3_set_rxtx_function(struct rte_eth_dev *eth_dev) + } else { + eth_dev->rx_pkt_burst = hns3_dummy_rxtx_burst; + eth_dev->tx_pkt_burst = hns3_dummy_rxtx_burst; +- eth_dev->tx_pkt_prepare = hns3_dummy_rxtx_burst; ++ eth_dev->tx_pkt_prepare = NULL; + } + } + +@@ -3819,10 +3980,12 @@ hns3_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id) + if (!hns3_dev_indep_txrx_supported(hw)) + return -ENOTSUP; + ++ rte_spinlock_lock(&hw->lock); + ret = hns3_reset_queue(hw, rx_queue_id, HNS3_RING_TYPE_RX); + if (ret) { + hns3_err(hw, "fail to reset Rx queue %u, ret = %d.", + rx_queue_id, ret); ++ rte_spinlock_unlock(&hw->lock); + return ret; + } + +@@ -3830,11 +3993,13 @@ hns3_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id) + if (ret) { + hns3_err(hw, "fail to init Rx queue %u, ret = %d.", + rx_queue_id, ret); ++ rte_spinlock_unlock(&hw->lock); + return ret; + } + + hns3_enable_rxq(rxq, true); + dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; ++ rte_spinlock_unlock(&hw->lock); + + return ret; + } +@@ -3861,12 +4026,14 @@ hns3_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id) + if (!hns3_dev_indep_txrx_supported(hw)) + return -ENOTSUP; + ++ rte_spinlock_lock(&hw->lock); + hns3_enable_rxq(rxq, false); + + hns3_rx_queue_release_mbufs(rxq); + + hns3_reset_sw_rxq(rxq); + dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED; ++ rte_spinlock_unlock(&hw->lock); + + return 0; + } +@@ -3881,16 +4048,19 @@ hns3_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id) + if (!hns3_dev_indep_txrx_supported(hw)) + return -ENOTSUP; + ++ rte_spinlock_lock(&hw->lock); + ret = hns3_reset_queue(hw, tx_queue_id, HNS3_RING_TYPE_TX); + if (ret) { + hns3_err(hw, "fail to reset Tx queue %u, ret = %d.", + tx_queue_id, ret); ++ rte_spinlock_unlock(&hw->lock); + return ret; + } + + hns3_init_txq(txq); + hns3_enable_txq(txq, true); + dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED; ++ rte_spinlock_unlock(&hw->lock); + + return ret; + } +@@ -3904,6 +4074,7 @@ hns3_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id) + if (!hns3_dev_indep_txrx_supported(hw)) + return -ENOTSUP; + ++ rte_spinlock_lock(&hw->lock); + hns3_enable_txq(txq, false); + hns3_tx_queue_release_mbufs(txq); + /* +@@ -3915,6 +4086,7 @@ hns3_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id) + */ + hns3_init_txq(txq); + dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED; ++ rte_spinlock_unlock(&hw->lock); + + return 0; + } +diff --git a/dpdk/drivers/net/hns3/hns3_rxtx.h b/dpdk/drivers/net/hns3/hns3_rxtx.h +index 5650a97c3a..73f613b17e 100644 +--- a/dpdk/drivers/net/hns3/hns3_rxtx.h ++++ b/dpdk/drivers/net/hns3/hns3_rxtx.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_RXTX_H_ +@@ -20,7 +20,7 @@ + #define HNS3_DEFAULT_TX_RS_THRESH 32 + #define HNS3_TX_FAST_FREE_AHEAD 64 + +-#define HNS3_DEFAULT_RX_BURST 32 ++#define HNS3_DEFAULT_RX_BURST 64 + #if (HNS3_DEFAULT_RX_BURST > 64) + #error "PMD HNS3: HNS3_DEFAULT_RX_BURST must <= 64\n" + #endif +@@ -102,9 +102,6 @@ + #define HNS3_RXD_LUM_B 9 + #define HNS3_RXD_CRCP_B 10 + #define HNS3_RXD_L3L4P_B 11 +-#define HNS3_RXD_TSIND_S 12 +-#define HNS3_RXD_TSIND_M (0x7 << HNS3_RXD_TSIND_S) +-#define HNS3_RXD_LKBK_B 15 + #define HNS3_RXD_GRO_SIZE_S 16 + #define HNS3_RXD_GRO_SIZE_M (0x3fff << HNS3_RXD_GRO_SIZE_S) + +@@ -306,7 +303,7 @@ struct hns3_rx_queue { + * should not be transitted to the upper-layer application. For hardware + * network engine whose vlan mode is HNS3_HW_SHIFT_AND_DISCARD_MODE, + * such as kunpeng 930, PVID will not be reported to the BDs. So, PMD +- * driver does not need to perform PVID-related operation in Rx. At this ++ * does not need to perform PVID-related operation in Rx. At this + * point, the pvid_sw_discard_en will be false. + */ + bool pvid_sw_discard_en; +@@ -387,6 +384,22 @@ struct hns3_tx_queue { + * not need to recalculate it. + */ + uint8_t tso_mode; ++ /* ++ * udp checksum mode. ++ * value range: ++ * HNS3_SPECIAL_PORT_HW_CKSUM_MODE/HNS3_SPECIAL_PORT_SW_CKSUM_MODE ++ * ++ * - HNS3_SPECIAL_PORT_SW_CKSUM_MODE ++ * In this mode, HW can not do checksum for special UDP port like ++ * 4789, 4790, 6081 for non-tunnel UDP packets and UDP tunnel ++ * packets without the PKT_TX_TUNEL_MASK in the mbuf. So, PMD need ++ * do the checksum for these packets to avoid a checksum error. ++ * ++ * - HNS3_SPECIAL_PORT_HW_CKSUM_MODE ++ * In this mode, HW does not have the preceding problems and can ++ * directly calculate the checksum of these UDP packets. ++ */ ++ uint8_t udp_cksum_mode; + /* + * The minimum length of the packet supported by hardware in the Tx + * direction. +@@ -404,12 +417,14 @@ struct hns3_tx_queue { + * PVID will overwrite the outer VLAN field of Tx BD. For the hardware + * network engine whose vlan mode is HNS3_HW_SHIFT_AND_DISCARD_MODE, + * such as kunpeng 930, if the PVID is set, the hardware will shift the +- * VLAN field automatically. So, PMD driver does not need to do ++ * VLAN field automatically. So, PMD does not need to do + * PVID-related operations in Tx. And pvid_sw_shift_en will be false at + * this point. + */ + bool pvid_sw_shift_en; + bool enabled; /* indicate if Tx queue has been enabled */ ++ /* check whether the mbuf fast free offload is enabled */ ++ uint16_t mbuf_fast_free_en:1; + + /* + * The following items are used for the abnormal errors statistics in +@@ -502,7 +517,7 @@ hns3_handle_bdinfo(struct hns3_rx_queue *rxq, struct rte_mbuf *rxm, + + /* + * If packet len bigger than mtu when recv with no-scattered algorithm, +- * the first n bd will without FE bit, we need process this sisution. ++ * the first n bd will without FE bit, we need process this situation. + * Note: we don't need add statistic counter because latest BD which + * with FE bit will mark HNS3_RXD_L2E_B bit. + */ +@@ -580,25 +595,21 @@ hns3_rx_calc_ptype(struct hns3_rx_queue *rxq, const uint32_t l234_info, + const uint32_t ol_info) + { + const struct hns3_ptype_table * const ptype_tbl = rxq->ptype_tbl; +- uint32_t l2id, l3id, l4id; +- uint32_t ol3id, ol4id, ol2id; ++ uint32_t ol3id, ol4id; ++ uint32_t l3id, l4id; + + ol4id = hns3_get_field(ol_info, HNS3_RXD_OL4ID_M, HNS3_RXD_OL4ID_S); + ol3id = hns3_get_field(ol_info, HNS3_RXD_OL3ID_M, HNS3_RXD_OL3ID_S); +- ol2id = hns3_get_field(ol_info, HNS3_RXD_OVLAN_M, HNS3_RXD_OVLAN_S); +- l2id = hns3_get_field(l234_info, HNS3_RXD_VLAN_M, HNS3_RXD_VLAN_S); + l3id = hns3_get_field(l234_info, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S); + l4id = hns3_get_field(l234_info, HNS3_RXD_L4ID_M, HNS3_RXD_L4ID_S); + + if (unlikely(ptype_tbl->ol4table[ol4id])) +- return ptype_tbl->inner_l2table[l2id] | +- ptype_tbl->inner_l3table[l3id] | ++ return ptype_tbl->inner_l3table[l3id] | + ptype_tbl->inner_l4table[l4id] | + ptype_tbl->ol3table[ol3id] | +- ptype_tbl->ol4table[ol4id] | ptype_tbl->ol2table[ol2id]; ++ ptype_tbl->ol4table[ol4id]; + else +- return ptype_tbl->l2l3table[l2id][l3id] | +- ptype_tbl->l4table[l4id]; ++ return ptype_tbl->l3table[l3id] | ptype_tbl->l4table[l4id]; + } + + void hns3_dev_rx_queue_release(void *queue); +diff --git a/dpdk/drivers/net/hns3/hns3_rxtx_vec.c b/dpdk/drivers/net/hns3/hns3_rxtx_vec.c +index a26c83d146..63f910165e 100644 +--- a/dpdk/drivers/net/hns3/hns3_rxtx_vec.c ++++ b/dpdk/drivers/net/hns3/hns3_rxtx_vec.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2020 Hisilicon Limited. ++ * Copyright(c) 2020-2021 HiSilicon Limited. + */ + + #include +@@ -104,14 +104,13 @@ hns3_recv_pkts_vec(void *__restrict rx_queue, + { + struct hns3_rx_queue *rxq = rx_queue; + struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use]; +- uint64_t bd_err_mask; /* bit mask indicate whick pkts is error */ ++ uint64_t pkt_err_mask; /* bit mask indicate whick pkts is error */ + uint16_t nb_rx; + +- nb_pkts = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST); +- nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP); +- + rte_prefetch_non_temporal(rxdp); + ++ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP); ++ + if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH) + hns3_rxq_rearm_mbuf(rxq); + +@@ -124,10 +123,31 @@ hns3_recv_pkts_vec(void *__restrict rx_queue, + rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 2].mbuf); + rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 3].mbuf); + +- bd_err_mask = 0; +- nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts, &bd_err_mask); +- if (unlikely(bd_err_mask)) +- nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, bd_err_mask); ++ if (likely(nb_pkts <= HNS3_DEFAULT_RX_BURST)) { ++ pkt_err_mask = 0; ++ nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts, ++ &pkt_err_mask); ++ nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, pkt_err_mask); ++ return nb_rx; ++ } ++ ++ nb_rx = 0; ++ while (nb_pkts > 0) { ++ uint16_t ret, n; ++ ++ n = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST); ++ pkt_err_mask = 0; ++ ret = hns3_recv_burst_vec(rxq, &rx_pkts[nb_rx], n, ++ &pkt_err_mask); ++ nb_pkts -= ret; ++ nb_rx += hns3_rx_reassemble_pkts(&rx_pkts[nb_rx], ret, ++ pkt_err_mask); ++ if (ret < n) ++ break; ++ ++ if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH) ++ hns3_rxq_rearm_mbuf(rxq); ++ } + + return nb_rx; + } +diff --git a/dpdk/drivers/net/hns3/hns3_rxtx_vec.h b/dpdk/drivers/net/hns3/hns3_rxtx_vec.h +index 35d99032f4..4985a7cae8 100644 +--- a/dpdk/drivers/net/hns3/hns3_rxtx_vec.h ++++ b/dpdk/drivers/net/hns3/hns3_rxtx_vec.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2020 Hisilicon Limited. ++ * Copyright(c) 2020-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_RXTX_VEC_H_ +@@ -18,6 +18,14 @@ hns3_tx_bulk_free_buffers(struct hns3_tx_queue *txq) + int i; + + tx_entry = &txq->sw_ring[txq->next_to_clean]; ++ if (txq->mbuf_fast_free_en) { ++ rte_mempool_put_bulk(tx_entry->mbuf->pool, (void **)tx_entry, ++ txq->tx_rs_thresh); ++ for (i = 0; i < txq->tx_rs_thresh; i++) ++ tx_entry[i].mbuf = NULL; ++ goto update_field; ++ } ++ + for (i = 0; i < txq->tx_rs_thresh; i++, tx_entry++) { + m = rte_pktmbuf_prefree_seg(tx_entry->mbuf); + tx_entry->mbuf = NULL; +@@ -36,6 +44,7 @@ hns3_tx_bulk_free_buffers(struct hns3_tx_queue *txq) + if (nb_free) + rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); + ++update_field: + /* Update numbers of available descriptor due to buffer freed */ + txq->tx_bd_ready += txq->tx_rs_thresh; + txq->next_to_clean += txq->tx_rs_thresh; +@@ -71,6 +80,9 @@ hns3_rx_reassemble_pkts(struct rte_mbuf **rx_pkts, + uint16_t count, i; + uint64_t mask; + ++ if (likely(pkt_err_mask == 0)) ++ return nb_pkts; ++ + count = 0; + for (i = 0; i < nb_pkts; i++) { + mask = ((uint64_t)1u) << i; +diff --git a/dpdk/drivers/net/hns3/hns3_rxtx_vec_neon.h b/dpdk/drivers/net/hns3/hns3_rxtx_vec_neon.h +index 54addbf240..4e94c7aa94 100644 +--- a/dpdk/drivers/net/hns3/hns3_rxtx_vec_neon.h ++++ b/dpdk/drivers/net/hns3/hns3_rxtx_vec_neon.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2020 Hisilicon Limited. ++ * Copyright(c) 2020-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_RXTX_VEC_NEON_H_ +diff --git a/dpdk/drivers/net/hns3/hns3_rxtx_vec_sve.c b/dpdk/drivers/net/hns3/hns3_rxtx_vec_sve.c +index 8c2c8f6108..67bf8ccdc5 100644 +--- a/dpdk/drivers/net/hns3/hns3_rxtx_vec_sve.c ++++ b/dpdk/drivers/net/hns3/hns3_rxtx_vec_sve.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2020 Hisilicon Limited. ++ * Copyright(c) 2020-2021 HiSilicon Limited. + */ + + #include +@@ -287,12 +287,11 @@ hns3_recv_pkts_vec_sve(void *__restrict rx_queue, + { + struct hns3_rx_queue *rxq = rx_queue; + struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use]; +- uint64_t bd_err_mask; /* bit mask indicate whick pkts is error */ ++ uint64_t pkt_err_mask; /* bit mask indicate whick pkts is error */ + uint16_t nb_rx; + + rte_prefetch_non_temporal(rxdp); + +- nb_pkts = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST); + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_SVE_DEFAULT_DESCS_PER_LOOP); + + if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH) +@@ -304,10 +303,31 @@ hns3_recv_pkts_vec_sve(void *__restrict rx_queue, + + hns3_rx_prefetch_mbuf_sve(&rxq->sw_ring[rxq->next_to_use]); + +- bd_err_mask = 0; +- nb_rx = hns3_recv_burst_vec_sve(rxq, rx_pkts, nb_pkts, &bd_err_mask); +- if (unlikely(bd_err_mask)) +- nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, bd_err_mask); ++ if (likely(nb_pkts <= HNS3_DEFAULT_RX_BURST)) { ++ pkt_err_mask = 0; ++ nb_rx = hns3_recv_burst_vec_sve(rxq, rx_pkts, nb_pkts, ++ &pkt_err_mask); ++ nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, pkt_err_mask); ++ return nb_rx; ++ } ++ ++ nb_rx = 0; ++ while (nb_pkts > 0) { ++ uint16_t ret, n; ++ ++ n = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST); ++ pkt_err_mask = 0; ++ ret = hns3_recv_burst_vec_sve(rxq, &rx_pkts[nb_rx], n, ++ &pkt_err_mask); ++ nb_pkts -= ret; ++ nb_rx += hns3_rx_reassemble_pkts(&rx_pkts[nb_rx], ret, ++ pkt_err_mask); ++ if (ret < n) ++ break; ++ ++ if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH) ++ hns3_rxq_rearm_mbuf_sve(rxq); ++ } + + return nb_rx; + } +diff --git a/dpdk/drivers/net/hns3/hns3_stats.c b/dpdk/drivers/net/hns3/hns3_stats.c +index 48ab6a38bb..3cb1e4cac1 100644 +--- a/dpdk/drivers/net/hns3/hns3_stats.c ++++ b/dpdk/drivers/net/hns3/hns3_stats.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #include +@@ -328,24 +328,21 @@ static const struct hns3_xstats_name_offset hns3_tx_queue_strings[] = { + + static void hns3_tqp_stats_clear(struct hns3_hw *hw); + +-/* +- * Query all the MAC statistics data of Network ICL command ,opcode id: 0x0034. +- * This command is used before send 'query_mac_stat command', the descriptor +- * number of 'query_mac_stat command' must match with reg_num in this command. +- * @praram hw +- * Pointer to structure hns3_hw. +- * @return +- * 0 on success. +- */ + static int +-hns3_update_mac_stats(struct hns3_hw *hw, const uint32_t desc_num) ++hns3_update_mac_stats(struct hns3_hw *hw) + { ++#define HNS3_MAC_STATS_REG_NUM_PER_DESC 4 ++ + uint64_t *data = (uint64_t *)(&hw->mac_stats); + struct hns3_cmd_desc *desc; ++ uint32_t stats_iterms; + uint64_t *desc_data; +- uint16_t i, k, n; ++ uint32_t desc_num; ++ uint16_t i; + int ret; + ++ /* The first desc has a 64-bit header, so need to consider it. */ ++ desc_num = hw->mac_stats_reg_num / HNS3_MAC_STATS_REG_NUM_PER_DESC + 1; + desc = rte_malloc("hns3_mac_desc", + desc_num * sizeof(struct hns3_cmd_desc), 0); + if (desc == NULL) { +@@ -361,65 +358,71 @@ hns3_update_mac_stats(struct hns3_hw *hw, const uint32_t desc_num) + return ret; + } + +- for (i = 0; i < desc_num; i++) { +- /* For special opcode 0034, only the first desc has the head */ +- if (i == 0) { +- desc_data = (uint64_t *)(&desc[i].data[0]); +- n = HNS3_RD_FIRST_STATS_NUM; +- } else { +- desc_data = (uint64_t *)(&desc[i]); +- n = HNS3_RD_OTHER_STATS_NUM; +- } +- +- for (k = 0; k < n; k++) { +- *data += rte_le_to_cpu_64(*desc_data); +- data++; +- desc_data++; +- } ++ stats_iterms = RTE_MIN(sizeof(hw->mac_stats) / sizeof(uint64_t), ++ hw->mac_stats_reg_num); ++ desc_data = (uint64_t *)(&desc[0].data[0]); ++ for (i = 0; i < stats_iterms; i++) { ++ /* ++ * Data memory is continuous and only the first descriptor has a ++ * header in this command. ++ */ ++ *data += rte_le_to_cpu_64(*desc_data); ++ data++; ++ desc_data++; + } + rte_free(desc); + + return 0; + } + +-/* +- * Query Mac stat reg num command ,opcode id: 0x0033. +- * This command is used before send 'query_mac_stat command', the descriptor +- * number of 'query_mac_stat command' must match with reg_num in this command. +- * @praram rte_stats +- * Pointer to structure rte_eth_stats. +- * @return +- * 0 on success. +- */ + static int +-hns3_mac_query_reg_num(struct rte_eth_dev *dev, uint32_t *desc_num) ++hns3_mac_query_reg_num(struct hns3_hw *hw, uint32_t *reg_num) + { +- struct hns3_adapter *hns = dev->data->dev_private; +- struct hns3_hw *hw = &hns->hw; ++#define HNS3_MAC_STATS_RSV_REG_NUM_ON_HIP08_B 3 + struct hns3_cmd_desc desc; +- uint32_t *desc_data; +- uint32_t reg_num; + int ret; + + hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_QUERY_MAC_REG_NUM, true); + ret = hns3_cmd_send(hw, &desc, 1); +- if (ret) ++ if (ret) { ++ hns3_err(hw, "failed to query MAC statistic reg number, ret = %d", ++ ret); + return ret; ++ } + +- /* +- * The num of MAC statistics registers that are provided by IMP in this +- * version. +- */ +- desc_data = (uint32_t *)(&desc.data[0]); +- reg_num = rte_le_to_cpu_32(*desc_data); ++ /* The number of MAC statistics registers are provided by firmware. */ ++ *reg_num = rte_le_to_cpu_32(desc.data[0]); ++ if (*reg_num == 0) { ++ hns3_err(hw, "MAC statistic reg number is invalid!"); ++ return -ENODATA; ++ } + + /* +- * The descriptor number of 'query_additional_mac_stat command' is +- * '1 + (reg_num-3)/4 + ((reg_num-3)%4 !=0)'; +- * This value is 83 in this version ++ * If driver doesn't request the firmware to report more MAC statistics ++ * iterms and the total number of MAC statistics registers by using new ++ * method, firmware will only reports the number of valid statistics ++ * registers. However, structure hns3_mac_stats in driver contains valid ++ * and reserved statistics iterms. In this case, the total register ++ * number must be added to three reserved statistics registers. + */ +- *desc_num = 1 + ((reg_num - 3) >> 2) + +- (uint32_t)(((reg_num - 3) & 0x3) ? 1 : 0); ++ *reg_num += HNS3_MAC_STATS_RSV_REG_NUM_ON_HIP08_B; ++ ++ return 0; ++} ++ ++int ++hns3_query_mac_stats_reg_num(struct hns3_hw *hw) ++{ ++ uint32_t mac_stats_reg_num = 0; ++ int ret; ++ ++ ret = hns3_mac_query_reg_num(hw, &mac_stats_reg_num); ++ if (ret) ++ return ret; ++ ++ hw->mac_stats_reg_num = mac_stats_reg_num; ++ if (hw->mac_stats_reg_num > sizeof(hw->mac_stats) / sizeof(uint64_t)) ++ hns3_warn(hw, "MAC stats reg number from firmware is greater than stats iterms in driver."); + + return 0; + } +@@ -429,15 +432,8 @@ hns3_query_update_mac_stats(struct rte_eth_dev *dev) + { + struct hns3_adapter *hns = dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; +- uint32_t desc_num; +- int ret; + +- ret = hns3_mac_query_reg_num(dev, &desc_num); +- if (ret == 0) +- ret = hns3_update_mac_stats(hw, desc_num); +- else +- hns3_err(hw, "Query mac reg num fail : %d", ret); +- return ret; ++ return hns3_update_mac_stats(hw); + } + + /* Get tqp stats from register */ +@@ -698,9 +694,13 @@ hns3_error_int_stats_add(struct hns3_adapter *hns, const char *err) + * @praram xstats + * A pointer to a table of structure of type *rte_eth_xstat* + * to be filled with device statistics ids and values. +- * This parameter can be set to NULL if n is 0. ++ * This parameter can be set to NULL if and only if n is 0. + * @param n + * The size of the xstats array (number of elements). ++ * If lower than the required number of elements, the function returns the ++ * required number of elements. ++ * If equal to zero, the xstats parameter must be NULL, the function returns ++ * the required number of elements. + * @return + * 0 on fail, count(The size of the statistics elements) on success. + */ +@@ -720,9 +720,6 @@ hns3_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, + int count; + int ret; + +- if (xstats == NULL) +- return 0; +- + count = hns3_xstats_calc_num(dev); + if ((int)n < count) + return count; +@@ -899,7 +896,7 @@ hns3_dev_xstats_get_names(struct rte_eth_dev *dev, + * A pointer to an ids array passed by application. This tells which + * statistics values function should retrieve. This parameter + * can be set to NULL if size is 0. In this case function will retrieve +- * all avalible statistics. ++ * all available statistics. + * @param values + * A pointer to a table to be filled with device statistics values. + * @param size +@@ -943,7 +940,7 @@ hns3_dev_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids, + len = cnt_stats * sizeof(struct rte_eth_xstat); + values_copy = rte_zmalloc("hns3_xstats_values", len, 0); + if (values_copy == NULL) { +- hns3_err(hw, "Failed to allocate %" PRIx64 " bytes needed " ++ hns3_err(hw, "Failed to allocate 0x%" PRIx64 " bytes needed " + "to store statistics values", len); + return -ENOMEM; + } +@@ -965,7 +962,7 @@ hns3_dev_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids, + + for (i = 0; i < size; i++) { + if (ids[i] >= cnt_stats) { +- hns3_err(hw, "ids[%u] (%" PRIx64 ") is invalid, " ++ hns3_err(hw, "ids[%u] (%" PRIu64 ") is invalid, " + "should < %u", i, ids[i], cnt_stats); + rte_free(values_copy); + return -EINVAL; +@@ -1024,7 +1021,7 @@ hns3_dev_xstats_get_names_by_id(struct rte_eth_dev *dev, + len = cnt_stats * sizeof(struct rte_eth_xstat_name); + names_copy = rte_zmalloc("hns3_xstats_names", len, 0); + if (names_copy == NULL) { +- hns3_err(hw, "Failed to allocate %" PRIx64 " bytes needed " ++ hns3_err(hw, "Failed to allocate 0x%" PRIx64 " bytes needed " + "to store statistics names", len); + return -ENOMEM; + } +@@ -1033,7 +1030,7 @@ hns3_dev_xstats_get_names_by_id(struct rte_eth_dev *dev, + + for (i = 0; i < size; i++) { + if (ids[i] >= cnt_stats) { +- hns3_err(hw, "ids[%u] (%" PRIx64 ") is invalid, " ++ hns3_err(hw, "ids[%u] (%" PRIu64 ") is invalid, " + "should < %u", i, ids[i], cnt_stats); + rte_free(names_copy); + return -EINVAL; +diff --git a/dpdk/drivers/net/hns3/hns3_stats.h b/dpdk/drivers/net/hns3/hns3_stats.h +index 9fcd5f9bbf..436fac3b31 100644 +--- a/dpdk/drivers/net/hns3/hns3_stats.h ++++ b/dpdk/drivers/net/hns3/hns3_stats.h +@@ -1,15 +1,10 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2018-2019 Hisilicon Limited. ++ * Copyright(c) 2018-2021 HiSilicon Limited. + */ + + #ifndef _HNS3_STATS_H_ + #define _HNS3_STATS_H_ + +-/* stats macro */ +-#define HNS3_MAC_CMD_NUM 21 +-#define HNS3_RD_FIRST_STATS_NUM 2 +-#define HNS3_RD_OTHER_STATS_NUM 4 +- + /* TQP stats */ + struct hns3_tqp_stats { + uint64_t rcb_tx_ring_pktnum_rcd; /* Total num of transmitted packets */ +@@ -22,6 +17,7 @@ struct hns3_tqp_stats { + struct hns3_mac_stats { + uint64_t mac_tx_mac_pause_num; + uint64_t mac_rx_mac_pause_num; ++ uint64_t rsv0; + uint64_t mac_tx_pfc_pri0_pkt_num; + uint64_t mac_tx_pfc_pri1_pkt_num; + uint64_t mac_tx_pfc_pri2_pkt_num; +@@ -58,7 +54,7 @@ struct hns3_mac_stats { + uint64_t mac_tx_1519_2047_oct_pkt_num; + uint64_t mac_tx_2048_4095_oct_pkt_num; + uint64_t mac_tx_4096_8191_oct_pkt_num; +- uint64_t rsv0; ++ uint64_t rsv1; + uint64_t mac_tx_8192_9216_oct_pkt_num; + uint64_t mac_tx_9217_12287_oct_pkt_num; + uint64_t mac_tx_12288_16383_oct_pkt_num; +@@ -85,7 +81,7 @@ struct hns3_mac_stats { + uint64_t mac_rx_1519_2047_oct_pkt_num; + uint64_t mac_rx_2048_4095_oct_pkt_num; + uint64_t mac_rx_4096_8191_oct_pkt_num; +- uint64_t rsv1; ++ uint64_t rsv2; + uint64_t mac_rx_8192_9216_oct_pkt_num; + uint64_t mac_rx_9217_12287_oct_pkt_num; + uint64_t mac_rx_12288_16383_oct_pkt_num; +@@ -140,8 +136,8 @@ int hns3_dev_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, + __rte_unused unsigned int size); + int hns3_dev_xstats_get_by_id(struct rte_eth_dev *dev, +- __rte_unused const uint64_t *ids, +- __rte_unused uint64_t *values, ++ const uint64_t *ids, ++ uint64_t *values, + uint32_t size); + int hns3_dev_xstats_get_names_by_id(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, +@@ -151,5 +147,6 @@ int hns3_stats_reset(struct rte_eth_dev *dev); + void hns3_error_int_stats_add(struct hns3_adapter *hns, const char *err); + int hns3_tqp_stats_init(struct hns3_hw *hw); + void hns3_tqp_stats_uninit(struct hns3_hw *hw); ++int hns3_query_mac_stats_reg_num(struct hns3_hw *hw); + + #endif /* _HNS3_STATS_H_ */ +diff --git a/dpdk/drivers/net/hns3/meson.build b/dpdk/drivers/net/hns3/meson.build +index 5674d986ba..208527ea29 100644 +--- a/dpdk/drivers/net/hns3/meson.build ++++ b/dpdk/drivers/net/hns3/meson.build +@@ -1,5 +1,5 @@ + # SPDX-License-Identifier: BSD-3-Clause +-# Copyright(c) 2018-2019 Hisilicon Limited ++# Copyright(c) 2018-2021 Hisilicon Limited + + if not is_linux + build = false +@@ -31,7 +31,25 @@ deps += ['hash'] + + if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64') + sources += files('hns3_rxtx_vec.c') +- if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != '' ++ ++ # compile SVE when: ++ # a. support SVE in minimum instruction set baseline ++ # b. it's not minimum instruction set, but compiler support ++ if dpdk_conf.has('RTE_HAS_SVE_ACLE') + sources += files('hns3_rxtx_vec_sve.c') ++ elif cc.has_argument('-march=armv8.2-a+sve') and cc.check_header('arm_sve.h') ++ cflags += ['-DRTE_HAS_SVE_ACLE=1'] ++ sve_cflags = [] ++ foreach flag: cflags ++ if not (flag.startswith('-march=') or flag.startswith('-mcpu=') or flag.startswith('-mtune=')) ++ sve_cflags += flag ++ endif ++ endforeach ++ hns3_sve_lib = static_library('hns3_sve_lib', ++ 'hns3_rxtx_vec_sve.c', ++ dependencies: [static_rte_ethdev], ++ include_directories: includes, ++ c_args: [sve_cflags, '-march=armv8.2-a+sve']) ++ objs += hns3_sve_lib.extract_objects('hns3_rxtx_vec_sve.c') + endif + endif +diff --git a/dpdk/drivers/net/i40e/base/i40e_adminq.c b/dpdk/drivers/net/i40e/base/i40e_adminq.c +index 0da45f03e4..c63a38e900 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_adminq.c ++++ b/dpdk/drivers/net/i40e/base/i40e_adminq.c +@@ -468,7 +468,7 @@ enum i40e_status_code i40e_init_arq(struct i40e_hw *hw) + /* initialize base registers */ + ret_code = i40e_config_arq_regs(hw); + if (ret_code != I40E_SUCCESS) +- goto init_adminq_free_rings; ++ goto init_config_regs; + + /* success! */ + hw->aq.arq.count = hw->aq.num_arq_entries; +@@ -476,6 +476,10 @@ enum i40e_status_code i40e_init_arq(struct i40e_hw *hw) + + init_adminq_free_rings: + i40e_free_adminq_arq(hw); ++ return ret_code; ++ ++init_config_regs: ++ i40e_free_arq_bufs(hw); + + init_adminq_exit: + return ret_code; +@@ -648,8 +652,10 @@ enum i40e_status_code i40e_init_adminq(struct i40e_hw *hw) + { + struct i40e_adminq_info *aq = &hw->aq; + enum i40e_status_code ret_code; +- u16 cfg_ptr, oem_hi, oem_lo; +- u16 eetrack_lo, eetrack_hi; ++ u16 oem_hi = 0, oem_lo = 0; ++ u16 eetrack_hi = 0; ++ u16 eetrack_lo = 0; ++ u16 cfg_ptr = 0; + int retry = 0; + + /* verify input for valid configuration */ +diff --git a/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h b/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h +index 2ca41db5d3..4d80568050 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h ++++ b/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h +@@ -1947,8 +1947,10 @@ enum i40e_aq_phy_type { + I40E_PHY_TYPE_25GBASE_LR = 0x22, + I40E_PHY_TYPE_25GBASE_AOC = 0x23, + I40E_PHY_TYPE_25GBASE_ACC = 0x24, +- I40E_PHY_TYPE_2_5GBASE_T = 0x30, +- I40E_PHY_TYPE_5GBASE_T = 0x31, ++ I40E_PHY_TYPE_2_5GBASE_T = 0x26, ++ I40E_PHY_TYPE_5GBASE_T = 0x27, ++ I40E_PHY_TYPE_2_5GBASE_T_LINK_STATUS = 0x30, ++ I40E_PHY_TYPE_5GBASE_T_LINK_STATUS = 0x31, + I40E_PHY_TYPE_MAX, + I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP = 0xFD, + I40E_PHY_TYPE_EMPTY = 0xFE, +diff --git a/dpdk/drivers/net/i40e/base/i40e_common.c b/dpdk/drivers/net/i40e/base/i40e_common.c +index e20bb9ac35..f11d25d0d8 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_common.c ++++ b/dpdk/drivers/net/i40e/base/i40e_common.c +@@ -1276,12 +1276,15 @@ STATIC enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw) + case I40E_PHY_TYPE_40GBASE_LR4: + case I40E_PHY_TYPE_25GBASE_LR: + case I40E_PHY_TYPE_25GBASE_SR: ++ case I40E_PHY_TYPE_10GBASE_AOC: ++ case I40E_PHY_TYPE_25GBASE_AOC: ++ case I40E_PHY_TYPE_40GBASE_AOC: + media = I40E_MEDIA_TYPE_FIBER; + break; + case I40E_PHY_TYPE_100BASE_TX: + case I40E_PHY_TYPE_1000BASE_T: +- case I40E_PHY_TYPE_2_5GBASE_T: +- case I40E_PHY_TYPE_5GBASE_T: ++ case I40E_PHY_TYPE_2_5GBASE_T_LINK_STATUS: ++ case I40E_PHY_TYPE_5GBASE_T_LINK_STATUS: + case I40E_PHY_TYPE_10GBASE_T: + media = I40E_MEDIA_TYPE_BASET; + break; +@@ -1290,10 +1293,7 @@ STATIC enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw) + case I40E_PHY_TYPE_10GBASE_CR1: + case I40E_PHY_TYPE_40GBASE_CR4: + case I40E_PHY_TYPE_10GBASE_SFPP_CU: +- case I40E_PHY_TYPE_40GBASE_AOC: +- case I40E_PHY_TYPE_10GBASE_AOC: + case I40E_PHY_TYPE_25GBASE_CR: +- case I40E_PHY_TYPE_25GBASE_AOC: + case I40E_PHY_TYPE_25GBASE_ACC: + media = I40E_MEDIA_TYPE_DA; + break; +@@ -1341,7 +1341,7 @@ STATIC enum i40e_status_code i40e_poll_globr(struct i40e_hw *hw, + return I40E_ERR_RESET_FAILED; + } + +-#define I40E_PF_RESET_WAIT_COUNT 200 ++#define I40E_PF_RESET_WAIT_COUNT 1000 + /** + * i40e_pf_reset - Reset the PF + * @hw: pointer to the hardware structure +@@ -2078,6 +2078,9 @@ enum i40e_status_code i40e_aq_get_link_info(struct i40e_hw *hw, + hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE) + hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU; + ++ /* 'Get Link Status' response data structure from X722 FW has ++ * different format and does not contain this information ++ */ + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE && + hw->mac.type != I40E_MAC_X722) { + __le32 tmp; +@@ -2674,7 +2677,7 @@ enum i40e_status_code i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw, + } + + /** +- * i40e_get_vsi_params - get VSI configuration info ++ * i40e_aq_get_vsi_params - get VSI configuration info + * @hw: pointer to the hw struct + * @vsi_ctx: pointer to a vsi context struct + * @cmd_details: pointer to command details structure or NULL +@@ -2935,7 +2938,7 @@ enum i40e_status_code i40e_get_link_status(struct i40e_hw *hw, bool *link_up) + } + + /** +- * i40e_updatelink_status - update status of the HW network link ++ * i40e_update_link_info - update status of the HW network link + * @hw: pointer to the hw struct + **/ + enum i40e_status_code i40e_update_link_info(struct i40e_hw *hw) +@@ -2948,10 +2951,13 @@ enum i40e_status_code i40e_update_link_info(struct i40e_hw *hw) + return status; + + /* extra checking needed to ensure link info to user is timely */ +- if ((hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) && +- ((hw->phy.link_info.link_info & I40E_AQ_LINK_UP) || +- !(hw->phy.link_info_old.link_info & I40E_AQ_LINK_UP))) { +- status = i40e_aq_get_phy_capabilities(hw, false, false, ++ if (((hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) && ++ ((hw->phy.link_info.link_info & I40E_AQ_LINK_UP) || ++ !(hw->phy.link_info_old.link_info & I40E_AQ_LINK_UP))) || ++ hw->mac.type == I40E_MAC_X722) { ++ status = i40e_aq_get_phy_capabilities(hw, false, ++ hw->mac.type == ++ I40E_MAC_X722, + &abilities, NULL); + if (status) + return status; +@@ -4717,7 +4723,7 @@ enum i40e_status_code i40e_aq_del_udp_tunnel(struct i40e_hw *hw, u8 index, + } + + /** +- * i40e_aq_get_switch_resource_alloc (0x0204) ++ * i40e_aq_get_switch_resource_alloc - command (0x0204) to get allocations + * @hw: pointer to the hw struct + * @num_entries: pointer to u8 to store the number of resource entries returned + * @buf: pointer to a user supplied buffer. This buffer must be large enough +@@ -5856,7 +5862,7 @@ enum i40e_status_code i40e_aq_add_cloud_filters(struct i40e_hw *hw, + * @filter_count: number of filters contained in the buffer + * + * Set the cloud filters for a given VSI. The contents of the +- * i40e_aqc_cloud_filters_element_bb are filled in by the caller of the ++ * i40e_aqc_cloud_filters_element_bb are filled in by the caller of + * the function. + * + **/ +@@ -6864,7 +6870,7 @@ u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num) + } + + /** +- * i40e_blink_phy_led ++ * i40e_blink_phy_link_led + * @hw: pointer to the HW structure + * @time: time how long led will blinks in secs + * @interval: gap between LED on and off in msecs +@@ -7711,7 +7717,7 @@ enum i40e_status_code i40e_aq_set_arp_proxy_config(struct i40e_hw *hw, + } + + /** +- * i40e_aq_opc_set_ns_proxy_table_entry ++ * i40e_aq_set_ns_proxy_table_entry + * @hw: pointer to the HW structure + * @ns_proxy_table_entry: pointer to NS table entry command struct + * @cmd_details: pointer to command details +diff --git a/dpdk/drivers/net/i40e/base/i40e_dcb.c b/dpdk/drivers/net/i40e/base/i40e_dcb.c +index 388af3d64d..46add19c9f 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_dcb.c ++++ b/dpdk/drivers/net/i40e/base/i40e_dcb.c +@@ -235,7 +235,7 @@ static void i40e_parse_ieee_app_tlv(struct i40e_lldp_org_tlv *tlv, + } + + /** +- * i40e_parse_ieee_etsrec_tlv ++ * i40e_parse_ieee_tlv + * @tlv: IEEE 802.1Qaz TLV + * @dcbcfg: Local store to update ETS REC data + * +diff --git a/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c b/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c +index d3969396f0..d3bd683ff3 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c ++++ b/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c +@@ -516,7 +516,7 @@ enum i40e_status_code i40e_configure_lan_hmc(struct i40e_hw *hw, + } + + /** +- * i40e_delete_hmc_object - remove hmc objects ++ * i40e_delete_lan_hmc_object - remove hmc objects + * @hw: pointer to the HW structure + * @info: pointer to i40e_hmc_delete_obj_info struct + * +diff --git a/dpdk/drivers/net/i40e/base/i40e_nvm.c b/dpdk/drivers/net/i40e/base/i40e_nvm.c +index 561ed21136..f385042601 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_nvm.c ++++ b/dpdk/drivers/net/i40e/base/i40e_nvm.c +@@ -7,7 +7,7 @@ + #include "i40e_prototype.h" + + /** +- * i40e_init_nvm_ops - Initialize NVM function pointers ++ * i40e_init_nvm - Initialize NVM function pointers + * @hw: pointer to the HW structure + * + * Setup the function pointers and the NVM info structure. Should be called +@@ -755,10 +755,11 @@ enum i40e_status_code i40e_update_nvm_checksum(struct i40e_hw *hw) + DEBUGFUNC("i40e_update_nvm_checksum"); + + ret_code = i40e_calc_nvm_checksum(hw, &checksum); +- le_sum = CPU_TO_LE16(checksum); +- if (ret_code == I40E_SUCCESS) ++ if (ret_code == I40E_SUCCESS) { ++ le_sum = CPU_TO_LE16(checksum); + ret_code = i40e_write_nvm_aq(hw, 0x00, I40E_SR_SW_CHECKSUM_WORD, + 1, &le_sum, true); ++ } + + return ret_code; + } +diff --git a/dpdk/drivers/net/i40e/base/i40e_type.h b/dpdk/drivers/net/i40e/base/i40e_type.h +index cf41345834..4674715ed7 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_type.h ++++ b/dpdk/drivers/net/i40e/base/i40e_type.h +@@ -329,12 +329,8 @@ struct i40e_phy_info { + I40E_PHY_TYPE_OFFSET) + #define I40E_CAP_PHY_TYPE_25GBASE_ACC BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC + \ + I40E_PHY_TYPE_OFFSET) +-/* Offset for 2.5G/5G PHY Types value to bit number conversion */ +-#define I40E_PHY_TYPE_OFFSET2 (-10) +-#define I40E_CAP_PHY_TYPE_2_5GBASE_T BIT_ULL(I40E_PHY_TYPE_2_5GBASE_T + \ +- I40E_PHY_TYPE_OFFSET2) +-#define I40E_CAP_PHY_TYPE_5GBASE_T BIT_ULL(I40E_PHY_TYPE_5GBASE_T + \ +- I40E_PHY_TYPE_OFFSET2) ++#define I40E_CAP_PHY_TYPE_2_5GBASE_T BIT_ULL(I40E_PHY_TYPE_2_5GBASE_T) ++#define I40E_CAP_PHY_TYPE_5GBASE_T BIT_ULL(I40E_PHY_TYPE_5GBASE_T) + #define I40E_HW_CAP_MAX_GPIO 30 + #define I40E_HW_CAP_MDIO_PORT_MODE_MDIO 0 + #define I40E_HW_CAP_MDIO_PORT_MODE_I2C 1 +diff --git a/dpdk/drivers/net/i40e/base/virtchnl.h b/dpdk/drivers/net/i40e/base/virtchnl.h +index 9c64fd4690..648072f5bb 100644 +--- a/dpdk/drivers/net/i40e/base/virtchnl.h ++++ b/dpdk/drivers/net/i40e/base/virtchnl.h +@@ -402,9 +402,36 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_select); + * PF removes the filters and returns status. + */ + ++/* VIRTCHNL_ETHER_ADDR_LEGACY ++ * Prior to adding the @type member to virtchnl_ether_addr, there were 2 pad ++ * bytes. Moving forward all VF drivers should not set type to ++ * VIRTCHNL_ETHER_ADDR_LEGACY. This is only here to not break previous/legacy ++ * behavior. The control plane function (i.e. PF) can use a best effort method ++ * of tracking the primary/device unicast in this case, but there is no ++ * guarantee and functionality depends on the implementation of the PF. ++ */ ++ ++/* VIRTCHNL_ETHER_ADDR_PRIMARY ++ * All VF drivers should set @type to VIRTCHNL_ETHER_ADDR_PRIMARY for the ++ * primary/device unicast MAC address filter for VIRTCHNL_OP_ADD_ETH_ADDR and ++ * VIRTCHNL_OP_DEL_ETH_ADDR. This allows for the underlying control plane ++ * function (i.e. PF) to accurately track and use this MAC address for ++ * displaying on the host and for VM/function reset. ++ */ ++ ++/* VIRTCHNL_ETHER_ADDR_EXTRA ++ * All VF drivers should set @type to VIRTCHNL_ETHER_ADDR_EXTRA for any extra ++ * unicast and/or multicast filters that are being added/deleted via ++ * VIRTCHNL_OP_DEL_ETH_ADDR/VIRTCHNL_OP_ADD_ETH_ADDR respectively. ++ */ + struct virtchnl_ether_addr { + u8 addr[VIRTCHNL_ETH_LENGTH_OF_ADDRESS]; +- u8 pad[2]; ++ u8 type; ++#define VIRTCHNL_ETHER_ADDR_LEGACY 0 ++#define VIRTCHNL_ETHER_ADDR_PRIMARY 1 ++#define VIRTCHNL_ETHER_ADDR_EXTRA 2 ++#define VIRTCHNL_ETHER_ADDR_TYPE_MASK 3 /* first two bits of type are valid */ ++ u8 pad; + }; + + VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_ether_addr); +diff --git a/dpdk/drivers/net/i40e/i40e_ethdev.c b/dpdk/drivers/net/i40e/i40e_ethdev.c +index ef4f28fe53..c2d52e4acc 100644 +--- a/dpdk/drivers/net/i40e/i40e_ethdev.c ++++ b/dpdk/drivers/net/i40e/i40e_ethdev.c +@@ -202,12 +202,12 @@ + #define I40E_TRANSLATE_INSET 0 + #define I40E_TRANSLATE_REG 1 + +-#define I40E_INSET_IPV4_TOS_MASK 0x0009FF00UL +-#define I40E_INSET_IPv4_TTL_MASK 0x000D00FFUL +-#define I40E_INSET_IPV4_PROTO_MASK 0x000DFF00UL +-#define I40E_INSET_IPV6_TC_MASK 0x0009F00FUL +-#define I40E_INSET_IPV6_HOP_LIMIT_MASK 0x000CFF00UL +-#define I40E_INSET_IPV6_NEXT_HDR_MASK 0x000C00FFUL ++#define I40E_INSET_IPV4_TOS_MASK 0x0000FF00UL ++#define I40E_INSET_IPV4_TTL_MASK 0x000000FFUL ++#define I40E_INSET_IPV4_PROTO_MASK 0x0000FF00UL ++#define I40E_INSET_IPV6_TC_MASK 0x0000F00FUL ++#define I40E_INSET_IPV6_HOP_LIMIT_MASK 0x0000FF00UL ++#define I40E_INSET_IPV6_NEXT_HDR_MASK 0x000000FFUL + + /* PCI offset for querying capability */ + #define PCI_DEV_CAP_REG 0xA4 +@@ -220,6 +220,25 @@ + /* Bit mask of Extended Tag enable/disable */ + #define PCI_DEV_CTRL_EXT_TAG_MASK (1 << PCI_DEV_CTRL_EXT_TAG_SHIFT) + ++#define I40E_GLQF_PIT_IPV4_START 2 ++#define I40E_GLQF_PIT_IPV4_COUNT 2 ++#define I40E_GLQF_PIT_IPV6_START 4 ++#define I40E_GLQF_PIT_IPV6_COUNT 2 ++ ++#define I40E_GLQF_PIT_SOURCE_OFF_GET(a) \ ++ (((a) & I40E_GLQF_PIT_SOURCE_OFF_MASK) >> \ ++ I40E_GLQF_PIT_SOURCE_OFF_SHIFT) ++ ++#define I40E_GLQF_PIT_DEST_OFF_GET(a) \ ++ (((a) & I40E_GLQF_PIT_DEST_OFF_MASK) >> \ ++ I40E_GLQF_PIT_DEST_OFF_SHIFT) ++ ++#define I40E_GLQF_PIT_FSIZE_GET(a) (((a) & I40E_GLQF_PIT_FSIZE_MASK) >> \ ++ I40E_GLQF_PIT_FSIZE_SHIFT) ++ ++#define I40E_GLQF_PIT_BUILD(off, mask) (((off) << 16) | (mask)) ++#define I40E_FDIR_FIELD_OFFSET(a) ((a) >> 1) ++ + static int eth_i40e_dev_init(struct rte_eth_dev *eth_dev, void *init_params); + static int eth_i40e_dev_uninit(struct rte_eth_dev *eth_dev); + static int i40e_dev_configure(struct rte_eth_dev *dev); +@@ -378,6 +397,7 @@ static int i40e_set_default_mac_addr(struct rte_eth_dev *dev, + struct rte_ether_addr *mac_addr); + + static int i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); ++static void i40e_set_mac_max_frame(struct rte_eth_dev *dev, uint16_t size); + + static int i40e_ethertype_filter_convert( + const struct rte_eth_ethertype_filter *input, +@@ -515,7 +535,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = { + /* store statistics names and its offset in stats structure */ + struct rte_i40e_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; +- unsigned offset; ++ int offset; + }; + + static const struct rte_i40e_xstats_name_off rte_i40e_stats_strings[] = { +@@ -525,6 +545,8 @@ static const struct rte_i40e_xstats_name_off rte_i40e_stats_strings[] = { + {"rx_dropped_packets", offsetof(struct i40e_eth_stats, rx_discards)}, + {"rx_unknown_protocol_packets", offsetof(struct i40e_eth_stats, + rx_unknown_protocol)}, ++ {"rx_size_error_packets", offsetof(struct i40e_pf, rx_err1) - ++ offsetof(struct i40e_pf, stats)}, + {"tx_unicast_packets", offsetof(struct i40e_eth_stats, tx_unicast)}, + {"tx_multicast_packets", offsetof(struct i40e_eth_stats, tx_multicast)}, + {"tx_broadcast_packets", offsetof(struct i40e_eth_stats, tx_broadcast)}, +@@ -708,10 +730,11 @@ i40e_write_global_rx_ctl(struct i40e_hw *hw, uint32_t reg_addr, + uint32_t reg_val) + { + uint32_t ori_reg_val; +- struct rte_eth_dev *dev; ++ struct rte_eth_dev_data *dev_data = ++ ((struct i40e_adapter *)hw->back)->pf.dev_data; ++ struct rte_eth_dev *dev = &rte_eth_devices[dev_data->port_id]; + + ori_reg_val = i40e_read_rx_ctl(hw, reg_addr); +- dev = ((struct i40e_adapter *)hw->back)->eth_dev; + i40e_write_rx_ctl(hw, reg_addr, reg_val); + if (ori_reg_val != reg_val) + PMD_DRV_LOG(WARNING, +@@ -830,6 +853,8 @@ floating_veb_list_handler(__rte_unused const char *key, + idx = strtoul(floating_veb_value, &end, 10); + if (errno || end == NULL) + return -1; ++ if (idx < 0) ++ return -1; + while (isblank(*end)) + end++; + if (*end == '-') { +@@ -1061,6 +1086,7 @@ i40e_init_fdir_filter_list(struct rte_eth_dev *dev) + char fdir_hash_name[RTE_HASH_NAMESIZE]; + uint32_t alloc = hw->func_caps.fd_filters_guaranteed; + uint32_t best = hw->func_caps.fd_filters_best_effort; ++ enum i40e_filter_pctype pctype; + struct rte_bitmap *bmp = NULL; + uint32_t bmp_size; + void *mem = NULL; +@@ -1109,6 +1135,10 @@ i40e_init_fdir_filter_list(struct rte_eth_dev *dev) + goto err_fdir_filter_array_alloc; + } + ++ for (pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; ++ pctype <= I40E_FILTER_PCTYPE_L2_PAYLOAD; pctype++) ++ pf->fdir.flow_count[pctype] = 0; ++ + fdir_info->fdir_space_size = alloc + best; + fdir_info->fdir_actual_cnt = 0; + fdir_info->fdir_guarantee_total_space = alloc; +@@ -1295,7 +1325,9 @@ i40e_aq_debug_write_global_register(struct i40e_hw *hw, + struct i40e_asq_cmd_details *cmd_details) + { + uint64_t ori_reg_val; +- struct rte_eth_dev *dev; ++ struct rte_eth_dev_data *dev_data = ++ ((struct i40e_adapter *)hw->back)->pf.dev_data; ++ struct rte_eth_dev *dev = &rte_eth_devices[dev_data->port_id]; + int ret; + + ret = i40e_aq_debug_read_register(hw, reg_addr, &ori_reg_val, NULL); +@@ -1305,7 +1337,6 @@ i40e_aq_debug_write_global_register(struct i40e_hw *hw, + reg_addr); + return -EIO; + } +- dev = ((struct i40e_adapter *)hw->back)->eth_dev; + + if (ori_reg_val != reg_val) + PMD_DRV_LOG(WARNING, +@@ -1477,10 +1508,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused) + intr_handle = &pci_dev->intr_handle; + + rte_eth_copy_pci_info(dev, pci_dev); +- dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + + pf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); +- pf->adapter->eth_dev = dev; + pf->dev_data = dev->data; + + hw->back = I40E_PF_TO_ADAPTER(pf); +@@ -1747,11 +1776,6 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused) + */ + i40e_add_tx_flow_control_drop_filter(pf); + +- /* Set the max frame size to 0x2600 by default, +- * in case other drivers changed the default value. +- */ +- i40e_aq_set_mac_config(hw, I40E_FRAME_SIZE_MAX, TRUE, false, 0, NULL); +- + /* initialize mirror rule list */ + TAILQ_INIT(&pf->mirror_list); + +@@ -1790,12 +1814,14 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused) + return 0; + + err_init_fdir_filter_list: +- rte_free(pf->tunnel.hash_table); ++ rte_hash_free(pf->tunnel.hash_table); + rte_free(pf->tunnel.hash_map); + err_init_tunnel_filter_list: +- rte_free(pf->ethertype.hash_table); ++ rte_hash_free(pf->ethertype.hash_table); + rte_free(pf->ethertype.hash_map); + err_init_ethtype_filter_list: ++ rte_intr_callback_unregister(intr_handle, ++ i40e_dev_interrupt_handler, dev); + rte_free(dev->data->mac_addrs); + dev->data->mac_addrs = NULL; + err_mac_alloc: +@@ -1965,7 +1991,7 @@ i40e_dev_configure(struct rte_eth_dev *dev) + goto err; + + /* VMDQ setup. +- * General PMD driver call sequence are NIC init, configure, ++ * General PMD call sequence are NIC init, configure, + * rx/tx_queue_setup and dev_start. In rx/tx_queue_setup() function, it + * will try to lookup the VSI that specific queue belongs to if VMDQ + * applicable. So, VMDQ setting has to be done before +@@ -2013,7 +2039,7 @@ i40e_dev_configure(struct rte_eth_dev *dev) + void + i40e_vsi_queues_unbind_intr(struct i40e_vsi *vsi) + { +- struct rte_eth_dev *dev = vsi->adapter->eth_dev; ++ struct rte_eth_dev *dev = I40E_VSI_TO_ETH_DEV(vsi); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); +@@ -2129,7 +2155,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t msix_vect, + int + i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t itr_idx) + { +- struct rte_eth_dev *dev = vsi->adapter->eth_dev; ++ struct rte_eth_dev *dev = I40E_VSI_TO_ETH_DEV(vsi); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); +@@ -2205,7 +2231,7 @@ i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t itr_idx) + void + i40e_vsi_enable_queues_intr(struct i40e_vsi *vsi) + { +- struct rte_eth_dev *dev = vsi->adapter->eth_dev; ++ struct rte_eth_dev *dev = I40E_VSI_TO_ETH_DEV(vsi); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); +@@ -2232,7 +2258,7 @@ i40e_vsi_enable_queues_intr(struct i40e_vsi *vsi) + void + i40e_vsi_disable_queues_intr(struct i40e_vsi *vsi) + { +- struct rte_eth_dev *dev = vsi->adapter->eth_dev; ++ struct rte_eth_dev *dev = I40E_VSI_TO_ETH_DEV(vsi); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); +@@ -2345,7 +2371,8 @@ i40e_phy_conf_link(struct i40e_hw *hw, + phy_conf.phy_type = is_up ? cpu_to_le32(phy_type_mask) : 0; + phy_conf.phy_type_ext = is_up ? (I40E_AQ_PHY_TYPE_EXT_25G_KR | + I40E_AQ_PHY_TYPE_EXT_25G_CR | I40E_AQ_PHY_TYPE_EXT_25G_SR | +- I40E_AQ_PHY_TYPE_EXT_25G_LR) : 0; ++ I40E_AQ_PHY_TYPE_EXT_25G_LR | I40E_AQ_PHY_TYPE_EXT_25G_AOC | ++ I40E_AQ_PHY_TYPE_EXT_25G_ACC) : 0; + phy_conf.fec_config = phy_ab.fec_cfg_curr_mod_ext_info; + phy_conf.eee_capability = phy_ab.eee_capability; + phy_conf.eeer = phy_ab.eeer_val; +@@ -2403,6 +2430,7 @@ i40e_dev_start(struct rte_eth_dev *dev) + uint32_t intr_vector = 0; + struct i40e_vsi *vsi; + uint16_t nb_rxq, nb_txq; ++ uint16_t max_frame_size; + + hw->adapter_stopped = 0; + +@@ -2525,7 +2553,7 @@ i40e_dev_start(struct rte_eth_dev *dev) + if (ret != I40E_SUCCESS) + PMD_DRV_LOG(WARNING, "Fail to set phy mask"); + +- /* Call get_link_info aq commond to enable/disable LSE */ ++ /* Call get_link_info aq command to enable/disable LSE */ + i40e_dev_link_update(dev, 0); + } + +@@ -2544,6 +2572,9 @@ i40e_dev_start(struct rte_eth_dev *dev) + "please call hierarchy_commit() " + "before starting the port"); + ++ max_frame_size = dev->data->mtu + I40E_ETH_OVERHEAD; ++ i40e_set_mac_max_frame(dev, max_frame_size); ++ + return I40E_SUCCESS; + + tx_err: +@@ -2911,6 +2942,9 @@ i40e_dev_set_link_down(struct rte_eth_dev *dev) + return i40e_phy_conf_link(hw, abilities, speed, false); + } + ++#define CHECK_INTERVAL 100 /* 100ms */ ++#define MAX_REPEAT_TIME 10 /* 1s (10 * 100ms) in total */ ++ + static __rte_always_inline void + update_link_reg(struct i40e_hw *hw, struct rte_eth_link *link) + { +@@ -2978,8 +3012,6 @@ static __rte_always_inline void + update_link_aq(struct i40e_hw *hw, struct rte_eth_link *link, + bool enable_lse, int wait_to_complete) + { +-#define CHECK_INTERVAL 100 /* 100ms */ +-#define MAX_REPEAT_TIME 10 /* 1s (10 * 100ms) in total */ + uint32_t rep_cnt = MAX_REPEAT_TIME; + struct i40e_link_status link_status; + int status; +@@ -3265,6 +3297,10 @@ i40e_read_stats_registers(struct i40e_pf *pf, struct i40e_hw *hw) + pf->offset_loaded, + &os->eth.rx_unknown_protocol, + &ns->eth.rx_unknown_protocol); ++ i40e_stat_update_48(hw, I40E_GL_RXERR1_H(hw->pf_id + I40E_MAX_VF), ++ I40E_GL_RXERR1_L(hw->pf_id + I40E_MAX_VF), ++ pf->offset_loaded, &pf->rx_err1_offset, ++ &pf->rx_err1); + i40e_stat_update_48_in_64(hw, I40E_GLPRT_GOTCH(hw->port), + I40E_GLPRT_GOTCL(hw->port), + pf->offset_loaded, &os->eth.tx_bytes, +@@ -3464,7 +3500,8 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + stats->ipackets = pf->main_vsi->eth_stats.rx_unicast + + pf->main_vsi->eth_stats.rx_multicast + + pf->main_vsi->eth_stats.rx_broadcast - +- pf->main_vsi->eth_stats.rx_discards; ++ pf->main_vsi->eth_stats.rx_discards - ++ pf->rx_err1; + stats->opackets = ns->eth.tx_unicast + + ns->eth.tx_multicast + + ns->eth.tx_broadcast; +@@ -3478,7 +3515,8 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + pf->main_vsi->eth_stats.rx_discards; + stats->ierrors = ns->crc_errors + + ns->rx_length_errors + ns->rx_undersize + +- ns->rx_oversize + ns->rx_fragments + ns->rx_jabber; ++ ns->rx_oversize + ns->rx_fragments + ns->rx_jabber + ++ pf->rx_err1; + + if (pf->vfs) { + for (i = 0; i < pf->vf_num; i++) { +@@ -3613,7 +3651,7 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev, + count++; + } + +- /* Get individiual stats from i40e_hw_port struct */ ++ /* Get individual stats from i40e_hw_port struct */ + for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) { + strlcpy(xstats_names[count].name, + rte_i40e_hw_port_strings[i].name, +@@ -3671,7 +3709,7 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, + count++; + } + +- /* Get individiual stats from i40e_hw_port struct */ ++ /* Get individual stats from i40e_hw_port struct */ + for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) { + xstats[count].value = *(uint64_t *)(((char *)hw_stats) + + rte_i40e_hw_port_strings[i].offset); +@@ -3724,9 +3762,11 @@ i40e_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) + ((hw->nvm.version >> 4) & 0xff), + (hw->nvm.version & 0xf), hw->nvm.eetrack, + ver, build, patch); ++ if (ret < 0) ++ return -EINVAL; + + ret += 1; /* add the size of '\0' */ +- if (fw_size < (u32)ret) ++ if (fw_size < (size_t)ret) + return ret; + else + return 0; +@@ -3803,6 +3843,7 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) + DEV_TX_OFFLOAD_IPIP_TNL_TSO | + DEV_TX_OFFLOAD_GENEVE_TNL_TSO | + DEV_TX_OFFLOAD_MULTI_SEGS | ++ DEV_TX_OFFLOAD_OUTER_UDP_CKSUM | + dev_info->tx_queue_offload_capa; + dev_info->dev_capa = + RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP | +@@ -4581,13 +4622,15 @@ i40e_allocate_dma_mem_d(__rte_unused struct i40e_hw *hw, + u64 size, + u32 alignment) + { ++ static uint64_t i40e_dma_memzone_id; + const struct rte_memzone *mz = NULL; + char z_name[RTE_MEMZONE_NAMESIZE]; + + if (!mem) + return I40E_ERR_PARAM; + +- snprintf(z_name, sizeof(z_name), "i40e_dma_%"PRIu64, rte_rand()); ++ snprintf(z_name, sizeof(z_name), "i40e_dma_%" PRIu64, ++ __atomic_fetch_add(&i40e_dma_memzone_id, 1, __ATOMIC_RELAXED)); + mz = rte_memzone_reserve_bounded(z_name, size, SOCKET_ID_ANY, + RTE_MEMZONE_IOVA_CONTIG, alignment, RTE_PGSIZE_2M); + if (!mz) +@@ -5596,7 +5639,7 @@ i40e_vsi_get_bw_config(struct i40e_vsi *vsi) + &ets_sla_config, NULL); + if (ret != I40E_SUCCESS) { + PMD_DRV_LOG(ERR, +- "VSI failed to get TC bandwdith configuration %u", ++ "VSI failed to get TC bandwidth configuration %u", + hw->aq.asq_last_status); + return ret; + } +@@ -6255,6 +6298,8 @@ i40e_pf_setup(struct i40e_pf *pf) + memset(&pf->stats_offset, 0, sizeof(struct i40e_hw_port_stats)); + memset(&pf->internal_stats, 0, sizeof(struct i40e_eth_stats)); + memset(&pf->internal_stats_offset, 0, sizeof(struct i40e_eth_stats)); ++ pf->rx_err1 = 0; ++ pf->rx_err1_offset = 0; + + ret = i40e_pf_get_switch_config(pf); + if (ret != I40E_SUCCESS) { +@@ -6444,8 +6489,7 @@ i40e_dev_tx_init(struct i40e_pf *pf) + break; + } + if (ret == I40E_SUCCESS) +- i40e_set_tx_function(container_of(pf, struct i40e_adapter, pf) +- ->eth_dev); ++ i40e_set_tx_function(&rte_eth_devices[pf->dev_data->port_id]); + + return ret; + } +@@ -6473,8 +6517,7 @@ i40e_dev_rx_init(struct i40e_pf *pf) + } + } + if (ret == I40E_SUCCESS) +- i40e_set_rx_function(container_of(pf, struct i40e_adapter, pf) +- ->eth_dev); ++ i40e_set_rx_function(&rte_eth_devices[pf->dev_data->port_id]); + + return ret; + } +@@ -6771,6 +6814,7 @@ i40e_dev_handle_aq_msg(struct rte_eth_dev *dev) + if (!ret) + rte_eth_dev_callback_process(dev, + RTE_ETH_EVENT_INTR_LSC, NULL); ++ + break; + default: + PMD_DRV_LOG(DEBUG, "Request %u is not supported yet", +@@ -6874,7 +6918,7 @@ i40e_handle_mdd_event(struct rte_eth_dev *dev) + * @param handle + * Pointer to interrupt handle. + * @param param +- * The address of parameter (struct rte_eth_dev *) regsitered before. ++ * The address of parameter (struct rte_eth_dev *) registered before. + * + * @return + * void +@@ -7913,7 +7957,7 @@ i40e_status_code i40e_replace_mpls_l1_filter(struct i40e_pf *pf) + struct i40e_aqc_replace_cloud_filters_cmd filter_replace; + struct i40e_aqc_replace_cloud_filters_cmd_buf filter_replace_buf; + struct i40e_hw *hw = I40E_PF_TO_HW(pf); +- struct rte_eth_dev *dev = ((struct i40e_adapter *)hw->back)->eth_dev; ++ struct rte_eth_dev *dev = &rte_eth_devices[pf->dev_data->port_id]; + enum i40e_status_code status = I40E_SUCCESS; + + if (pf->support_multi_driver) { +@@ -7974,7 +8018,7 @@ i40e_status_code i40e_replace_mpls_cloud_filter(struct i40e_pf *pf) + struct i40e_aqc_replace_cloud_filters_cmd filter_replace; + struct i40e_aqc_replace_cloud_filters_cmd_buf filter_replace_buf; + struct i40e_hw *hw = I40E_PF_TO_HW(pf); +- struct rte_eth_dev *dev = ((struct i40e_adapter *)hw->back)->eth_dev; ++ struct rte_eth_dev *dev = &rte_eth_devices[pf->dev_data->port_id]; + enum i40e_status_code status = I40E_SUCCESS; + + if (pf->support_multi_driver) { +@@ -8049,7 +8093,7 @@ i40e_replace_gtp_l1_filter(struct i40e_pf *pf) + struct i40e_aqc_replace_cloud_filters_cmd filter_replace; + struct i40e_aqc_replace_cloud_filters_cmd_buf filter_replace_buf; + struct i40e_hw *hw = I40E_PF_TO_HW(pf); +- struct rte_eth_dev *dev = ((struct i40e_adapter *)hw->back)->eth_dev; ++ struct rte_eth_dev *dev = &rte_eth_devices[pf->dev_data->port_id]; + enum i40e_status_code status = I40E_SUCCESS; + + if (pf->support_multi_driver) { +@@ -8137,7 +8181,7 @@ i40e_status_code i40e_replace_gtp_cloud_filter(struct i40e_pf *pf) + struct i40e_aqc_replace_cloud_filters_cmd filter_replace; + struct i40e_aqc_replace_cloud_filters_cmd_buf filter_replace_buf; + struct i40e_hw *hw = I40E_PF_TO_HW(pf); +- struct rte_eth_dev *dev = ((struct i40e_adapter *)hw->back)->eth_dev; ++ struct rte_eth_dev *dev = &rte_eth_devices[pf->dev_data->port_id]; + enum i40e_status_code status = I40E_SUCCESS; + + if (pf->support_multi_driver) { +@@ -8212,7 +8256,7 @@ i40e_replace_port_l1_filter(struct i40e_pf *pf, + struct i40e_aqc_replace_cloud_filters_cmd filter_replace; + enum i40e_status_code status = I40E_SUCCESS; + struct i40e_hw *hw = I40E_PF_TO_HW(pf); +- struct rte_eth_dev *dev = ((struct i40e_adapter *)hw->back)->eth_dev; ++ struct rte_eth_dev *dev = &rte_eth_devices[pf->dev_data->port_id]; + + if (pf->support_multi_driver) { + PMD_DRV_LOG(ERR, "Replace l1 filter is not supported."); +@@ -8284,7 +8328,7 @@ i40e_replace_port_cloud_filter(struct i40e_pf *pf, + struct i40e_aqc_replace_cloud_filters_cmd filter_replace; + enum i40e_status_code status = I40E_SUCCESS; + struct i40e_hw *hw = I40E_PF_TO_HW(pf); +- struct rte_eth_dev *dev = ((struct i40e_adapter *)hw->back)->eth_dev; ++ struct rte_eth_dev *dev = &rte_eth_devices[pf->dev_data->port_id]; + + if (pf->support_multi_driver) { + PMD_DRV_LOG(ERR, "Replace cloud filter is not supported."); +@@ -9487,49 +9531,116 @@ i40e_translate_input_set_reg(enum i40e_mac_type type, uint64_t input) + return val; + } + ++static int ++i40e_get_inset_field_offset(struct i40e_hw *hw, uint32_t pit_reg_start, ++ uint32_t pit_reg_count, uint32_t hdr_off) ++{ ++ const uint32_t pit_reg_end = pit_reg_start + pit_reg_count; ++ uint32_t field_off = I40E_FDIR_FIELD_OFFSET(hdr_off); ++ uint32_t i, reg_val, src_off, count; ++ ++ for (i = pit_reg_start; i < pit_reg_end; i++) { ++ reg_val = i40e_read_rx_ctl(hw, I40E_GLQF_PIT(i)); ++ ++ src_off = I40E_GLQF_PIT_SOURCE_OFF_GET(reg_val); ++ count = I40E_GLQF_PIT_FSIZE_GET(reg_val); ++ ++ if (src_off <= field_off && (src_off + count) > field_off) ++ break; ++ } ++ ++ if (i >= pit_reg_end) { ++ PMD_DRV_LOG(ERR, ++ "Hardware GLQF_PIT configuration does not support this field mask"); ++ return -1; ++ } ++ ++ return I40E_GLQF_PIT_DEST_OFF_GET(reg_val) + field_off - src_off; ++} ++ + int +-i40e_generate_inset_mask_reg(uint64_t inset, uint32_t *mask, uint8_t nb_elem) ++i40e_generate_inset_mask_reg(struct i40e_hw *hw, uint64_t inset, ++ uint32_t *mask, uint8_t nb_elem) + { +- uint8_t i, idx = 0; +- uint64_t inset_need_mask = inset; ++ static const uint64_t mask_inset[] = { ++ I40E_INSET_IPV4_PROTO | I40E_INSET_IPV4_TTL, ++ I40E_INSET_IPV6_NEXT_HDR | I40E_INSET_IPV6_HOP_LIMIT }; + + static const struct { + uint64_t inset; + uint32_t mask; +- } inset_mask_map[] = { +- {I40E_INSET_IPV4_TOS, I40E_INSET_IPV4_TOS_MASK}, +- {I40E_INSET_IPV4_PROTO | I40E_INSET_IPV4_TTL, 0}, +- {I40E_INSET_IPV4_PROTO, I40E_INSET_IPV4_PROTO_MASK}, +- {I40E_INSET_IPV4_TTL, I40E_INSET_IPv4_TTL_MASK}, +- {I40E_INSET_IPV6_TC, I40E_INSET_IPV6_TC_MASK}, +- {I40E_INSET_IPV6_NEXT_HDR | I40E_INSET_IPV6_HOP_LIMIT, 0}, +- {I40E_INSET_IPV6_NEXT_HDR, I40E_INSET_IPV6_NEXT_HDR_MASK}, +- {I40E_INSET_IPV6_HOP_LIMIT, I40E_INSET_IPV6_HOP_LIMIT_MASK}, ++ uint32_t offset; ++ } inset_mask_offset_map[] = { ++ { I40E_INSET_IPV4_TOS, I40E_INSET_IPV4_TOS_MASK, ++ offsetof(struct rte_ipv4_hdr, type_of_service) }, ++ ++ { I40E_INSET_IPV4_PROTO, I40E_INSET_IPV4_PROTO_MASK, ++ offsetof(struct rte_ipv4_hdr, next_proto_id) }, ++ ++ { I40E_INSET_IPV4_TTL, I40E_INSET_IPV4_TTL_MASK, ++ offsetof(struct rte_ipv4_hdr, time_to_live) }, ++ ++ { I40E_INSET_IPV6_TC, I40E_INSET_IPV6_TC_MASK, ++ offsetof(struct rte_ipv6_hdr, vtc_flow) }, ++ ++ { I40E_INSET_IPV6_NEXT_HDR, I40E_INSET_IPV6_NEXT_HDR_MASK, ++ offsetof(struct rte_ipv6_hdr, proto) }, ++ ++ { I40E_INSET_IPV6_HOP_LIMIT, I40E_INSET_IPV6_HOP_LIMIT_MASK, ++ offsetof(struct rte_ipv6_hdr, hop_limits) }, + }; + +- if (!inset || !mask || !nb_elem) ++ uint32_t i; ++ int idx = 0; ++ ++ assert(mask); ++ if (!inset) + return 0; + +- for (i = 0, idx = 0; i < RTE_DIM(inset_mask_map); i++) { ++ for (i = 0; i < RTE_DIM(mask_inset); i++) { + /* Clear the inset bit, if no MASK is required, + * for example proto + ttl + */ +- if ((inset & inset_mask_map[i].inset) == +- inset_mask_map[i].inset && inset_mask_map[i].mask == 0) +- inset_need_mask &= ~inset_mask_map[i].inset; +- if (!inset_need_mask) +- return 0; ++ if ((mask_inset[i] & inset) == mask_inset[i]) { ++ inset &= ~mask_inset[i]; ++ if (!inset) ++ return 0; ++ } + } +- for (i = 0, idx = 0; i < RTE_DIM(inset_mask_map); i++) { +- if ((inset_need_mask & inset_mask_map[i].inset) == +- inset_mask_map[i].inset) { +- if (idx >= nb_elem) { +- PMD_DRV_LOG(ERR, "exceed maximal number of bitmasks"); +- return -EINVAL; +- } +- mask[idx] = inset_mask_map[i].mask; +- idx++; ++ ++ for (i = 0; i < RTE_DIM(inset_mask_offset_map); i++) { ++ uint32_t pit_start, pit_count; ++ int offset; ++ ++ if (!(inset_mask_offset_map[i].inset & inset)) ++ continue; ++ ++ if (inset_mask_offset_map[i].inset & ++ (I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_PROTO | ++ I40E_INSET_IPV4_TTL)) { ++ pit_start = I40E_GLQF_PIT_IPV4_START; ++ pit_count = I40E_GLQF_PIT_IPV4_COUNT; ++ } else { ++ pit_start = I40E_GLQF_PIT_IPV6_START; ++ pit_count = I40E_GLQF_PIT_IPV6_COUNT; + } ++ ++ offset = i40e_get_inset_field_offset(hw, pit_start, pit_count, ++ inset_mask_offset_map[i].offset); ++ ++ if (offset < 0) ++ return -EINVAL; ++ ++ if (idx >= nb_elem) { ++ PMD_DRV_LOG(ERR, ++ "Configuration of inset mask out of range %u", ++ nb_elem); ++ return -ERANGE; ++ } ++ ++ mask[idx] = I40E_GLQF_PIT_BUILD((uint32_t)offset, ++ inset_mask_offset_map[i].mask); ++ idx++; + } + + return idx; +@@ -9551,9 +9662,10 @@ void + i40e_check_write_global_reg(struct i40e_hw *hw, uint32_t addr, uint32_t val) + { + uint32_t reg = i40e_read_rx_ctl(hw, addr); +- struct rte_eth_dev *dev; ++ struct rte_eth_dev_data *dev_data = ++ ((struct i40e_adapter *)hw->back)->pf.dev_data; ++ struct rte_eth_dev *dev = &rte_eth_devices[dev_data->port_id]; + +- dev = ((struct i40e_adapter *)hw->back)->eth_dev; + if (reg != val) { + i40e_write_rx_ctl(hw, addr, val); + PMD_DRV_LOG(WARNING, +@@ -9583,7 +9695,7 @@ i40e_filter_input_set_init(struct i40e_pf *pf) + + input_set = i40e_get_default_input_set(pctype); + +- num = i40e_generate_inset_mask_reg(input_set, mask_reg, ++ num = i40e_generate_inset_mask_reg(hw, input_set, mask_reg, + I40E_INSET_MASK_NUM_REG); + if (num < 0) + return; +@@ -9688,7 +9800,7 @@ i40e_hash_filter_inset_select(struct i40e_hw *hw, + inset_reg |= i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, pctype)); + input_set |= pf->hash_input_set[pctype]; + } +- num = i40e_generate_inset_mask_reg(input_set, mask_reg, ++ num = i40e_generate_inset_mask_reg(hw, input_set, mask_reg, + I40E_INSET_MASK_NUM_REG); + if (num < 0) + return -EINVAL; +@@ -9728,7 +9840,7 @@ i40e_ethertype_filter_convert(const struct rte_eth_ethertype_filter *input, + return 0; + } + +-/* Check if there exists the ehtertype filter */ ++/* Check if there exists the ethertype filter */ + struct i40e_ethertype_filter * + i40e_sw_ethertype_filter_lookup(struct i40e_ethertype_rule *ethertype_rule, + const struct i40e_ethertype_filter_input *input) +@@ -11679,9 +11791,6 @@ static int i40e_get_module_eeprom(struct rte_eth_dev *dev, + uint32_t value = 0; + uint32_t i; + +- if (!info || !info->length || !info->data) +- return -EINVAL; +- + if (hw->phy.link_info.module_type[0] == I40E_MODULE_TYPE_SFP) + is_sfp = true; + +@@ -12401,7 +12510,7 @@ i40e_cloud_filter_qinq_create(struct i40e_pf *pf) + struct i40e_aqc_replace_cloud_filters_cmd filter_replace; + struct i40e_aqc_replace_cloud_filters_cmd_buf filter_replace_buf; + struct i40e_hw *hw = I40E_PF_TO_HW(pf); +- struct rte_eth_dev *dev = ((struct i40e_adapter *)hw->back)->eth_dev; ++ struct rte_eth_dev *dev = &rte_eth_devices[pf->dev_data->port_id]; + + if (pf->support_multi_driver) { + PMD_DRV_LOG(ERR, "Replace cloud filter is not supported."); +@@ -13023,6 +13132,35 @@ i40e_config_rss_filter(struct i40e_pf *pf, + return 0; + } + ++static void ++i40e_set_mac_max_frame(struct rte_eth_dev *dev, uint16_t size) ++{ ++ struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ uint32_t rep_cnt = MAX_REPEAT_TIME; ++ struct rte_eth_link link; ++ enum i40e_status_code status; ++ bool can_be_set = true; ++ ++ /* I40E_MEDIA_TYPE_BASET link up can be ignored */ ++ if (hw->phy.media_type != I40E_MEDIA_TYPE_BASET) { ++ do { ++ update_link_reg(hw, &link); ++ if (link.link_status) ++ break; ++ rte_delay_ms(CHECK_INTERVAL); ++ } while (--rep_cnt); ++ can_be_set = !!link.link_status; ++ } ++ ++ if (can_be_set) { ++ status = i40e_aq_set_mac_config(hw, size, TRUE, 0, false, NULL); ++ if (status != I40E_SUCCESS) ++ PMD_DRV_LOG(ERR, "Failed to set max frame size at port level"); ++ } else { ++ PMD_DRV_LOG(ERR, "Set max frame size at port level not applicable on link down"); ++ } ++} ++ + RTE_LOG_REGISTER(i40e_logtype_init, pmd.net.i40e.init, NOTICE); + RTE_LOG_REGISTER(i40e_logtype_driver, pmd.net.i40e.driver, NOTICE); + #ifdef RTE_LIBRTE_I40E_DEBUG_RX +diff --git a/dpdk/drivers/net/i40e/i40e_ethdev.h b/dpdk/drivers/net/i40e/i40e_ethdev.h +index 20d051db8b..601520ca66 100644 +--- a/dpdk/drivers/net/i40e/i40e_ethdev.h ++++ b/dpdk/drivers/net/i40e/i40e_ethdev.h +@@ -17,6 +17,13 @@ + + #include "base/i40e_register.h" + ++/** ++ * _i=0...143, ++ * counters 0-127 are for the 128 VFs, ++ * counters 128-143 are for the 16 PFs ++ */ ++#define I40E_GL_RXERR1_H(_i) (0x00318004 + ((_i) * 8)) ++ + #define I40E_VLAN_TAG_SIZE 4 + + #define I40E_AQ_LEN 32 +@@ -88,8 +95,10 @@ + do { \ + uint32_t ori_val; \ + struct rte_eth_dev *dev; \ ++ struct rte_eth_dev_data *dev_data; \ + ori_val = I40E_READ_REG((hw), (reg)); \ +- dev = ((struct i40e_adapter *)hw->back)->eth_dev; \ ++ dev_data = ((struct i40e_adapter *)hw->back)->pf.dev_data; \ ++ dev = &rte_eth_devices[dev_data->port_id]; \ + I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((hw), \ + (reg)), (value)); \ + if (ori_val != value) \ +@@ -629,6 +638,7 @@ struct i40e_fdir_flow_ext { + uint8_t raw_id; + uint8_t is_vf; /* 1 for VF, 0 for port dev */ + uint16_t dst_id; /* VF ID, available when is_vf is 1*/ ++ uint64_t input_set; + bool inner_ip; /* If there is inner ip */ + enum i40e_fdir_ip_type iip_type; /* ip type for inner ip */ + enum i40e_fdir_ip_type oip_type; /* ip type for outer ip */ +@@ -785,7 +795,7 @@ struct i40e_fdir_info { + bool flex_pit_flag[I40E_MAX_FLXPLD_LAYER]; + bool flex_mask_flag[I40E_FILTER_PCTYPE_MAX]; + +- bool inset_flag[I40E_FILTER_PCTYPE_MAX]; /* Mark if input set is set */ ++ uint32_t flow_count[I40E_FILTER_PCTYPE_MAX]; + + uint32_t flex_flow_count[I40E_MAX_FLXPLD_LAYER]; + }; +@@ -886,7 +896,7 @@ struct i40e_tunnel_filter { + TAILQ_ENTRY(i40e_tunnel_filter) rules; + struct i40e_tunnel_filter_input input; + uint8_t is_to_vf; /* 0 - to PF, 1 - to VF */ +- uint16_t vf_id; /* VF id, avaiblable when is_to_vf is 1. */ ++ uint16_t vf_id; /* VF id, available when is_to_vf is 1. */ + uint16_t queue; /* Queue assigned to when match */ + }; + +@@ -955,7 +965,7 @@ struct i40e_tunnel_filter_conf { + uint32_t tenant_id; /**< Tenant ID to match. VNI, GRE key... */ + uint16_t queue_id; /**< Queue assigned to if match. */ + uint8_t is_to_vf; /**< 0 - to PF, 1 - to VF */ +- uint16_t vf_id; /**< VF id, avaiblable when is_to_vf is 1. */ ++ uint16_t vf_id; /**< VF id, available when is_to_vf is 1. */ + }; + + #define I40E_MIRROR_MAX_ENTRIES_PER_RULE 64 +@@ -1092,7 +1102,7 @@ struct i40e_vf_msg_cfg { + /* + * If message statistics from a VF exceed the maximal limitation, + * the PF will ignore any new message from that VF for +- * 'ignor_second' time. ++ * 'ignore_second' time. + */ + uint32_t ignore_second; + }; +@@ -1111,6 +1121,9 @@ struct i40e_pf { + + struct i40e_hw_port_stats stats_offset; + struct i40e_hw_port_stats stats; ++ u64 rx_err1; /* rxerr1 */ ++ u64 rx_err1_offset; ++ + /* internal packet statistics, it should be excluded from the total */ + struct i40e_eth_stats internal_stats_offset; + struct i40e_eth_stats internal_stats; +@@ -1263,7 +1276,6 @@ struct i40e_vf { + struct i40e_adapter { + /* Common for both PF and VF */ + struct i40e_hw hw; +- struct rte_eth_dev *eth_dev; + + /* Specific for PF or VF */ + union { +@@ -1297,7 +1309,7 @@ struct i40e_adapter { + }; + + /** +- * Strucute to store private data for each VF representor instance ++ * Structure to store private data for each VF representor instance + */ + struct i40e_vf_representor { + uint16_t switch_domain_id; +@@ -1305,7 +1317,7 @@ struct i40e_vf_representor { + uint16_t vf_id; + /**< Virtual Function ID */ + struct i40e_adapter *adapter; +- /**< Private data store of assocaiated physical function */ ++ /**< Private data store of associated physical function */ + struct i40e_eth_stats stats_offset; + /**< Zero-point of VF statistics*/ + }; +@@ -1439,8 +1451,8 @@ bool is_i40evf_supported(struct rte_eth_dev *dev); + + int i40e_validate_input_set(enum i40e_filter_pctype pctype, + enum rte_filter_type filter, uint64_t inset); +-int i40e_generate_inset_mask_reg(uint64_t inset, uint32_t *mask, +- uint8_t nb_elem); ++int i40e_generate_inset_mask_reg(struct i40e_hw *hw, uint64_t inset, ++ uint32_t *mask, uint8_t nb_elem); + uint64_t i40e_translate_input_set_reg(enum i40e_mac_type type, uint64_t input); + void i40e_check_write_reg(struct i40e_hw *hw, uint32_t addr, uint32_t val); + void i40e_check_write_global_reg(struct i40e_hw *hw, +@@ -1513,7 +1525,7 @@ i40e_get_vsi_from_adapter(struct i40e_adapter *adapter) + #define I40E_VSI_TO_DEV_DATA(vsi) \ + (((struct i40e_vsi *)vsi)->adapter->pf.dev_data) + #define I40E_VSI_TO_ETH_DEV(vsi) \ +- (((struct i40e_vsi *)vsi)->adapter->eth_dev) ++ (&rte_eth_devices[((struct i40e_vsi *)vsi)->adapter->pf.dev_data->port_id]) + + /* I40E_PF_TO */ + #define I40E_PF_TO_HW(pf) \ +diff --git a/dpdk/drivers/net/i40e/i40e_ethdev_vf.c b/dpdk/drivers/net/i40e/i40e_ethdev_vf.c +index bca8cb80e4..b56f9f9149 100644 +--- a/dpdk/drivers/net/i40e/i40e_ethdev_vf.c ++++ b/dpdk/drivers/net/i40e/i40e_ethdev_vf.c +@@ -42,7 +42,7 @@ + /* busy wait delay in msec */ + #define I40EVF_BUSY_WAIT_DELAY 10 + #define I40EVF_BUSY_WAIT_COUNT 50 +-#define MAX_RESET_WAIT_CNT 20 ++#define MAX_RESET_WAIT_CNT 100 + + #define I40EVF_ALARM_INTERVAL 50000 /* us */ + +@@ -106,6 +106,9 @@ static int i40evf_dev_tx_queue_start(struct rte_eth_dev *dev, + uint16_t tx_queue_id); + static int i40evf_dev_tx_queue_stop(struct rte_eth_dev *dev, + uint16_t tx_queue_id); ++static int i40evf_add_del_eth_addr(struct rte_eth_dev *dev, ++ struct rte_ether_addr *addr, ++ bool add, uint8_t type); + static int i40evf_add_mac_addr(struct rte_eth_dev *dev, + struct rte_ether_addr *addr, + uint32_t index, +@@ -823,10 +826,9 @@ i40evf_stop_queues(struct rte_eth_dev *dev) + } + + static int +-i40evf_add_mac_addr(struct rte_eth_dev *dev, +- struct rte_ether_addr *addr, +- __rte_unused uint32_t index, +- __rte_unused uint32_t pool) ++i40evf_add_del_eth_addr(struct rte_eth_dev *dev, ++ struct rte_ether_addr *addr, ++ bool add, uint8_t type) + { + struct virtchnl_ether_addr_list *list; + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); +@@ -835,83 +837,70 @@ i40evf_add_mac_addr(struct rte_eth_dev *dev, + int err; + struct vf_cmd_info args; + +- if (rte_is_zero_ether_addr(addr)) { +- PMD_DRV_LOG(ERR, "Invalid mac:%x:%x:%x:%x:%x:%x", +- addr->addr_bytes[0], addr->addr_bytes[1], +- addr->addr_bytes[2], addr->addr_bytes[3], +- addr->addr_bytes[4], addr->addr_bytes[5]); +- return I40E_ERR_INVALID_MAC_ADDR; +- } +- + list = (struct virtchnl_ether_addr_list *)cmd_buffer; + list->vsi_id = vf->vsi_res->vsi_id; + list->num_elements = 1; ++ list->list[0].type = type; + rte_memcpy(list->list[0].addr, addr->addr_bytes, + sizeof(addr->addr_bytes)); + +- args.ops = VIRTCHNL_OP_ADD_ETH_ADDR; ++ args.ops = add ? VIRTCHNL_OP_ADD_ETH_ADDR : VIRTCHNL_OP_DEL_ETH_ADDR; + args.in_args = cmd_buffer; + args.in_args_size = sizeof(cmd_buffer); + args.out_buffer = vf->aq_resp; + args.out_size = I40E_AQ_BUF_SZ; + err = i40evf_execute_vf_cmd(dev, &args); + if (err) +- PMD_DRV_LOG(ERR, "fail to execute command " +- "OP_ADD_ETHER_ADDRESS"); +- else +- vf->vsi.mac_num++; +- ++ PMD_DRV_LOG(ERR, "fail to execute command %s", ++ add ? "OP_ADD_ETH_ADDR" : "OP_DEL_ETH_ADDR"); + return err; + } + +-static void +-i40evf_del_mac_addr_by_addr(struct rte_eth_dev *dev, +- struct rte_ether_addr *addr) ++static int ++i40evf_add_mac_addr(struct rte_eth_dev *dev, ++ struct rte_ether_addr *addr, ++ __rte_unused uint32_t index, ++ __rte_unused uint32_t pool) + { +- struct virtchnl_ether_addr_list *list; + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); +- uint8_t cmd_buffer[sizeof(struct virtchnl_ether_addr_list) + \ +- sizeof(struct virtchnl_ether_addr)]; + int err; +- struct vf_cmd_info args; + +- if (i40e_validate_mac_addr(addr->addr_bytes) != I40E_SUCCESS) { +- PMD_DRV_LOG(ERR, "Invalid mac:%x-%x-%x-%x-%x-%x", ++ if (rte_is_zero_ether_addr(addr)) { ++ PMD_DRV_LOG(ERR, "Invalid mac:%x:%x:%x:%x:%x:%x", + addr->addr_bytes[0], addr->addr_bytes[1], + addr->addr_bytes[2], addr->addr_bytes[3], + addr->addr_bytes[4], addr->addr_bytes[5]); +- return; ++ return I40E_ERR_INVALID_MAC_ADDR; + } + +- list = (struct virtchnl_ether_addr_list *)cmd_buffer; +- list->vsi_id = vf->vsi_res->vsi_id; +- list->num_elements = 1; +- rte_memcpy(list->list[0].addr, addr->addr_bytes, +- sizeof(addr->addr_bytes)); ++ err = i40evf_add_del_eth_addr(dev, addr, TRUE, VIRTCHNL_ETHER_ADDR_EXTRA); + +- args.ops = VIRTCHNL_OP_DEL_ETH_ADDR; +- args.in_args = cmd_buffer; +- args.in_args_size = sizeof(cmd_buffer); +- args.out_buffer = vf->aq_resp; +- args.out_size = I40E_AQ_BUF_SZ; +- err = i40evf_execute_vf_cmd(dev, &args); + if (err) +- PMD_DRV_LOG(ERR, "fail to execute command " +- "OP_DEL_ETHER_ADDRESS"); ++ PMD_DRV_LOG(ERR, "fail to add MAC address"); + else +- vf->vsi.mac_num--; +- return; ++ vf->vsi.mac_num++; ++ ++ return err; + } + + static void + i40evf_del_mac_addr(struct rte_eth_dev *dev, uint32_t index) + { ++ struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + struct rte_eth_dev_data *data = dev->data; + struct rte_ether_addr *addr; ++ int err; + + addr = &data->mac_addrs[index]; + +- i40evf_del_mac_addr_by_addr(dev, addr); ++ err = i40evf_add_del_eth_addr(dev, addr, FALSE, VIRTCHNL_ETHER_ADDR_EXTRA); ++ ++ if (err) ++ PMD_DRV_LOG(ERR, "fail to delete MAC address"); ++ else ++ vf->vsi.mac_num--; ++ ++ return; + } + + static int +@@ -1575,6 +1564,7 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev) + { + struct i40e_hw *hw + = I40E_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); ++ struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(eth_dev->data->dev_private); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + + PMD_INIT_FUNC_TRACE(); +@@ -1605,12 +1595,17 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev) + hw->device_id = pci_dev->id.device_id; + hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id; + hw->subsystem_device_id = pci_dev->id.subsystem_device_id; ++ hw->bus.bus_id = pci_dev->addr.bus; + hw->bus.device = pci_dev->addr.devid; + hw->bus.func = pci_dev->addr.function; + hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; + hw->adapter_stopped = 1; + hw->adapter_closed = 0; + ++ vf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(eth_dev->data->dev_private); ++ vf->dev_data = eth_dev->data; ++ hw->back = I40E_DEV_PRIVATE_TO_ADAPTER(vf); ++ + if(i40evf_init_vf(eth_dev) != 0) { + PMD_INIT_LOG(ERR, "Init vf failed"); + return -1; +@@ -1890,7 +1885,7 @@ i40evf_rxq_init(struct rte_eth_dev *dev, struct i40e_rx_queue *rxq) + RTE_PKTMBUF_HEADROOM); + rxq->hs_mode = i40e_header_split_none; + rxq->rx_hdr_len = 0; +- rxq->rx_buf_len = RTE_ALIGN(buf_size, (1 << I40E_RXQ_CTX_DBUFF_SHIFT)); ++ rxq->rx_buf_len = RTE_ALIGN_FLOOR(buf_size, (1 << I40E_RXQ_CTX_DBUFF_SHIFT)); + len = rxq->rx_buf_len * I40E_MAX_CHAINED_RX_BUFFERS; + rxq->max_pkt_len = RTE_MIN(len, + dev_data->dev_conf.rxmode.max_rx_pkt_len); +@@ -2092,6 +2087,9 @@ i40evf_add_del_all_mac_addr(struct rte_eth_dev *dev, bool add) + continue; + rte_memcpy(list->list[j].addr, addr->addr_bytes, + sizeof(addr->addr_bytes)); ++ list->list[j].type = (j == 0 ? ++ VIRTCHNL_ETHER_ADDR_PRIMARY : ++ VIRTCHNL_ETHER_ADDR_EXTRA); + PMD_DRV_LOG(DEBUG, "add/rm mac:%x:%x:%x:%x:%x:%x", + addr->addr_bytes[0], addr->addr_bytes[1], + addr->addr_bytes[2], addr->addr_bytes[3], +@@ -2745,7 +2743,7 @@ i40evf_config_rss(struct i40e_vf *vf) + } + + for (i = 0; i < rss_lut_size; i++) +- lut_info[i] = i % vf->num_queue_pairs; ++ lut_info[i] = i % num; + + ret = i40evf_set_rss_lut(&vf->vsi, lut_info, + rss_lut_size); +@@ -2852,15 +2850,23 @@ i40evf_set_default_mac_addr(struct rte_eth_dev *dev, + struct rte_ether_addr *mac_addr) + { + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); ++ struct rte_ether_addr *old_addr; ++ int ret; ++ ++ old_addr = (struct rte_ether_addr *)hw->mac.addr; + + if (!rte_is_valid_assigned_ether_addr(mac_addr)) { + PMD_DRV_LOG(ERR, "Tried to set invalid MAC address."); + return -EINVAL; + } + +- i40evf_del_mac_addr_by_addr(dev, (struct rte_ether_addr *)hw->mac.addr); ++ if (rte_is_same_ether_addr(old_addr, mac_addr)) ++ return 0; ++ ++ i40evf_add_del_eth_addr(dev, old_addr, FALSE, VIRTCHNL_ETHER_ADDR_PRIMARY); + +- if (i40evf_add_mac_addr(dev, mac_addr, 0, 0) != 0) ++ ret = i40evf_add_del_eth_addr(dev, mac_addr, TRUE, VIRTCHNL_ETHER_ADDR_PRIMARY); ++ if (ret) + return -EIO; + + rte_ether_addr_copy(mac_addr, (struct rte_ether_addr *)hw->mac.addr); +@@ -2904,6 +2910,7 @@ i40evf_add_del_mc_addr_list(struct rte_eth_dev *dev, + + memcpy(list->list[i].addr, mc_addrs[i].addr_bytes, + sizeof(list->list[i].addr)); ++ list->list[i].type = VIRTCHNL_ETHER_ADDR_EXTRA; + } + + args.ops = add ? VIRTCHNL_OP_ADD_ETH_ADDR : VIRTCHNL_OP_DEL_ETH_ADDR; +diff --git a/dpdk/drivers/net/i40e/i40e_fdir.c b/dpdk/drivers/net/i40e/i40e_fdir.c +index f5defcf585..5e2bb64c14 100644 +--- a/dpdk/drivers/net/i40e/i40e_fdir.c ++++ b/dpdk/drivers/net/i40e/i40e_fdir.c +@@ -141,7 +141,7 @@ i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq) + I40E_QRX_TAIL(rxq->vsi->base_queue); + + rte_wmb(); +- /* Init the RX tail regieter. */ ++ /* Init the RX tail register. */ + I40E_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1); + + return err; +@@ -159,7 +159,7 @@ i40e_fdir_setup(struct i40e_pf *pf) + int err = I40E_SUCCESS; + char z_name[RTE_MEMZONE_NAMESIZE]; + const struct rte_memzone *mz = NULL; +- struct rte_eth_dev *eth_dev = pf->adapter->eth_dev; ++ struct rte_eth_dev *eth_dev = &rte_eth_devices[pf->dev_data->port_id]; + uint16_t i; + + if ((pf->flags & I40E_FLAG_FDIR) == 0) { +@@ -283,7 +283,6 @@ i40e_fdir_teardown(struct i40e_pf *pf) + { + struct i40e_hw *hw = I40E_PF_TO_HW(pf); + struct i40e_vsi *vsi; +- struct rte_eth_dev *dev = pf->adapter->eth_dev; + + vsi = pf->fdir.fdir_vsi; + if (!vsi) +@@ -301,10 +300,8 @@ i40e_fdir_teardown(struct i40e_pf *pf) + PMD_DRV_LOG(DEBUG, "Failed to do FDIR RX switch off"); + + i40e_dev_rx_queue_release(pf->fdir.rxq); +- rte_eth_dma_zone_free(dev, "fdir_rx_ring", pf->fdir.rxq->queue_id); + pf->fdir.rxq = NULL; + i40e_dev_tx_queue_release(pf->fdir.txq); +- rte_eth_dma_zone_free(dev, "fdir_tx_ring", pf->fdir.txq->queue_id); + pf->fdir.txq = NULL; + i40e_vsi_release(vsi); + pf->fdir.fdir_vsi = NULL; +@@ -432,7 +429,7 @@ i40e_check_fdir_flex_payload(const struct rte_eth_flex_payload_cfg *flex_cfg) + + for (i = 0; i < I40E_FDIR_MAX_FLEX_LEN; i++) { + if (flex_cfg->src_offset[i] >= I40E_MAX_FLX_SOURCE_OFF) { +- PMD_DRV_LOG(ERR, "exceeds maxmial payload limit."); ++ PMD_DRV_LOG(ERR, "exceeds maximal payload limit."); + return -EINVAL; + } + } +@@ -440,7 +437,7 @@ i40e_check_fdir_flex_payload(const struct rte_eth_flex_payload_cfg *flex_cfg) + memset(flex_pit, 0, sizeof(flex_pit)); + num = i40e_srcoff_to_flx_pit(flex_cfg->src_offset, flex_pit); + if (num > I40E_MAX_FLXPLD_FIED) { +- PMD_DRV_LOG(ERR, "exceeds maxmial number of flex fields."); ++ PMD_DRV_LOG(ERR, "exceeds maximal number of flex fields."); + return -EINVAL; + } + for (i = 0; i < num; i++) { +@@ -950,7 +947,7 @@ i40e_flow_fdir_construct_pkt(struct i40e_pf *pf, + uint8_t pctype = fdir_input->pctype; + struct i40e_customized_pctype *cus_pctype; + +- /* raw pcket template - just copy contents of the raw packet */ ++ /* raw packet template - just copy contents of the raw packet */ + if (fdir_input->flow_ext.pkt_template) { + memcpy(raw_pkt, fdir_input->flow.raw_flow.packet, + fdir_input->flow.raw_flow.length); +@@ -1588,6 +1585,86 @@ i40e_flow_set_fdir_flex_msk(struct i40e_pf *pf, + pf->fdir.flex_mask_flag[pctype] = 1; + } + ++static int ++i40e_flow_set_fdir_inset(struct i40e_pf *pf, ++ enum i40e_filter_pctype pctype, ++ uint64_t input_set) ++{ ++ uint32_t mask_reg[I40E_INSET_MASK_NUM_REG] = {0}; ++ struct i40e_hw *hw = I40E_PF_TO_HW(pf); ++ uint64_t inset_reg = 0; ++ int i, num; ++ ++ /* Check if the input set is valid */ ++ if (i40e_validate_input_set(pctype, RTE_ETH_FILTER_FDIR, ++ input_set) != 0) { ++ PMD_DRV_LOG(ERR, "Invalid input set"); ++ return -EINVAL; ++ } ++ ++ /* Check if the configuration is conflicted */ ++ if (pf->fdir.flow_count[pctype] && ++ memcmp(&pf->fdir.input_set[pctype], &input_set, sizeof(uint64_t))) { ++ PMD_DRV_LOG(ERR, "Conflict with the first rule's input set."); ++ return -EINVAL; ++ } ++ ++ if (pf->fdir.flow_count[pctype] && ++ !memcmp(&pf->fdir.input_set[pctype], &input_set, sizeof(uint64_t))) ++ return 0; ++ ++ num = i40e_generate_inset_mask_reg(hw, input_set, mask_reg, ++ I40E_INSET_MASK_NUM_REG); ++ if (num < 0) { ++ PMD_DRV_LOG(ERR, "Invalid pattern mask."); ++ return -EINVAL; ++ } ++ ++ if (pf->support_multi_driver) { ++ for (i = 0; i < num; i++) ++ if (i40e_read_rx_ctl(hw, ++ I40E_GLQF_FD_MSK(i, pctype)) != ++ mask_reg[i]) { ++ PMD_DRV_LOG(ERR, "Input set setting is not" ++ " supported with" ++ " `support-multi-driver`" ++ " enabled!"); ++ return -EPERM; ++ } ++ for (i = num; i < I40E_INSET_MASK_NUM_REG; i++) ++ if (i40e_read_rx_ctl(hw, ++ I40E_GLQF_FD_MSK(i, pctype)) != 0) { ++ PMD_DRV_LOG(ERR, "Input set setting is not" ++ " supported with" ++ " `support-multi-driver`" ++ " enabled!"); ++ return -EPERM; ++ } ++ ++ } else { ++ for (i = 0; i < num; i++) ++ i40e_check_write_reg(hw, I40E_GLQF_FD_MSK(i, pctype), ++ mask_reg[i]); ++ /*clear unused mask registers of the pctype */ ++ for (i = num; i < I40E_INSET_MASK_NUM_REG; i++) ++ i40e_check_write_reg(hw, ++ I40E_GLQF_FD_MSK(i, pctype), 0); ++ } ++ ++ inset_reg |= i40e_translate_input_set_reg(hw->mac.type, input_set); ++ ++ i40e_check_write_reg(hw, I40E_PRTQF_FD_INSET(pctype, 0), ++ (uint32_t)(inset_reg & UINT32_MAX)); ++ i40e_check_write_reg(hw, I40E_PRTQF_FD_INSET(pctype, 1), ++ (uint32_t)((inset_reg >> ++ I40E_32_BIT_WIDTH) & UINT32_MAX)); ++ ++ I40E_WRITE_FLUSH(hw); ++ ++ pf->fdir.input_set[pctype] = input_set; ++ return 0; ++} ++ + static inline unsigned char * + i40e_find_available_buffer(struct rte_eth_dev *dev) + { +@@ -1685,6 +1762,15 @@ i40e_flow_add_del_fdir_filter(struct rte_eth_dev *dev, + i40e_fdir_filter_convert(filter, &check_filter); + + if (add) { ++ /* configure the input set for common PCTYPEs*/ ++ if (!filter->input.flow_ext.customized_pctype && ++ !filter->input.flow_ext.pkt_template) { ++ ret = i40e_flow_set_fdir_inset(pf, pctype, ++ filter->input.flow_ext.input_set); ++ if (ret < 0) ++ return ret; ++ } ++ + if (filter->input.flow_ext.is_flex_flow) { + for (i = 0; i < filter->input.flow_ext.raw_id; i++) { + layer_idx = filter->input.flow_ext.layer_idx; +@@ -1744,7 +1830,7 @@ i40e_flow_add_del_fdir_filter(struct rte_eth_dev *dev, + &check_filter.fdir.input); + if (!node) { + PMD_DRV_LOG(ERR, +- "There's no corresponding flow firector filter!"); ++ "There's no corresponding flow director filter!"); + return -EINVAL; + } + +@@ -1799,11 +1885,13 @@ i40e_flow_add_del_fdir_filter(struct rte_eth_dev *dev, + } + + if (add) { ++ fdir_info->flow_count[pctype]++; + fdir_info->fdir_actual_cnt++; + if (fdir_info->fdir_invalprio == 1 && + fdir_info->fdir_guarantee_free_space > 0) + fdir_info->fdir_guarantee_free_space--; + } else { ++ fdir_info->flow_count[pctype]--; + fdir_info->fdir_actual_cnt--; + if (fdir_info->fdir_invalprio == 1 && + fdir_info->fdir_guarantee_free_space < +diff --git a/dpdk/drivers/net/i40e/i40e_flow.c b/dpdk/drivers/net/i40e/i40e_flow.c +index bbd666b7a0..b41a1fd3ca 100644 +--- a/dpdk/drivers/net/i40e/i40e_flow.c ++++ b/dpdk/drivers/net/i40e/i40e_flow.c +@@ -2243,82 +2243,6 @@ i40e_flow_check_raw_item(const struct rte_flow_item *item, + return 0; + } + +-static int +-i40e_flow_set_fdir_inset(struct i40e_pf *pf, +- enum i40e_filter_pctype pctype, +- uint64_t input_set) +-{ +- struct i40e_hw *hw = I40E_PF_TO_HW(pf); +- uint64_t inset_reg = 0; +- uint32_t mask_reg[I40E_INSET_MASK_NUM_REG] = {0}; +- int i, num; +- +- /* Check if the input set is valid */ +- if (i40e_validate_input_set(pctype, RTE_ETH_FILTER_FDIR, +- input_set) != 0) { +- PMD_DRV_LOG(ERR, "Invalid input set"); +- return -EINVAL; +- } +- +- /* Check if the configuration is conflicted */ +- if (pf->fdir.inset_flag[pctype] && +- memcmp(&pf->fdir.input_set[pctype], &input_set, sizeof(uint64_t))) +- return -1; +- +- if (pf->fdir.inset_flag[pctype] && +- !memcmp(&pf->fdir.input_set[pctype], &input_set, sizeof(uint64_t))) +- return 0; +- +- num = i40e_generate_inset_mask_reg(input_set, mask_reg, +- I40E_INSET_MASK_NUM_REG); +- if (num < 0) +- return -EINVAL; +- +- if (pf->support_multi_driver) { +- for (i = 0; i < num; i++) +- if (i40e_read_rx_ctl(hw, +- I40E_GLQF_FD_MSK(i, pctype)) != +- mask_reg[i]) { +- PMD_DRV_LOG(ERR, "Input set setting is not" +- " supported with" +- " `support-multi-driver`" +- " enabled!"); +- return -EPERM; +- } +- for (i = num; i < I40E_INSET_MASK_NUM_REG; i++) +- if (i40e_read_rx_ctl(hw, +- I40E_GLQF_FD_MSK(i, pctype)) != 0) { +- PMD_DRV_LOG(ERR, "Input set setting is not" +- " supported with" +- " `support-multi-driver`" +- " enabled!"); +- return -EPERM; +- } +- +- } else { +- for (i = 0; i < num; i++) +- i40e_check_write_reg(hw, I40E_GLQF_FD_MSK(i, pctype), +- mask_reg[i]); +- /*clear unused mask registers of the pctype */ +- for (i = num; i < I40E_INSET_MASK_NUM_REG; i++) +- i40e_check_write_reg(hw, +- I40E_GLQF_FD_MSK(i, pctype), 0); +- } +- +- inset_reg |= i40e_translate_input_set_reg(hw->mac.type, input_set); +- +- i40e_check_write_reg(hw, I40E_PRTQF_FD_INSET(pctype, 0), +- (uint32_t)(inset_reg & UINT32_MAX)); +- i40e_check_write_reg(hw, I40E_PRTQF_FD_INSET(pctype, 1), +- (uint32_t)((inset_reg >> +- I40E_32_BIT_WIDTH) & UINT32_MAX)); +- +- I40E_WRITE_FLUSH(hw); +- +- pf->fdir.input_set[pctype] = input_set; +- pf->fdir.inset_flag[pctype] = 1; +- return 0; +-} + + static uint8_t + i40e_flow_fdir_get_pctype_value(struct i40e_pf *pf, +@@ -2433,7 +2357,7 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + const struct rte_flow_item *item = pattern; + const struct rte_flow_item_eth *eth_spec, *eth_mask; + const struct rte_flow_item_vlan *vlan_spec, *vlan_mask; +- const struct rte_flow_item_ipv4 *ipv4_spec, *ipv4_mask; ++ const struct rte_flow_item_ipv4 *ipv4_spec, *ipv4_last, *ipv4_mask; + const struct rte_flow_item_ipv6 *ipv6_spec, *ipv6_mask; + const struct rte_flow_item_tcp *tcp_spec, *tcp_mask; + const struct rte_flow_item_udp *udp_spec, *udp_mask; +@@ -2446,7 +2370,6 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + + uint8_t pctype = 0; + uint64_t input_set = I40E_INSET_NONE; +- uint16_t frag_off; + enum rte_flow_item_type item_type; + enum rte_flow_item_type next_type; + enum rte_flow_item_type l3 = RTE_FLOW_ITEM_TYPE_END; +@@ -2472,7 +2395,7 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + memset(len_arr, 0, sizeof(len_arr)); + filter->input.flow_ext.customized_pctype = false; + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { +- if (item->last) { ++ if (item->last && item->type != RTE_FLOW_ITEM_TYPE_IPV4) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, +@@ -2611,15 +2534,40 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + l3 = RTE_FLOW_ITEM_TYPE_IPV4; + ipv4_spec = item->spec; + ipv4_mask = item->mask; ++ ipv4_last = item->last; + pctype = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER; + layer_idx = I40E_FLXPLD_L3_IDX; + ++ if (ipv4_last) { ++ if (!ipv4_spec || !ipv4_mask || !outer_ip) { ++ rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM, ++ item, ++ "Not support range"); ++ return -rte_errno; ++ } ++ /* Only fragment_offset supports range */ ++ if (ipv4_last->hdr.version_ihl || ++ ipv4_last->hdr.type_of_service || ++ ipv4_last->hdr.total_length || ++ ipv4_last->hdr.packet_id || ++ ipv4_last->hdr.time_to_live || ++ ipv4_last->hdr.next_proto_id || ++ ipv4_last->hdr.hdr_checksum || ++ ipv4_last->hdr.src_addr || ++ ipv4_last->hdr.dst_addr) { ++ rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM, ++ item, ++ "Not support range"); ++ return -rte_errno; ++ } ++ } + if (ipv4_spec && ipv4_mask && outer_ip) { + /* Check IPv4 mask and update input set */ + if (ipv4_mask->hdr.version_ihl || + ipv4_mask->hdr.total_length || + ipv4_mask->hdr.packet_id || +- ipv4_mask->hdr.fragment_offset || + ipv4_mask->hdr.hdr_checksum) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, +@@ -2640,11 +2588,56 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + input_set |= I40E_INSET_IPV4_PROTO; + + /* Check if it is fragment. */ +- frag_off = ipv4_spec->hdr.fragment_offset; +- frag_off = rte_be_to_cpu_16(frag_off); +- if (frag_off & RTE_IPV4_HDR_OFFSET_MASK || +- frag_off & RTE_IPV4_HDR_MF_FLAG) +- pctype = I40E_FILTER_PCTYPE_FRAG_IPV4; ++ uint16_t frag_mask = ++ ipv4_mask->hdr.fragment_offset; ++ uint16_t frag_spec = ++ ipv4_spec->hdr.fragment_offset; ++ uint16_t frag_last = 0; ++ if (ipv4_last) ++ frag_last = ++ ipv4_last->hdr.fragment_offset; ++ if (frag_mask) { ++ frag_mask = rte_be_to_cpu_16(frag_mask); ++ frag_spec = rte_be_to_cpu_16(frag_spec); ++ frag_last = rte_be_to_cpu_16(frag_last); ++ /* frag_off mask has to be 0x3fff */ ++ if (frag_mask != ++ (RTE_IPV4_HDR_OFFSET_MASK | ++ RTE_IPV4_HDR_MF_FLAG)) { ++ rte_flow_error_set(error, ++ EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM, ++ item, ++ "Invalid IPv4 fragment_offset mask"); ++ return -rte_errno; ++ } ++ /* ++ * non-frag rule: ++ * mask=0x3fff,spec=0 ++ * frag rule: ++ * mask=0x3fff,spec=0x8,last=0x2000 ++ */ ++ if (frag_spec == ++ (1 << RTE_IPV4_HDR_FO_SHIFT) && ++ frag_last == RTE_IPV4_HDR_MF_FLAG) { ++ pctype = ++ I40E_FILTER_PCTYPE_FRAG_IPV4; ++ } else if (frag_spec || frag_last) { ++ rte_flow_error_set(error, ++ EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM, ++ item, ++ "Invalid IPv4 fragment_offset rule"); ++ return -rte_errno; ++ } ++ } else if (frag_spec || frag_last) { ++ rte_flow_error_set(error, ++ EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM, ++ item, ++ "Invalid fragment_offset"); ++ return -rte_errno; ++ } + + if (input_set & (I40E_INSET_DMAC | I40E_INSET_SMAC)) { + if (input_set & (I40E_INSET_IPV4_SRC | +@@ -3050,12 +3043,15 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, +- "Exceeds maxmial payload limit."); ++ "Exceeds maximal payload limit."); + return -rte_errno; + } + + for (i = 0; i < raw_spec->length; i++) { + j = i + next_dst_off; ++ if (j >= RTE_ETH_FDIR_MAX_FLEXLEN || ++ j >= I40E_FDIR_MAX_FLEX_LEN) ++ break; + filter->input.flow_ext.flexbytes[j] = + raw_spec->pattern[i]; + filter->input.flow_ext.flex_mask[j] = +@@ -3143,18 +3139,17 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + + /* If customized pctype is not used, set fdir configuration.*/ + if (!filter->input.flow_ext.customized_pctype) { +- ret = i40e_flow_set_fdir_inset(pf, pctype, input_set); +- if (ret == -1) { +- rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ITEM, item, +- "Conflict with the first rule's input set."); +- return -rte_errno; +- } else if (ret == -EINVAL) { ++ /* Check if the input set is valid */ ++ if (i40e_validate_input_set(pctype, RTE_ETH_FILTER_FDIR, ++ input_set) != 0) { + rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ITEM, item, +- "Invalid pattern mask."); ++ RTE_FLOW_ERROR_TYPE_ITEM, ++ item, ++ "Invalid input set"); + return -rte_errno; + } ++ ++ filter->input.flow_ext.input_set = input_set; + } + + filter->input.pctype = pctype; +@@ -5469,7 +5464,7 @@ i40e_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error) + static int + i40e_flow_flush_fdir_filter(struct i40e_pf *pf) + { +- struct rte_eth_dev *dev = pf->adapter->eth_dev; ++ struct rte_eth_dev *dev = &rte_eth_devices[pf->dev_data->port_id]; + struct i40e_fdir_info *fdir_info = &pf->fdir; + struct i40e_fdir_filter *fdir_filter; + enum i40e_filter_pctype pctype; +@@ -5512,7 +5507,7 @@ i40e_flow_flush_fdir_filter(struct i40e_pf *pf) + + for (pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; + pctype <= I40E_FILTER_PCTYPE_L2_PAYLOAD; pctype++) { +- pf->fdir.inset_flag[pctype] = 0; ++ pf->fdir.flow_count[pctype] = 0; + pf->fdir.flex_mask_flag[pctype] = 0; + } + +diff --git a/dpdk/drivers/net/i40e/i40e_pf.c b/dpdk/drivers/net/i40e/i40e_pf.c +index 65d649b627..7ec8209f09 100644 +--- a/dpdk/drivers/net/i40e/i40e_pf.c ++++ b/dpdk/drivers/net/i40e/i40e_pf.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + #include "i40e_logs.h" + #include "base/i40e_prototype.h" +@@ -29,6 +30,28 @@ + + #define I40E_CFG_CRCSTRIP_DEFAULT 1 + ++/* Supported RSS offloads */ ++#define I40E_DEFAULT_RSS_HENA ( \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_SCTP) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_L2_PAYLOAD)) ++ ++#define I40E_DEFAULT_RSS_HENA_EXPANDED (I40E_DEFAULT_RSS_HENA | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \ ++ BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP)) ++ + static int + i40e_pf_host_switch_queues(struct i40e_pf_vf *vf, + struct virtchnl_queue_select *qsel, +@@ -321,7 +344,7 @@ i40e_pf_host_process_cmd_get_vf_resource(struct i40e_pf_vf *vf, uint8_t *msg, + vf->request_caps = *(uint32_t *)msg; + + /* enable all RSS by default, +- * doesn't support hena setting by virtchnnl yet. ++ * doesn't support hena setting by virtchnl yet. + */ + if (vf->request_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { + I40E_WRITE_REG(hw, I40E_VFQF_HENA1(0, vf->vf_idx), +@@ -333,6 +356,10 @@ i40e_pf_host_process_cmd_get_vf_resource(struct i40e_pf_vf *vf, uint8_t *msg, + + vf_res->vf_cap_flags = vf->request_caps & + I40E_VIRTCHNL_OFFLOAD_CAPS; ++ ++ if (vf->request_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES) ++ vf_res->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; ++ + /* For X722, it supports write back on ITR + * without binding queue to interrupt vector. + */ +@@ -571,14 +598,14 @@ i40e_pf_config_irq_link_list(struct i40e_pf_vf *vf, + tempmap = vvm->rxq_map; + for (i = 0; i < sizeof(vvm->rxq_map) * BITS_PER_CHAR; i++) { + if (tempmap & 0x1) +- linklistmap |= (1 << (2 * i)); ++ linklistmap |= RTE_BIT64(2 * i); + tempmap >>= 1; + } + + tempmap = vvm->txq_map; + for (i = 0; i < sizeof(vvm->txq_map) * BITS_PER_CHAR; i++) { + if (tempmap & 0x1) +- linklistmap |= (1 << (2 * i + 1)); ++ linklistmap |= RTE_BIT64(2 * i + 1); + tempmap >>= 1; + } + +@@ -699,7 +726,7 @@ i40e_pf_host_process_cmd_config_irq_map(struct i40e_pf_vf *vf, + if ((map->rxq_map < qbit_max) && (map->txq_map < qbit_max)) { + i40e_pf_config_irq_link_list(vf, map); + } else { +- /* configured queue size excceed limit */ ++ /* configured queue size exceed limit */ + ret = I40E_ERR_PARAM; + goto send_msg; + } +@@ -1284,6 +1311,37 @@ i40e_pf_host_process_cmd_request_queues(struct i40e_pf_vf *vf, uint8_t *msg) + (u8 *)vfres, sizeof(*vfres)); + } + ++static void ++i40e_pf_host_process_cmd_get_rss_hena(struct i40e_pf_vf *vf) ++{ ++ struct virtchnl_rss_hena vrh = {0}; ++ struct i40e_pf *pf = vf->pf; ++ ++ if (pf->adapter->hw.mac.type == I40E_MAC_X722) ++ vrh.hena = I40E_DEFAULT_RSS_HENA_EXPANDED; ++ else ++ vrh.hena = I40E_DEFAULT_RSS_HENA; ++ ++ i40e_pf_host_send_msg_to_vf(vf, VIRTCHNL_OP_GET_RSS_HENA_CAPS, ++ I40E_SUCCESS, (uint8_t *)&vrh, sizeof(vrh)); ++} ++ ++static void ++i40e_pf_host_process_cmd_set_rss_hena(struct i40e_pf_vf *vf, uint8_t *msg) ++{ ++ struct virtchnl_rss_hena *vrh = ++ (struct virtchnl_rss_hena *)msg; ++ struct i40e_hw *hw = &vf->pf->adapter->hw; ++ ++ i40e_write_rx_ctl(hw, I40E_VFQF_HENA1(0, vf->vf_idx), ++ (uint32_t)vrh->hena); ++ i40e_write_rx_ctl(hw, I40E_VFQF_HENA1(1, vf->vf_idx), ++ (uint32_t)(vrh->hena >> 32)); ++ ++ i40e_pf_host_send_msg_to_vf(vf, VIRTCHNL_OP_SET_RSS_HENA, ++ I40E_SUCCESS, NULL, 0); ++} ++ + void + i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, + uint16_t abs_vf_id, uint32_t opcode, +@@ -1454,6 +1512,14 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, + PMD_DRV_LOG(INFO, "OP_REQUEST_QUEUES received"); + i40e_pf_host_process_cmd_request_queues(vf, msg); + break; ++ case VIRTCHNL_OP_GET_RSS_HENA_CAPS: ++ PMD_DRV_LOG(INFO, "OP_GET_RSS_HENA_CAPS received"); ++ i40e_pf_host_process_cmd_get_rss_hena(vf); ++ break; ++ case VIRTCHNL_OP_SET_RSS_HENA: ++ PMD_DRV_LOG(INFO, "OP_SET_RSS_HENA received"); ++ i40e_pf_host_process_cmd_set_rss_hena(vf, msg); ++ break; + + /* Don't add command supported below, which will + * return an error code. +diff --git a/dpdk/drivers/net/i40e/i40e_rxtx.c b/dpdk/drivers/net/i40e/i40e_rxtx.c +index b8859bbff2..8a687803b9 100644 +--- a/dpdk/drivers/net/i40e/i40e_rxtx.c ++++ b/dpdk/drivers/net/i40e/i40e_rxtx.c +@@ -67,6 +67,7 @@ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT | \ + PKT_TX_TUNNEL_MASK | \ ++ PKT_TX_OUTER_UDP_CKSUM | \ + I40E_TX_IEEE1588_TMST) + + #define I40E_TX_OFFLOAD_NOTSUP_MASK \ +@@ -418,7 +419,7 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq) + uint16_t pkt_len; + uint64_t qword1; + uint32_t rx_status; +- int32_t s[I40E_LOOK_AHEAD], nb_dd; ++ int32_t s[I40E_LOOK_AHEAD], var, nb_dd; + int32_t i, j, nb_rx = 0; + uint64_t pkt_flags; + uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; +@@ -451,8 +452,18 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq) + rte_smp_rmb(); + + /* Compute how many status bits were set */ +- for (j = 0, nb_dd = 0; j < I40E_LOOK_AHEAD; j++) +- nb_dd += s[j] & (1 << I40E_RX_DESC_STATUS_DD_SHIFT); ++ for (j = 0, nb_dd = 0; j < I40E_LOOK_AHEAD; j++) { ++ var = s[j] & (1 << I40E_RX_DESC_STATUS_DD_SHIFT); ++#ifdef RTE_ARCH_ARM ++ /* For Arm platforms, only compute continuous status bits */ ++ if (var) ++ nb_dd += 1; ++ else ++ break; ++#else ++ nb_dd += var; ++#endif ++ } + + nb_rx += nb_dd; + +@@ -558,7 +569,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq) + rxdp[i].read.pkt_addr = dma_addr; + } + +- /* Update rx tail regsiter */ ++ /* Update rx tail register */ + I40E_PCI_REG_WRITE(rxq->qrx_tail, rxq->rx_free_trigger); + + rxq->rx_free_trigger = +@@ -693,6 +704,12 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + break; + } + ++ /** ++ * Use acquire fence to ensure that qword1 which includes DD ++ * bit is loaded before loading of other descriptor words. ++ */ ++ rte_atomic_thread_fence(__ATOMIC_ACQUIRE); ++ + rxd = *rxdp; + nb_hold++; + rxe = &sw_ring[rx_id]; +@@ -809,6 +826,12 @@ i40e_recv_scattered_pkts(void *rx_queue, + break; + } + ++ /** ++ * Use acquire fence to ensure that qword1 which includes DD ++ * bit is loaded before loading of other descriptor words. ++ */ ++ rte_atomic_thread_fence(__ATOMIC_ACQUIRE); ++ + rxd = *rxdp; + nb_hold++; + rxe = &sw_ring[rx_id]; +@@ -932,7 +955,7 @@ i40e_recv_scattered_pkts(void *rx_queue, + * threshold of the queue, advance the Receive Descriptor Tail (RDT) + * register. Update the RDT with the value of the last processed RX + * descriptor minus 1, to guarantee that the RDT register is never +- * equal to the RDH register, which creates a "full" ring situtation ++ * equal to the RDH register, which creates a "full" ring situation + * from the hardware point of view. + */ + nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold); +@@ -1386,7 +1409,7 @@ tx_xmit_pkts(struct i40e_tx_queue *txq, + i40e_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n)); + txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n)); + +- /* Determin if RS bit needs to be set */ ++ /* Determine if RS bit needs to be set */ + if (txq->tx_tail > txq->tx_next_rs) { + txr[txq->tx_next_rs].cmd_type_offset_bsz |= + rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) << +@@ -1579,7 +1602,7 @@ i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id) + } + + if (rxq->rx_deferred_start) +- PMD_DRV_LOG(WARNING, "RX queue %u is deferrd start", ++ PMD_DRV_LOG(WARNING, "RX queue %u is deferred start", + rx_queue_id); + + err = i40e_alloc_rx_queue_mbufs(rxq); +@@ -1588,7 +1611,7 @@ i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id) + return err; + } + +- /* Init the RX tail regieter. */ ++ /* Init the RX tail register. */ + I40E_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1); + + err = i40e_switch_rx_queue(hw, rxq->reg_idx, TRUE); +@@ -1653,7 +1676,7 @@ i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id) + } + + if (txq->tx_deferred_start) +- PMD_DRV_LOG(WARNING, "TX queue %u is deferrd start", ++ PMD_DRV_LOG(WARNING, "TX queue %u is deferred start", + tx_queue_id); + + /* +@@ -1808,7 +1831,7 @@ i40e_dev_rx_queue_setup_runtime(struct rte_eth_dev *dev, + PMD_DRV_LOG(ERR, "Can't use default burst."); + return -EINVAL; + } +- /* check scatterred conflict */ ++ /* check scattered conflict */ + if (!dev->data->scattered_rx && use_scattered_rx) { + PMD_DRV_LOG(ERR, "Scattered rx is required."); + return -EINVAL; +@@ -1902,7 +1925,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, + rxq->rx_deferred_start = rx_conf->rx_deferred_start; + rxq->offloads = offloads; + +- /* Allocate the maximun number of RX ring hardware descriptor. */ ++ /* Allocate the maximum number of RX ring hardware descriptor. */ + len = I40E_MAX_RING_DESC; + + /** +@@ -1922,6 +1945,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, + return -ENOMEM; + } + ++ rxq->mz = rz; + /* Zero all the descriptors in the ring. */ + memset(rz->addr, 0, ring_size); + +@@ -2001,6 +2025,7 @@ i40e_dev_rx_queue_release(void *rxq) + + i40e_rx_queue_release_mbufs(q); + rte_free(q->sw_ring); ++ rte_memzone_free(q->mz); + rte_free(q); + } + +@@ -2187,8 +2212,6 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev, + if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) { + vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + vsi = &vf->vsi; +- if (!vsi) +- return -EINVAL; + reg_idx = queue_idx; + } else { + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); +@@ -2232,7 +2255,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev, + */ + tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ? + tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH); +- /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */ ++ /* force tx_rs_thresh to adapt an aggressive tx_free_thresh */ + tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ? + nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH; + if (tx_conf->tx_rs_thresh > 0) +@@ -2323,6 +2346,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev, + return -ENOMEM; + } + ++ txq->mz = tz; + txq->nb_tx_desc = nb_desc; + txq->tx_rs_thresh = tx_rs_thresh; + txq->tx_free_thresh = tx_free_thresh; +@@ -2396,6 +2420,7 @@ i40e_dev_tx_queue_release(void *txq) + + i40e_tx_queue_release_mbufs(q); + rte_free(q->sw_ring); ++ rte_memzone_free(q->mz); + rte_free(q); + } + +@@ -2480,6 +2505,10 @@ i40e_reset_rx_queue(struct i40e_rx_queue *rxq) + #endif /* RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC */ + rxq->rx_tail = 0; + rxq->nb_rx_hold = 0; ++ ++ if (rxq->pkt_first_seg != NULL) ++ rte_pktmbuf_free(rxq->pkt_first_seg); ++ + rxq->pkt_first_seg = NULL; + rxq->pkt_last_seg = NULL; + +@@ -2889,7 +2918,7 @@ i40e_rx_queue_init(struct i40e_rx_queue *rxq) + if (rxq->max_pkt_len > buf_size) + dev_data->scattered_rx = 1; + +- /* Init the RX tail regieter. */ ++ /* Init the RX tail register. */ + I40E_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1); + + return 0; +@@ -2929,7 +2958,6 @@ i40e_dev_free_queues(struct rte_eth_dev *dev) + continue; + i40e_dev_rx_queue_release(dev->data->rx_queues[i]); + dev->data->rx_queues[i] = NULL; +- rte_eth_dma_zone_free(dev, "rx_ring", i); + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { +@@ -2937,7 +2965,6 @@ i40e_dev_free_queues(struct rte_eth_dev *dev) + continue; + i40e_dev_tx_queue_release(dev->data->tx_queues[i]); + dev->data->tx_queues[i] = NULL; +- rte_eth_dma_zone_free(dev, "tx_ring", i); + } + } + +@@ -2954,7 +2981,7 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf) + return I40E_ERR_BAD_PTR; + } + +- dev = pf->adapter->eth_dev; ++ dev = &rte_eth_devices[pf->dev_data->port_id]; + + /* Allocate the TX queue data structure. */ + txq = rte_zmalloc_socket("i40e fdir tx queue", +@@ -2980,6 +3007,7 @@ i40e_fdir_setup_tx_resources(struct i40e_pf *pf) + return I40E_ERR_NO_MEMORY; + } + ++ txq->mz = tz; + txq->nb_tx_desc = I40E_FDIR_NUM_TX_DESC; + txq->queue_id = I40E_FDIR_QUEUE_ID; + txq->reg_idx = pf->fdir.fdir_vsi->base_queue; +@@ -3012,7 +3040,7 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf) + return I40E_ERR_BAD_PTR; + } + +- dev = pf->adapter->eth_dev; ++ dev = &rte_eth_devices[pf->dev_data->port_id]; + + /* Allocate the RX queue data structure. */ + rxq = rte_zmalloc_socket("i40e fdir rx queue", +@@ -3038,6 +3066,7 @@ i40e_fdir_setup_rx_resources(struct i40e_pf *pf) + return I40E_ERR_NO_MEMORY; + } + ++ rxq->mz = rz; + rxq->nb_rx_desc = I40E_FDIR_NUM_RX_DESC; + rxq->queue_id = I40E_FDIR_QUEUE_ID; + rxq->reg_idx = pf->fdir.fdir_vsi->base_queue; +diff --git a/dpdk/drivers/net/i40e/i40e_rxtx.h b/dpdk/drivers/net/i40e/i40e_rxtx.h +index 57d7b4160b..0671750779 100644 +--- a/dpdk/drivers/net/i40e/i40e_rxtx.h ++++ b/dpdk/drivers/net/i40e/i40e_rxtx.h +@@ -121,6 +121,7 @@ struct i40e_rx_queue { + uint16_t rx_using_sse; /**rx_descriptor_fields1, + pkt_mb1); + desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl); +- /* C.4 calc avaialbe number of desc */ ++ /* C.4 calc available number of desc */ + if (unlikely(stat == 0)) { + nb_pkts_recd += RTE_I40E_DESCS_PER_LOOP; + } else { +diff --git a/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c b/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c +index 0bcb48e24e..a8aa5bc0cc 100644 +--- a/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c ++++ b/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c +@@ -282,7 +282,7 @@ desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp, + l3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e); + /* then we shift left 1 bit */ + l3_l4e = _mm_slli_epi32(l3_l4e, 1); +- /* we need to mask out the reduntant bits */ ++ /* we need to mask out the redundant bits */ + l3_l4e = _mm_and_si128(l3_l4e, cksum_mask); + + vlan0 = _mm_or_si128(vlan0, rss); +@@ -297,7 +297,7 @@ desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp, + __m128i v_fdir_ol_flags = descs_to_fdir_16b(desc_fltstat, + descs, rx_pkts); + #endif +- /* OR in ol_flag bits after descriptor speicific extraction */ ++ /* OR in ol_flag bits after descriptor specific extraction */ + vlan0 = _mm_or_si128(vlan0, v_fdir_ol_flags); + } + +@@ -462,7 +462,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, + /* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */ + mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); + /* Read desc statuses backwards to avoid race condition */ +- /* A.1 load 4 pkts desc */ ++ /* A.1 load desc[3] */ + descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); + rte_compiler_barrier(); + +@@ -474,9 +474,9 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, + mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); + #endif + ++ /* A.1 load desc[2-0] */ + descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); + rte_compiler_barrier(); +- /* B.1 load 2 mbuf point */ + descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); + rte_compiler_barrier(); + descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); +@@ -557,7 +557,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, + /* and with mask to extract bits, flipping 1-0 */ + __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); + /* the staterr values are not in order, as the count +- * count of dd bits doesn't care. However, for end of ++ * of dd bits doesn't care. However, for end of + * packet tracking, we do care, so shuffle. This also + * compresses the 32-bit values to 8-bit + */ +@@ -577,7 +577,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, + _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, + pkt_mb1); + desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl); +- /* C.4 calc avaialbe number of desc */ ++ /* C.4 calc available number of desc */ + var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); + nb_pkts_recd += var; + if (likely(var != RTE_I40E_DESCS_PER_LOOP)) +diff --git a/dpdk/drivers/net/i40e/i40e_vf_representor.c b/dpdk/drivers/net/i40e/i40e_vf_representor.c +index 9e40406a3d..5daa7ddcfd 100644 +--- a/dpdk/drivers/net/i40e/i40e_vf_representor.c ++++ b/dpdk/drivers/net/i40e/i40e_vf_representor.c +@@ -19,15 +19,18 @@ i40e_vf_representor_link_update(struct rte_eth_dev *ethdev, + int wait_to_complete) + { + struct i40e_vf_representor *representor = ethdev->data->dev_private; ++ struct rte_eth_dev *dev = ++ &rte_eth_devices[representor->adapter->pf.dev_data->port_id]; + +- return i40e_dev_link_update(representor->adapter->eth_dev, +- wait_to_complete); ++ return i40e_dev_link_update(dev, wait_to_complete); + } + static int + i40e_vf_representor_dev_infos_get(struct rte_eth_dev *ethdev, + struct rte_eth_dev_info *dev_info) + { + struct i40e_vf_representor *representor = ethdev->data->dev_private; ++ struct rte_eth_dev_data *pf_dev_data = ++ representor->adapter->pf.dev_data; + + /* get dev info for the vdev */ + dev_info->device = ethdev->device; +@@ -99,7 +102,7 @@ i40e_vf_representor_dev_infos_get(struct rte_eth_dev *ethdev, + }; + + dev_info->switch_info.name = +- representor->adapter->eth_dev->device->name; ++ rte_eth_devices[pf_dev_data->port_id].device->name; + dev_info->switch_info.domain_id = representor->switch_domain_id; + dev_info->switch_info.port_id = representor->vf_id; + +@@ -213,7 +216,7 @@ i40e_vf_representor_stats_get(struct rte_eth_dev *ethdev, + int ret; + + ret = rte_pmd_i40e_get_vf_native_stats( +- representor->adapter->eth_dev->data->port_id, ++ representor->adapter->pf.dev_data->port_id, + representor->vf_id, &native_stats); + if (ret == 0) { + i40evf_stat_update_48( +@@ -273,7 +276,7 @@ i40e_vf_representor_stats_reset(struct rte_eth_dev *ethdev) + struct i40e_vf_representor *representor = ethdev->data->dev_private; + + return rte_pmd_i40e_get_vf_native_stats( +- representor->adapter->eth_dev->data->port_id, ++ representor->adapter->pf.dev_data->port_id, + representor->vf_id, &representor->stats_offset); + } + +@@ -283,7 +286,7 @@ i40e_vf_representor_promiscuous_enable(struct rte_eth_dev *ethdev) + struct i40e_vf_representor *representor = ethdev->data->dev_private; + + return rte_pmd_i40e_set_vf_unicast_promisc( +- representor->adapter->eth_dev->data->port_id, ++ representor->adapter->pf.dev_data->port_id, + representor->vf_id, 1); + } + +@@ -293,7 +296,7 @@ i40e_vf_representor_promiscuous_disable(struct rte_eth_dev *ethdev) + struct i40e_vf_representor *representor = ethdev->data->dev_private; + + return rte_pmd_i40e_set_vf_unicast_promisc( +- representor->adapter->eth_dev->data->port_id, ++ representor->adapter->pf.dev_data->port_id, + representor->vf_id, 0); + } + +@@ -303,7 +306,7 @@ i40e_vf_representor_allmulticast_enable(struct rte_eth_dev *ethdev) + struct i40e_vf_representor *representor = ethdev->data->dev_private; + + return rte_pmd_i40e_set_vf_multicast_promisc( +- representor->adapter->eth_dev->data->port_id, ++ representor->adapter->pf.dev_data->port_id, + representor->vf_id, 1); + } + +@@ -313,7 +316,7 @@ i40e_vf_representor_allmulticast_disable(struct rte_eth_dev *ethdev) + struct i40e_vf_representor *representor = ethdev->data->dev_private; + + return rte_pmd_i40e_set_vf_multicast_promisc( +- representor->adapter->eth_dev->data->port_id, ++ representor->adapter->pf.dev_data->port_id, + representor->vf_id, 0); + } + +@@ -323,7 +326,7 @@ i40e_vf_representor_mac_addr_remove(struct rte_eth_dev *ethdev, uint32_t index) + struct i40e_vf_representor *representor = ethdev->data->dev_private; + + rte_pmd_i40e_remove_vf_mac_addr( +- representor->adapter->eth_dev->data->port_id, ++ representor->adapter->pf.dev_data->port_id, + representor->vf_id, ðdev->data->mac_addrs[index]); + } + +@@ -334,7 +337,7 @@ i40e_vf_representor_mac_addr_set(struct rte_eth_dev *ethdev, + struct i40e_vf_representor *representor = ethdev->data->dev_private; + + return rte_pmd_i40e_set_vf_mac_addr( +- representor->adapter->eth_dev->data->port_id, ++ representor->adapter->pf.dev_data->port_id, + representor->vf_id, mac_addr); + } + +@@ -346,7 +349,7 @@ i40e_vf_representor_vlan_filter_set(struct rte_eth_dev *ethdev, + uint64_t vf_mask = 1ULL << representor->vf_id; + + return rte_pmd_i40e_set_vf_vlan_filter( +- representor->adapter->eth_dev->data->port_id, ++ representor->adapter->pf.dev_data->port_id, + vlan_id, vf_mask, on); + } + +@@ -360,7 +363,7 @@ i40e_vf_representor_vlan_offload_set(struct rte_eth_dev *ethdev, int mask) + struct i40e_pf *pf; + uint32_t vfid; + +- pdev = representor->adapter->eth_dev; ++ pdev = &rte_eth_devices[representor->adapter->pf.dev_data->port_id]; + vfid = representor->vf_id; + + if (!is_i40e_supported(pdev)) { +@@ -410,7 +413,7 @@ i40e_vf_representor_vlan_strip_queue_set(struct rte_eth_dev *ethdev, + struct i40e_vf_representor *representor = ethdev->data->dev_private; + + rte_pmd_i40e_set_vf_vlan_stripq( +- representor->adapter->eth_dev->data->port_id, ++ representor->adapter->pf.dev_data->port_id, + representor->vf_id, on); + } + +@@ -421,7 +424,7 @@ i40e_vf_representor_vlan_pvid_set(struct rte_eth_dev *ethdev, uint16_t vlan_id, + struct i40e_vf_representor *representor = ethdev->data->dev_private; + + return rte_pmd_i40e_set_vf_vlan_insert( +- representor->adapter->eth_dev->data->port_id, ++ representor->adapter->pf.dev_data->port_id, + representor->vf_id, vlan_id); + } + +@@ -487,7 +490,7 @@ i40e_vf_representor_init(struct rte_eth_dev *ethdev, void *init_params) + ((struct i40e_vf_representor *)init_params)->adapter; + + pf = I40E_DEV_PRIVATE_TO_PF( +- representor->adapter->eth_dev->data->dev_private); ++ representor->adapter->pf.dev_data->dev_private); + + if (representor->vf_id >= pf->vf_num) + return -ENODEV; +@@ -508,8 +511,7 @@ i40e_vf_representor_init(struct rte_eth_dev *ethdev, void *init_params) + return -ENODEV; + } + +- ethdev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR | +- RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; ++ ethdev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR; + ethdev->data->representor_id = representor->vf_id; + + /* Setting the number queues allocated to the VF */ +@@ -519,7 +521,7 @@ i40e_vf_representor_init(struct rte_eth_dev *ethdev, void *init_params) + ethdev->data->mac_addrs = &vf->mac_addr; + + /* Link state. Inherited from PF */ +- link = &representor->adapter->eth_dev->data->dev_link; ++ link = &representor->adapter->pf.dev_data->dev_link; + + ethdev->data->dev_link.link_speed = link->link_speed; + ethdev->data->dev_link.link_duplex = link->link_duplex; +diff --git a/dpdk/drivers/net/i40e/rte_pmd_i40e.c b/dpdk/drivers/net/i40e/rte_pmd_i40e.c +index 2e34140c5b..2481043bf5 100644 +--- a/dpdk/drivers/net/i40e/rte_pmd_i40e.c ++++ b/dpdk/drivers/net/i40e/rte_pmd_i40e.c +@@ -1427,7 +1427,7 @@ rte_pmd_i40e_set_tc_strict_prio(uint16_t port, uint8_t tc_map) + /* Get all TCs' bandwidth. */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + if (veb->enabled_tc & BIT_ULL(i)) { +- /* For rubust, if bandwidth is 0, use 1 instead. */ ++ /* For robust, if bandwidth is 0, use 1 instead. */ + if (veb->bw_info.bw_ets_share_credits[i]) + ets_data.tc_bw_share_credits[i] = + veb->bw_info.bw_ets_share_credits[i]; +diff --git a/dpdk/drivers/net/iavf/iavf.h b/dpdk/drivers/net/iavf/iavf.h +index 3328bd9327..bc49f1fa17 100644 +--- a/dpdk/drivers/net/iavf/iavf.h ++++ b/dpdk/drivers/net/iavf/iavf.h +@@ -14,7 +14,7 @@ + + #define IAVF_AQ_LEN 32 + #define IAVF_AQ_BUF_SZ 4096 +-#define IAVF_RESET_WAIT_CNT 50 ++#define IAVF_RESET_WAIT_CNT 500 + #define IAVF_BUF_SIZE_MIN 1024 + #define IAVF_FRAME_SIZE_MAX 9728 + #define IAVF_QUEUE_BASE_ADDR_UNIT 128 +@@ -136,7 +136,7 @@ struct iavf_info { + uint64_t supported_rxdid; + uint8_t *proto_xtr; /* proto xtr type for all queues */ + volatile enum virtchnl_ops pend_cmd; /* pending command not finished */ +- uint32_t cmd_retval; /* return value of the cmd response from PF */ ++ int cmd_retval; /* return value of the cmd response from PF */ + uint8_t *aq_resp; /* buffer to store the adminq response from PF */ + + /* Event from pf */ +@@ -166,6 +166,8 @@ struct iavf_info { + struct iavf_fdir_info fdir; /* flow director info */ + /* indicate large VF support enabled or not */ + bool lv_enabled; ++ ++ struct rte_eth_dev *eth_dev; + }; + + #define IAVF_MAX_PKT_TYPE 1024 +@@ -194,14 +196,14 @@ struct iavf_devargs { + /* Structure to store private data for each VF instance. */ + struct iavf_adapter { + struct iavf_hw hw; +- struct rte_eth_dev *eth_dev; ++ struct rte_eth_dev_data *dev_data; + struct iavf_info vf; + + bool rx_bulk_alloc_allowed; + /* For vector PMD */ + bool rx_vec_allowed; + bool tx_vec_allowed; +- const uint32_t *ptype_tbl; ++ uint32_t ptype_tbl[IAVF_MAX_PKT_TYPE] __rte_cache_min_aligned; + bool stopped; + uint16_t fdir_ref_cnt; + struct iavf_devargs devargs; +@@ -220,8 +222,6 @@ struct iavf_adapter { + (&(((struct iavf_vsi *)vsi)->adapter->hw)) + #define IAVF_VSI_TO_VF(vsi) \ + (&(((struct iavf_vsi *)vsi)->adapter->vf)) +-#define IAVF_VSI_TO_ETH_DEV(vsi) \ +- (((struct iavf_vsi *)vsi)->adapter->eth_dev) + + static inline void + iavf_init_adminq_parameter(struct iavf_hw *hw) +@@ -255,7 +255,7 @@ struct iavf_cmd_info { + * _atomic_set_cmd successfully. + */ + static inline void +-_notify_cmd(struct iavf_info *vf, uint32_t msg_ret) ++_notify_cmd(struct iavf_info *vf, int msg_ret) + { + vf->cmd_retval = msg_ret; + rte_wmb(); +@@ -314,7 +314,7 @@ int iavf_query_stats(struct iavf_adapter *adapter, + int iavf_config_promisc(struct iavf_adapter *adapter, bool enable_unicast, + bool enable_multicast); + int iavf_add_del_eth_addr(struct iavf_adapter *adapter, +- struct rte_ether_addr *addr, bool add); ++ struct rte_ether_addr *addr, bool add, uint8_t type); + int iavf_add_del_vlan(struct iavf_adapter *adapter, uint16_t vlanid, bool add); + int iavf_fdir_add(struct iavf_adapter *adapter, struct iavf_fdir_conf *filter); + int iavf_fdir_del(struct iavf_adapter *adapter, struct iavf_fdir_conf *filter); +@@ -325,6 +325,6 @@ int iavf_add_del_rss_cfg(struct iavf_adapter *adapter, + int iavf_add_del_mc_addr_list(struct iavf_adapter *adapter, + struct rte_ether_addr *mc_addrs, + uint32_t mc_addrs_num, bool add); +-int iavf_request_queues(struct iavf_adapter *adapter, uint16_t num); ++int iavf_request_queues(struct rte_eth_dev *dev, uint16_t num); + int iavf_get_max_rss_queue_region(struct iavf_adapter *adapter); + #endif /* _IAVF_ETHDEV_H_ */ +diff --git a/dpdk/drivers/net/iavf/iavf_ethdev.c b/dpdk/drivers/net/iavf/iavf_ethdev.c +index ed69ba483e..060b3e3707 100644 +--- a/dpdk/drivers/net/iavf/iavf_ethdev.c ++++ b/dpdk/drivers/net/iavf/iavf_ethdev.c +@@ -250,15 +250,15 @@ iavf_init_rss(struct iavf_adapter *adapter) + uint16_t i, j, nb_q; + int ret; + +- rss_conf = &adapter->eth_dev->data->dev_conf.rx_adv_conf.rss_conf; +- nb_q = RTE_MIN(adapter->eth_dev->data->nb_rx_queues, ++ rss_conf = &adapter->dev_data->dev_conf.rx_adv_conf.rss_conf; ++ nb_q = RTE_MIN(adapter->dev_data->nb_rx_queues, + vf->max_rss_qregion); + + if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF)) { + PMD_DRV_LOG(DEBUG, "RSS is not supported"); + return -ENOTSUP; + } +- if (adapter->eth_dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_RSS) { ++ if (adapter->dev_data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_RSS) { + PMD_DRV_LOG(WARNING, "RSS is enabled by PF by default"); + /* set all lut items to default queue */ + for (i = 0; i < vf->vf_res->rss_lut_size; i++) +@@ -274,7 +274,7 @@ iavf_init_rss(struct iavf_adapter *adapter) + /* configure RSS key */ + if (!rss_conf->rss_key) { + /* Calculate the default hash key */ +- for (i = 0; i <= vf->vf_res->rss_key_size; i++) ++ for (i = 0; i < vf->vf_res->rss_key_size; i++) + vf->rss_key[i] = (uint8_t)rte_rand(); + } else + rte_memcpy(vf->rss_key, rss_conf->rss_key, +@@ -287,7 +287,7 @@ iavf_init_rss(struct iavf_adapter *adapter) + j = 0; + vf->rss_lut[i] = j; + } +- /* send virtchnnl ops to configure rss*/ ++ /* send virtchnl ops to configure RSS */ + ret = iavf_configure_rss_lut(adapter); + if (ret) + return ret; +@@ -306,7 +306,7 @@ iavf_queues_req_reset(struct rte_eth_dev *dev, uint16_t num) + struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(ad); + int ret; + +- ret = iavf_request_queues(ad, num); ++ ret = iavf_request_queues(dev, num); + if (ret) { + PMD_DRV_LOG(ERR, "request queues from PF failed"); + return ret; +@@ -408,13 +408,14 @@ iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue *rxq) + { + struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_eth_dev_data *dev_data = dev->data; +- uint16_t buf_size, max_pkt_len, len; ++ uint16_t buf_size, max_pkt_len; + + buf_size = rte_pktmbuf_data_room_size(rxq->mp) - RTE_PKTMBUF_HEADROOM; + + /* Calculate the maximum packet length allowed */ +- len = rxq->rx_buf_len * IAVF_MAX_CHAINED_RX_BUFFERS; +- max_pkt_len = RTE_MIN(len, dev->data->dev_conf.rxmode.max_rx_pkt_len); ++ max_pkt_len = RTE_MIN((uint32_t) ++ rxq->rx_buf_len * IAVF_MAX_CHAINED_RX_BUFFERS, ++ dev->data->dev_conf.rxmode.max_rx_pkt_len); + + /* Check if the jumbo frame and maximum packet length are set + * correctly. +@@ -508,7 +509,7 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev, + if (!qv_map) { + PMD_DRV_LOG(ERR, "Failed to allocate %d queue-vector map", + dev->data->nb_rx_queues); +- return -1; ++ goto qv_map_alloc_err; + } + + if (!dev->data->dev_conf.intr_conf.rxq || +@@ -569,7 +570,7 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev, + "vector %u are mapping to all Rx queues", + vf->msix_base); + } else { +- /* If Rx interrupt is reuquired, and we can use ++ /* If Rx interrupt is required, and we can use + * multi interrupts, then the vec is from 1 + */ + vf->nb_msix = RTE_MIN(intr_handle->nb_efd, +@@ -593,7 +594,7 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev, + if (!vf->lv_enabled) { + if (iavf_config_irq_map(adapter)) { + PMD_DRV_LOG(ERR, "config interrupt mapping failed"); +- return -1; ++ goto config_irq_map_err; + } + } else { + uint16_t num_qv_maps = dev->data->nb_rx_queues; +@@ -603,7 +604,7 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev, + if (iavf_config_irq_map_lv(adapter, + IAVF_IRQ_MAP_NUM_PER_BUF, index)) { + PMD_DRV_LOG(ERR, "config interrupt mapping for large VF failed"); +- return -1; ++ goto config_irq_map_err; + } + num_qv_maps -= IAVF_IRQ_MAP_NUM_PER_BUF; + index += IAVF_IRQ_MAP_NUM_PER_BUF; +@@ -611,10 +612,20 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev, + + if (iavf_config_irq_map_lv(adapter, num_qv_maps, index)) { + PMD_DRV_LOG(ERR, "config interrupt mapping for large VF failed"); +- return -1; ++ goto config_irq_map_err; + } + } + return 0; ++ ++config_irq_map_err: ++ rte_free(vf->qv_map); ++ vf->qv_map = NULL; ++ ++qv_map_alloc_err: ++ rte_free(intr_handle->intr_vec); ++ intr_handle->intr_vec = NULL; ++ ++ return -1; + } + + static int +@@ -623,28 +634,38 @@ iavf_start_queues(struct rte_eth_dev *dev) + struct iavf_rx_queue *rxq; + struct iavf_tx_queue *txq; + int i; ++ uint16_t nb_txq, nb_rxq; + +- for (i = 0; i < dev->data->nb_tx_queues; i++) { +- txq = dev->data->tx_queues[i]; ++ for (nb_txq = 0; nb_txq < dev->data->nb_tx_queues; nb_txq++) { ++ txq = dev->data->tx_queues[nb_txq]; + if (txq->tx_deferred_start) + continue; +- if (iavf_dev_tx_queue_start(dev, i) != 0) { +- PMD_DRV_LOG(ERR, "Fail to start queue %u", i); +- return -1; ++ if (iavf_dev_tx_queue_start(dev, nb_txq) != 0) { ++ PMD_DRV_LOG(ERR, "Fail to start tx queue %u", nb_txq); ++ goto tx_err; + } + } + +- for (i = 0; i < dev->data->nb_rx_queues; i++) { +- rxq = dev->data->rx_queues[i]; ++ for (nb_rxq = 0; nb_rxq < dev->data->nb_rx_queues; nb_rxq++) { ++ rxq = dev->data->rx_queues[nb_rxq]; + if (rxq->rx_deferred_start) + continue; +- if (iavf_dev_rx_queue_start(dev, i) != 0) { +- PMD_DRV_LOG(ERR, "Fail to start queue %u", i); +- return -1; ++ if (iavf_dev_rx_queue_start(dev, nb_rxq) != 0) { ++ PMD_DRV_LOG(ERR, "Fail to start rx queue %u", nb_rxq); ++ goto rx_err; + } + } + + return 0; ++ ++rx_err: ++ for (i = 0; i < nb_rxq; i++) ++ iavf_dev_rx_queue_stop(dev, i); ++tx_err: ++ for (i = 0; i < nb_txq; i++) ++ iavf_dev_tx_queue_stop(dev, i); ++ ++ return -1; + } + + static int +@@ -954,7 +975,7 @@ iavf_dev_add_mac_addr(struct rte_eth_dev *dev, struct rte_ether_addr *addr, + return -EINVAL; + } + +- err = iavf_add_del_eth_addr(adapter, addr, true); ++ err = iavf_add_del_eth_addr(adapter, addr, true, VIRTCHNL_ETHER_ADDR_EXTRA); + if (err) { + PMD_DRV_LOG(ERR, "fail to add MAC address"); + return -EIO; +@@ -976,7 +997,7 @@ iavf_dev_del_mac_addr(struct rte_eth_dev *dev, uint32_t index) + + addr = &dev->data->mac_addrs[index]; + +- err = iavf_add_del_eth_addr(adapter, addr, false); ++ err = iavf_add_del_eth_addr(adapter, addr, false, VIRTCHNL_ETHER_ADDR_EXTRA); + if (err) + PMD_DRV_LOG(ERR, "fail to delete MAC address"); + +@@ -1064,7 +1085,7 @@ iavf_dev_rss_reta_update(struct rte_eth_dev *dev, + } + + rte_memcpy(vf->rss_lut, lut, reta_size); +- /* send virtchnnl ops to configure rss*/ ++ /* send virtchnl ops to configure RSS */ + ret = iavf_configure_rss_lut(adapter); + if (ret) /* revert back */ + rte_memcpy(vf->rss_lut, lut, reta_size); +@@ -1188,17 +1209,15 @@ iavf_dev_set_default_mac_addr(struct rte_eth_dev *dev, + struct iavf_adapter *adapter = + IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); + struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(adapter); +- struct rte_ether_addr *perm_addr, *old_addr; ++ struct rte_ether_addr *old_addr; + int ret; + + old_addr = (struct rte_ether_addr *)hw->mac.addr; +- perm_addr = (struct rte_ether_addr *)hw->mac.perm_addr; + +- /* If the MAC address is configured by host, skip the setting */ +- if (rte_is_valid_assigned_ether_addr(perm_addr)) +- return -EPERM; ++ if (rte_is_same_ether_addr(old_addr, mac_addr)) ++ return 0; + +- ret = iavf_add_del_eth_addr(adapter, old_addr, false); ++ ret = iavf_add_del_eth_addr(adapter, old_addr, false, VIRTCHNL_ETHER_ADDR_PRIMARY); + if (ret) + PMD_DRV_LOG(ERR, "Fail to delete old MAC:" + " %02X:%02X:%02X:%02X:%02X:%02X", +@@ -1209,7 +1228,7 @@ iavf_dev_set_default_mac_addr(struct rte_eth_dev *dev, + old_addr->addr_bytes[4], + old_addr->addr_bytes[5]); + +- ret = iavf_add_del_eth_addr(adapter, mac_addr, true); ++ ret = iavf_add_del_eth_addr(adapter, mac_addr, true, VIRTCHNL_ETHER_ADDR_PRIMARY); + if (ret) + PMD_DRV_LOG(ERR, "Fail to add new MAC:" + " %02X:%02X:%02X:%02X:%02X:%02X", +@@ -1412,7 +1431,7 @@ iavf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) + + IAVF_WRITE_REG(hw, + IAVF_VFINT_DYN_CTLN1(msix_intr - IAVF_RX_VEC_START), +- 0); ++ IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_MASK); + + IAVF_WRITE_FLUSH(hw); + return 0; +@@ -1791,6 +1810,8 @@ iavf_init_vf(struct rte_eth_dev *dev) + struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + ++ vf->eth_dev = dev; ++ + err = iavf_parse_devargs(dev); + if (err) { + PMD_INIT_LOG(ERR, "Failed to parse devargs"); +@@ -1974,7 +1995,6 @@ iavf_dev_init(struct rte_eth_dev *eth_dev) + return 0; + } + rte_eth_copy_pci_info(eth_dev, pci_dev); +- eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + + hw->vendor_id = pci_dev->id.vendor_id; + hw->device_id = pci_dev->id.device_id; +@@ -1985,7 +2005,7 @@ iavf_dev_init(struct rte_eth_dev *eth_dev) + hw->bus.func = pci_dev->addr.function; + hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; + hw->back = IAVF_DEV_PRIVATE_TO_ADAPTER(eth_dev->data->dev_private); +- adapter->eth_dev = eth_dev; ++ adapter->dev_data = eth_dev->data; + adapter->stopped = 1; + + if (iavf_init_vf(eth_dev) != 0) { +@@ -1994,7 +2014,7 @@ iavf_dev_init(struct rte_eth_dev *eth_dev) + } + + /* set default ptype table */ +- adapter->ptype_tbl = iavf_get_default_ptype_table(); ++ iavf_set_default_ptype_table(eth_dev); + + /* copy mac addr */ + eth_dev->data->mac_addrs = rte_zmalloc( +diff --git a/dpdk/drivers/net/iavf/iavf_fdir.c b/dpdk/drivers/net/iavf/iavf_fdir.c +index 253213f8b5..01a3fc3f98 100644 +--- a/dpdk/drivers/net/iavf/iavf_fdir.c ++++ b/dpdk/drivers/net/iavf/iavf_fdir.c +@@ -287,7 +287,7 @@ iavf_fdir_parse_action_qregion(struct iavf_adapter *ad, + } + } + +- if (rss->queue[rss->queue_num - 1] >= ad->eth_dev->data->nb_rx_queues) { ++ if (rss->queue[rss->queue_num - 1] >= ad->dev_data->nb_rx_queues) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, act, + "Invalid queue region indexes."); +@@ -367,7 +367,7 @@ iavf_fdir_parse_action(struct iavf_adapter *ad, + filter_action->act_conf.queue.index = act_q->index; + + if (filter_action->act_conf.queue.index >= +- ad->eth_dev->data->nb_rx_queues) { ++ ad->dev_data->nb_rx_queues) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, "Invalid queue for FDIR."); +diff --git a/dpdk/drivers/net/iavf/iavf_hash.c b/dpdk/drivers/net/iavf/iavf_hash.c +index 72b0117230..eb7fd3f66f 100644 +--- a/dpdk/drivers/net/iavf/iavf_hash.c ++++ b/dpdk/drivers/net/iavf/iavf_hash.c +@@ -901,10 +901,9 @@ iavf_any_invalid_rss_type(enum rte_eth_hash_function rss_func, + static int + iavf_hash_parse_action(struct iavf_pattern_match_item *match_item, + const struct rte_flow_action actions[], +- uint64_t pattern_hint, void **meta, ++ uint64_t pattern_hint, struct iavf_rss_meta *rss_meta, + struct rte_flow_error *error) + { +- struct iavf_rss_meta *rss_meta = (struct iavf_rss_meta *)*meta; + struct virtchnl_proto_hdrs *proto_hdrs; + enum rte_flow_action_type action_type; + const struct rte_flow_action_rss *rss; +@@ -1018,7 +1017,7 @@ iavf_hash_parse_pattern_action(__rte_unused struct iavf_adapter *ad, + goto error; + + ret = iavf_hash_parse_action(pattern_match_item, actions, phint, +- (void **)&rss_meta_ptr, error); ++ rss_meta_ptr, error); + + error: + if (!ret && meta) +diff --git a/dpdk/drivers/net/iavf/iavf_rxtx.c b/dpdk/drivers/net/iavf/iavf_rxtx.c +index 21d508b3f4..7689d7fa46 100644 +--- a/dpdk/drivers/net/iavf/iavf_rxtx.c ++++ b/dpdk/drivers/net/iavf/iavf_rxtx.c +@@ -192,6 +192,10 @@ reset_rx_queue(struct iavf_rx_queue *rxq) + + rxq->rx_tail = 0; + rxq->nb_rx_hold = 0; ++ ++ if (rxq->pkt_first_seg != NULL) ++ rte_pktmbuf_free(rxq->pkt_first_seg); ++ + rxq->pkt_first_seg = NULL; + rxq->pkt_last_seg = NULL; + rxq->rxrearm_nb = 0; +@@ -241,11 +245,15 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq) + volatile union iavf_rx_desc *rxd; + struct rte_mbuf *mbuf = NULL; + uint64_t dma_addr; +- uint16_t i; ++ uint16_t i, j; + + for (i = 0; i < rxq->nb_rx_desc; i++) { + mbuf = rte_mbuf_raw_alloc(rxq->mp); + if (unlikely(!mbuf)) { ++ for (j = 0; j < i; j++) { ++ rte_pktmbuf_free_seg(rxq->sw_ring[j]); ++ rxq->sw_ring[j] = NULL; ++ } + PMD_DRV_LOG(ERR, "Failed to allocate mbuf for RX"); + return -ENOMEM; + } +@@ -318,12 +326,24 @@ release_txq_mbufs(struct iavf_tx_queue *txq) + } + } + +-static const struct iavf_rxq_ops def_rxq_ops = { +- .release_mbufs = release_rxq_mbufs, ++static const ++struct iavf_rxq_ops iavf_rxq_release_mbufs_ops[] = { ++ [IAVF_REL_MBUFS_DEFAULT].release_mbufs = release_rxq_mbufs, ++#ifdef RTE_ARCH_X86 ++ [IAVF_REL_MBUFS_SSE_VEC].release_mbufs = iavf_rx_queue_release_mbufs_sse, ++#endif + }; + +-static const struct iavf_txq_ops def_txq_ops = { +- .release_mbufs = release_txq_mbufs, ++static const ++struct iavf_txq_ops iavf_txq_release_mbufs_ops[] = { ++ [IAVF_REL_MBUFS_DEFAULT].release_mbufs = release_txq_mbufs, ++#ifdef RTE_ARCH_X86 ++ [IAVF_REL_MBUFS_SSE_VEC].release_mbufs = iavf_tx_queue_release_mbufs_sse, ++#ifdef CC_AVX512_SUPPORT ++ [IAVF_REL_MBUFS_AVX512_VEC].release_mbufs = iavf_tx_queue_release_mbufs_avx512, ++#endif ++#endif ++ + }; + + static inline void +@@ -431,48 +451,54 @@ iavf_rxd_to_pkt_fields_by_comms_aux_v2(struct iavf_rx_queue *rxq, + #endif + } + ++static const ++iavf_rxd_to_pkt_fields_t rxd_to_pkt_fields_ops[IAVF_RXDID_LAST + 1] = { ++ [IAVF_RXDID_LEGACY_0] = iavf_rxd_to_pkt_fields_by_comms_ovs, ++ [IAVF_RXDID_LEGACY_1] = iavf_rxd_to_pkt_fields_by_comms_ovs, ++ [IAVF_RXDID_COMMS_AUX_VLAN] = iavf_rxd_to_pkt_fields_by_comms_aux_v1, ++ [IAVF_RXDID_COMMS_AUX_IPV4] = iavf_rxd_to_pkt_fields_by_comms_aux_v1, ++ [IAVF_RXDID_COMMS_AUX_IPV6] = iavf_rxd_to_pkt_fields_by_comms_aux_v1, ++ [IAVF_RXDID_COMMS_AUX_IPV6_FLOW] = ++ iavf_rxd_to_pkt_fields_by_comms_aux_v1, ++ [IAVF_RXDID_COMMS_AUX_TCP] = iavf_rxd_to_pkt_fields_by_comms_aux_v1, ++ [IAVF_RXDID_COMMS_AUX_IP_OFFSET] = ++ iavf_rxd_to_pkt_fields_by_comms_aux_v2, ++ [IAVF_RXDID_COMMS_OVS_1] = iavf_rxd_to_pkt_fields_by_comms_ovs, ++}; ++ + static void + iavf_select_rxd_to_pkt_fields_handler(struct iavf_rx_queue *rxq, uint32_t rxdid) + { ++ rxq->rxdid = rxdid; ++ + switch (rxdid) { + case IAVF_RXDID_COMMS_AUX_VLAN: + rxq->xtr_ol_flag = rte_pmd_ifd_dynflag_proto_xtr_vlan_mask; +- rxq->rxd_to_pkt_fields = +- iavf_rxd_to_pkt_fields_by_comms_aux_v1; + break; + case IAVF_RXDID_COMMS_AUX_IPV4: + rxq->xtr_ol_flag = rte_pmd_ifd_dynflag_proto_xtr_ipv4_mask; +- rxq->rxd_to_pkt_fields = +- iavf_rxd_to_pkt_fields_by_comms_aux_v1; + break; + case IAVF_RXDID_COMMS_AUX_IPV6: + rxq->xtr_ol_flag = rte_pmd_ifd_dynflag_proto_xtr_ipv6_mask; +- rxq->rxd_to_pkt_fields = +- iavf_rxd_to_pkt_fields_by_comms_aux_v1; + break; + case IAVF_RXDID_COMMS_AUX_IPV6_FLOW: + rxq->xtr_ol_flag = + rte_pmd_ifd_dynflag_proto_xtr_ipv6_flow_mask; +- rxq->rxd_to_pkt_fields = +- iavf_rxd_to_pkt_fields_by_comms_aux_v1; + break; + case IAVF_RXDID_COMMS_AUX_TCP: + rxq->xtr_ol_flag = rte_pmd_ifd_dynflag_proto_xtr_tcp_mask; +- rxq->rxd_to_pkt_fields = +- iavf_rxd_to_pkt_fields_by_comms_aux_v1; + break; + case IAVF_RXDID_COMMS_AUX_IP_OFFSET: + rxq->xtr_ol_flag = + rte_pmd_ifd_dynflag_proto_xtr_ip_offset_mask; +- rxq->rxd_to_pkt_fields = +- iavf_rxd_to_pkt_fields_by_comms_aux_v2; + break; + case IAVF_RXDID_COMMS_OVS_1: +- rxq->rxd_to_pkt_fields = iavf_rxd_to_pkt_fields_by_comms_ovs; ++ case IAVF_RXDID_LEGACY_0: ++ case IAVF_RXDID_LEGACY_1: + break; + default: + /* update this according to the RXDID for FLEX_DESC_NONE */ +- rxq->rxd_to_pkt_fields = iavf_rxd_to_pkt_fields_by_comms_ovs; ++ rxq->rxdid = IAVF_RXDID_COMMS_OVS_1; + break; + } + +@@ -556,7 +582,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + rxq->vsi = vsi; + + len = rte_pktmbuf_data_room_size(rxq->mp) - RTE_PKTMBUF_HEADROOM; +- rxq->rx_buf_len = RTE_ALIGN(len, (1 << IAVF_RXQ_CTX_DBUFF_SHIFT)); ++ rxq->rx_buf_len = RTE_ALIGN_FLOOR(len, (1 << IAVF_RXQ_CTX_DBUFF_SHIFT)); + + /* Allocate the software ring. */ + len = nb_desc + IAVF_RX_MAX_BURST; +@@ -571,8 +597,8 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + return -ENOMEM; + } + +- /* Allocate the maximun number of RX ring hardware descriptor with +- * a liitle more to support bulk allocate. ++ /* Allocate the maximum number of RX ring hardware descriptor with ++ * a little more to support bulk allocate. + */ + len = IAVF_MAX_RING_DESC + IAVF_RX_MAX_BURST; + ring_size = RTE_ALIGN(len * sizeof(union iavf_rx_desc), +@@ -596,7 +622,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + rxq->q_set = true; + dev->data->rx_queues[queue_idx] = rxq; + rxq->qrx_tail = hw->hw_addr + IAVF_QRX_TAIL1(rxq->queue_id); +- rxq->ops = &def_rxq_ops; ++ rxq->rel_mbufs_type = IAVF_REL_MBUFS_DEFAULT; + + if (check_rx_bulk_allow(rxq) == true) { + PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are " +@@ -647,7 +673,8 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev, + tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH); + tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ? + tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH); +- check_tx_thresh(nb_desc, tx_rs_thresh, tx_rs_thresh); ++ if (check_tx_thresh(nb_desc, tx_rs_thresh, tx_free_thresh) != 0) ++ return -EINVAL; + + /* Free memory if needed. */ + if (dev->data->tx_queues[queue_idx]) { +@@ -706,7 +733,7 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev, + txq->q_set = true; + dev->data->tx_queues[queue_idx] = txq; + txq->qtx_tail = hw->hw_addr + IAVF_QTX_TAIL1(queue_idx); +- txq->ops = &def_txq_ops; ++ txq->rel_mbufs_type = IAVF_REL_MBUFS_DEFAULT; + + if (check_tx_vec_allow(txq) == false) { + struct iavf_adapter *ad = +@@ -752,12 +779,14 @@ iavf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id) + else + err = iavf_switch_queue_lv(adapter, rx_queue_id, true, true); + +- if (err) ++ if (err) { ++ release_rxq_mbufs(rxq); + PMD_DRV_LOG(ERR, "Failed to switch RX queue %u on", + rx_queue_id); +- else ++ } else { + dev->data->rx_queue_state[rx_queue_id] = + RTE_ETH_QUEUE_STATE_STARTED; ++ } + + return err; + } +@@ -820,7 +849,7 @@ iavf_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id) + } + + rxq = dev->data->rx_queues[rx_queue_id]; +- rxq->ops->release_mbufs(rxq); ++ iavf_rxq_release_mbufs_ops[rxq->rel_mbufs_type].release_mbufs(rxq); + reset_rx_queue(rxq); + dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED; + +@@ -848,7 +877,7 @@ iavf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id) + } + + txq = dev->data->tx_queues[tx_queue_id]; +- txq->ops->release_mbufs(txq); ++ iavf_txq_release_mbufs_ops[txq->rel_mbufs_type].release_mbufs(txq); + reset_tx_queue(txq); + dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED; + +@@ -863,7 +892,7 @@ iavf_dev_rx_queue_release(void *rxq) + if (!q) + return; + +- q->ops->release_mbufs(q); ++ iavf_rxq_release_mbufs_ops[q->rel_mbufs_type].release_mbufs(q); + rte_free(q->sw_ring); + rte_memzone_free(q->mz); + rte_free(q); +@@ -877,7 +906,7 @@ iavf_dev_tx_queue_release(void *txq) + if (!q) + return; + +- q->ops->release_mbufs(q); ++ iavf_txq_release_mbufs_ops[q->rel_mbufs_type].release_mbufs(q); + rte_free(q->sw_ring); + rte_memzone_free(q->mz); + rte_free(q); +@@ -911,7 +940,7 @@ iavf_stop_queues(struct rte_eth_dev *dev) + txq = dev->data->tx_queues[i]; + if (!txq) + continue; +- txq->ops->release_mbufs(txq); ++ iavf_txq_release_mbufs_ops[txq->rel_mbufs_type].release_mbufs(txq); + reset_tx_queue(txq); + dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; + } +@@ -919,7 +948,7 @@ iavf_stop_queues(struct rte_eth_dev *dev) + rxq = dev->data->rx_queues[i]; + if (!rxq) + continue; +- rxq->ops->release_mbufs(rxq); ++ iavf_rxq_release_mbufs_ops[rxq->rel_mbufs_type].release_mbufs(rxq); + reset_rx_queue(rxq); + dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; + } +@@ -1151,6 +1180,7 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + rxd = *rxdp; + nb_hold++; + rxe = rxq->sw_ring[rx_id]; ++ rxq->sw_ring[rx_id] = nmb; + rx_id++; + if (unlikely(rx_id == rxq->nb_rx_desc)) + rx_id = 0; +@@ -1256,6 +1286,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue, + rxd = *rxdp; + nb_hold++; + rxe = rxq->sw_ring[rx_id]; ++ rxq->sw_ring[rx_id] = nmb; + rx_id++; + if (unlikely(rx_id == rxq->nb_rx_desc)) + rx_id = 0; +@@ -1291,7 +1322,7 @@ iavf_recv_pkts_flex_rxd(void *rx_queue, + rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M & + rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)]; + iavf_flex_rxd_to_vlan_tci(rxm, &rxd); +- rxq->rxd_to_pkt_fields(rxq, rxm, &rxd); ++ rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd); + pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0); + rxm->ol_flags |= pkt_flags; + +@@ -1347,6 +1378,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts, + rxd = *rxdp; + nb_hold++; + rxe = rxq->sw_ring[rx_id]; ++ rxq->sw_ring[rx_id] = nmb; + rx_id++; + if (rx_id == rxq->nb_rx_desc) + rx_id = 0; +@@ -1432,7 +1464,7 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts, + first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M & + rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)]; + iavf_flex_rxd_to_vlan_tci(first_seg, &rxd); +- rxq->rxd_to_pkt_fields(rxq, first_seg, &rxd); ++ rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd); + pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0); + + first_seg->ol_flags |= pkt_flags; +@@ -1500,6 +1532,7 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + rxd = *rxdp; + nb_hold++; + rxe = rxq->sw_ring[rx_id]; ++ rxq->sw_ring[rx_id] = nmb; + rx_id++; + if (rx_id == rxq->nb_rx_desc) + rx_id = 0; +@@ -1622,7 +1655,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq) + struct rte_mbuf *mb; + uint16_t stat_err0; + uint16_t pkt_len; +- int32_t s[IAVF_LOOK_AHEAD], nb_dd; ++ int32_t s[IAVF_LOOK_AHEAD], var, nb_dd; + int32_t i, j, nb_rx = 0; + uint64_t pkt_flags; + const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; +@@ -1647,9 +1680,27 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq) + + rte_smp_rmb(); + +- /* Compute how many status bits were set */ +- for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++) +- nb_dd += s[j] & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S); ++ /* Compute how many contiguous DD bits were set */ ++ for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++) { ++ var = s[j] & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S); ++#ifdef RTE_ARCH_ARM ++ /* For Arm platforms, count only contiguous descriptors ++ * whose DD bit is set to 1. On Arm platforms, reads of ++ * descriptors can be reordered. Since the CPU may ++ * be reading the descriptors as the NIC updates them ++ * in memory, it is possbile that the DD bit for a ++ * descriptor earlier in the queue is read as not set ++ * while the DD bit for a descriptor later in the queue ++ * is read as set. ++ */ ++ if (var) ++ nb_dd += 1; ++ else ++ break; ++#else ++ nb_dd += var; ++#endif ++ } + + nb_rx += nb_dd; + +@@ -1669,7 +1720,7 @@ iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq) + mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M & + rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)]; + iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]); +- rxq->rxd_to_pkt_fields(rxq, mb, &rxdp[j]); ++ rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]); + stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0); + pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0); + +@@ -1699,7 +1750,7 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq) + uint16_t pkt_len; + uint64_t qword1; + uint32_t rx_status; +- int32_t s[IAVF_LOOK_AHEAD], nb_dd; ++ int32_t s[IAVF_LOOK_AHEAD], var, nb_dd; + int32_t i, j, nb_rx = 0; + uint64_t pkt_flags; + const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; +@@ -1730,9 +1781,27 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq) + + rte_smp_rmb(); + +- /* Compute how many status bits were set */ +- for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++) +- nb_dd += s[j] & (1 << IAVF_RX_DESC_STATUS_DD_SHIFT); ++ /* Compute how many contiguous DD bits were set */ ++ for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++) { ++ var = s[j] & (1 << IAVF_RX_DESC_STATUS_DD_SHIFT); ++#ifdef RTE_ARCH_ARM ++ /* For Arm platforms, count only contiguous descriptors ++ * whose DD bit is set to 1. On Arm platforms, reads of ++ * descriptors can be reordered. Since the CPU may ++ * be reading the descriptors as the NIC updates them ++ * in memory, it is possbile that the DD bit for a ++ * descriptor earlier in the queue is read as not set ++ * while the DD bit for a descriptor later in the queue ++ * is read as set. ++ */ ++ if (var) ++ nb_dd += 1; ++ else ++ break; ++#else ++ nb_dd += var; ++#endif ++ } + + nb_rx += nb_dd; + +@@ -2173,6 +2242,11 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + (volatile struct iavf_tx_context_desc *) + &txr[tx_id]; + ++ /* clear QW0 or the previous writeback value ++ * may impact next write ++ */ ++ *(volatile uint64_t *)ctx_txd = 0; ++ + txn = &sw_ring[txe->next_id]; + RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf); + if (txe->mbuf) { +@@ -2308,10 +2382,24 @@ iavf_set_rx_function(struct rte_eth_dev *dev) + struct iavf_adapter *adapter = + IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); + struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); ++ int i; ++ struct iavf_rx_queue *rxq; ++ bool use_flex = true; ++ ++ for (i = 0; i < dev->data->nb_rx_queues; i++) { ++ rxq = dev->data->rx_queues[i]; ++ if (rxq->rxdid <= IAVF_RXDID_LEGACY_1) { ++ PMD_DRV_LOG(NOTICE, "request RXDID[%d] in Queue[%d] is legacy, " ++ "set rx_pkt_burst as legacy for all queues", rxq->rxdid, i); ++ use_flex = false; ++ } else if (!(vf->supported_rxdid & BIT(rxq->rxdid))) { ++ PMD_DRV_LOG(NOTICE, "request RXDID[%d] in Queue[%d] is not supported, " ++ "set rx_pkt_burst as legacy for all queues", rxq->rxdid, i); ++ use_flex = false; ++ } ++ } + + #ifdef RTE_ARCH_X86 +- struct iavf_rx_queue *rxq; +- int i; + bool use_avx2 = false; + #ifdef CC_AVX512_SUPPORT + bool use_avx512 = false; +@@ -2393,7 +2481,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev) + if (dev->data->scattered_rx) { + PMD_DRV_LOG(DEBUG, "Using a Scattered Rx callback (port=%d).", + dev->data->port_id); +- if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) ++ if (use_flex) + dev->rx_pkt_burst = iavf_recv_scattered_pkts_flex_rxd; + else + dev->rx_pkt_burst = iavf_recv_scattered_pkts; +@@ -2404,7 +2492,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev) + } else { + PMD_DRV_LOG(DEBUG, "Using Basic Rx callback (port=%d).", + dev->data->port_id); +- if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) ++ if (use_flex) + dev->rx_pkt_burst = iavf_recv_pkts_flex_rxd; + else + dev->rx_pkt_burst = iavf_recv_pkts; +@@ -2660,8 +2748,8 @@ iavf_dev_tx_desc_status(void *tx_queue, uint16_t offset) + return RTE_ETH_TX_DESC_FULL; + } + +-const uint32_t * +-iavf_get_default_ptype_table(void) ++static inline uint32_t ++iavf_get_default_ptype(uint16_t ptype) + { + static const uint32_t ptype_tbl[IAVF_MAX_PKT_TYPE] + __rte_cache_aligned = { +@@ -3196,5 +3284,16 @@ iavf_get_default_ptype_table(void) + /* All others reserved */ + }; + +- return ptype_tbl; ++ return ptype_tbl[ptype]; ++} ++ ++void __rte_cold ++iavf_set_default_ptype_table(struct rte_eth_dev *dev) ++{ ++ struct iavf_adapter *ad = ++ IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); ++ int i; ++ ++ for (i = 0; i < IAVF_MAX_PKT_TYPE; i++) ++ ad->ptype_tbl[i] = iavf_get_default_ptype(i); + } +diff --git a/dpdk/drivers/net/iavf/iavf_rxtx.h b/dpdk/drivers/net/iavf/iavf_rxtx.h +index d4b4935be6..557a7e46a7 100644 +--- a/dpdk/drivers/net/iavf/iavf_rxtx.h ++++ b/dpdk/drivers/net/iavf/iavf_rxtx.h +@@ -34,7 +34,7 @@ + #define DEFAULT_TX_RS_THRESH 32 + #define DEFAULT_TX_FREE_THRESH 32 + +-#define IAVF_MIN_TSO_MSS 256 ++#define IAVF_MIN_TSO_MSS 88 + #define IAVF_MAX_TSO_MSS 9668 + #define IAVF_TSO_MAX_SEG UINT8_MAX + #define IAVF_TX_MAX_MTU_SEG 8 +@@ -166,6 +166,7 @@ struct iavf_rx_queue { + struct rte_mbuf *pkt_last_seg; /* last segment of current packet */ + struct rte_mbuf fake_mbuf; /* dummy mbuf */ + uint8_t rxdid; ++ uint8_t rel_mbufs_type; + + /* used for VPMD */ + uint16_t rxrearm_nb; /* number of remaining to be re-armed */ +@@ -193,8 +194,6 @@ struct iavf_rx_queue { + uint8_t proto_xtr; /* protocol extraction type */ + uint64_t xtr_ol_flag; + /* flexible descriptor metadata extraction offload flag */ +- iavf_rxd_to_pkt_fields_t rxd_to_pkt_fields; +- /* handle flexible descriptor by RXDID */ + }; + + struct iavf_tx_entry { +@@ -222,6 +221,7 @@ struct iavf_tx_queue { + uint16_t last_desc_cleaned; /* last desc have been cleaned*/ + uint16_t free_thresh; + uint16_t rs_thresh; ++ uint8_t rel_mbufs_type; + + uint16_t port_id; + uint16_t queue_id; +@@ -326,6 +326,12 @@ struct iavf_32b_rx_flex_desc_comms_ovs { + } flex_ts; + }; + ++enum iavf_rxtx_rel_mbufs_type { ++ IAVF_REL_MBUFS_DEFAULT = 0, ++ IAVF_REL_MBUFS_SSE_VEC = 1, ++ IAVF_REL_MBUFS_AVX512_VEC = 2, ++}; ++ + /* Receive Flex Descriptor profile IDs: There are a total + * of 64 profiles where profile IDs 0/1 are for legacy; and + * profiles 2-63 are flex profiles that can be programmed +@@ -484,7 +490,10 @@ int iavf_txq_vec_setup_avx512(struct iavf_tx_queue *txq); + + uint8_t iavf_proto_xtr_type_to_rxdid(uint8_t xtr_type); + +-const uint32_t *iavf_get_default_ptype_table(void); ++void iavf_set_default_ptype_table(struct rte_eth_dev *dev); ++void iavf_tx_queue_release_mbufs_avx512(struct iavf_tx_queue *txq); ++void iavf_rx_queue_release_mbufs_sse(struct iavf_rx_queue *rxq); ++void iavf_tx_queue_release_mbufs_sse(struct iavf_tx_queue *txq); + + static inline + void iavf_dump_rx_descriptor(struct iavf_rx_queue *rxq, +@@ -540,8 +549,8 @@ void iavf_dump_tx_descriptor(const struct iavf_tx_queue *txq, + + #define FDIR_PROC_ENABLE_PER_QUEUE(ad, on) do { \ + int i; \ +- for (i = 0; i < (ad)->eth_dev->data->nb_rx_queues; i++) { \ +- struct iavf_rx_queue *rxq = (ad)->eth_dev->data->rx_queues[i]; \ ++ for (i = 0; i < (ad)->dev_data->nb_rx_queues; i++) { \ ++ struct iavf_rx_queue *rxq = (ad)->dev_data->rx_queues[i]; \ + if (!rxq) \ + continue; \ + rxq->fdir_enabled = on; \ +diff --git a/dpdk/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/dpdk/drivers/net/iavf/iavf_rxtx_vec_avx2.c +index 8f28afc8c5..a006d90a24 100644 +--- a/dpdk/drivers/net/iavf/iavf_rxtx_vec_avx2.c ++++ b/dpdk/drivers/net/iavf/iavf_rxtx_vec_avx2.c +@@ -10,126 +10,10 @@ + #pragma GCC diagnostic ignored "-Wcast-qual" + #endif + +-static inline void ++static __rte_always_inline void + iavf_rxq_rearm(struct iavf_rx_queue *rxq) + { +- int i; +- uint16_t rx_id; +- volatile union iavf_rx_desc *rxdp; +- struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start]; +- +- rxdp = rxq->rx_ring + rxq->rxrearm_start; +- +- /* Pull 'n' more MBUFs into the software ring */ +- if (rte_mempool_get_bulk(rxq->mp, +- (void *)rxp, +- IAVF_RXQ_REARM_THRESH) < 0) { +- if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >= +- rxq->nb_rx_desc) { +- __m128i dma_addr0; +- +- dma_addr0 = _mm_setzero_si128(); +- for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) { +- rxp[i] = &rxq->fake_mbuf; +- _mm_store_si128((__m128i *)&rxdp[i].read, +- dma_addr0); +- } +- } +- rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += +- IAVF_RXQ_REARM_THRESH; +- return; +- } +- +-#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC +- struct rte_mbuf *mb0, *mb1; +- __m128i dma_addr0, dma_addr1; +- __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, +- RTE_PKTMBUF_HEADROOM); +- /* Initialize the mbufs in vector, process 2 mbufs in one loop */ +- for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) { +- __m128i vaddr0, vaddr1; +- +- mb0 = rxp[0]; +- mb1 = rxp[1]; +- +- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ +- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != +- offsetof(struct rte_mbuf, buf_addr) + 8); +- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); +- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); +- +- /* convert pa to dma_addr hdr/data */ +- dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); +- dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); +- +- /* add headroom to pa values */ +- dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room); +- dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room); +- +- /* flush desc with pa dma_addr */ +- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0); +- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1); +- } +-#else +- struct rte_mbuf *mb0, *mb1, *mb2, *mb3; +- __m256i dma_addr0_1, dma_addr2_3; +- __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM); +- /* Initialize the mbufs in vector, process 4 mbufs in one loop */ +- for (i = 0; i < IAVF_RXQ_REARM_THRESH; +- i += 4, rxp += 4, rxdp += 4) { +- __m128i vaddr0, vaddr1, vaddr2, vaddr3; +- __m256i vaddr0_1, vaddr2_3; +- +- mb0 = rxp[0]; +- mb1 = rxp[1]; +- mb2 = rxp[2]; +- mb3 = rxp[3]; +- +- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ +- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != +- offsetof(struct rte_mbuf, buf_addr) + 8); +- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); +- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); +- vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr); +- vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr); +- +- /** +- * merge 0 & 1, by casting 0 to 256-bit and inserting 1 +- * into the high lanes. Similarly for 2 & 3 +- */ +- vaddr0_1 = +- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0), +- vaddr1, 1); +- vaddr2_3 = +- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2), +- vaddr3, 1); +- +- /* convert pa to dma_addr hdr/data */ +- dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1); +- dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3); +- +- /* add headroom to pa values */ +- dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room); +- dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room); +- +- /* flush desc with pa dma_addr */ +- _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1); +- _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3); +- } +- +-#endif +- +- rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH; +- if (rxq->rxrearm_start >= rxq->nb_rx_desc) +- rxq->rxrearm_start = 0; +- +- rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH; +- +- rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? +- (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); +- +- /* Update the tail pointer on the NIC */ +- IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id); ++ return iavf_rxq_rearm_common(rxq, false); + } + + #define PKTLEN_SHIFT 10 +@@ -640,7 +524,10 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, + { + #define IAVF_DESCS_PER_LOOP_AVX 8 + +- const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl; ++ struct iavf_adapter *adapter = rxq->vsi->adapter; ++ ++ uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads; ++ const uint32_t *type_table = adapter->ptype_tbl; + + const __m256i mbuf_init = _mm256_set_epi64x(0, 0, + 0, rxq->mbuf_initializer); +@@ -996,8 +883,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, + * needs to load 2nd 16B of each desc for RSS hash parsing, + * will cause performance drop to get into this context. + */ +- if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads & +- DEV_RX_OFFLOAD_RSS_HASH) { ++ if (offloads & DEV_RX_OFFLOAD_RSS_HASH) { + /* load bottom half of every 32B desc */ + const __m128i raw_desc_bh7 = + _mm_load_si128 +diff --git a/dpdk/drivers/net/iavf/iavf_rxtx_vec_avx512.c b/dpdk/drivers/net/iavf/iavf_rxtx_vec_avx512.c +index 584d12ea36..d56a523940 100644 +--- a/dpdk/drivers/net/iavf/iavf_rxtx_vec_avx512.c ++++ b/dpdk/drivers/net/iavf/iavf_rxtx_vec_avx512.c +@@ -13,7 +13,7 @@ + #define IAVF_DESCS_PER_LOOP_AVX 8 + #define PKTLEN_SHIFT 10 + +-static inline void ++static __rte_always_inline void + iavf_rxq_rearm(struct iavf_rx_queue *rxq) + { + int i; +@@ -25,6 +25,9 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq) + + rxdp = rxq->rx_ring + rxq->rxrearm_start; + ++ if (unlikely(!cache)) ++ return iavf_rxq_rearm_common(rxq, true); ++ + /* We need to pull 'n' more MBUFs into the software ring from mempool + * We inline the mempool function here, so we can vectorize the copy + * from the cache into the shadow ring. +@@ -380,7 +383,7 @@ _iavf_recv_raw_pkts_vec_avx512(struct iavf_rx_queue *rxq, + len4_7); + __m512i mb4_7 = _mm512_shuffle_epi8(desc4_7, shuf_msk); + +- mb4_7 = _mm512_add_epi16(mb4_7, crc_adjust); ++ mb4_7 = _mm512_add_epi32(mb4_7, crc_adjust); + /** + * to get packet types, shift 64-bit values down 30 bits + * and so ptype is in lower 8-bits in each +@@ -411,7 +414,7 @@ _iavf_recv_raw_pkts_vec_avx512(struct iavf_rx_queue *rxq, + len0_3); + __m512i mb0_3 = _mm512_shuffle_epi8(desc0_3, shuf_msk); + +- mb0_3 = _mm512_add_epi16(mb0_3, crc_adjust); ++ mb0_3 = _mm512_add_epi32(mb0_3, crc_adjust); + /* get the packet types */ + const __m512i ptypes0_3 = _mm512_srli_epi64(desc0_3, 30); + const __m256i ptypes2_3 = _mm512_extracti64x4_epi64(ptypes0_3, 1); +@@ -638,7 +641,10 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts, uint8_t *split_packet) + { +- const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl; ++ struct iavf_adapter *adapter = rxq->vsi->adapter; ++ ++ uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads; ++ const uint32_t *type_table = adapter->ptype_tbl; + + const __m256i mbuf_init = _mm256_set_epi64x(0, 0, 0, + rxq->mbuf_initializer); +@@ -869,7 +875,7 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq, + */ + __m512i mb4_7 = _mm512_shuffle_epi8(raw_desc4_7, shuf_msk); + +- mb4_7 = _mm512_add_epi16(mb4_7, crc_adjust); ++ mb4_7 = _mm512_add_epi32(mb4_7, crc_adjust); + /** + * to get packet types, ptype is located in bit16-25 + * of each 128bits +@@ -898,7 +904,7 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq, + */ + __m512i mb0_3 = _mm512_shuffle_epi8(raw_desc0_3, shuf_msk); + +- mb0_3 = _mm512_add_epi16(mb0_3, crc_adjust); ++ mb0_3 = _mm512_add_epi32(mb0_3, crc_adjust); + /** + * to get packet types, ptype is located in bit16-25 + * of each 128bits +@@ -1011,8 +1017,7 @@ _iavf_recv_raw_pkts_vec_avx512_flex_rxd(struct iavf_rx_queue *rxq, + * needs to load 2nd 16B of each desc for RSS hash parsing, + * will cause performance drop to get into this context. + */ +- if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads & +- DEV_RX_OFFLOAD_RSS_HASH) { ++ if (offloads & DEV_RX_OFFLOAD_RSS_HASH) { + /* load bottom half of every 32B desc */ + const __m128i raw_desc_bh7 = + _mm_load_si128 +@@ -1676,7 +1681,7 @@ iavf_xmit_pkts_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts, + return nb_tx; + } + +-static inline void ++void __rte_cold + iavf_tx_queue_release_mbufs_avx512(struct iavf_tx_queue *txq) + { + unsigned int i; +@@ -1696,13 +1701,9 @@ iavf_tx_queue_release_mbufs_avx512(struct iavf_tx_queue *txq) + } + } + +-static const struct iavf_txq_ops avx512_vec_txq_ops = { +- .release_mbufs = iavf_tx_queue_release_mbufs_avx512, +-}; +- + int __rte_cold + iavf_txq_vec_setup_avx512(struct iavf_tx_queue *txq) + { +- txq->ops = &avx512_vec_txq_ops; ++ txq->rel_mbufs_type = IAVF_REL_MBUFS_AVX512_VEC; + return 0; + } +diff --git a/dpdk/drivers/net/iavf/iavf_rxtx_vec_common.h b/dpdk/drivers/net/iavf/iavf_rxtx_vec_common.h +index 7ad1e0f68a..7629474508 100644 +--- a/dpdk/drivers/net/iavf/iavf_rxtx_vec_common.h ++++ b/dpdk/drivers/net/iavf/iavf_rxtx_vec_common.h +@@ -11,6 +11,10 @@ + #include "iavf.h" + #include "iavf_rxtx.h" + ++#ifndef __INTEL_COMPILER ++#pragma GCC diagnostic ignored "-Wcast-qual" ++#endif ++ + static inline uint16_t + reassemble_packets(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_bufs, + uint16_t nb_bufs, uint8_t *split_flags) +@@ -276,4 +280,203 @@ iavf_tx_vec_dev_check_default(struct rte_eth_dev *dev) + return 0; + } + ++#ifdef CC_AVX2_SUPPORT ++static __rte_always_inline void ++iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512) ++{ ++ int i; ++ uint16_t rx_id; ++ volatile union iavf_rx_desc *rxdp; ++ struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start]; ++ ++ rxdp = rxq->rx_ring + rxq->rxrearm_start; ++ ++ /* Pull 'n' more MBUFs into the software ring */ ++ if (rte_mempool_get_bulk(rxq->mp, ++ (void *)rxp, ++ IAVF_RXQ_REARM_THRESH) < 0) { ++ if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >= ++ rxq->nb_rx_desc) { ++ __m128i dma_addr0; ++ ++ dma_addr0 = _mm_setzero_si128(); ++ for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) { ++ rxp[i] = &rxq->fake_mbuf; ++ _mm_store_si128((__m128i *)&rxdp[i].read, ++ dma_addr0); ++ } ++ } ++ rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += ++ IAVF_RXQ_REARM_THRESH; ++ return; ++ } ++ ++#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC ++ struct rte_mbuf *mb0, *mb1; ++ __m128i dma_addr0, dma_addr1; ++ __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, ++ RTE_PKTMBUF_HEADROOM); ++ /* Initialize the mbufs in vector, process 2 mbufs in one loop */ ++ for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) { ++ __m128i vaddr0, vaddr1; ++ ++ mb0 = rxp[0]; ++ mb1 = rxp[1]; ++ ++ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ ++ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != ++ offsetof(struct rte_mbuf, buf_addr) + 8); ++ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); ++ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); ++ ++ /* convert pa to dma_addr hdr/data */ ++ dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); ++ dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); ++ ++ /* add headroom to pa values */ ++ dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room); ++ dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room); ++ ++ /* flush desc with pa dma_addr */ ++ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0); ++ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1); ++ } ++#else ++#ifdef CC_AVX512_SUPPORT ++ if (avx512) { ++ struct rte_mbuf *mb0, *mb1, *mb2, *mb3; ++ struct rte_mbuf *mb4, *mb5, *mb6, *mb7; ++ __m512i dma_addr0_3, dma_addr4_7; ++ __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM); ++ /* Initialize the mbufs in vector, process 8 mbufs in one loop */ ++ for (i = 0; i < IAVF_RXQ_REARM_THRESH; ++ i += 8, rxp += 8, rxdp += 8) { ++ __m128i vaddr0, vaddr1, vaddr2, vaddr3; ++ __m128i vaddr4, vaddr5, vaddr6, vaddr7; ++ __m256i vaddr0_1, vaddr2_3; ++ __m256i vaddr4_5, vaddr6_7; ++ __m512i vaddr0_3, vaddr4_7; ++ ++ mb0 = rxp[0]; ++ mb1 = rxp[1]; ++ mb2 = rxp[2]; ++ mb3 = rxp[3]; ++ mb4 = rxp[4]; ++ mb5 = rxp[5]; ++ mb6 = rxp[6]; ++ mb7 = rxp[7]; ++ ++ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ ++ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != ++ offsetof(struct rte_mbuf, buf_addr) + 8); ++ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); ++ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); ++ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr); ++ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr); ++ vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr); ++ vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr); ++ vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr); ++ vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr); ++ ++ /** ++ * merge 0 & 1, by casting 0 to 256-bit and inserting 1 ++ * into the high lanes. Similarly for 2 & 3, and so on. ++ */ ++ vaddr0_1 = ++ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0), ++ vaddr1, 1); ++ vaddr2_3 = ++ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2), ++ vaddr3, 1); ++ vaddr4_5 = ++ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4), ++ vaddr5, 1); ++ vaddr6_7 = ++ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6), ++ vaddr7, 1); ++ vaddr0_3 = ++ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1), ++ vaddr2_3, 1); ++ vaddr4_7 = ++ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5), ++ vaddr6_7, 1); ++ ++ /* convert pa to dma_addr hdr/data */ ++ dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3); ++ dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7); ++ ++ /* add headroom to pa values */ ++ dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room); ++ dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room); ++ ++ /* flush desc with pa dma_addr */ ++ _mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3); ++ _mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7); ++ } ++ } else ++#endif ++ { ++ struct rte_mbuf *mb0, *mb1, *mb2, *mb3; ++ __m256i dma_addr0_1, dma_addr2_3; ++ __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM); ++ /* Initialize the mbufs in vector, process 4 mbufs in one loop */ ++ for (i = 0; i < IAVF_RXQ_REARM_THRESH; ++ i += 4, rxp += 4, rxdp += 4) { ++ __m128i vaddr0, vaddr1, vaddr2, vaddr3; ++ __m256i vaddr0_1, vaddr2_3; ++ ++ mb0 = rxp[0]; ++ mb1 = rxp[1]; ++ mb2 = rxp[2]; ++ mb3 = rxp[3]; ++ ++ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ ++ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != ++ offsetof(struct rte_mbuf, buf_addr) + 8); ++ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); ++ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); ++ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr); ++ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr); ++ ++ /** ++ * merge 0 & 1, by casting 0 to 256-bit and inserting 1 ++ * into the high lanes. Similarly for 2 & 3 ++ */ ++ vaddr0_1 = ++ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0), ++ vaddr1, 1); ++ vaddr2_3 = ++ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2), ++ vaddr3, 1); ++ ++ /* convert pa to dma_addr hdr/data */ ++ dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1); ++ dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3); ++ ++ /* add headroom to pa values */ ++ dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room); ++ dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room); ++ ++ /* flush desc with pa dma_addr */ ++ _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1); ++ _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3); ++ } ++ } ++ ++#endif ++ ++ rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH; ++ if (rxq->rxrearm_start >= rxq->nb_rx_desc) ++ rxq->rxrearm_start = 0; ++ ++ rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH; ++ ++ rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? ++ (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); ++ ++ /* Update the tail pointer on the NIC */ ++ IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id); ++} ++#endif ++ + #endif +diff --git a/dpdk/drivers/net/iavf/iavf_rxtx_vec_sse.c b/dpdk/drivers/net/iavf/iavf_rxtx_vec_sse.c +index 75c77f9d32..b3ea8bc86d 100644 +--- a/dpdk/drivers/net/iavf/iavf_rxtx_vec_sse.c ++++ b/dpdk/drivers/net/iavf/iavf_rxtx_vec_sse.c +@@ -159,7 +159,7 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4], + l3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e); + /* then we shift left 1 bit */ + l3_l4e = _mm_slli_epi32(l3_l4e, 1); +- /* we need to mask out the reduntant bits */ ++ /* we need to mask out the redundant bits */ + l3_l4e = _mm_and_si128(l3_l4e, cksum_mask); + + vlan0 = _mm_or_si128(vlan0, rss); +@@ -494,7 +494,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, + /* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */ + mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); + /* Read desc statuses backwards to avoid race condition */ +- /* A.1 load 4 pkts desc */ ++ /* A.1 load desc[3] */ + descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); + rte_compiler_barrier(); + +@@ -506,9 +506,9 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, + mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]); + #endif + ++ /* A.1 load desc[2-0] */ + descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); + rte_compiler_barrier(); +- /* B.1 load 2 mbuf point */ + descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); + rte_compiler_barrier(); + descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); +@@ -590,7 +590,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, + /* and with mask to extract bits, flipping 1-0 */ + __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); + /* the staterr values are not in order, as the count +- * count of dd bits doesn't care. However, for end of ++ * of dd bits doesn't care. However, for end of + * packet tracking, we do care, so shuffle. This also + * compresses the 32-bit values to 8-bit + */ +@@ -611,7 +611,7 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, + _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, + pkt_mb1); + desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl); +- /* C.4 calc avaialbe number of desc */ ++ /* C.4 calc available number of desc */ + var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); + nb_pkts_recd += var; + if (likely(var != IAVF_VPMD_DESCS_PER_LOOP)) +@@ -644,7 +644,9 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, + uint16_t nb_pkts_recd; + int pos; + uint64_t var; +- const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; ++ struct iavf_adapter *adapter = rxq->vsi->adapter; ++ uint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads; ++ const uint32_t *ptype_tbl = adapter->ptype_tbl; + __m128i crc_adjust = _mm_set_epi16 + (0, 0, 0, /* ignore non-length fields */ + -rxq->crc_len, /* sub crc on data_len */ +@@ -755,7 +757,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, + /* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */ + mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); + /* Read desc statuses backwards to avoid race condition */ +- /* A.1 load 4 pkts desc */ ++ /* A.1 load desc[3] */ + descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); + rte_compiler_barrier(); + +@@ -767,9 +769,9 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, + mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]); + #endif + ++ /* A.1 load desc[2-0] */ + descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); + rte_compiler_barrier(); +- /* B.1 load 2 mbuf point */ + descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); + rte_compiler_barrier(); + descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); +@@ -817,8 +819,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, + * needs to load 2nd 16B of each desc for RSS hash parsing, + * will cause performance drop to get into this context. + */ +- if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads & +- DEV_RX_OFFLOAD_RSS_HASH) { ++ if (offloads & DEV_RX_OFFLOAD_RSS_HASH) { + /* load bottom half of every 32B desc */ + const __m128i raw_desc_bh3 = + _mm_load_si128 +@@ -884,7 +885,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, + /* and with mask to extract bits, flipping 1-0 */ + __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); + /* the staterr values are not in order, as the count +- * count of dd bits doesn't care. However, for end of ++ * of dd bits doesn't care. However, for end of + * packet tracking, we do care, so shuffle. This also + * compresses the 32-bit values to 8-bit + */ +@@ -1197,37 +1198,29 @@ iavf_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, + return nb_tx; + } + +-static void __rte_cold ++void __rte_cold + iavf_rx_queue_release_mbufs_sse(struct iavf_rx_queue *rxq) + { + _iavf_rx_queue_release_mbufs_vec(rxq); + } + +-static void __rte_cold ++void __rte_cold + iavf_tx_queue_release_mbufs_sse(struct iavf_tx_queue *txq) + { + _iavf_tx_queue_release_mbufs_vec(txq); + } + +-static const struct iavf_rxq_ops sse_vec_rxq_ops = { +- .release_mbufs = iavf_rx_queue_release_mbufs_sse, +-}; +- +-static const struct iavf_txq_ops sse_vec_txq_ops = { +- .release_mbufs = iavf_tx_queue_release_mbufs_sse, +-}; +- + int __rte_cold + iavf_txq_vec_setup(struct iavf_tx_queue *txq) + { +- txq->ops = &sse_vec_txq_ops; ++ txq->rel_mbufs_type = IAVF_REL_MBUFS_SSE_VEC; + return 0; + } + + int __rte_cold + iavf_rxq_vec_setup(struct iavf_rx_queue *rxq) + { +- rxq->ops = &sse_vec_rxq_ops; ++ rxq->rel_mbufs_type = IAVF_REL_MBUFS_SSE_VEC; + return iavf_rxq_vec_setup_default(rxq); + } + +diff --git a/dpdk/drivers/net/iavf/iavf_vchnl.c b/dpdk/drivers/net/iavf/iavf_vchnl.c +index c17ae06227..f6da2cf4bd 100644 +--- a/dpdk/drivers/net/iavf/iavf_vchnl.c ++++ b/dpdk/drivers/net/iavf/iavf_vchnl.c +@@ -71,7 +71,6 @@ iavf_read_msg_from_pf(struct iavf_adapter *adapter, uint16_t buf_len, + { + struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(adapter); + struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter); +- struct rte_eth_dev *dev = adapter->eth_dev; + struct iavf_arq_event_info event; + enum iavf_aq_result result = IAVF_MSG_NON; + enum virtchnl_ops opcode; +@@ -113,7 +112,7 @@ iavf_read_msg_from_pf(struct iavf_adapter *adapter, uint16_t buf_len, + speed = vpe->event_data.link_event.link_speed; + vf->link_speed = iavf_convert_link_speed(speed); + } +- iavf_dev_link_update(dev, 0); ++ iavf_dev_link_update(vf->eth_dev, 0); + PMD_DRV_LOG(INFO, "Link status update:%s", + vf->link_up ? "up" : "down"); + break; +@@ -180,7 +179,7 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args) + args->out_buffer); + if (result == IAVF_MSG_CMD) + break; +- rte_delay_ms(ASQ_DELAY_MS); ++ iavf_msec_delay(ASQ_DELAY_MS); + } while (i++ < MAX_TRY_TIMES); + if (i >= MAX_TRY_TIMES || + vf->cmd_retval != VIRTCHNL_STATUS_SUCCESS) { +@@ -206,7 +205,7 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args) + err = -1; + break; + } +- rte_delay_ms(ASQ_DELAY_MS); ++ iavf_msec_delay(ASQ_DELAY_MS); + /* If don't read msg or read sys event, continue */ + } while (i++ < MAX_TRY_TIMES); + if (i >= MAX_TRY_TIMES || +@@ -224,16 +223,22 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args) + do { + if (vf->pend_cmd == VIRTCHNL_OP_UNKNOWN) + break; +- rte_delay_ms(ASQ_DELAY_MS); ++ iavf_msec_delay(ASQ_DELAY_MS); + /* If don't read msg or read sys event, continue */ + } while (i++ < MAX_TRY_TIMES); +- /* If there's no response is received, clear command */ +- if (i >= MAX_TRY_TIMES || +- vf->cmd_retval != VIRTCHNL_STATUS_SUCCESS) { +- err = -1; +- PMD_DRV_LOG(ERR, "No response or return failure (%d)" +- " for cmd %d", vf->cmd_retval, args->ops); ++ ++ if (i >= MAX_TRY_TIMES) { ++ PMD_DRV_LOG(ERR, "No response for cmd %d", args->ops); + _clear_cmd(vf); ++ err = -EIO; ++ } else if (vf->cmd_retval == ++ VIRTCHNL_STATUS_ERR_NOT_SUPPORTED) { ++ PMD_DRV_LOG(ERR, "Cmd %d not supported", args->ops); ++ err = -ENOTSUP; ++ } else if (vf->cmd_retval != VIRTCHNL_STATUS_SUCCESS) { ++ PMD_DRV_LOG(ERR, "Return failure %d for cmd %d", ++ vf->cmd_retval, args->ops); ++ err = -EINVAL; + } + break; + } +@@ -421,7 +426,7 @@ iavf_check_api_version(struct iavf_adapter *adapter) + (vf->virtchnl_version.major == VIRTCHNL_VERSION_MAJOR_START && + vf->virtchnl_version.minor < VIRTCHNL_VERSION_MINOR_START)) { + PMD_INIT_LOG(ERR, "VIRTCHNL API version should not be lower" +- " than (%u.%u) to support Adapative VF", ++ " than (%u.%u) to support Adaptive VF", + VIRTCHNL_VERSION_MAJOR_START, + VIRTCHNL_VERSION_MAJOR_START); + return -1; +@@ -532,8 +537,8 @@ iavf_enable_queues(struct iavf_adapter *adapter) + memset(&queue_select, 0, sizeof(queue_select)); + queue_select.vsi_id = vf->vsi_res->vsi_id; + +- queue_select.rx_queues = BIT(adapter->eth_dev->data->nb_rx_queues) - 1; +- queue_select.tx_queues = BIT(adapter->eth_dev->data->nb_tx_queues) - 1; ++ queue_select.rx_queues = BIT(adapter->dev_data->nb_rx_queues) - 1; ++ queue_select.tx_queues = BIT(adapter->dev_data->nb_tx_queues) - 1; + + args.ops = VIRTCHNL_OP_ENABLE_QUEUES; + args.in_args = (u8 *)&queue_select; +@@ -560,8 +565,8 @@ iavf_disable_queues(struct iavf_adapter *adapter) + memset(&queue_select, 0, sizeof(queue_select)); + queue_select.vsi_id = vf->vsi_res->vsi_id; + +- queue_select.rx_queues = BIT(adapter->eth_dev->data->nb_rx_queues) - 1; +- queue_select.tx_queues = BIT(adapter->eth_dev->data->nb_tx_queues) - 1; ++ queue_select.rx_queues = BIT(adapter->dev_data->nb_rx_queues) - 1; ++ queue_select.tx_queues = BIT(adapter->dev_data->nb_tx_queues) - 1; + + args.ops = VIRTCHNL_OP_DISABLE_QUEUES; + args.in_args = (u8 *)&queue_select; +@@ -631,12 +636,12 @@ iavf_enable_queues_lv(struct iavf_adapter *adapter) + queue_chunk[VIRTCHNL_QUEUE_TYPE_TX].type = VIRTCHNL_QUEUE_TYPE_TX; + queue_chunk[VIRTCHNL_QUEUE_TYPE_TX].start_queue_id = 0; + queue_chunk[VIRTCHNL_QUEUE_TYPE_TX].num_queues = +- adapter->eth_dev->data->nb_tx_queues; ++ adapter->dev_data->nb_tx_queues; + + queue_chunk[VIRTCHNL_QUEUE_TYPE_RX].type = VIRTCHNL_QUEUE_TYPE_RX; + queue_chunk[VIRTCHNL_QUEUE_TYPE_RX].start_queue_id = 0; + queue_chunk[VIRTCHNL_QUEUE_TYPE_RX].num_queues = +- adapter->eth_dev->data->nb_rx_queues; ++ adapter->dev_data->nb_rx_queues; + + args.ops = VIRTCHNL_OP_ENABLE_QUEUES_V2; + args.in_args = (u8 *)queue_select; +@@ -675,12 +680,12 @@ iavf_disable_queues_lv(struct iavf_adapter *adapter) + queue_chunk[VIRTCHNL_QUEUE_TYPE_TX].type = VIRTCHNL_QUEUE_TYPE_TX; + queue_chunk[VIRTCHNL_QUEUE_TYPE_TX].start_queue_id = 0; + queue_chunk[VIRTCHNL_QUEUE_TYPE_TX].num_queues = +- adapter->eth_dev->data->nb_tx_queues; ++ adapter->dev_data->nb_tx_queues; + + queue_chunk[VIRTCHNL_QUEUE_TYPE_RX].type = VIRTCHNL_QUEUE_TYPE_RX; + queue_chunk[VIRTCHNL_QUEUE_TYPE_RX].start_queue_id = 0; + queue_chunk[VIRTCHNL_QUEUE_TYPE_RX].num_queues = +- adapter->eth_dev->data->nb_rx_queues; ++ adapter->dev_data->nb_rx_queues; + + args.ops = VIRTCHNL_OP_DISABLE_QUEUES_V2; + args.in_args = (u8 *)queue_select; +@@ -811,9 +816,9 @@ iavf_configure_queues(struct iavf_adapter *adapter, + uint16_t num_queue_pairs, uint16_t index) + { + struct iavf_rx_queue **rxq = +- (struct iavf_rx_queue **)adapter->eth_dev->data->rx_queues; ++ (struct iavf_rx_queue **)adapter->dev_data->rx_queues; + struct iavf_tx_queue **txq = +- (struct iavf_tx_queue **)adapter->eth_dev->data->tx_queues; ++ (struct iavf_tx_queue **)adapter->dev_data->tx_queues; + struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter); + struct virtchnl_vsi_queue_config_info *vc_config; + struct virtchnl_queue_pair_info *vc_qp; +@@ -837,7 +842,7 @@ iavf_configure_queues(struct iavf_adapter *adapter, + vc_qp->txq.queue_id = i; + + /* Virtchnnl configure tx queues by pairs */ +- if (i < adapter->eth_dev->data->nb_tx_queues) { ++ if (i < adapter->dev_data->nb_tx_queues) { + vc_qp->txq.ring_len = txq[i]->nb_tx_desc; + vc_qp->txq.dma_ring_addr = txq[i]->tx_ring_phys_addr; + } +@@ -846,7 +851,7 @@ iavf_configure_queues(struct iavf_adapter *adapter, + vc_qp->rxq.queue_id = i; + vc_qp->rxq.max_pkt_size = vf->max_pkt_len; + +- if (i >= adapter->eth_dev->data->nb_rx_queues) ++ if (i >= adapter->dev_data->nb_rx_queues) + continue; + + /* Virtchnnl configure rx queues by pairs */ +@@ -915,7 +920,7 @@ iavf_config_irq_map(struct iavf_adapter *adapter) + return -ENOMEM; + + map_info->num_vectors = vf->nb_msix; +- for (i = 0; i < adapter->eth_dev->data->nb_rx_queues; i++) { ++ for (i = 0; i < adapter->dev_data->nb_rx_queues; i++) { + vecmap = + &map_info->vecmap[vf->qv_map[i].vector_id - vf->msix_base]; + vecmap->vsi_id = vf->vsi_res->vsi_id; +@@ -994,7 +999,7 @@ iavf_add_del_all_mac_addr(struct iavf_adapter *adapter, bool add) + j = 0; + len = sizeof(struct virtchnl_ether_addr_list); + for (i = begin; i < IAVF_NUM_MACADDR_MAX; i++, next_begin++) { +- addr = &adapter->eth_dev->data->mac_addrs[i]; ++ addr = &adapter->dev_data->mac_addrs[i]; + if (rte_is_zero_ether_addr(addr)) + continue; + len += sizeof(struct virtchnl_ether_addr); +@@ -1011,11 +1016,14 @@ iavf_add_del_all_mac_addr(struct iavf_adapter *adapter, bool add) + } + + for (i = begin; i < next_begin; i++) { +- addr = &adapter->eth_dev->data->mac_addrs[i]; ++ addr = &adapter->dev_data->mac_addrs[i]; + if (rte_is_zero_ether_addr(addr)) + continue; + rte_memcpy(list->list[j].addr, addr->addr_bytes, + sizeof(addr->addr_bytes)); ++ list->list[j].type = (j == 0 ? ++ VIRTCHNL_ETHER_ADDR_PRIMARY : ++ VIRTCHNL_ETHER_ADDR_EXTRA); + PMD_DRV_LOG(DEBUG, "add/rm mac:%x:%x:%x:%x:%x:%x", + addr->addr_bytes[0], addr->addr_bytes[1], + addr->addr_bytes[2], addr->addr_bytes[3], +@@ -1098,8 +1106,8 @@ iavf_config_promisc(struct iavf_adapter *adapter, + PMD_DRV_LOG(ERR, + "fail to execute command CONFIG_PROMISCUOUS_MODE"); + +- if (err == IAVF_NOT_SUPPORTED) +- return -ENOTSUP; ++ if (err == -ENOTSUP) ++ return err; + + return -EAGAIN; + } +@@ -1111,7 +1119,7 @@ iavf_config_promisc(struct iavf_adapter *adapter, + + int + iavf_add_del_eth_addr(struct iavf_adapter *adapter, struct rte_ether_addr *addr, +- bool add) ++ bool add, uint8_t type) + { + struct virtchnl_ether_addr_list *list; + struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter); +@@ -1123,6 +1131,7 @@ iavf_add_del_eth_addr(struct iavf_adapter *adapter, struct rte_ether_addr *addr, + list = (struct virtchnl_ether_addr_list *)cmd_buffer; + list->vsi_id = vf->vsi_res->vsi_id; + list->num_elements = 1; ++ list->list[0].type = type; + rte_memcpy(list->list[0].addr, addr->addr_bytes, + sizeof(addr->addr_bytes)); + +@@ -1294,7 +1303,7 @@ iavf_fdir_check(struct iavf_adapter *adapter, + + err = iavf_execute_vf_cmd(adapter, &args); + if (err) { +- PMD_DRV_LOG(ERR, "fail to check flow direcotor rule"); ++ PMD_DRV_LOG(ERR, "fail to check flow director rule"); + return err; + } + +@@ -1377,6 +1386,7 @@ iavf_add_del_mc_addr_list(struct iavf_adapter *adapter, + + memcpy(list->list[i].addr, mc_addrs[i].addr_bytes, + sizeof(list->list[i].addr)); ++ list->list[i].type = VIRTCHNL_ETHER_ADDR_EXTRA; + } + + args.ops = add ? VIRTCHNL_OP_ADD_ETH_ADDR : VIRTCHNL_OP_DEL_ETH_ADDR; +@@ -1397,9 +1407,10 @@ iavf_add_del_mc_addr_list(struct iavf_adapter *adapter, + } + + int +-iavf_request_queues(struct iavf_adapter *adapter, uint16_t num) ++iavf_request_queues(struct rte_eth_dev *dev, uint16_t num) + { +- struct rte_eth_dev *dev = adapter->eth_dev; ++ struct iavf_adapter *adapter = ++ IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); + struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct virtchnl_vf_res_request vfres; +diff --git a/dpdk/drivers/net/ice/base/ice_common.c b/dpdk/drivers/net/ice/base/ice_common.c +index 304e55e210..2b53f78512 100644 +--- a/dpdk/drivers/net/ice/base/ice_common.c ++++ b/dpdk/drivers/net/ice/base/ice_common.c +@@ -2017,6 +2017,23 @@ ice_parse_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p, + ice_recalc_port_limited_caps(hw, &func_p->common_cap); + } + ++/** ++ * ice_func_id_to_logical_id - map from function id to logical pf id ++ * @active_function_bitmap: active function bitmap ++ * @pf_id: function number of device ++ */ ++static int ice_func_id_to_logical_id(u32 active_function_bitmap, u8 pf_id) ++{ ++ u8 logical_id = 0; ++ u8 i; ++ ++ for (i = 0; i < pf_id; i++) ++ if (active_function_bitmap & BIT(i)) ++ logical_id++; ++ ++ return logical_id; ++} ++ + /** + * ice_parse_valid_functions_cap - Parse ICE_AQC_CAPS_VALID_FUNCTIONS caps + * @hw: pointer to the HW struct +@@ -2034,6 +2051,8 @@ ice_parse_valid_functions_cap(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p, + dev_p->num_funcs = ice_hweight32(number); + ice_debug(hw, ICE_DBG_INIT, "dev caps: num_funcs = %d\n", + dev_p->num_funcs); ++ ++ hw->logical_pf_id = ice_func_id_to_logical_id(number, hw->pf_id); + } + + /** +@@ -2913,7 +2932,7 @@ ice_phy_caps_equals_cfg(struct ice_aqc_get_phy_caps_data *phy_caps, + /** + * ice_copy_phy_caps_to_cfg - Copy PHY ability data to configuration data + * @pi: port information structure +- * @caps: PHY ability structure to copy date from ++ * @caps: PHY ability structure to copy data from + * @cfg: PHY configuration structure to copy data to + * + * Helper function to copy AQC PHY get ability data to PHY set configuration +diff --git a/dpdk/drivers/net/ice/base/ice_flex_pipe.c b/dpdk/drivers/net/ice/base/ice_flex_pipe.c +index d74fecbf5b..ed3363c869 100644 +--- a/dpdk/drivers/net/ice/base/ice_flex_pipe.c ++++ b/dpdk/drivers/net/ice/base/ice_flex_pipe.c +@@ -1597,8 +1597,12 @@ static enum ice_prof_type + ice_get_sw_prof_type(struct ice_hw *hw, struct ice_fv *fv) + { + u16 i; ++ bool valid_prof = false; + + for (i = 0; i < hw->blk[ICE_BLK_SW].es.fvw; i++) { ++ if (fv->ew[i].off != ICE_NAN_OFFSET) ++ valid_prof = true; ++ + /* UDP tunnel will have UDP_OF protocol ID and VNI offset */ + if (fv->ew[i].prot_id == (u8)ICE_PROT_UDP_OF && + fv->ew[i].off == ICE_VNI_OFFSET) +@@ -1613,7 +1617,7 @@ ice_get_sw_prof_type(struct ice_hw *hw, struct ice_fv *fv) + return ICE_PROF_TUN_PPPOE; + } + +- return ICE_PROF_NON_TUN; ++ return valid_prof ? ICE_PROF_NON_TUN : ICE_PROF_INVALID; + } + + /** +@@ -1630,11 +1634,6 @@ ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type req_profs, + struct ice_seg *ice_seg; + struct ice_fv *fv; + +- if (req_profs == ICE_PROF_ALL) { +- ice_bitmap_set(bm, 0, ICE_MAX_NUM_PROFILES); +- return; +- } +- + ice_memset(&state, 0, sizeof(state), ICE_NONDMA_MEM); + ice_zero_bitmap(bm, ICE_MAX_NUM_PROFILES); + ice_seg = hw->seg; +@@ -2253,7 +2252,7 @@ enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all) + * @off: variable to receive the protocol offset + */ + enum ice_status +-ice_find_prot_off(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 fv_idx, ++ice_find_prot_off(struct ice_hw *hw, enum ice_block blk, u8 prof, u8 fv_idx, + u8 *prot, u16 *off) + { + struct ice_fv_word *fv_ext; +@@ -3125,7 +3124,7 @@ static void ice_init_prof_masks(struct ice_hw *hw, enum ice_block blk) + per_pf = ICE_PROF_MASK_COUNT / hw->dev_caps.num_funcs; + + hw->blk[blk].masks.count = per_pf; +- hw->blk[blk].masks.first = hw->pf_id * per_pf; ++ hw->blk[blk].masks.first = hw->logical_pf_id * per_pf; + + ice_memset(hw->blk[blk].masks.masks, 0, + sizeof(hw->blk[blk].masks.masks), ICE_NONDMA_MEM); +diff --git a/dpdk/drivers/net/ice/base/ice_flex_pipe.h b/dpdk/drivers/net/ice/base/ice_flex_pipe.h +index 214c7a2837..9ae3c82b40 100644 +--- a/dpdk/drivers/net/ice/base/ice_flex_pipe.h ++++ b/dpdk/drivers/net/ice/base/ice_flex_pipe.h +@@ -25,7 +25,7 @@ enum ice_status + ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access); + void ice_release_change_lock(struct ice_hw *hw); + enum ice_status +-ice_find_prot_off(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 fv_idx, ++ice_find_prot_off(struct ice_hw *hw, enum ice_block blk, u8 prof, u8 fv_idx, + u8 *prot, u16 *off); + enum ice_status + ice_find_label_value(struct ice_seg *ice_seg, char const *name, u32 type, +diff --git a/dpdk/drivers/net/ice/base/ice_flex_type.h b/dpdk/drivers/net/ice/base/ice_flex_type.h +index 1dd57baccd..a0030183ac 100644 +--- a/dpdk/drivers/net/ice/base/ice_flex_type.h ++++ b/dpdk/drivers/net/ice/base/ice_flex_type.h +@@ -779,6 +779,7 @@ struct ice_chs_chg { + #define ICE_FLOW_PTYPE_MAX ICE_XLT1_CNT + + enum ice_prof_type { ++ ICE_PROF_INVALID = 0x0, + ICE_PROF_NON_TUN = 0x1, + ICE_PROF_TUN_UDP = 0x2, + ICE_PROF_TUN_GRE = 0x4, +diff --git a/dpdk/drivers/net/ice/base/ice_flow.c b/dpdk/drivers/net/ice/base/ice_flow.c +index 1b36c2b897..c75f58659c 100644 +--- a/dpdk/drivers/net/ice/base/ice_flow.c ++++ b/dpdk/drivers/net/ice/base/ice_flow.c +@@ -1732,9 +1732,14 @@ ice_flow_acl_free_act_cntr(struct ice_hw *hw, struct ice_flow_action *acts, + if (acts[i].type == ICE_FLOW_ACT_CNTR_PKT || + acts[i].type == ICE_FLOW_ACT_CNTR_BYTES || + acts[i].type == ICE_FLOW_ACT_CNTR_PKT_BYTES) { +- struct ice_acl_cntrs cntrs; ++ struct ice_acl_cntrs cntrs = { 0 }; + enum ice_status status; + ++ /* amount is unused in the dealloc path but the common ++ * parameter check routine wants a value set, as zero ++ * is invalid for the check. Just set it. ++ */ ++ cntrs.amount = 1; + cntrs.bank = 0; /* Only bank0 for the moment */ + cntrs.first_cntr = + LE16_TO_CPU(acts[i].data.acl_act.value); +@@ -2333,7 +2338,7 @@ ice_flow_acl_check_actions(struct ice_hw *hw, struct ice_flow_action *acts, + if (acts[i].type == ICE_FLOW_ACT_CNTR_PKT || + acts[i].type == ICE_FLOW_ACT_CNTR_BYTES || + acts[i].type == ICE_FLOW_ACT_CNTR_PKT_BYTES) { +- struct ice_acl_cntrs cntrs; ++ struct ice_acl_cntrs cntrs = { 0 }; + enum ice_status status; + + cntrs.amount = 1; +@@ -3228,7 +3233,7 @@ ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len, + } + + #define ICE_FLOW_RSS_SEG_HDR_L2_MASKS \ +-(ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN) ++(ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_ETH_NON_IP | ICE_FLOW_SEG_HDR_VLAN) + + #define ICE_FLOW_RSS_SEG_HDR_L3_MASKS \ + (ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6) +diff --git a/dpdk/drivers/net/ice/base/ice_lan_tx_rx.h b/dpdk/drivers/net/ice/base/ice_lan_tx_rx.h +index ec0c9f3ab0..7039cc8d92 100644 +--- a/dpdk/drivers/net/ice/base/ice_lan_tx_rx.h ++++ b/dpdk/drivers/net/ice/base/ice_lan_tx_rx.h +@@ -1353,7 +1353,7 @@ static const struct ice_rx_ptype_decoded ice_ptype_lkup[] = { + /* Non Tunneled IPv6 */ + ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), + ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), +- ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY3), ++ ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), + ICE_PTT_UNUSED_ENTRY(91), + ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), + ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), +diff --git a/dpdk/drivers/net/ice/base/ice_osdep.h b/dpdk/drivers/net/ice/base/ice_osdep.h +index c0f1e77257..be51c64b67 100644 +--- a/dpdk/drivers/net/ice/base/ice_osdep.h ++++ b/dpdk/drivers/net/ice/base/ice_osdep.h +@@ -21,7 +21,6 @@ + #include + #include + #include +-#include + #include + + #include "ice_alloc.h" +@@ -192,7 +191,7 @@ struct ice_virt_mem { + } __rte_packed; + + #define ice_malloc(h, s) rte_zmalloc(NULL, s, 0) +-#define ice_calloc(h, c, s) rte_zmalloc(NULL, (c) * (s), 0) ++#define ice_calloc(h, c, s) rte_calloc(NULL, c, s, 0) + #define ice_free(h, m) rte_free(m) + + #define ice_memset(a, b, c, d) memset((a), (b), (c)) +@@ -245,13 +244,15 @@ static inline void * + ice_alloc_dma_mem(__rte_unused struct ice_hw *hw, + struct ice_dma_mem *mem, u64 size) + { ++ static uint64_t ice_dma_memzone_id; + const struct rte_memzone *mz = NULL; + char z_name[RTE_MEMZONE_NAMESIZE]; + + if (!mem) + return NULL; + +- snprintf(z_name, sizeof(z_name), "ice_dma_%"PRIu64, rte_rand()); ++ snprintf(z_name, sizeof(z_name), "ice_dma_%" PRIu64, ++ __atomic_fetch_add(&ice_dma_memzone_id, 1, __ATOMIC_RELAXED)); + mz = rte_memzone_reserve_bounded(z_name, size, SOCKET_ID_ANY, 0, + 0, RTE_PGSIZE_2M); + if (!mz) +diff --git a/dpdk/drivers/net/ice/base/ice_protocol_type.h b/dpdk/drivers/net/ice/base/ice_protocol_type.h +index e8caefd8f9..8e0557b212 100644 +--- a/dpdk/drivers/net/ice/base/ice_protocol_type.h ++++ b/dpdk/drivers/net/ice/base/ice_protocol_type.h +@@ -164,6 +164,7 @@ enum ice_prot_id { + + #define ICE_VNI_OFFSET 12 /* offset of VNI from ICE_PROT_UDP_OF */ + ++#define ICE_NAN_OFFSET 511 + #define ICE_MAC_OFOS_HW 1 + #define ICE_MAC_IL_HW 4 + #define ICE_ETYPE_OL_HW 9 +@@ -393,7 +394,7 @@ struct ice_recp_grp_entry { + #define ICE_INVAL_CHAIN_IND 0xFF + u16 rid; + u8 chain_idx; +- u16 fv_idx[ICE_NUM_WORDS_RECIPE]; ++ u8 fv_idx[ICE_NUM_WORDS_RECIPE]; + u16 fv_mask[ICE_NUM_WORDS_RECIPE]; + struct ice_pref_recipe_group r_group; + }; +diff --git a/dpdk/drivers/net/ice/base/ice_sched.c b/dpdk/drivers/net/ice/base/ice_sched.c +index 882448671e..9196628cf1 100644 +--- a/dpdk/drivers/net/ice/base/ice_sched.c ++++ b/dpdk/drivers/net/ice/base/ice_sched.c +@@ -4728,12 +4728,12 @@ ice_sched_get_node_by_id_type(struct ice_port_info *pi, u32 id, + + case ICE_AGG_TYPE_Q: + /* The current implementation allows single queue to modify */ +- node = ice_sched_get_node(pi, id); ++ node = ice_sched_find_node_by_teid(pi->root, id); + break; + + case ICE_AGG_TYPE_QG: + /* The current implementation allows single qg to modify */ +- child_node = ice_sched_get_node(pi, id); ++ child_node = ice_sched_find_node_by_teid(pi->root, id); + if (!child_node) + break; + node = child_node->parent; +diff --git a/dpdk/drivers/net/ice/base/ice_switch.c b/dpdk/drivers/net/ice/base/ice_switch.c +index 247c3acb67..d7a8836293 100644 +--- a/dpdk/drivers/net/ice/base/ice_switch.c ++++ b/dpdk/drivers/net/ice/base/ice_switch.c +@@ -2936,6 +2936,10 @@ enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw) + ICE_AQC_GET_SW_CONF_RESP_TYPE_S); + + switch (res_type) { ++ case ICE_AQC_GET_SW_CONF_RESP_VSI: ++ if (hw->dcf_enabled && !is_vf) ++ hw->pf_id = pf_vf_num; ++ break; + case ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT: + case ICE_AQC_GET_SW_CONF_RESP_VIRT_PORT: + if (j == num_total_ports) { +@@ -5602,7 +5606,7 @@ ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_handle, + &remove_list_head); + ice_release_lock(rule_lock); + if (status) +- return; ++ goto free_fltr_list; + + switch (lkup) { + case ICE_SW_LKUP_MAC: +@@ -5630,6 +5634,7 @@ ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_handle, + break; + } + ++free_fltr_list: + LIST_FOR_EACH_ENTRY_SAFE(fm_entry, tmp, &remove_list_head, + ice_fltr_list_entry, list_entry) { + LIST_DEL(&fm_entry->list_entry); +@@ -7017,6 +7022,7 @@ ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo, + bool ice_is_prof_rule(enum ice_sw_tunnel_type type) + { + switch (type) { ++ case ICE_SW_TUN_AND_NON_TUN: + case ICE_SW_TUN_PROFID_IPV6_ESP: + case ICE_SW_TUN_PROFID_IPV6_AH: + case ICE_SW_TUN_PROFID_MAC_IPV6_L2TPV3: +diff --git a/dpdk/drivers/net/ice/base/ice_type.h b/dpdk/drivers/net/ice/base/ice_type.h +index 6b8d44f0b4..1aae9fe721 100644 +--- a/dpdk/drivers/net/ice/base/ice_type.h ++++ b/dpdk/drivers/net/ice/base/ice_type.h +@@ -864,6 +864,7 @@ struct ice_hw { + u8 revision_id; + + u8 pf_id; /* device profile info */ ++ u8 logical_pf_id; + + u16 max_burst_size; /* driver sets this value */ + +diff --git a/dpdk/drivers/net/ice/base/meson.build b/dpdk/drivers/net/ice/base/meson.build +index 22963ce31d..8f7d4384e7 100644 +--- a/dpdk/drivers/net/ice/base/meson.build ++++ b/dpdk/drivers/net/ice/base/meson.build +@@ -20,6 +20,11 @@ error_cflags = ['-Wno-unused-value', + '-Wno-unused-variable', + '-Wno-unused-parameter', + ] ++# Bugzilla ID: 678 ++if (toolchain == 'gcc' and cc.version().version_compare('>=11.0.0')) ++ error_cflags += ['-Wno-array-bounds'] ++endif ++ + c_args = cflags + + foreach flag: error_cflags +diff --git a/dpdk/drivers/net/ice/ice_acl_filter.c b/dpdk/drivers/net/ice/ice_acl_filter.c +index f7dbe53574..7e44751d64 100644 +--- a/dpdk/drivers/net/ice/ice_acl_filter.c ++++ b/dpdk/drivers/net/ice/ice_acl_filter.c +@@ -45,7 +45,7 @@ static struct ice_flow_parser ice_acl_parser; + + struct acl_rule { + enum ice_fltr_ptype flow_type; +- uint32_t entry_id[4]; ++ uint64_t entry_id[4]; + }; + + static struct +@@ -430,7 +430,7 @@ ice_acl_hw_set_conf(struct ice_pf *pf, struct ice_fdir_fltr *input, + /* For IPV4_OTHER type, should add entry for all types. + * For IPV4_UDP/TCP/SCTP type, only add entry for each. + */ +- if (slot_id < MAX_ACL_ENTRIES) { ++ if (slot_id < MAX_ACL_NORMAL_ENTRIES) { + entry_id = ((uint64_t)flow_type << 32) | slot_id; + ret = ice_flow_add_entry(hw, blk, flow_type, + entry_id, pf->main_vsi->idx, +@@ -440,29 +440,39 @@ ice_acl_hw_set_conf(struct ice_pf *pf, struct ice_fdir_fltr *input, + PMD_DRV_LOG(ERR, "Fail to add entry."); + return ret; + } +- rule->entry_id[entry_idx] = slot_id; ++ rule->entry_id[entry_idx] = entry_id; + pf->acl.hw_entry_id[slot_id] = hw_entry; + } else { + PMD_DRV_LOG(ERR, "Exceed the maximum entry number(%d)" +- " HW supported!", MAX_ACL_ENTRIES); ++ " HW supported!", MAX_ACL_NORMAL_ENTRIES); + return -1; + } + + return 0; + } + ++static inline void ++ice_acl_del_entry(struct ice_hw *hw, uint64_t entry_id) ++{ ++ uint64_t hw_entry; ++ ++ hw_entry = ice_flow_find_entry(hw, ICE_BLK_ACL, entry_id); ++ ice_flow_rem_entry(hw, ICE_BLK_ACL, hw_entry); ++} ++ + static inline void + ice_acl_hw_rem_conf(struct ice_pf *pf, struct acl_rule *rule, int32_t entry_idx) + { + uint32_t slot_id; + int32_t i; ++ uint64_t entry_id; + struct ice_hw *hw = ICE_PF_TO_HW(pf); + + for (i = 0; i < entry_idx; i++) { +- slot_id = rule->entry_id[i]; ++ entry_id = rule->entry_id[i]; ++ slot_id = ICE_LO_DWORD(entry_id); + rte_bitmap_set(pf->acl.slots, slot_id); +- ice_flow_rem_entry(hw, ICE_BLK_ACL, +- pf->acl.hw_entry_id[slot_id]); ++ ice_acl_del_entry(hw, entry_id); + } + } + +@@ -562,6 +572,7 @@ ice_acl_destroy_filter(struct ice_adapter *ad, + { + struct acl_rule *rule = (struct acl_rule *)flow->rule; + uint32_t slot_id, i; ++ uint64_t entry_id; + struct ice_pf *pf = &ad->pf; + struct ice_hw *hw = ICE_PF_TO_HW(pf); + int ret = 0; +@@ -569,19 +580,19 @@ ice_acl_destroy_filter(struct ice_adapter *ad, + switch (rule->flow_type) { + case ICE_FLTR_PTYPE_NONF_IPV4_OTHER: + for (i = 0; i < 4; i++) { +- slot_id = rule->entry_id[i]; ++ entry_id = rule->entry_id[i]; ++ slot_id = ICE_LO_DWORD(entry_id); + rte_bitmap_set(pf->acl.slots, slot_id); +- ice_flow_rem_entry(hw, ICE_BLK_ACL, +- pf->acl.hw_entry_id[slot_id]); ++ ice_acl_del_entry(hw, entry_id); + } + break; + case ICE_FLTR_PTYPE_NONF_IPV4_UDP: + case ICE_FLTR_PTYPE_NONF_IPV4_TCP: + case ICE_FLTR_PTYPE_NONF_IPV4_SCTP: +- slot_id = rule->entry_id[0]; ++ entry_id = rule->entry_id[0]; ++ slot_id = ICE_LO_DWORD(entry_id); + rte_bitmap_set(pf->acl.slots, slot_id); +- ice_flow_rem_entry(hw, ICE_BLK_ACL, +- pf->acl.hw_entry_id[slot_id]); ++ ice_acl_del_entry(hw, entry_id); + break; + default: + rte_flow_error_set(error, EINVAL, +diff --git a/dpdk/drivers/net/ice/ice_dcf.c b/dpdk/drivers/net/ice/ice_dcf.c +index 294ddcd2e1..7aeb9c82ca 100644 +--- a/dpdk/drivers/net/ice/ice_dcf.c ++++ b/dpdk/drivers/net/ice/ice_dcf.c +@@ -529,15 +529,26 @@ int + ice_dcf_handle_vsi_update_event(struct ice_dcf_hw *hw) + { + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(hw->eth_dev); +- int err = 0; ++ int i = 0; ++ int err = -1; + + rte_spinlock_lock(&hw->vc_cmd_send_lock); + + rte_intr_disable(&pci_dev->intr_handle); + ice_dcf_disable_irq0(hw); + +- if (ice_dcf_get_vf_resource(hw) || ice_dcf_get_vf_vsi_map(hw) < 0) +- err = -1; ++ for (;;) { ++ if (ice_dcf_get_vf_resource(hw) == 0 && ++ ice_dcf_get_vf_vsi_map(hw) >= 0) { ++ err = 0; ++ break; ++ } ++ ++ if (++i >= ICE_DCF_ARQ_MAX_RETRIES) ++ break; ++ ++ rte_delay_ms(ICE_DCF_ARQ_CHECK_TIME); ++ } + + rte_intr_enable(&pci_dev->intr_handle); + ice_dcf_enable_irq0(hw); +@@ -815,7 +826,7 @@ ice_dcf_init_rss(struct ice_dcf_hw *hw) + j = 0; + hw->rss_lut[i] = j; + } +- /* send virtchnnl ops to configure rss*/ ++ /* send virtchnl ops to configure RSS */ + ret = ice_dcf_configure_rss_lut(hw); + if (ret) + return ret; +@@ -828,7 +839,7 @@ ice_dcf_init_rss(struct ice_dcf_hw *hw) + + #define IAVF_RXDID_LEGACY_0 0 + #define IAVF_RXDID_LEGACY_1 1 +-#define IAVF_RXDID_COMMS_GENERIC 16 ++#define IAVF_RXDID_COMMS_OVS_1 22 + + int + ice_dcf_configure_queues(struct ice_dcf_hw *hw) +@@ -863,11 +874,11 @@ ice_dcf_configure_queues(struct ice_dcf_hw *hw) + } + vc_qp->rxq.vsi_id = hw->vsi_res->vsi_id; + vc_qp->rxq.queue_id = i; +- vc_qp->rxq.max_pkt_size = rxq[i]->max_pkt_len; + + if (i >= hw->eth_dev->data->nb_rx_queues) + continue; + ++ vc_qp->rxq.max_pkt_size = rxq[i]->max_pkt_len; + vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc; + vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_dma; + vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len; +@@ -876,8 +887,8 @@ ice_dcf_configure_queues(struct ice_dcf_hw *hw) + if (hw->vf_res->vf_cap_flags & + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC && + hw->supported_rxdid & +- BIT(IAVF_RXDID_COMMS_GENERIC)) { +- vc_qp->rxq.rxdid = IAVF_RXDID_COMMS_GENERIC; ++ BIT(IAVF_RXDID_COMMS_OVS_1)) { ++ vc_qp->rxq.rxdid = IAVF_RXDID_COMMS_OVS_1; + PMD_DRV_LOG(NOTICE, "request RXDID == %d in " + "Queue[%d]", vc_qp->rxq.rxdid, i); + } else { +diff --git a/dpdk/drivers/net/ice/ice_dcf_ethdev.c b/dpdk/drivers/net/ice/ice_dcf_ethdev.c +index e5c877805f..429057a862 100644 +--- a/dpdk/drivers/net/ice/ice_dcf_ethdev.c ++++ b/dpdk/drivers/net/ice/ice_dcf_ethdev.c +@@ -48,13 +48,14 @@ ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ice_rx_queue *rxq) + struct ice_dcf_adapter *dcf_ad = dev->data->dev_private; + struct rte_eth_dev_data *dev_data = dev->data; + struct iavf_hw *hw = &dcf_ad->real_hw.avf; +- uint16_t buf_size, max_pkt_len, len; ++ uint16_t buf_size, max_pkt_len; + + buf_size = rte_pktmbuf_data_room_size(rxq->mp) - RTE_PKTMBUF_HEADROOM; + rxq->rx_hdr_len = 0; + rxq->rx_buf_len = RTE_ALIGN(buf_size, (1 << ICE_RLAN_CTX_DBUF_S)); +- len = ICE_SUPPORT_CHAIN_NUM * rxq->rx_buf_len; +- max_pkt_len = RTE_MIN(len, dev->data->dev_conf.rxmode.max_rx_pkt_len); ++ max_pkt_len = RTE_MIN((uint32_t) ++ ICE_SUPPORT_CHAIN_NUM * rxq->rx_buf_len, ++ dev->data->dev_conf.rxmode.max_rx_pkt_len); + + /* Check if the jumbo frame and maximum packet length are set + * correctly. +@@ -165,10 +166,15 @@ ice_dcf_config_rx_queues_irqs(struct rte_eth_dev *dev, + VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) { + /* If WB_ON_ITR supports, enable it */ + hw->msix_base = IAVF_RX_VEC_START; ++ /* Set the ITR for index zero, to 2us to make sure that ++ * we leave time for aggregation to occur, but don't ++ * increase latency dramatically. ++ */ + IAVF_WRITE_REG(&hw->avf, + IAVF_VFINT_DYN_CTLN1(hw->msix_base - 1), +- IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | +- IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_MASK); ++ (0 << IAVF_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | ++ IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_MASK | ++ (2UL << IAVF_VFINT_DYN_CTLN1_INTERVAL_SHIFT)); + } else { + /* If no WB_ON_ITR offload flags, need to set + * interrupt for descriptor write back. +@@ -201,7 +207,7 @@ ice_dcf_config_rx_queues_irqs(struct rte_eth_dev *dev, + "vector %u are mapping to all Rx queues", + hw->msix_base); + } else { +- /* If Rx interrupt is reuquired, and we can use ++ /* If Rx interrupt is required, and we can use + * multi interrupts, then the vec is from 1 + */ + hw->nb_msix = RTE_MIN(hw->vf_res->max_vectors, +@@ -646,6 +652,8 @@ ice_dcf_dev_info_get(struct rte_eth_dev *dev, + dev_info->hash_key_size = hw->vf_res->rss_key_size; + dev_info->reta_size = hw->vf_res->rss_lut_size; + dev_info->flow_type_rss_offloads = ICE_RSS_OFFLOAD_ALL; ++ dev_info->max_mtu = dev_info->max_rx_pktlen - ICE_ETH_OVERHEAD; ++ dev_info->min_mtu = RTE_ETHER_MIN_MTU; + + dev_info->rx_offload_capa = + DEV_RX_OFFLOAD_VLAN_STRIP | +@@ -664,6 +672,7 @@ ice_dcf_dev_info_get(struct rte_eth_dev *dev, + DEV_TX_OFFLOAD_TCP_CKSUM | + DEV_TX_OFFLOAD_SCTP_CKSUM | + DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | ++ DEV_TX_OFFLOAD_OUTER_UDP_CKSUM | + DEV_TX_OFFLOAD_TCP_TSO | + DEV_TX_OFFLOAD_VXLAN_TNL_TSO | + DEV_TX_OFFLOAD_GRE_TNL_TSO | +@@ -863,6 +872,13 @@ ice_dcf_dev_close(struct rte_eth_dev *dev) + return 0; + } + ++bool ++ice_dcf_adminq_need_retry(struct ice_adapter *ad) ++{ ++ return ad->hw.dcf_enabled && ++ !__atomic_load_n(&ad->dcf_state_on, __ATOMIC_RELAXED); ++} ++ + static int + ice_dcf_link_update(__rte_unused struct rte_eth_dev *dev, + __rte_unused int wait_to_complete) +@@ -898,6 +914,7 @@ static int + ice_dcf_dev_init(struct rte_eth_dev *eth_dev) + { + struct ice_dcf_adapter *adapter = eth_dev->data->dev_private; ++ struct ice_adapter *parent_adapter = &adapter->parent; + + eth_dev->dev_ops = &ice_dcf_eth_dev_ops; + eth_dev->rx_pkt_burst = ice_dcf_recv_pkts; +@@ -906,14 +923,16 @@ ice_dcf_dev_init(struct rte_eth_dev *eth_dev) + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + +- eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; +- + adapter->real_hw.vc_event_msg_cb = ice_dcf_handle_pf_event_msg; + if (ice_dcf_init_hw(eth_dev, &adapter->real_hw) != 0) { + PMD_INIT_LOG(ERR, "Failed to init DCF hardware"); ++ __atomic_store_n(&parent_adapter->dcf_state_on, false, ++ __ATOMIC_RELAXED); + return -1; + } + ++ __atomic_store_n(&parent_adapter->dcf_state_on, true, __ATOMIC_RELAXED); ++ + if (ice_dcf_init_parent_adapter(eth_dev) != 0) { + PMD_INIT_LOG(ERR, "Failed to init DCF parent adapter"); + ice_dcf_uninit_hw(eth_dev, &adapter->real_hw); +diff --git a/dpdk/drivers/net/ice/ice_dcf_ethdev.h b/dpdk/drivers/net/ice/ice_dcf_ethdev.h +index b54528beae..0d25ff315e 100644 +--- a/dpdk/drivers/net/ice/ice_dcf_ethdev.h ++++ b/dpdk/drivers/net/ice/ice_dcf_ethdev.h +@@ -26,5 +26,6 @@ void ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw, + uint8_t *msg, uint16_t msglen); + int ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev); + void ice_dcf_uninit_parent_adapter(struct rte_eth_dev *eth_dev); ++bool ice_dcf_adminq_need_retry(struct ice_adapter *ad); + + #endif /* _ICE_DCF_ETHDEV_H_ */ +diff --git a/dpdk/drivers/net/ice/ice_dcf_parent.c b/dpdk/drivers/net/ice/ice_dcf_parent.c +index 30ead4c9fd..c7c1111a67 100644 +--- a/dpdk/drivers/net/ice/ice_dcf_parent.c ++++ b/dpdk/drivers/net/ice/ice_dcf_parent.c +@@ -111,15 +111,22 @@ static void* + ice_dcf_vsi_update_service_handler(void *param) + { + struct ice_dcf_hw *hw = param; ++ struct ice_dcf_adapter *adapter = ++ container_of(hw, struct ice_dcf_adapter, real_hw); ++ struct ice_adapter *parent_adapter = &adapter->parent; + ++ pthread_detach(pthread_self()); + usleep(ICE_DCF_VSI_UPDATE_SERVICE_INTERVAL); + ++ + rte_spinlock_lock(&vsi_update_lock); + + if (!ice_dcf_handle_vsi_update_event(hw)) { + struct ice_dcf_adapter *dcf_ad = + container_of(hw, struct ice_dcf_adapter, real_hw); + ++ __atomic_store_n(&parent_adapter->dcf_state_on, true, ++ __ATOMIC_RELAXED); + ice_dcf_update_vf_vsi_map(&dcf_ad->parent.hw, + hw->num_vfs, hw->vf_vsi_map); + } +@@ -135,6 +142,9 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw, + { + struct virtchnl_pf_event *pf_msg = (struct virtchnl_pf_event *)msg; + pthread_t thread; ++ struct ice_dcf_adapter *adapter = ++ container_of(dcf_hw, struct ice_dcf_adapter, real_hw); ++ struct ice_adapter *parent_adapter = &adapter->parent; + + if (msglen < sizeof(struct virtchnl_pf_event)) { + PMD_DRV_LOG(DEBUG, "Invalid event message length : %u", msglen); +@@ -159,6 +169,8 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw, + pf_msg->event_data.vf_vsi_map.vsi_id); + pthread_create(&thread, NULL, + ice_dcf_vsi_update_service_handler, dcf_hw); ++ __atomic_store_n(&parent_adapter->dcf_state_on, false, ++ __ATOMIC_RELAXED); + break; + default: + PMD_DRV_LOG(ERR, "Unknown event received %u", pf_msg->event); +@@ -208,7 +220,7 @@ ice_dcf_init_parent_hw(struct ice_hw *hw) + goto err_unroll_alloc; + + /* Initialize port_info struct with link information */ +- status = ice_aq_get_link_info(hw->port_info, false, NULL, NULL); ++ status = ice_aq_get_link_info(hw->port_info, true, NULL, NULL); + if (status) + goto err_unroll_alloc; + +@@ -361,7 +373,6 @@ ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev) + const struct rte_ether_addr *mac; + int err; + +- parent_adapter->eth_dev = eth_dev; + parent_adapter->pf.adapter = parent_adapter; + parent_adapter->pf.dev_data = eth_dev->data; + /* create a dummy main_vsi */ +diff --git a/dpdk/drivers/net/ice/ice_ethdev.c b/dpdk/drivers/net/ice/ice_ethdev.c +index 70e5f74b2f..3d46c727cc 100644 +--- a/dpdk/drivers/net/ice/ice_ethdev.c ++++ b/dpdk/drivers/net/ice/ice_ethdev.c +@@ -10,6 +10,8 @@ + #include + #include + ++#include ++ + #include "base/ice_sched.h" + #include "base/ice_flow.h" + #include "base/ice_dcb.h" +@@ -805,7 +807,7 @@ ice_init_mac_address(struct rte_eth_dev *dev) + (struct rte_ether_addr *)hw->port_info[0].mac.perm_addr); + + dev->data->mac_addrs = +- rte_zmalloc(NULL, sizeof(struct rte_ether_addr), 0); ++ rte_zmalloc(NULL, sizeof(struct rte_ether_addr) * ICE_NUM_MACADDR_MAX, 0); + if (!dev->data->mac_addrs) { + PMD_INIT_LOG(ERR, + "Failed to allocate memory to store mac address"); +@@ -1088,12 +1090,13 @@ ice_remove_all_mac_vlan_filters(struct ice_vsi *vsi) + { + struct ice_mac_filter *m_f; + struct ice_vlan_filter *v_f; ++ void *temp; + int ret = 0; + + if (!vsi || !vsi->mac_num) + return -EINVAL; + +- TAILQ_FOREACH(m_f, &vsi->mac_list, next) { ++ TAILQ_FOREACH_SAFE(m_f, &vsi->mac_list, next, temp) { + ret = ice_remove_mac_filter(vsi, &m_f->mac_info.mac_addr); + if (ret != ICE_SUCCESS) { + ret = -EINVAL; +@@ -1104,7 +1107,7 @@ ice_remove_all_mac_vlan_filters(struct ice_vsi *vsi) + if (vsi->vlan_num == 0) + return 0; + +- TAILQ_FOREACH(v_f, &vsi->vlan_list, next) { ++ TAILQ_FOREACH_SAFE(v_f, &vsi->vlan_list, next, temp) { + ret = ice_remove_vlan_filter(vsi, v_f->vlan_info.vlan_id); + if (ret != ICE_SUCCESS) { + ret = -EINVAL; +@@ -1336,7 +1339,7 @@ ice_handle_aq_msg(struct rte_eth_dev *dev) + * @param handle + * Pointer to interrupt handle. + * @param param +- * The address of parameter (struct rte_eth_dev *) regsitered before. ++ * The address of parameter (struct rte_eth_dev *) registered before. + * + * @return + * void +@@ -1689,7 +1692,7 @@ ice_setup_vsi(struct ice_pf *pf, enum ice_vsi_type type) + } + + /* At the beginning, only TC0. */ +- /* What we need here is the maximam number of the TX queues. ++ /* What we need here is the maximum number of the TX queues. + * Currently vsi->nb_qps means it. + * Correct it if any change. + */ +@@ -1768,8 +1771,14 @@ ice_pkg_file_search_path(struct rte_pci_device *pci_dev, char *pkg_file) + pos = rte_pci_find_ext_capability(pci_dev, RTE_PCI_EXT_CAP_ID_DSN); + + if (pos) { +- rte_pci_read_config(pci_dev, &dsn_low, 4, pos + 4); +- rte_pci_read_config(pci_dev, &dsn_high, 4, pos + 8); ++ if (rte_pci_read_config(pci_dev, &dsn_low, 4, pos + 4) < 0) { ++ PMD_INIT_LOG(ERR, "Failed to read pci config space\n"); ++ return -1; ++ } ++ if (rte_pci_read_config(pci_dev, &dsn_high, 4, pos + 8) < 0) { ++ PMD_INIT_LOG(ERR, "Failed to read pci config space\n"); ++ return -1; ++ } + snprintf(opt_ddp_filename, ICE_MAX_PKG_FILENAME_SIZE, + "ice-%08x%08x.pkg", dsn_high, dsn_low); + } else { +@@ -1831,7 +1840,11 @@ static int ice_load_pkg(struct rte_eth_dev *dev) + struct ice_adapter *ad = + ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); + +- ice_pkg_file_search_path(pci_dev, pkg_file); ++ err = ice_pkg_file_search_path(pci_dev, pkg_file); ++ if (err) { ++ PMD_INIT_LOG(ERR, "failed to search file path\n"); ++ return err; ++ } + + file = fopen(pkg_file, "rb"); + if (!file) { +@@ -2144,7 +2157,6 @@ ice_dev_init(struct rte_eth_dev *dev) + intr_handle = &pci_dev->intr_handle; + + pf->adapter = ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); +- pf->adapter->eth_dev = dev; + pf->dev_data = dev->data; + hw->back = pf->adapter; + hw->hw_addr = (uint8_t *)pci_dev->mem_resource[0].addr; +@@ -2174,7 +2186,7 @@ ice_dev_init(struct rte_eth_dev *dev) + if (ad->devargs.safe_mode_support == 0) { + PMD_INIT_LOG(ERR, "Failed to load the DDP package," + "Use safe-mode-support=1 to enter Safe Mode"); +- return ret; ++ goto err_init_fw; + } + + PMD_INIT_LOG(WARNING, "Failed to load the DDP package," +@@ -2246,30 +2258,37 @@ ice_dev_init(struct rte_eth_dev *dev) + ret = ice_flow_init(ad); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to initialize flow"); +- return ret; ++ goto err_flow_init; + } + } + + ret = ice_reset_fxp_resource(hw); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to reset fxp resource"); +- return ret; ++ goto err_flow_init; + } + + pf->supported_rxdid = ice_get_supported_rxdid(hw); + + return 0; + ++err_flow_init: ++ ice_flow_uninit(ad); ++ rte_intr_disable(intr_handle); ++ ice_pf_disable_irq0(hw); ++ rte_intr_callback_unregister(intr_handle, ++ ice_interrupt_handler, dev); + err_pf_setup: + ice_res_pool_destroy(&pf->msix_pool); + err_msix_pool_init: + rte_free(dev->data->mac_addrs); + dev->data->mac_addrs = NULL; + err_init_mac: +- ice_sched_cleanup_all(hw); +- rte_free(hw->port_info); +- ice_shutdown_all_ctrlq(hw); + rte_free(pf->proto_xtr); ++#ifndef RTE_EXEC_ENV_WINDOWS ++err_init_fw: ++#endif ++ ice_deinit_hw(hw); + + return ret; + } +@@ -2308,7 +2327,7 @@ ice_release_vsi(struct ice_vsi *vsi) + void + ice_vsi_disable_queues_intr(struct ice_vsi *vsi) + { +- struct rte_eth_dev *dev = vsi->adapter->eth_dev; ++ struct rte_eth_dev *dev = &rte_eth_devices[vsi->adapter->pf.dev_data->port_id]; + struct rte_pci_device *pci_dev = ICE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + struct ice_hw *hw = ICE_VSI_TO_HW(vsi); +@@ -3164,11 +3183,36 @@ ice_rss_hash_set(struct ice_pf *pf, uint64_t rss_hf) + pf->rss_hf = rss_hf & ICE_RSS_HF_ALL; + } + ++static void ++ice_get_default_rss_key(uint8_t *rss_key, uint32_t rss_key_size) ++{ ++ static struct ice_aqc_get_set_rss_keys default_key; ++ static bool default_key_done; ++ uint8_t *key = (uint8_t *)&default_key; ++ size_t i; ++ ++ if (rss_key_size > sizeof(default_key)) { ++ PMD_DRV_LOG(WARNING, ++ "requested size %u is larger than default %zu, " ++ "only %zu bytes are gotten for key\n", ++ rss_key_size, sizeof(default_key), ++ sizeof(default_key)); ++ } ++ ++ if (!default_key_done) { ++ /* Calculate the default hash key */ ++ for (i = 0; i < sizeof(default_key); i++) ++ key[i] = (uint8_t)rte_rand(); ++ default_key_done = true; ++ } ++ rte_memcpy(rss_key, key, RTE_MIN(rss_key_size, sizeof(default_key))); ++} ++ + static int ice_init_rss(struct ice_pf *pf) + { + struct ice_hw *hw = ICE_PF_TO_HW(pf); + struct ice_vsi *vsi = pf->main_vsi; +- struct rte_eth_dev *dev = pf->adapter->eth_dev; ++ struct rte_eth_dev_data *dev_data = pf->dev_data; + struct ice_aq_get_set_rss_lut_params lut_params; + struct rte_eth_rss_conf *rss_conf; + struct ice_aqc_get_set_rss_keys key; +@@ -3177,8 +3221,8 @@ static int ice_init_rss(struct ice_pf *pf) + bool is_safe_mode = pf->adapter->is_safe_mode; + uint32_t reg; + +- rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf; +- nb_q = dev->data->nb_rx_queues; ++ rss_conf = &dev_data->dev_conf.rx_adv_conf.rss_conf; ++ nb_q = dev_data->nb_rx_queues; + vsi->rss_key_size = ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE; + vsi->rss_lut_size = pf->hash_lut_size; + +@@ -3212,16 +3256,15 @@ static int ice_init_rss(struct ice_pf *pf) + } + } + /* configure RSS key */ +- if (!rss_conf->rss_key) { +- /* Calculate the default hash key */ +- for (i = 0; i <= vsi->rss_key_size; i++) +- vsi->rss_key[i] = (uint8_t)rte_rand(); +- } else { ++ if (!rss_conf->rss_key) ++ ice_get_default_rss_key(vsi->rss_key, vsi->rss_key_size); ++ else + rte_memcpy(vsi->rss_key, rss_conf->rss_key, + RTE_MIN(rss_conf->rss_key_len, + vsi->rss_key_size)); +- } +- rte_memcpy(key.standard_rss_key, vsi->rss_key, vsi->rss_key_size); ++ ++ rte_memcpy(key.standard_rss_key, vsi->rss_key, ++ RTE_MIN(sizeof(key.standard_rss_key), vsi->rss_key_size)); + ret = ice_aq_set_rss_key(hw, vsi->idx, &key); + if (ret) + goto out; +@@ -3312,7 +3355,7 @@ __vsi_queues_bind_intr(struct ice_vsi *vsi, uint16_t msix_vect, + void + ice_vsi_queues_bind_intr(struct ice_vsi *vsi) + { +- struct rte_eth_dev *dev = vsi->adapter->eth_dev; ++ struct rte_eth_dev *dev = &rte_eth_devices[vsi->adapter->pf.dev_data->port_id]; + struct rte_pci_device *pci_dev = ICE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + struct ice_hw *hw = ICE_VSI_TO_HW(vsi); +@@ -3365,7 +3408,7 @@ ice_vsi_queues_bind_intr(struct ice_vsi *vsi) + void + ice_vsi_enable_queues_intr(struct ice_vsi *vsi) + { +- struct rte_eth_dev *dev = vsi->adapter->eth_dev; ++ struct rte_eth_dev *dev = &rte_eth_devices[vsi->adapter->pf.dev_data->port_id]; + struct rte_pci_device *pci_dev = ICE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + struct ice_hw *hw = ICE_VSI_TO_HW(vsi); +@@ -3499,7 +3542,7 @@ ice_dev_start(struct rte_eth_dev *dev) + goto rx_err; + } + +- /* enable Rx interrput and mapping Rx queue to interrupt vector */ ++ /* enable Rx interrupt and mapping Rx queue to interrupt vector */ + if (ice_rxq_intr_setup(dev)) + return -EIO; + +@@ -3526,8 +3569,8 @@ ice_dev_start(struct rte_eth_dev *dev) + + ice_dev_set_link_up(dev); + +- /* Call get_link_info aq commond to enable/disable LSE */ +- ice_link_update(dev, 0); ++ /* Call get_link_info aq command to enable/disable LSE */ ++ ice_link_update(dev, 1); + + pf->adapter_stopped = false; + +@@ -3629,7 +3672,7 @@ ice_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) + } + + dev_info->rx_queue_offload_capa = 0; +- dev_info->tx_queue_offload_capa = 0; ++ dev_info->tx_queue_offload_capa = DEV_TX_OFFLOAD_MBUF_FAST_FREE; + + dev_info->reta_size = pf->hash_lut_size; + dev_info->hash_key_size = (VSIQF_HKEY_MAX_INDEX + 1) * sizeof(uint32_t); +@@ -4039,20 +4082,16 @@ ice_vsi_config_vlan_filter(struct ice_vsi *vsi, bool on) + { + struct ice_hw *hw = ICE_VSI_TO_HW(vsi); + struct ice_vsi_ctx ctxt; +- uint8_t sec_flags, sw_flags2; ++ uint8_t sw_flags2; + int ret = 0; + +- sec_flags = ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << +- ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S; + sw_flags2 = ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; + +- if (on) { +- vsi->info.sec_flags |= sec_flags; ++ if (on) + vsi->info.sw_flags2 |= sw_flags2; +- } else { +- vsi->info.sec_flags &= ~sec_flags; ++ else + vsi->info.sw_flags2 &= ~sw_flags2; +- } ++ + vsi->info.sw_id = hw->port_info->sw_id; + (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info)); + ctxt.info.valid_sections = +@@ -4388,8 +4427,10 @@ ice_rss_hash_update(struct rte_eth_dev *dev, + if (status) + return status; + +- if (rss_conf->rss_hf == 0) ++ if (rss_conf->rss_hf == 0) { ++ pf->rss_hf = 0; + return 0; ++ } + + /* RSS hash configuration */ + ice_rss_hash_set(pf, rss_conf->rss_hf); +@@ -4448,8 +4489,11 @@ ice_promisc_disable(struct rte_eth_dev *dev) + uint8_t pmask; + int ret = 0; + +- pmask = ICE_PROMISC_UCAST_RX | ICE_PROMISC_UCAST_TX | +- ICE_PROMISC_MCAST_RX | ICE_PROMISC_MCAST_TX; ++ if (dev->data->all_multicast == 1) ++ pmask = ICE_PROMISC_UCAST_RX | ICE_PROMISC_UCAST_TX; ++ else ++ pmask = ICE_PROMISC_UCAST_RX | ICE_PROMISC_UCAST_TX | ++ ICE_PROMISC_MCAST_RX | ICE_PROMISC_MCAST_TX; + + status = ice_clear_vsi_promisc(hw, vsi->idx, pmask, 0); + if (status != ICE_SUCCESS) { +@@ -4565,10 +4609,12 @@ ice_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) + hw->flash.nvm.minor, + hw->flash.nvm.eetrack, + ver, build, patch); ++ if (ret < 0) ++ return -EINVAL; + + /* add the size of '\0' */ + ret += 1; +- if (fw_size < (u32)ret) ++ if (fw_size < (size_t)ret) + return ret; + else + return 0; +@@ -5154,7 +5200,7 @@ ice_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, + count++; + } + +- /* Get individiual stats from ice_hw_port struct */ ++ /* Get individual stats from ice_hw_port struct */ + for (i = 0; i < ICE_NB_HW_PORT_XSTATS; i++) { + xstats[count].value = + *(uint64_t *)((char *)hw_stats + +@@ -5185,7 +5231,7 @@ static int ice_xstats_get_names(__rte_unused struct rte_eth_dev *dev, + count++; + } + +- /* Get individiual stats from ice_hw_port struct */ ++ /* Get individual stats from ice_hw_port struct */ + for (i = 0; i < ICE_NB_HW_PORT_XSTATS; i++) { + strlcpy(xstats_names[count].name, ice_hw_port_strings[i].name, + sizeof(xstats_names[count].name)); +diff --git a/dpdk/drivers/net/ice/ice_ethdev.h b/dpdk/drivers/net/ice/ice_ethdev.h +index 2b03c59671..fd81ede8a1 100644 +--- a/dpdk/drivers/net/ice/ice_ethdev.h ++++ b/dpdk/drivers/net/ice/ice_ethdev.h +@@ -50,7 +50,7 @@ + #define ICE_PKG_FILE_SEARCH_PATH_UPDATES "/lib/firmware/updates/intel/ice/ddp/" + #define ICE_MAX_PKG_FILENAME_SIZE 256 + +-#define MAX_ACL_ENTRIES 512 ++#define MAX_ACL_NORMAL_ENTRIES 256 + + /** + * vlan_id is a 12 bit number. +@@ -222,7 +222,7 @@ struct ice_vsi { + * needs to add, HW needs to know the layout that VSIs are organized. + * Besides that, VSI isan element and can't switch packets, which needs + * to add new component VEB to perform switching. So, a new VSI needs +- * to specify the the uplink VSI (Parent VSI) before created. The ++ * to specify the uplink VSI (Parent VSI) before created. The + * uplink VSI will check whether it had a VEB to switch packets. If no, + * it will try to create one. Then, uplink VSI will move the new VSI + * into its' sib_vsi_list to manage all the downlink VSI. +@@ -400,7 +400,7 @@ struct ice_acl_conf { + struct ice_acl_info { + struct ice_acl_conf conf; + struct rte_bitmap *slots; +- uint64_t hw_entry_id[MAX_ACL_ENTRIES]; ++ uint64_t hw_entry_id[MAX_ACL_NORMAL_ENTRIES]; + }; + + struct ice_pf { +@@ -467,7 +467,6 @@ struct ice_devargs { + struct ice_adapter { + /* Common for both PF and VF */ + struct ice_hw hw; +- struct rte_eth_dev *eth_dev; + struct ice_pf pf; + bool rx_bulk_alloc_allowed; + bool rx_vec_allowed; +@@ -479,6 +478,14 @@ struct ice_adapter { + struct ice_devargs devargs; + enum ice_pkg_type active_pkg_type; /* loaded ddp package type */ + uint16_t fdir_ref_cnt; ++ /* True if DCF state of the associated PF is on */ ++ bool dcf_state_on; ++#ifdef RTE_ARCH_X86 ++ bool rx_use_avx2; ++ bool rx_use_avx512; ++ bool tx_use_avx2; ++ bool tx_use_avx512; ++#endif + }; + + struct ice_vsi_vlan_pvid_info { +@@ -512,8 +519,6 @@ struct ice_vsi_vlan_pvid_info { + (&(((struct ice_vsi *)vsi)->adapter->hw)) + #define ICE_VSI_TO_PF(vsi) \ + (&(((struct ice_vsi *)vsi)->adapter->pf)) +-#define ICE_VSI_TO_ETH_DEV(vsi) \ +- (((struct ice_vsi *)vsi)->adapter->eth_dev) + + /* ICE_PF_TO */ + #define ICE_PF_TO_HW(pf) \ +diff --git a/dpdk/drivers/net/ice/ice_fdir_filter.c b/dpdk/drivers/net/ice/ice_fdir_filter.c +index 175abcdd5c..4a071254ce 100644 +--- a/dpdk/drivers/net/ice/ice_fdir_filter.c ++++ b/dpdk/drivers/net/ice/ice_fdir_filter.c +@@ -382,7 +382,7 @@ ice_fdir_counter_free(__rte_unused struct ice_pf *pf, + static int + ice_fdir_init_filter_list(struct ice_pf *pf) + { +- struct rte_eth_dev *dev = pf->adapter->eth_dev; ++ struct rte_eth_dev *dev = &rte_eth_devices[pf->dev_data->port_id]; + struct ice_fdir_info *fdir_info = &pf->fdir; + char fdir_hash_name[RTE_HASH_NAMESIZE]; + int ret; +@@ -444,7 +444,7 @@ ice_fdir_release_filter_list(struct ice_pf *pf) + static int + ice_fdir_setup(struct ice_pf *pf) + { +- struct rte_eth_dev *eth_dev = pf->adapter->eth_dev; ++ struct rte_eth_dev *eth_dev = &rte_eth_devices[pf->dev_data->port_id]; + struct ice_hw *hw = ICE_PF_TO_HW(pf); + const struct rte_memzone *mz = NULL; + char z_name[RTE_MEMZONE_NAMESIZE]; +@@ -632,7 +632,7 @@ ice_fdir_prof_rm_all(struct ice_pf *pf) + static void + ice_fdir_teardown(struct ice_pf *pf) + { +- struct rte_eth_dev *eth_dev = pf->adapter->eth_dev; ++ struct rte_eth_dev *eth_dev = &rte_eth_devices[pf->dev_data->port_id]; + struct ice_hw *hw = ICE_PF_TO_HW(pf); + struct ice_vsi *vsi; + int err; +diff --git a/dpdk/drivers/net/ice/ice_generic_flow.c b/dpdk/drivers/net/ice/ice_generic_flow.c +index 1429cbc3b6..311fd7d162 100644 +--- a/dpdk/drivers/net/ice/ice_generic_flow.c ++++ b/dpdk/drivers/net/ice/ice_generic_flow.c +@@ -2323,7 +2323,9 @@ ice_flow_flush(struct rte_eth_dev *dev, + ret = ice_flow_destroy(dev, p_flow, error); + if (ret) { + PMD_DRV_LOG(ERR, "Failed to flush flows"); +- return -EINVAL; ++ if (ret != -EAGAIN) ++ ret = -EINVAL; ++ return ret; + } + } + +@@ -2360,15 +2362,16 @@ ice_flow_query(struct rte_eth_dev *dev, + ret = flow->engine->query_count(ad, flow, count, error); + break; + default: +- return rte_flow_error_set(error, ENOTSUP, ++ ret = rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "action not supported"); ++ goto out; + } + } + ++out: + rte_spinlock_unlock(&pf->flow_ops_lock); +- + return ret; + } + +@@ -2379,7 +2382,7 @@ ice_flow_redirect(struct ice_adapter *ad, + struct ice_pf *pf = &ad->pf; + struct rte_flow *p_flow; + void *temp; +- int ret; ++ int ret = 0; + + rte_spinlock_lock(&pf->flow_ops_lock); + +@@ -2389,11 +2392,11 @@ ice_flow_redirect(struct ice_adapter *ad, + ret = p_flow->engine->redirect(ad, p_flow, rd); + if (ret) { + PMD_DRV_LOG(ERR, "Failed to redirect flows"); +- return ret; ++ break; + } + } + + rte_spinlock_unlock(&pf->flow_ops_lock); + +- return 0; ++ return ret; + } +diff --git a/dpdk/drivers/net/ice/ice_hash.c b/dpdk/drivers/net/ice/ice_hash.c +index fe3e06c579..1bb7d2c7c6 100644 +--- a/dpdk/drivers/net/ice/ice_hash.c ++++ b/dpdk/drivers/net/ice/ice_hash.c +@@ -312,6 +312,9 @@ struct rss_type_match_hdr hint_eth_pppoes_ipv6_tcp = { + struct rss_type_match_hdr hint_eth_pppoes = { + ICE_FLOW_SEG_HDR_PPPOE, + ETH_RSS_ETH | ETH_RSS_PPPOE}; ++struct rss_type_match_hdr hint_ethertype = { ++ ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_ETH_NON_IP, ++ ETH_RSS_ETH}; + + /* Supported pattern for os default package. */ + static struct ice_pattern_match_item ice_hash_pattern_list_os[] = { +@@ -444,6 +447,8 @@ static struct ice_pattern_match_item ice_hash_pattern_list_comms[] = { + &hint_eth_pppoes_ipv6_tcp}, + {pattern_eth_pppoes, ICE_INSET_NONE, + &hint_eth_pppoes}, ++ {pattern_ethertype, ICE_INSET_NONE, ++ &hint_ethertype}, + }; + + /** +@@ -1140,6 +1145,15 @@ ice_hash_parse_action(struct ice_pattern_match_item *pattern_match_item, + "Not supported flow"); + } + ++ /* update hash field for eth-non-ip. */ ++ if (rss_type & ETH_RSS_ETH) { ++ if (hash_meta->pkt_hdr & ++ ICE_FLOW_SEG_HDR_ETH_NON_IP) { ++ hash_meta->hash_flds |= ++ BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_TYPE); ++ } ++ } ++ + /* update hash field for nat-t esp. */ + if (rss_type & ETH_RSS_ESP && + (m->eth_rss_hint & ETH_RSS_NONFRAG_IPV4_UDP || +diff --git a/dpdk/drivers/net/ice/ice_rxtx.c b/dpdk/drivers/net/ice/ice_rxtx.c +index c98328ce0b..5f94b4174d 100644 +--- a/dpdk/drivers/net/ice/ice_rxtx.c ++++ b/dpdk/drivers/net/ice/ice_rxtx.c +@@ -129,6 +129,8 @@ ice_rxd_to_pkt_fields_by_comms_aux_v1(struct ice_rx_queue *rxq, + *RTE_NET_ICE_DYNF_PROTO_XTR_METADATA(mb) = metadata; + } + } ++#else ++ RTE_SET_USED(rxq); + #endif + } + +@@ -167,54 +169,60 @@ ice_rxd_to_pkt_fields_by_comms_aux_v2(struct ice_rx_queue *rxq, + *RTE_NET_ICE_DYNF_PROTO_XTR_METADATA(mb) = metadata; + } + } ++#else ++ RTE_SET_USED(rxq); + #endif + } + ++static const ice_rxd_to_pkt_fields_t rxd_to_pkt_fields_ops[] = { ++ [ICE_RXDID_COMMS_AUX_VLAN] = ice_rxd_to_pkt_fields_by_comms_aux_v1, ++ [ICE_RXDID_COMMS_AUX_IPV4] = ice_rxd_to_pkt_fields_by_comms_aux_v1, ++ [ICE_RXDID_COMMS_AUX_IPV6] = ice_rxd_to_pkt_fields_by_comms_aux_v1, ++ [ICE_RXDID_COMMS_AUX_IPV6_FLOW] = ice_rxd_to_pkt_fields_by_comms_aux_v1, ++ [ICE_RXDID_COMMS_AUX_TCP] = ice_rxd_to_pkt_fields_by_comms_aux_v1, ++ [ICE_RXDID_COMMS_AUX_IP_OFFSET] = ice_rxd_to_pkt_fields_by_comms_aux_v2, ++ [ICE_RXDID_COMMS_GENERIC] = ice_rxd_to_pkt_fields_by_comms_generic, ++ [ICE_RXDID_COMMS_OVS] = ice_rxd_to_pkt_fields_by_comms_ovs, ++}; ++ + void + ice_select_rxd_to_pkt_fields_handler(struct ice_rx_queue *rxq, uint32_t rxdid) + { ++ rxq->rxdid = rxdid; ++ + switch (rxdid) { + case ICE_RXDID_COMMS_AUX_VLAN: + rxq->xtr_ol_flag = rte_net_ice_dynflag_proto_xtr_vlan_mask; +- rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_aux_v1; + break; + + case ICE_RXDID_COMMS_AUX_IPV4: + rxq->xtr_ol_flag = rte_net_ice_dynflag_proto_xtr_ipv4_mask; +- rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_aux_v1; + break; + + case ICE_RXDID_COMMS_AUX_IPV6: + rxq->xtr_ol_flag = rte_net_ice_dynflag_proto_xtr_ipv6_mask; +- rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_aux_v1; + break; + + case ICE_RXDID_COMMS_AUX_IPV6_FLOW: + rxq->xtr_ol_flag = rte_net_ice_dynflag_proto_xtr_ipv6_flow_mask; +- rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_aux_v1; + break; + + case ICE_RXDID_COMMS_AUX_TCP: + rxq->xtr_ol_flag = rte_net_ice_dynflag_proto_xtr_tcp_mask; +- rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_aux_v1; + break; + + case ICE_RXDID_COMMS_AUX_IP_OFFSET: + rxq->xtr_ol_flag = rte_net_ice_dynflag_proto_xtr_ip_offset_mask; +- rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_aux_v2; + break; + + case ICE_RXDID_COMMS_GENERIC: +- rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_generic; +- break; +- ++ /* fallthrough */ + case ICE_RXDID_COMMS_OVS: +- rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_ovs; + break; + + default: + /* update this according to the RXDID for PROTO_XTR_NONE */ +- rxq->rxd_to_pkt_fields = ice_rxd_to_pkt_fields_by_comms_ovs; ++ rxq->rxdid = ICE_RXDID_COMMS_OVS; + break; + } + +@@ -228,11 +236,11 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq) + struct ice_vsi *vsi = rxq->vsi; + struct ice_hw *hw = ICE_VSI_TO_HW(vsi); + struct ice_pf *pf = ICE_VSI_TO_PF(vsi); +- struct rte_eth_dev *dev = ICE_VSI_TO_ETH_DEV(rxq->vsi); ++ struct rte_eth_dev_data *dev_data = rxq->vsi->adapter->pf.dev_data; + struct ice_rlan_ctx rx_ctx; + enum ice_status err; +- uint16_t buf_size, len; +- struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; ++ uint16_t buf_size; ++ struct rte_eth_rxmode *rxmode = &dev_data->dev_conf.rxmode; + uint32_t rxdid = ICE_RXDID_COMMS_OVS; + uint32_t regval; + +@@ -241,9 +249,9 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq) + RTE_PKTMBUF_HEADROOM); + rxq->rx_hdr_len = 0; + rxq->rx_buf_len = RTE_ALIGN(buf_size, (1 << ICE_RLAN_CTX_DBUF_S)); +- len = ICE_SUPPORT_CHAIN_NUM * rxq->rx_buf_len; +- rxq->max_pkt_len = RTE_MIN(len, +- dev->data->dev_conf.rxmode.max_rx_pkt_len); ++ rxq->max_pkt_len = RTE_MIN((uint32_t) ++ ICE_SUPPORT_CHAIN_NUM * rxq->rx_buf_len, ++ dev_data->dev_conf.rxmode.max_rx_pkt_len); + + if (rxmode->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { + if (rxq->max_pkt_len <= ICE_ETH_MAX_LEN || +@@ -339,7 +347,7 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq) + + /* Check if scattered RX needs to be used. */ + if (rxq->max_pkt_len > buf_size) +- dev->data->scattered_rx = 1; ++ dev_data->scattered_rx = 1; + + rxq->qrx_tail = hw->hw_addr + QRX_TAIL(rxq->reg_idx); + +@@ -1075,7 +1083,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev, + rxq->proto_xtr = pf->proto_xtr != NULL ? + pf->proto_xtr[queue_idx] : PROTO_XTR_NONE; + +- /* Allocate the maximun number of RX ring hardware descriptor. */ ++ /* Allocate the maximum number of RX ring hardware descriptor. */ + len = ICE_MAX_RING_DESC; + + /** +@@ -1096,6 +1104,7 @@ ice_rx_queue_setup(struct rte_eth_dev *dev, + return -ENOMEM; + } + ++ rxq->mz = rz; + /* Zero all the descriptors in the ring. */ + memset(rz->addr, 0, ring_size); + +@@ -1151,6 +1160,7 @@ ice_rx_queue_release(void *rxq) + + q->rx_rel_mbufs(q); + rte_free(q->sw_ring); ++ rte_memzone_free(q->mz); + rte_free(q); + } + +@@ -1203,7 +1213,7 @@ ice_tx_queue_setup(struct rte_eth_dev *dev, + tx_free_thresh = (uint16_t)(tx_conf->tx_free_thresh ? + tx_conf->tx_free_thresh : + ICE_DEFAULT_TX_FREE_THRESH); +- /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */ ++ /* force tx_rs_thresh to adapt an aggressive tx_free_thresh */ + tx_rs_thresh = + (ICE_DEFAULT_TX_RSBIT_THRESH + tx_free_thresh > nb_desc) ? + nb_desc - tx_free_thresh : ICE_DEFAULT_TX_RSBIT_THRESH; +@@ -1297,6 +1307,7 @@ ice_tx_queue_setup(struct rte_eth_dev *dev, + return -ENOMEM; + } + ++ txq->mz = tz; + txq->nb_tx_desc = nb_desc; + txq->tx_rs_thresh = tx_rs_thresh; + txq->tx_free_thresh = tx_free_thresh; +@@ -1347,6 +1358,7 @@ ice_tx_queue_release(void *txq) + + q->tx_rel_mbufs(q); + rte_free(q->sw_ring); ++ rte_memzone_free(q->mz); + rte_free(q); + } + +@@ -1434,7 +1446,9 @@ ice_rxd_error_to_pkt_flags(uint16_t stat_err0) + return 0; + + if (likely(!(stat_err0 & ICE_RX_FLEX_ERR0_BITS))) { +- flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD); ++ flags |= (PKT_RX_IP_CKSUM_GOOD | ++ PKT_RX_L4_CKSUM_GOOD | ++ PKT_RX_OUTER_L4_CKSUM_GOOD); + return flags; + } + +@@ -1548,7 +1562,7 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq) + mb->packet_type = ptype_tbl[ICE_RX_FLEX_DESC_PTYPE_M & + rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)]; + ice_rxd_to_vlan_tci(mb, &rxdp[j]); +- rxq->rxd_to_pkt_fields(rxq, mb, &rxdp[j]); ++ rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]); + + mb->ol_flags |= pkt_flags; + } +@@ -1628,7 +1642,7 @@ ice_rx_alloc_bufs(struct ice_rx_queue *rxq) + rxdp[i].read.pkt_addr = dma_addr; + } + +- /* Update rx tail regsiter */ ++ /* Update Rx tail register */ + ICE_PCI_REG_WRITE(rxq->qrx_tail, rxq->rx_free_trigger); + + rxq->rx_free_trigger = +@@ -1644,7 +1658,6 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + { + struct ice_rx_queue *rxq = (struct ice_rx_queue *)rx_queue; + uint16_t nb_rx = 0; +- struct rte_eth_dev *dev; + + if (!nb_pkts) + return 0; +@@ -1661,8 +1674,7 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + if (ice_rx_alloc_bufs(rxq) != 0) { + uint16_t i, j; + +- dev = ICE_VSI_TO_ETH_DEV(rxq->vsi); +- dev->data->rx_mbuf_alloc_failed += ++ rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed += + rxq->rx_free_thresh; + PMD_RX_LOG(DEBUG, "Rx mbuf alloc failed for " + "port_id=%u, queue_id=%u", +@@ -1735,7 +1747,6 @@ ice_recv_scattered_pkts(void *rx_queue, + uint64_t dma_addr; + uint64_t pkt_flags; + uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; +- struct rte_eth_dev *dev; + + while (nb_rx < nb_pkts) { + rxdp = &rx_ring[rx_id]; +@@ -1748,8 +1759,7 @@ ice_recv_scattered_pkts(void *rx_queue, + /* allocate mbuf */ + nmb = rte_mbuf_raw_alloc(rxq->mp); + if (unlikely(!nmb)) { +- dev = ICE_VSI_TO_ETH_DEV(rxq->vsi); +- dev->data->rx_mbuf_alloc_failed++; ++ rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++; + break; + } + rxd = *rxdp; /* copy descriptor in ring to temp variable*/ +@@ -1845,7 +1855,7 @@ ice_recv_scattered_pkts(void *rx_queue, + first_seg->packet_type = ptype_tbl[ICE_RX_FLEX_DESC_PTYPE_M & + rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)]; + ice_rxd_to_vlan_tci(first_seg, &rxd); +- rxq->rxd_to_pkt_fields(rxq, first_seg, &rxd); ++ rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd); + pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0); + first_seg->ol_flags |= pkt_flags; + /* Prefetch data of first segment, if configured to do so. */ +@@ -1865,7 +1875,7 @@ ice_recv_scattered_pkts(void *rx_queue, + * threshold of the queue, advance the Receive Descriptor Tail (RDT) + * register. Update the RDT with the value of the last processed RX + * descriptor minus 1, to guarantee that the RDT register is never +- * equal to the RDH register, which creates a "full" ring situtation ++ * equal to the RDH register, which creates a "full" ring situation + * from the hardware point of view. + */ + nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold); +@@ -2041,7 +2051,6 @@ ice_free_queues(struct rte_eth_dev *dev) + continue; + ice_rx_queue_release(dev->data->rx_queues[i]); + dev->data->rx_queues[i] = NULL; +- rte_eth_dma_zone_free(dev, "rx_ring", i); + } + dev->data->nb_rx_queues = 0; + +@@ -2050,7 +2059,6 @@ ice_free_queues(struct rte_eth_dev *dev) + continue; + ice_tx_queue_release(dev->data->tx_queues[i]); + dev->data->tx_queues[i] = NULL; +- rte_eth_dma_zone_free(dev, "tx_ring", i); + } + dev->data->nb_tx_queues = 0; + } +@@ -2071,7 +2079,7 @@ ice_fdir_setup_tx_resources(struct ice_pf *pf) + return -EINVAL; + } + +- dev = pf->adapter->eth_dev; ++ dev = &rte_eth_devices[pf->adapter->pf.dev_data->port_id]; + + /* Allocate the TX queue data structure. */ + txq = rte_zmalloc_socket("ice fdir tx queue", +@@ -2097,6 +2105,7 @@ ice_fdir_setup_tx_resources(struct ice_pf *pf) + return -ENOMEM; + } + ++ txq->mz = tz; + txq->nb_tx_desc = ICE_FDIR_NUM_TX_DESC; + txq->queue_id = ICE_FDIR_QUEUE_ID; + txq->reg_idx = pf->fdir.fdir_vsi->base_queue; +@@ -2129,7 +2138,7 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf) + return -EINVAL; + } + +- dev = pf->adapter->eth_dev; ++ dev = &rte_eth_devices[pf->adapter->pf.dev_data->port_id]; + + /* Allocate the RX queue data structure. */ + rxq = rte_zmalloc_socket("ice fdir rx queue", +@@ -2155,6 +2164,7 @@ ice_fdir_setup_rx_resources(struct ice_pf *pf) + return -ENOMEM; + } + ++ rxq->mz = rz; + rxq->nb_rx_desc = ICE_FDIR_NUM_RX_DESC; + rxq->queue_id = ICE_FDIR_QUEUE_ID; + rxq->reg_idx = pf->fdir.fdir_vsi->base_queue; +@@ -2198,7 +2208,6 @@ ice_recv_pkts(void *rx_queue, + uint64_t dma_addr; + uint64_t pkt_flags; + uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; +- struct rte_eth_dev *dev; + + while (nb_rx < nb_pkts) { + rxdp = &rx_ring[rx_id]; +@@ -2211,8 +2220,7 @@ ice_recv_pkts(void *rx_queue, + /* allocate mbuf */ + nmb = rte_mbuf_raw_alloc(rxq->mp); + if (unlikely(!nmb)) { +- dev = ICE_VSI_TO_ETH_DEV(rxq->vsi); +- dev->data->rx_mbuf_alloc_failed++; ++ rxq->vsi->adapter->pf.dev_data->rx_mbuf_alloc_failed++; + break; + } + rxd = *rxdp; /* copy descriptor in ring to temp variable*/ +@@ -2249,7 +2257,7 @@ ice_recv_pkts(void *rx_queue, + rxm->packet_type = ptype_tbl[ICE_RX_FLEX_DESC_PTYPE_M & + rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)]; + ice_rxd_to_vlan_tci(rxm, &rxd); +- rxq->rxd_to_pkt_fields(rxq, rxm, &rxd); ++ rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd); + pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0); + rxm->ol_flags |= pkt_flags; + /* copy old mbuf to rx_pkts */ +@@ -2351,15 +2359,15 @@ ice_txd_enable_checksum(uint64_t ol_flags, + if (ol_flags & PKT_TX_IP_CKSUM) { + *td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM; + *td_offset |= (tx_offload.l3_len >> 2) << +- ICE_TX_DESC_LEN_IPLEN_S; ++ ICE_TX_DESC_LEN_IPLEN_S; + } else if (ol_flags & PKT_TX_IPV4) { + *td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV4; + *td_offset |= (tx_offload.l3_len >> 2) << +- ICE_TX_DESC_LEN_IPLEN_S; ++ ICE_TX_DESC_LEN_IPLEN_S; + } else if (ol_flags & PKT_TX_IPV6) { + *td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV6; + *td_offset |= (tx_offload.l3_len >> 2) << +- ICE_TX_DESC_LEN_IPLEN_S; ++ ICE_TX_DESC_LEN_IPLEN_S; + } + + if (ol_flags & PKT_TX_TCP_SEG) { +@@ -2970,7 +2978,7 @@ tx_xmit_pkts(struct ice_tx_queue *txq, + ice_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n)); + txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n)); + +- /* Determin if RS bit needs to be set */ ++ /* Determine if RS bit needs to be set */ + if (txq->tx_tail > txq->tx_next_rs) { + txr[txq->tx_next_rs].cmd_type_offset_bsz |= + rte_cpu_to_le_64(((uint64_t)ICE_TX_DESC_CMD_RS) << +@@ -3025,10 +3033,10 @@ ice_set_rx_function(struct rte_eth_dev *dev) + #ifdef RTE_ARCH_X86 + struct ice_rx_queue *rxq; + int i; +- bool use_avx512 = false; +- bool use_avx2 = false; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { ++ ad->rx_use_avx512 = false; ++ ad->rx_use_avx2 = false; + if (!ice_rx_vec_dev_check(dev) && ad->rx_bulk_alloc_allowed && + rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) { + ad->rx_vec_allowed = true; +@@ -3044,16 +3052,16 @@ ice_set_rx_function(struct rte_eth_dev *dev) + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 && + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1) + #ifdef CC_AVX512_SUPPORT +- use_avx512 = true; ++ ad->rx_use_avx512 = true; + #else + PMD_DRV_LOG(NOTICE, + "AVX512 is not supported in build env"); + #endif +- if (!use_avx512 && ++ if (!ad->rx_use_avx512 && + (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 || + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) && + rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) +- use_avx2 = true; ++ ad->rx_use_avx2 = true; + + } else { + ad->rx_vec_allowed = false; +@@ -3062,7 +3070,7 @@ ice_set_rx_function(struct rte_eth_dev *dev) + + if (ad->rx_vec_allowed) { + if (dev->data->scattered_rx) { +- if (use_avx512) { ++ if (ad->rx_use_avx512) { + #ifdef CC_AVX512_SUPPORT + PMD_DRV_LOG(NOTICE, + "Using AVX512 Vector Scattered Rx (port %d).", +@@ -3073,14 +3081,14 @@ ice_set_rx_function(struct rte_eth_dev *dev) + } else { + PMD_DRV_LOG(DEBUG, + "Using %sVector Scattered Rx (port %d).", +- use_avx2 ? "avx2 " : "", ++ ad->rx_use_avx2 ? "avx2 " : "", + dev->data->port_id); +- dev->rx_pkt_burst = use_avx2 ? ++ dev->rx_pkt_burst = ad->rx_use_avx2 ? + ice_recv_scattered_pkts_vec_avx2 : + ice_recv_scattered_pkts_vec; + } + } else { +- if (use_avx512) { ++ if (ad->rx_use_avx512) { + #ifdef CC_AVX512_SUPPORT + PMD_DRV_LOG(NOTICE, + "Using AVX512 Vector Rx (port %d).", +@@ -3091,9 +3099,9 @@ ice_set_rx_function(struct rte_eth_dev *dev) + } else { + PMD_DRV_LOG(DEBUG, + "Using %sVector Rx (port %d).", +- use_avx2 ? "avx2 " : "", ++ ad->rx_use_avx2 ? "avx2 " : "", + dev->data->port_id); +- dev->rx_pkt_burst = use_avx2 ? ++ dev->rx_pkt_burst = ad->rx_use_avx2 ? + ice_recv_pkts_vec_avx2 : + ice_recv_pkts_vec; + } +@@ -3241,10 +3249,10 @@ ice_set_tx_function(struct rte_eth_dev *dev) + #ifdef RTE_ARCH_X86 + struct ice_tx_queue *txq; + int i; +- bool use_avx512 = false; +- bool use_avx2 = false; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { ++ ad->tx_use_avx2 = false; ++ ad->tx_use_avx512 = false; + if (!ice_tx_vec_dev_check(dev) && + rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) { + ad->tx_vec_allowed = true; +@@ -3260,16 +3268,16 @@ ice_set_tx_function(struct rte_eth_dev *dev) + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 && + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1) + #ifdef CC_AVX512_SUPPORT +- use_avx512 = true; ++ ad->tx_use_avx512 = true; + #else + PMD_DRV_LOG(NOTICE, + "AVX512 is not supported in build env"); + #endif +- if (!use_avx512 && ++ if (!ad->tx_use_avx512 && + (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 || + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) && + rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) +- use_avx2 = true; ++ ad->tx_use_avx2 = true; + + } else { + ad->tx_vec_allowed = false; +@@ -3277,7 +3285,7 @@ ice_set_tx_function(struct rte_eth_dev *dev) + } + + if (ad->tx_vec_allowed) { +- if (use_avx512) { ++ if (ad->tx_use_avx512) { + #ifdef CC_AVX512_SUPPORT + PMD_DRV_LOG(NOTICE, "Using AVX512 Vector Tx (port %d).", + dev->data->port_id); +@@ -3285,9 +3293,9 @@ ice_set_tx_function(struct rte_eth_dev *dev) + #endif + } else { + PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).", +- use_avx2 ? "avx2 " : "", ++ ad->tx_use_avx2 ? "avx2 " : "", + dev->data->port_id); +- dev->tx_pkt_burst = use_avx2 ? ++ dev->tx_pkt_burst = ad->tx_use_avx2 ? + ice_xmit_pkts_vec_avx2 : + ice_xmit_pkts_vec; + } +diff --git a/dpdk/drivers/net/ice/ice_rxtx.h b/dpdk/drivers/net/ice/ice_rxtx.h +index adfae016a9..a74c4b3492 100644 +--- a/dpdk/drivers/net/ice/ice_rxtx.h ++++ b/dpdk/drivers/net/ice/ice_rxtx.h +@@ -86,8 +86,9 @@ struct ice_rx_queue { + bool rx_deferred_start; /* don't start this queue in dev start */ + uint8_t proto_xtr; /* Protocol extraction from flexible descriptor */ + uint64_t xtr_ol_flag; /* Protocol extraction offload flag */ +- ice_rxd_to_pkt_fields_t rxd_to_pkt_fields; /* handle FlexiMD by RXDID */ ++ uint32_t rxdid; /* Receive Flex Descriptor profile ID */ + ice_rx_release_mbufs_t rx_rel_mbufs; ++ const struct rte_memzone *mz; + }; + + struct ice_tx_entry { +@@ -132,6 +133,7 @@ struct ice_tx_queue { + bool tx_deferred_start; /* don't start this queue in dev start */ + bool q_set; /* indicate if tx queue has been configured */ + ice_tx_release_mbufs_t tx_rel_mbufs; ++ const struct rte_memzone *mz; + }; + + /* Offload features */ +@@ -266,8 +268,8 @@ int ice_tx_done_cleanup(void *txq, uint32_t free_cnt); + + #define FDIR_PARSING_ENABLE_PER_QUEUE(ad, on) do { \ + int i; \ +- for (i = 0; i < (ad)->eth_dev->data->nb_rx_queues; i++) { \ +- struct ice_rx_queue *rxq = (ad)->eth_dev->data->rx_queues[i]; \ ++ for (i = 0; i < (ad)->pf.dev_data->nb_rx_queues; i++) { \ ++ struct ice_rx_queue *rxq = (ad)->pf.dev_data->rx_queues[i]; \ + if (!rxq) \ + continue; \ + rxq->fdir_enabled = on; \ +diff --git a/dpdk/drivers/net/ice/ice_rxtx_common_avx.h b/dpdk/drivers/net/ice/ice_rxtx_common_avx.h +new file mode 100644 +index 0000000000..81e0db5dd3 +--- /dev/null ++++ b/dpdk/drivers/net/ice/ice_rxtx_common_avx.h +@@ -0,0 +1,213 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2019 Intel Corporation ++ */ ++ ++#ifndef _ICE_RXTX_COMMON_AVX_H_ ++#define _ICE_RXTX_COMMON_AVX_H_ ++ ++#include "ice_rxtx.h" ++ ++#ifndef __INTEL_COMPILER ++#pragma GCC diagnostic ignored "-Wcast-qual" ++#endif ++ ++#ifdef __AVX2__ ++static __rte_always_inline void ++ice_rxq_rearm_common(struct ice_rx_queue *rxq, __rte_unused bool avx512) ++{ ++ int i; ++ uint16_t rx_id; ++ volatile union ice_rx_flex_desc *rxdp; ++ struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; ++ ++ rxdp = rxq->rx_ring + rxq->rxrearm_start; ++ ++ /* Pull 'n' more MBUFs into the software ring */ ++ if (rte_mempool_get_bulk(rxq->mp, ++ (void *)rxep, ++ ICE_RXQ_REARM_THRESH) < 0) { ++ if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >= ++ rxq->nb_rx_desc) { ++ __m128i dma_addr0; ++ ++ dma_addr0 = _mm_setzero_si128(); ++ for (i = 0; i < ICE_DESCS_PER_LOOP; i++) { ++ rxep[i].mbuf = &rxq->fake_mbuf; ++ _mm_store_si128((__m128i *)&rxdp[i].read, ++ dma_addr0); ++ } ++ } ++ rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += ++ ICE_RXQ_REARM_THRESH; ++ return; ++ } ++ ++#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC ++ struct rte_mbuf *mb0, *mb1; ++ __m128i dma_addr0, dma_addr1; ++ __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, ++ RTE_PKTMBUF_HEADROOM); ++ /* Initialize the mbufs in vector, process 2 mbufs in one loop */ ++ for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) { ++ __m128i vaddr0, vaddr1; ++ ++ mb0 = rxep[0].mbuf; ++ mb1 = rxep[1].mbuf; ++ ++ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ ++ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != ++ offsetof(struct rte_mbuf, buf_addr) + 8); ++ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); ++ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); ++ ++ /* convert pa to dma_addr hdr/data */ ++ dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); ++ dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); ++ ++ /* add headroom to pa values */ ++ dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room); ++ dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room); ++ ++ /* flush desc with pa dma_addr */ ++ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0); ++ _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1); ++ } ++#else ++#ifdef __AVX512VL__ ++ if (avx512) { ++ struct rte_mbuf *mb0, *mb1, *mb2, *mb3; ++ struct rte_mbuf *mb4, *mb5, *mb6, *mb7; ++ __m512i dma_addr0_3, dma_addr4_7; ++ __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM); ++ /* Initialize the mbufs in vector, process 8 mbufs in one loop */ ++ for (i = 0; i < ICE_RXQ_REARM_THRESH; ++ i += 8, rxep += 8, rxdp += 8) { ++ __m128i vaddr0, vaddr1, vaddr2, vaddr3; ++ __m128i vaddr4, vaddr5, vaddr6, vaddr7; ++ __m256i vaddr0_1, vaddr2_3; ++ __m256i vaddr4_5, vaddr6_7; ++ __m512i vaddr0_3, vaddr4_7; ++ ++ mb0 = rxep[0].mbuf; ++ mb1 = rxep[1].mbuf; ++ mb2 = rxep[2].mbuf; ++ mb3 = rxep[3].mbuf; ++ mb4 = rxep[4].mbuf; ++ mb5 = rxep[5].mbuf; ++ mb6 = rxep[6].mbuf; ++ mb7 = rxep[7].mbuf; ++ ++ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ ++ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != ++ offsetof(struct rte_mbuf, buf_addr) + 8); ++ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); ++ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); ++ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr); ++ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr); ++ vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr); ++ vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr); ++ vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr); ++ vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr); ++ ++ /** ++ * merge 0 & 1, by casting 0 to 256-bit and inserting 1 ++ * into the high lanes. Similarly for 2 & 3, and so on. ++ */ ++ vaddr0_1 = ++ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0), ++ vaddr1, 1); ++ vaddr2_3 = ++ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2), ++ vaddr3, 1); ++ vaddr4_5 = ++ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4), ++ vaddr5, 1); ++ vaddr6_7 = ++ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6), ++ vaddr7, 1); ++ vaddr0_3 = ++ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1), ++ vaddr2_3, 1); ++ vaddr4_7 = ++ _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5), ++ vaddr6_7, 1); ++ ++ /* convert pa to dma_addr hdr/data */ ++ dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3); ++ dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7); ++ ++ /* add headroom to pa values */ ++ dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room); ++ dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room); ++ ++ /* flush desc with pa dma_addr */ ++ _mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3); ++ _mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7); ++ } ++ } else ++#endif /* __AVX512VL__ */ ++ { ++ struct rte_mbuf *mb0, *mb1, *mb2, *mb3; ++ __m256i dma_addr0_1, dma_addr2_3; ++ __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM); ++ /* Initialize the mbufs in vector, process 4 mbufs in one loop */ ++ for (i = 0; i < ICE_RXQ_REARM_THRESH; ++ i += 4, rxep += 4, rxdp += 4) { ++ __m128i vaddr0, vaddr1, vaddr2, vaddr3; ++ __m256i vaddr0_1, vaddr2_3; ++ ++ mb0 = rxep[0].mbuf; ++ mb1 = rxep[1].mbuf; ++ mb2 = rxep[2].mbuf; ++ mb3 = rxep[3].mbuf; ++ ++ /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ ++ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != ++ offsetof(struct rte_mbuf, buf_addr) + 8); ++ vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); ++ vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); ++ vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr); ++ vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr); ++ ++ /** ++ * merge 0 & 1, by casting 0 to 256-bit and inserting 1 ++ * into the high lanes. Similarly for 2 & 3 ++ */ ++ vaddr0_1 = ++ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0), ++ vaddr1, 1); ++ vaddr2_3 = ++ _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2), ++ vaddr3, 1); ++ ++ /* convert pa to dma_addr hdr/data */ ++ dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1); ++ dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3); ++ ++ /* add headroom to pa values */ ++ dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room); ++ dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room); ++ ++ /* flush desc with pa dma_addr */ ++ _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1); ++ _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3); ++ } ++ } ++ ++#endif ++ ++ rxq->rxrearm_start += ICE_RXQ_REARM_THRESH; ++ if (rxq->rxrearm_start >= rxq->nb_rx_desc) ++ rxq->rxrearm_start = 0; ++ ++ rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH; ++ ++ rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? ++ (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); ++ ++ /* Update the tail pointer on the NIC */ ++ ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id); ++} ++#endif /* __AVX2__ */ ++ ++#endif /* _ICE_RXTX_COMMON_AVX_H_ */ +diff --git a/dpdk/drivers/net/ice/ice_rxtx_vec_avx2.c b/dpdk/drivers/net/ice/ice_rxtx_vec_avx2.c +index 7838e17787..581d3d348f 100644 +--- a/dpdk/drivers/net/ice/ice_rxtx_vec_avx2.c ++++ b/dpdk/drivers/net/ice/ice_rxtx_vec_avx2.c +@@ -3,6 +3,7 @@ + */ + + #include "ice_rxtx_vec_common.h" ++#include "ice_rxtx_common_avx.h" + + #include + +@@ -10,126 +11,10 @@ + #pragma GCC diagnostic ignored "-Wcast-qual" + #endif + +-static inline void ++static __rte_always_inline void + ice_rxq_rearm(struct ice_rx_queue *rxq) + { +- int i; +- uint16_t rx_id; +- volatile union ice_rx_flex_desc *rxdp; +- struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; +- +- rxdp = rxq->rx_ring + rxq->rxrearm_start; +- +- /* Pull 'n' more MBUFs into the software ring */ +- if (rte_mempool_get_bulk(rxq->mp, +- (void *)rxep, +- ICE_RXQ_REARM_THRESH) < 0) { +- if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >= +- rxq->nb_rx_desc) { +- __m128i dma_addr0; +- +- dma_addr0 = _mm_setzero_si128(); +- for (i = 0; i < ICE_DESCS_PER_LOOP; i++) { +- rxep[i].mbuf = &rxq->fake_mbuf; +- _mm_store_si128((__m128i *)&rxdp[i].read, +- dma_addr0); +- } +- } +- rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += +- ICE_RXQ_REARM_THRESH; +- return; +- } +- +-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC +- struct rte_mbuf *mb0, *mb1; +- __m128i dma_addr0, dma_addr1; +- __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, +- RTE_PKTMBUF_HEADROOM); +- /* Initialize the mbufs in vector, process 2 mbufs in one loop */ +- for (i = 0; i < ICE_RXQ_REARM_THRESH; i += 2, rxep += 2) { +- __m128i vaddr0, vaddr1; +- +- mb0 = rxep[0].mbuf; +- mb1 = rxep[1].mbuf; +- +- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ +- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != +- offsetof(struct rte_mbuf, buf_addr) + 8); +- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); +- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); +- +- /* convert pa to dma_addr hdr/data */ +- dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); +- dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); +- +- /* add headroom to pa values */ +- dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room); +- dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room); +- +- /* flush desc with pa dma_addr */ +- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0); +- _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1); +- } +-#else +- struct rte_mbuf *mb0, *mb1, *mb2, *mb3; +- __m256i dma_addr0_1, dma_addr2_3; +- __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM); +- /* Initialize the mbufs in vector, process 4 mbufs in one loop */ +- for (i = 0; i < ICE_RXQ_REARM_THRESH; +- i += 4, rxep += 4, rxdp += 4) { +- __m128i vaddr0, vaddr1, vaddr2, vaddr3; +- __m256i vaddr0_1, vaddr2_3; +- +- mb0 = rxep[0].mbuf; +- mb1 = rxep[1].mbuf; +- mb2 = rxep[2].mbuf; +- mb3 = rxep[3].mbuf; +- +- /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ +- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != +- offsetof(struct rte_mbuf, buf_addr) + 8); +- vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); +- vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); +- vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr); +- vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr); +- +- /** +- * merge 0 & 1, by casting 0 to 256-bit and inserting 1 +- * into the high lanes. Similarly for 2 & 3 +- */ +- vaddr0_1 = +- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0), +- vaddr1, 1); +- vaddr2_3 = +- _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2), +- vaddr3, 1); +- +- /* convert pa to dma_addr hdr/data */ +- dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1); +- dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3); +- +- /* add headroom to pa values */ +- dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room); +- dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room); +- +- /* flush desc with pa dma_addr */ +- _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1); +- _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3); +- } +- +-#endif +- +- rxq->rxrearm_start += ICE_RXQ_REARM_THRESH; +- if (rxq->rxrearm_start >= rxq->nb_rx_desc) +- rxq->rxrearm_start = 0; +- +- rxq->rxrearm_nb -= ICE_RXQ_REARM_THRESH; +- +- rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? +- (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); +- +- /* Update the tail pointer on the NIC */ +- ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id); ++ return ice_rxq_rearm_common(rxq, false); + } + + static inline __m256i +@@ -582,7 +467,7 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, + * needs to load 2nd 16B of each desc for RSS hash parsing, + * will cause performance drop to get into this context. + */ +- if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads & ++ if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_RSS_HASH) { + /* load bottom half of every 32B desc */ + const __m128i raw_desc_bh7 = +diff --git a/dpdk/drivers/net/ice/ice_rxtx_vec_avx512.c b/dpdk/drivers/net/ice/ice_rxtx_vec_avx512.c +index fd5d724329..2c0881a01e 100644 +--- a/dpdk/drivers/net/ice/ice_rxtx_vec_avx512.c ++++ b/dpdk/drivers/net/ice/ice_rxtx_vec_avx512.c +@@ -3,6 +3,7 @@ + */ + + #include "ice_rxtx_vec_common.h" ++#include "ice_rxtx_common_avx.h" + + #include + +@@ -12,7 +13,7 @@ + + #define ICE_DESCS_PER_LOOP_AVX 8 + +-static inline void ++static __rte_always_inline void + ice_rxq_rearm(struct ice_rx_queue *rxq) + { + int i; +@@ -24,6 +25,9 @@ ice_rxq_rearm(struct ice_rx_queue *rxq) + + rxdp = rxq->rx_ring + rxq->rxrearm_start; + ++ if (unlikely(!cache)) ++ return ice_rxq_rearm_common(rxq, true); ++ + /* We need to pull 'n' more MBUFs into the software ring */ + if (cache->len < ICE_RXQ_REARM_THRESH) { + uint32_t req = ICE_RXQ_REARM_THRESH + (cache->size - +@@ -564,7 +568,7 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq, + * needs to load 2nd 16B of each desc for RSS hash parsing, + * will cause performance drop to get into this context. + */ +- if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads & ++ if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_RSS_HASH) { + /* load bottom half of every 32B desc */ + const __m128i raw_desc_bh7 = +diff --git a/dpdk/drivers/net/ice/ice_rxtx_vec_common.h b/dpdk/drivers/net/ice/ice_rxtx_vec_common.h +index c09ac7f667..bd2450ad5f 100644 +--- a/dpdk/drivers/net/ice/ice_rxtx_vec_common.h ++++ b/dpdk/drivers/net/ice/ice_rxtx_vec_common.h +@@ -7,6 +7,10 @@ + + #include "ice_rxtx.h" + ++#ifndef __INTEL_COMPILER ++#pragma GCC diagnostic ignored "-Wcast-qual" ++#endif ++ + static inline uint16_t + ice_rx_reassemble_packets(struct ice_rx_queue *rxq, struct rte_mbuf **rx_bufs, + uint16_t nb_bufs, uint8_t *split_flags) +@@ -190,8 +194,8 @@ _ice_tx_queue_release_mbufs_vec(struct ice_tx_queue *txq) + */ + i = txq->tx_next_dd - txq->tx_rs_thresh + 1; + +-#ifdef CC_AVX512_SUPPORT +- struct rte_eth_dev *dev = txq->vsi->adapter->eth_dev; ++#ifdef __AVX512VL__ ++ struct rte_eth_dev *dev = &rte_eth_devices[txq->vsi->adapter->pf.dev_data->port_id]; + + if (dev->tx_pkt_burst == ice_xmit_pkts_vec_avx512) { + struct ice_vec_tx_entry *swr = (void *)txq->sw_ring; +diff --git a/dpdk/drivers/net/ice/ice_rxtx_vec_sse.c b/dpdk/drivers/net/ice/ice_rxtx_vec_sse.c +index 87e0c3db2e..d1cc695c11 100644 +--- a/dpdk/drivers/net/ice/ice_rxtx_vec_sse.c ++++ b/dpdk/drivers/net/ice/ice_rxtx_vec_sse.c +@@ -202,7 +202,7 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4], + __m128i l3_l4_mask = _mm_set_epi32(~0x6, ~0x6, ~0x6, ~0x6); + __m128i l3_l4_flags = _mm_and_si128(flags, l3_l4_mask); + flags = _mm_or_si128(l3_l4_flags, l4_outer_flags); +- /* we need to mask out the reduntant bits introduced by RSS or ++ /* we need to mask out the redundant bits introduced by RSS or + * VLAN fields. + */ + flags = _mm_and_si128(flags, cksum_mask); +@@ -416,7 +416,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, + /* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */ + mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); + /* Read desc statuses backwards to avoid race condition */ +- /* A.1 load 4 pkts desc */ ++ /* A.1 load desc[3] */ + descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); + rte_compiler_barrier(); + +@@ -428,9 +428,9 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, + mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]); + #endif + ++ /* A.1 load desc[2-0] */ + descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); + rte_compiler_barrier(); +- /* B.1 load 2 mbuf point */ + descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); + rte_compiler_barrier(); + descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); +@@ -478,7 +478,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, + * needs to load 2nd 16B of each desc for RSS hash parsing, + * will cause performance drop to get into this context. + */ +- if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads & ++ if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_RSS_HASH) { + /* load bottom half of every 32B desc */ + const __m128i raw_desc_bh3 = +@@ -545,7 +545,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, + /* and with mask to extract bits, flipping 1-0 */ + __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); + /* the staterr values are not in order, as the count +- * count of dd bits doesn't care. However, for end of ++ * of dd bits doesn't care. However, for end of + * packet tracking, we do care, so shuffle. This also + * compresses the 32-bit values to 8-bit + */ +@@ -566,7 +566,7 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, + _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, + pkt_mb0); + ice_rx_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl); +- /* C.4 calc avaialbe number of desc */ ++ /* C.4 calc available number of desc */ + var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); + nb_pkts_recd += var; + if (likely(var != ICE_DESCS_PER_LOOP)) +diff --git a/dpdk/drivers/net/ice/ice_switch_filter.c b/dpdk/drivers/net/ice/ice_switch_filter.c +index 8cba6eb7b1..ea5ced60a7 100644 +--- a/dpdk/drivers/net/ice/ice_switch_filter.c ++++ b/dpdk/drivers/net/ice/ice_switch_filter.c +@@ -142,6 +142,28 @@ static struct ice_flow_parser ice_switch_dist_parser_comms; + static struct ice_flow_parser ice_switch_perm_parser_os; + static struct ice_flow_parser ice_switch_perm_parser_comms; + ++enum ice_sw_fltr_status { ++ ICE_SW_FLTR_ADDED, ++ ICE_SW_FLTR_RMV_FAILED_ON_RIDRECT, ++ ICE_SW_FLTR_ADD_FAILED_ON_RIDRECT, ++}; ++ ++struct ice_switch_filter_conf { ++ enum ice_sw_fltr_status fltr_status; ++ ++ struct ice_rule_query_data sw_query_data; ++ ++ /* ++ * The lookup elements and rule info are saved here when filter creation ++ * succeeds. ++ */ ++ uint16_t vsi_num; ++ uint16_t lkups_num; ++ struct ice_adv_lkup_elem *lkups; ++ struct ice_adv_rule_info rule_info; ++}; ++ ++ + static struct + ice_pattern_match_item ice_switch_pattern_dist_os[] = { + {pattern_ethertype, +@@ -396,7 +418,7 @@ ice_switch_create(struct ice_adapter *ad, + struct ice_pf *pf = &ad->pf; + struct ice_hw *hw = ICE_PF_TO_HW(pf); + struct ice_rule_query_data rule_added = {0}; +- struct ice_rule_query_data *filter_ptr; ++ struct ice_switch_filter_conf *filter_conf_ptr; + struct ice_adv_lkup_elem *list = + ((struct sw_meta *)meta)->list; + uint16_t lkups_cnt = +@@ -416,28 +438,48 @@ ice_switch_create(struct ice_adapter *ad, + "lookup list should not be NULL"); + goto error; + } ++ ++ if (ice_dcf_adminq_need_retry(ad)) { ++ rte_flow_error_set(error, EAGAIN, ++ RTE_FLOW_ERROR_TYPE_ITEM, NULL, ++ "DCF is not on"); ++ goto error; ++ } ++ + ret = ice_add_adv_rule(hw, list, lkups_cnt, rule_info, &rule_added); + if (!ret) { +- filter_ptr = rte_zmalloc("ice_switch_filter", +- sizeof(struct ice_rule_query_data), 0); +- if (!filter_ptr) { ++ filter_conf_ptr = rte_zmalloc("ice_switch_filter", ++ sizeof(struct ice_switch_filter_conf), 0); ++ if (!filter_conf_ptr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "No memory for ice_switch_filter"); + goto error; + } +- flow->rule = filter_ptr; +- rte_memcpy(filter_ptr, +- &rule_added, +- sizeof(struct ice_rule_query_data)); ++ ++ filter_conf_ptr->sw_query_data = rule_added; ++ ++ filter_conf_ptr->vsi_num = ++ ice_get_hw_vsi_num(hw, rule_info->sw_act.vsi_handle); ++ filter_conf_ptr->lkups = list; ++ filter_conf_ptr->lkups_num = lkups_cnt; ++ filter_conf_ptr->rule_info = *rule_info; ++ ++ filter_conf_ptr->fltr_status = ICE_SW_FLTR_ADDED; ++ ++ flow->rule = filter_conf_ptr; + } else { +- rte_flow_error_set(error, EINVAL, ++ if (ice_dcf_adminq_need_retry(ad)) ++ ret = -EAGAIN; ++ else ++ ret = -EINVAL; ++ ++ rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "switch filter create flow fail"); + goto error; + } + +- rte_free(list); + rte_free(meta); + return 0; + +@@ -448,6 +490,18 @@ ice_switch_create(struct ice_adapter *ad, + return -rte_errno; + } + ++static inline void ++ice_switch_filter_rule_free(struct rte_flow *flow) ++{ ++ struct ice_switch_filter_conf *filter_conf_ptr = ++ (struct ice_switch_filter_conf *)flow->rule; ++ ++ if (filter_conf_ptr) ++ rte_free(filter_conf_ptr->lkups); ++ ++ rte_free(filter_conf_ptr); ++} ++ + static int + ice_switch_destroy(struct ice_adapter *ad, + struct rte_flow *flow, +@@ -455,37 +509,47 @@ ice_switch_destroy(struct ice_adapter *ad, + { + struct ice_hw *hw = &ad->hw; + int ret; +- struct ice_rule_query_data *filter_ptr; ++ struct ice_switch_filter_conf *filter_conf_ptr; + +- filter_ptr = (struct ice_rule_query_data *) ++ filter_conf_ptr = (struct ice_switch_filter_conf *) + flow->rule; + +- if (!filter_ptr) { ++ if (!filter_conf_ptr || ++ filter_conf_ptr->fltr_status == ICE_SW_FLTR_ADD_FAILED_ON_RIDRECT) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "no such flow" + " create by switch filter"); ++ ++ ice_switch_filter_rule_free(flow); ++ + return -rte_errno; + } + +- ret = ice_rem_adv_rule_by_id(hw, filter_ptr); ++ if (ice_dcf_adminq_need_retry(ad)) { ++ rte_flow_error_set(error, EAGAIN, ++ RTE_FLOW_ERROR_TYPE_ITEM, NULL, ++ "DCF is not on"); ++ return -rte_errno; ++ } ++ ++ ret = ice_rem_adv_rule_by_id(hw, &filter_conf_ptr->sw_query_data); + if (ret) { +- rte_flow_error_set(error, EINVAL, ++ if (ice_dcf_adminq_need_retry(ad)) ++ ret = -EAGAIN; ++ else ++ ret = -EINVAL; ++ ++ rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "fail to destroy switch filter rule"); + return -rte_errno; + } + +- rte_free(filter_ptr); ++ ice_switch_filter_rule_free(flow); + return ret; + } + +-static void +-ice_switch_filter_rule_free(struct rte_flow *flow) +-{ +- rte_free(flow->rule); +-} +- + static uint64_t + ice_switch_inset_get(const struct rte_flow_item pattern[], + struct rte_flow_error *error, +@@ -1489,7 +1553,7 @@ ice_switch_parse_action(struct ice_pf *pf, + struct ice_adv_rule_info *rule_info) + { + struct ice_vsi *vsi = pf->main_vsi; +- struct rte_eth_dev *dev = pf->adapter->eth_dev; ++ struct rte_eth_dev_data *dev_data = pf->adapter->pf.dev_data; + const struct rte_flow_action_queue *act_q; + const struct rte_flow_action_rss *act_qgrop; + uint16_t base_queue, i; +@@ -1520,7 +1584,7 @@ ice_switch_parse_action(struct ice_pf *pf, + goto error; + if ((act_qgrop->queue[0] + + act_qgrop->queue_num) > +- dev->data->nb_rx_queues) ++ dev_data->nb_rx_queues) + goto error1; + for (i = 0; i < act_qgrop->queue_num - 1; i++) + if (act_qgrop->queue[i + 1] != +@@ -1531,7 +1595,7 @@ ice_switch_parse_action(struct ice_pf *pf, + break; + case RTE_FLOW_ACTION_TYPE_QUEUE: + act_q = action->conf; +- if (act_q->index >= dev->data->nb_rx_queues) ++ if (act_q->index >= dev_data->nb_rx_queues) + goto error; + rule_info->sw_act.fltr_act = + ICE_FWD_TO_Q; +@@ -1775,8 +1839,12 @@ ice_switch_redirect(struct ice_adapter *ad, + struct rte_flow *flow, + struct ice_flow_redirect *rd) + { +- struct ice_rule_query_data *rdata = flow->rule; ++ struct ice_rule_query_data *rdata; ++ struct ice_switch_filter_conf *filter_conf_ptr = ++ (struct ice_switch_filter_conf *)flow->rule; ++ struct ice_rule_query_data added_rdata = { 0 }; + struct ice_adv_fltr_mgmt_list_entry *list_itr; ++ struct ice_adv_lkup_elem *lkups_ref = NULL; + struct ice_adv_lkup_elem *lkups_dp = NULL; + struct LIST_HEAD_TYPE *list_head; + struct ice_adv_rule_info rinfo; +@@ -1785,6 +1853,8 @@ ice_switch_redirect(struct ice_adapter *ad, + uint16_t lkups_cnt; + int ret; + ++ rdata = &filter_conf_ptr->sw_query_data; ++ + if (rdata->vsi_handle != rd->vsi_handle) + return 0; + +@@ -1795,59 +1865,117 @@ ice_switch_redirect(struct ice_adapter *ad, + if (rd->type != ICE_FLOW_REDIRECT_VSI) + return -ENOTSUP; + +- list_head = &sw->recp_list[rdata->rid].filt_rules; +- LIST_FOR_EACH_ENTRY(list_itr, list_head, ice_adv_fltr_mgmt_list_entry, +- list_entry) { +- rinfo = list_itr->rule_info; +- if ((rinfo.fltr_rule_id == rdata->rule_id && +- rinfo.sw_act.fltr_act == ICE_FWD_TO_VSI && +- rinfo.sw_act.vsi_handle == rd->vsi_handle) || +- (rinfo.fltr_rule_id == rdata->rule_id && +- rinfo.sw_act.fltr_act == ICE_FWD_TO_VSI_LIST)){ +- lkups_cnt = list_itr->lkups_cnt; +- lkups_dp = (struct ice_adv_lkup_elem *) +- ice_memdup(hw, list_itr->lkups, +- sizeof(*list_itr->lkups) * +- lkups_cnt, ICE_NONDMA_TO_NONDMA); +- +- if (!lkups_dp) { +- PMD_DRV_LOG(ERR, "Failed to allocate memory."); +- return -EINVAL; +- } ++ switch (filter_conf_ptr->fltr_status) { ++ case ICE_SW_FLTR_ADDED: ++ list_head = &sw->recp_list[rdata->rid].filt_rules; ++ LIST_FOR_EACH_ENTRY(list_itr, list_head, ++ ice_adv_fltr_mgmt_list_entry, ++ list_entry) { ++ rinfo = list_itr->rule_info; ++ if ((rinfo.fltr_rule_id == rdata->rule_id && ++ rinfo.sw_act.fltr_act == ICE_FWD_TO_VSI && ++ rinfo.sw_act.vsi_handle == rd->vsi_handle) || ++ (rinfo.fltr_rule_id == rdata->rule_id && ++ rinfo.sw_act.fltr_act == ICE_FWD_TO_VSI_LIST)){ ++ lkups_cnt = list_itr->lkups_cnt; ++ ++ lkups_dp = (struct ice_adv_lkup_elem *) ++ ice_memdup(hw, list_itr->lkups, ++ sizeof(*list_itr->lkups) * ++ lkups_cnt, ++ ICE_NONDMA_TO_NONDMA); ++ if (!lkups_dp) { ++ PMD_DRV_LOG(ERR, ++ "Failed to allocate memory."); ++ return -EINVAL; ++ } ++ lkups_ref = lkups_dp; + +- if (rinfo.sw_act.fltr_act == ICE_FWD_TO_VSI_LIST) { +- rinfo.sw_act.vsi_handle = rd->vsi_handle; +- rinfo.sw_act.fltr_act = ICE_FWD_TO_VSI; ++ if (rinfo.sw_act.fltr_act == ++ ICE_FWD_TO_VSI_LIST) { ++ rinfo.sw_act.vsi_handle = ++ rd->vsi_handle; ++ rinfo.sw_act.fltr_act = ICE_FWD_TO_VSI; ++ } ++ break; + } +- break; + } +- } + +- if (!lkups_dp) ++ if (!lkups_ref) ++ return -EINVAL; ++ ++ goto rmv_rule; ++ case ICE_SW_FLTR_RMV_FAILED_ON_RIDRECT: ++ /* Recover VSI context */ ++ hw->vsi_ctx[rd->vsi_handle]->vsi_num = filter_conf_ptr->vsi_num; ++ rinfo = filter_conf_ptr->rule_info; ++ lkups_cnt = filter_conf_ptr->lkups_num; ++ lkups_ref = filter_conf_ptr->lkups; ++ ++ if (rinfo.sw_act.fltr_act == ICE_FWD_TO_VSI_LIST) { ++ rinfo.sw_act.vsi_handle = rd->vsi_handle; ++ rinfo.sw_act.fltr_act = ICE_FWD_TO_VSI; ++ } ++ ++ goto rmv_rule; ++ case ICE_SW_FLTR_ADD_FAILED_ON_RIDRECT: ++ rinfo = filter_conf_ptr->rule_info; ++ lkups_cnt = filter_conf_ptr->lkups_num; ++ lkups_ref = filter_conf_ptr->lkups; ++ ++ goto add_rule; ++ default: + return -EINVAL; ++ } ++ ++rmv_rule: ++ if (ice_dcf_adminq_need_retry(ad)) { ++ PMD_DRV_LOG(WARNING, "DCF is not on"); ++ ret = -EAGAIN; ++ goto out; ++ } + + /* Remove the old rule */ +- ret = ice_rem_adv_rule(hw, list_itr->lkups, +- lkups_cnt, &rinfo); ++ ret = ice_rem_adv_rule(hw, lkups_ref, lkups_cnt, &rinfo); + if (ret) { + PMD_DRV_LOG(ERR, "Failed to delete the old rule %d", + rdata->rule_id); ++ filter_conf_ptr->fltr_status = ++ ICE_SW_FLTR_RMV_FAILED_ON_RIDRECT; + ret = -EINVAL; + goto out; + } + ++add_rule: ++ if (ice_dcf_adminq_need_retry(ad)) { ++ PMD_DRV_LOG(WARNING, "DCF is not on"); ++ ret = -EAGAIN; ++ goto out; ++ } ++ + /* Update VSI context */ + hw->vsi_ctx[rd->vsi_handle]->vsi_num = rd->new_vsi_num; + + /* Replay the rule */ +- ret = ice_add_adv_rule(hw, lkups_dp, lkups_cnt, +- &rinfo, rdata); ++ ret = ice_add_adv_rule(hw, lkups_ref, lkups_cnt, ++ &rinfo, &added_rdata); + if (ret) { + PMD_DRV_LOG(ERR, "Failed to replay the rule"); ++ filter_conf_ptr->fltr_status = ++ ICE_SW_FLTR_ADD_FAILED_ON_RIDRECT; + ret = -EINVAL; ++ } else { ++ filter_conf_ptr->sw_query_data = added_rdata; ++ /* Save VSI number for failure recover */ ++ filter_conf_ptr->vsi_num = rd->new_vsi_num; ++ filter_conf_ptr->fltr_status = ICE_SW_FLTR_ADDED; + } + + out: ++ if (ret == -EINVAL) ++ if (ice_dcf_adminq_need_retry(ad)) ++ ret = -EAGAIN; ++ + ice_free(hw, lkups_dp); + return ret; + } +diff --git a/dpdk/drivers/net/ice/meson.build b/dpdk/drivers/net/ice/meson.build +index 7b291269dc..4638011cbc 100644 +--- a/dpdk/drivers/net/ice/meson.build ++++ b/dpdk/drivers/net/ice/meson.build +@@ -24,8 +24,10 @@ if arch_subdir == 'x86' + # a. we have AVX supported in minimum instruction set baseline + # b. it's not minimum instruction set, but supported by compiler + if cc.get_define('__AVX2__', args: machine_args) != '' ++ cflags += ['-DCC_AVX2_SUPPORT'] + sources += files('ice_rxtx_vec_avx2.c') + elif cc.has_argument('-mavx2') ++ cflags += ['-DCC_AVX2_SUPPORT'] + ice_avx2_lib = static_library('ice_avx2_lib', + 'ice_rxtx_vec_avx2.c', + dependencies: [static_rte_ethdev, +diff --git a/dpdk/drivers/net/igc/igc_ethdev.c b/dpdk/drivers/net/igc/igc_ethdev.c +index 802212fc57..706a847a93 100644 +--- a/dpdk/drivers/net/igc/igc_ethdev.c ++++ b/dpdk/drivers/net/igc/igc_ethdev.c +@@ -341,6 +341,9 @@ eth_igc_configure(struct rte_eth_dev *dev) + + PMD_INIT_FUNC_TRACE(); + ++ if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) ++ dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; ++ + ret = igc_check_mq_mode(dev); + if (ret != 0) + return ret; +@@ -986,15 +989,20 @@ eth_igc_start(struct rte_eth_dev *dev) + hw->mac.autoneg = 1; + } else { + int num_speeds = 0; +- bool autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0; + +- /* Reset */ ++ if (*speeds & ETH_LINK_SPEED_FIXED) { ++ PMD_DRV_LOG(ERR, ++ "Force speed mode currently not supported"); ++ igc_dev_clear_queues(dev); ++ return -EINVAL; ++ } ++ + hw->phy.autoneg_advertised = 0; ++ hw->mac.autoneg = 1; + + if (*speeds & ~(ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | + ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M | +- ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G | +- ETH_LINK_SPEED_FIXED)) { ++ ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G)) { + num_speeds = -1; + goto error_invalid_config; + } +@@ -1022,19 +1030,8 @@ eth_igc_start(struct rte_eth_dev *dev) + hw->phy.autoneg_advertised |= ADVERTISE_2500_FULL; + num_speeds++; + } +- if (num_speeds == 0 || (!autoneg && num_speeds > 1)) ++ if (num_speeds == 0) + goto error_invalid_config; +- +- /* Set/reset the mac.autoneg based on the link speed, +- * fixed or not +- */ +- if (!autoneg) { +- hw->mac.autoneg = 0; +- hw->mac.forced_speed_duplex = +- hw->phy.autoneg_advertised; +- } else { +- hw->mac.autoneg = 1; +- } + } + + igc_setup_link(hw); +@@ -1240,8 +1237,15 @@ eth_igc_dev_init(struct rte_eth_dev *dev) + * has already done this work. Only check we don't need a different + * RX function. + */ +- if (rte_eal_process_type() != RTE_PROC_PRIMARY) ++ if (rte_eal_process_type() != RTE_PROC_PRIMARY) { ++ dev->rx_pkt_burst = igc_recv_pkts; ++ if (dev->data->scattered_rx) ++ dev->rx_pkt_burst = igc_recv_scattered_pkts; ++ ++ dev->tx_pkt_burst = igc_xmit_pkts; ++ dev->tx_pkt_prepare = eth_igc_prep_pkts; + return 0; ++ } + + rte_eth_copy_pci_info(dev, pci_dev); + dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; +@@ -1473,9 +1477,11 @@ eth_igc_fw_version_get(struct rte_eth_dev *dev, char *fw_version, + fw.eep_build); + } + } ++ if (ret < 0) ++ return -EINVAL; + + ret += 1; /* add the size of '\0' */ +- if (fw_size < (u32)ret) ++ if (fw_size < (size_t)ret) + return ret; + else + return 0; +@@ -1590,12 +1596,14 @@ eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) + return -EINVAL; + + /* +- * refuse mtu that requires the support of scattered packets when +- * this feature has not been enabled before. ++ * If device is started, refuse mtu that requires the support of ++ * scattered packets when this feature has not been enabled before. + */ +- if (!dev->data->scattered_rx && +- frame_size > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM) ++ if (dev->data->dev_started && !dev->data->scattered_rx && ++ frame_size > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM) { ++ PMD_INIT_LOG(ERR, "Stop port first."); + return -EINVAL; ++ } + + rctl = IGC_READ_REG(hw, IGC_RCTL); + +@@ -1899,8 +1907,7 @@ eth_igc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats) + + /* Rx Errors */ + rte_stats->imissed = stats->mpc; +- rte_stats->ierrors = stats->crcerrs + +- stats->rlec + stats->ruc + stats->roc + ++ rte_stats->ierrors = stats->crcerrs + stats->rlec + + stats->rxerrc + stats->algnerrc; + + /* Tx Errors */ +diff --git a/dpdk/drivers/net/igc/igc_ethdev.h b/dpdk/drivers/net/igc/igc_ethdev.h +index a09debfb40..6f658a0e83 100644 +--- a/dpdk/drivers/net/igc/igc_ethdev.h ++++ b/dpdk/drivers/net/igc/igc_ethdev.h +@@ -67,7 +67,8 @@ extern "C" { + DEV_RX_OFFLOAD_SCTP_CKSUM | \ + DEV_RX_OFFLOAD_JUMBO_FRAME | \ + DEV_RX_OFFLOAD_KEEP_CRC | \ +- DEV_RX_OFFLOAD_SCATTER) ++ DEV_RX_OFFLOAD_SCATTER | \ ++ DEV_RX_OFFLOAD_RSS_HASH) + + #define IGC_TX_OFFLOAD_ALL ( \ + DEV_TX_OFFLOAD_VLAN_INSERT | \ +diff --git a/dpdk/drivers/net/igc/igc_filter.c b/dpdk/drivers/net/igc/igc_filter.c +index 836621d4c1..7b086e7e01 100644 +--- a/dpdk/drivers/net/igc/igc_filter.c ++++ b/dpdk/drivers/net/igc/igc_filter.c +@@ -167,7 +167,7 @@ igc_tuple_filter_lookup(const struct igc_adapter *igc, + /* search the filter array */ + for (; i < IGC_MAX_NTUPLE_FILTERS; i++) { + if (igc->ntuple_filters[i].hash_val) { +- /* compare the hase value */ ++ /* compare the hash value */ + if (ntuple->hash_val == + igc->ntuple_filters[i].hash_val) + /* filter be found, return index */ +diff --git a/dpdk/drivers/net/igc/igc_flow.c b/dpdk/drivers/net/igc/igc_flow.c +index 1bb64d323c..66053060af 100644 +--- a/dpdk/drivers/net/igc/igc_flow.c ++++ b/dpdk/drivers/net/igc/igc_flow.c +@@ -656,7 +656,7 @@ igc_parse_action_rss(struct rte_eth_dev *dev, + * Return the pointer of the flow, or NULL for failed + **/ + static inline struct rte_flow * +-igc_alloc_flow(const void *filter, enum igc_filter_type type, uint inbytes) ++igc_alloc_flow(const void *filter, enum igc_filter_type type, size_t inbytes) + { + /* allocate memory, 8 bytes boundary aligned */ + struct rte_flow *flow = rte_malloc("igc flow filter", +diff --git a/dpdk/drivers/net/igc/igc_txrx.c b/dpdk/drivers/net/igc/igc_txrx.c +index 4654ec41f0..6c3d207a69 100644 +--- a/dpdk/drivers/net/igc/igc_txrx.c ++++ b/dpdk/drivers/net/igc/igc_txrx.c +@@ -343,7 +343,7 @@ rx_desc_get_pkt_info(struct igc_rx_queue *rxq, struct rte_mbuf *rxm, + rxm->packet_type = rx_desc_pkt_info_to_pkt_type(pkt_info); + } + +-static uint16_t ++uint16_t + igc_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + { + struct igc_rx_queue * const rxq = rx_queue; +@@ -486,7 +486,7 @@ igc_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + return nb_rx; + } + +-static uint16_t ++uint16_t + igc_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) + { +@@ -1290,20 +1290,24 @@ igc_rx_init(struct rte_eth_dev *dev) + * This needs to be done after enable. + */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { ++ uint32_t dvmolr; ++ + rxq = dev->data->rx_queues[i]; + IGC_WRITE_REG(hw, IGC_RDH(rxq->reg_idx), 0); +- IGC_WRITE_REG(hw, IGC_RDT(rxq->reg_idx), +- rxq->nb_rx_desc - 1); ++ IGC_WRITE_REG(hw, IGC_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1); + +- /* strip queue vlan offload */ +- if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP) { +- uint32_t dvmolr; +- dvmolr = IGC_READ_REG(hw, IGC_DVMOLR(rxq->queue_id)); ++ dvmolr = IGC_READ_REG(hw, IGC_DVMOLR(rxq->reg_idx)); ++ if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP) ++ dvmolr |= IGC_DVMOLR_STRVLAN; ++ else ++ dvmolr &= ~IGC_DVMOLR_STRVLAN; + +- /* If vlan been stripped off, the CRC is meaningless. */ +- dvmolr |= IGC_DVMOLR_STRVLAN | IGC_DVMOLR_STRCRC; +- IGC_WRITE_REG(hw, IGC_DVMOLR(rxq->reg_idx), dvmolr); +- } ++ if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) ++ dvmolr &= ~IGC_DVMOLR_STRCRC; ++ else ++ dvmolr |= IGC_DVMOLR_STRCRC; ++ ++ IGC_WRITE_REG(hw, IGC_DVMOLR(rxq->reg_idx), dvmolr); + } + + return 0; +@@ -1410,7 +1414,7 @@ eth_igc_rx_queue_setup(struct rte_eth_dev *dev, + } + + /* prepare packets for transmit */ +-static uint16_t ++uint16_t + eth_igc_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) + { +@@ -1617,7 +1621,7 @@ tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags) + return tmp; + } + +-static uint16_t ++uint16_t + igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + { + struct igc_tx_queue * const txq = tx_queue; +@@ -2112,7 +2116,7 @@ eth_igc_tx_done_cleanup(void *txqueue, uint32_t free_cnt) + sw_ring[tx_id].mbuf = NULL; + sw_ring[tx_id].last_id = tx_id; + +- /* Move to next segemnt. */ ++ /* Move to next segment. */ + tx_id = sw_ring[tx_id].next_id; + } while (tx_id != tx_next); + +@@ -2146,7 +2150,7 @@ eth_igc_tx_done_cleanup(void *txqueue, uint32_t free_cnt) + * Walk the list and find the next mbuf, if any. + */ + do { +- /* Move to next segemnt. */ ++ /* Move to next segment. */ + tx_id = sw_ring[tx_id].next_id; + + if (sw_ring[tx_id].mbuf) +@@ -2266,12 +2270,10 @@ eth_igc_vlan_strip_queue_set(struct rte_eth_dev *dev, + + reg_val = IGC_READ_REG(hw, IGC_DVMOLR(rx_queue_id)); + if (on) { +- /* If vlan been stripped off, the CRC is meaningless. */ +- reg_val |= IGC_DVMOLR_STRVLAN | IGC_DVMOLR_STRCRC; ++ reg_val |= IGC_DVMOLR_STRVLAN; + rxq->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; + } else { +- reg_val &= ~(IGC_DVMOLR_STRVLAN | IGC_DVMOLR_HIDVLAN | +- IGC_DVMOLR_STRCRC); ++ reg_val &= ~(IGC_DVMOLR_STRVLAN | IGC_DVMOLR_HIDVLAN); + rxq->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP; + } + +diff --git a/dpdk/drivers/net/igc/igc_txrx.h b/dpdk/drivers/net/igc/igc_txrx.h +index f2b2d75bbc..82d67e0b9d 100644 +--- a/dpdk/drivers/net/igc/igc_txrx.h ++++ b/dpdk/drivers/net/igc/igc_txrx.h +@@ -52,6 +52,12 @@ void eth_igc_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, + struct rte_eth_txq_info *qinfo); + void eth_igc_vlan_strip_queue_set(struct rte_eth_dev *dev, + uint16_t rx_queue_id, int on); ++uint16_t igc_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); ++uint16_t igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); ++uint16_t eth_igc_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, ++ uint16_t nb_pkts); ++uint16_t igc_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, ++ uint16_t nb_pkts); + #ifdef __cplusplus + } + #endif +diff --git a/dpdk/drivers/net/ionic/ionic_ethdev.c b/dpdk/drivers/net/ionic/ionic_ethdev.c +index fe778043eb..8cf39f4742 100644 +--- a/dpdk/drivers/net/ionic/ionic_ethdev.c ++++ b/dpdk/drivers/net/ionic/ionic_ethdev.c +@@ -218,15 +218,18 @@ ionic_dev_fw_version_get(struct rte_eth_dev *eth_dev, + { + struct ionic_lif *lif = IONIC_ETH_DEV_TO_LIF(eth_dev); + struct ionic_adapter *adapter = lif->adapter; ++ int ret; + +- if (fw_version == NULL || fw_size <= 0) +- return -EINVAL; +- +- snprintf(fw_version, fw_size, "%s", ++ ret = snprintf(fw_version, fw_size, "%s", + adapter->fw_version); +- fw_version[fw_size - 1] = '\0'; ++ if (ret < 0) ++ return -EINVAL; + +- return 0; ++ ret += 1; /* add the size of '\0' */ ++ if (fw_size < (size_t)ret) ++ return ret; ++ else ++ return 0; + } + + /* +diff --git a/dpdk/drivers/net/ionic/ionic_if.h b/dpdk/drivers/net/ionic/ionic_if.h +index f83c8711b1..c94dea0c88 100644 +--- a/dpdk/drivers/net/ionic/ionic_if.h ++++ b/dpdk/drivers/net/ionic/ionic_if.h +@@ -604,7 +604,7 @@ enum ionic_txq_desc_opcode { + * + * Offload 16-bit checksum computation to hardware. + * If @csum_l3 is set then the packet's L3 checksum is +- * updated. Similarly, if @csum_l4 is set the the L4 ++ * updated. Similarly, if @csum_l4 is set the L4 + * checksum is updated. If @encap is set then encap header + * checksums are also updated. + * +@@ -1788,7 +1788,7 @@ struct ionic_fw_control_cmd { + }; + + /** +- * struct ionic_fw_control_comp - Firmware control copletion ++ * struct ionic_fw_control_comp - Firmware control completion + * @opcode: opcode + * @slot: slot where the firmware was installed + */ +diff --git a/dpdk/drivers/net/ionic/ionic_lif.c b/dpdk/drivers/net/ionic/ionic_lif.c +index 5894f3505a..33a5977ced 100644 +--- a/dpdk/drivers/net/ionic/ionic_lif.c ++++ b/dpdk/drivers/net/ionic/ionic_lif.c +@@ -1470,17 +1470,18 @@ int + ionic_lif_init(struct ionic_lif *lif) + { + struct ionic_dev *idev = &lif->adapter->idev; +- struct ionic_q_init_comp comp; ++ struct ionic_lif_init_comp comp; + int err; + + memset(&lif->stats_base, 0, sizeof(lif->stats_base)); + + ionic_dev_cmd_lif_init(idev, lif->index, lif->info_pa); + err = ionic_dev_cmd_wait_check(idev, IONIC_DEVCMD_TIMEOUT); +- ionic_dev_cmd_comp(idev, &comp); + if (err) + return err; + ++ ionic_dev_cmd_comp(idev, &comp); ++ + lif->hw_index = comp.hw_index; + + err = ionic_lif_adminq_init(lif); +diff --git a/dpdk/drivers/net/ipn3ke/ipn3ke_ethdev.c b/dpdk/drivers/net/ipn3ke/ipn3ke_ethdev.c +index 4446d2af9e..1ab05f4fbd 100644 +--- a/dpdk/drivers/net/ipn3ke/ipn3ke_ethdev.c ++++ b/dpdk/drivers/net/ipn3ke/ipn3ke_ethdev.c +@@ -483,7 +483,7 @@ static int ipn3ke_vswitch_probe(struct rte_afu_device *afu_dev) + RTE_CACHE_LINE_SIZE, + afu_dev->device.numa_node); + if (!hw) { +- IPN3KE_AFU_PMD_ERR("failed to allocate hardwart data"); ++ IPN3KE_AFU_PMD_ERR("failed to allocate hardware data"); + retval = -ENOMEM; + return -ENOMEM; + } +diff --git a/dpdk/drivers/net/ipn3ke/ipn3ke_ethdev.h b/dpdk/drivers/net/ipn3ke/ipn3ke_ethdev.h +index a6815a9cca..06ca25a668 100644 +--- a/dpdk/drivers/net/ipn3ke/ipn3ke_ethdev.h ++++ b/dpdk/drivers/net/ipn3ke/ipn3ke_ethdev.h +@@ -223,7 +223,7 @@ struct ipn3ke_hw_cap { + }; + + /** +- * Strucute to store private data for each representor instance ++ * Structure to store private data for each representor instance + */ + struct ipn3ke_rpst { + TAILQ_ENTRY(ipn3ke_rpst) next; /**< Next in device list. */ +@@ -237,7 +237,7 @@ struct ipn3ke_rpst { + uint16_t i40e_pf_eth_port_id; + struct rte_eth_link ori_linfo; + struct ipn3ke_tm_internals tm; +- /**< Private data store of assocaiated physical function */ ++ /**< Private data store of associated physical function */ + struct rte_ether_addr mac_addr; + }; + +diff --git a/dpdk/drivers/net/ipn3ke/ipn3ke_flow.c b/dpdk/drivers/net/ipn3ke/ipn3ke_flow.c +index f857e64afb..592dcbb23a 100644 +--- a/dpdk/drivers/net/ipn3ke/ipn3ke_flow.c ++++ b/dpdk/drivers/net/ipn3ke/ipn3ke_flow.c +@@ -1299,7 +1299,7 @@ int ipn3ke_flow_init(void *dev) + IPN3KE_AFU_PMD_DEBUG("IPN3KE_CLF_LKUP_ENABLE: %x\n", data); + + +- /* configure rx parse config, settings associatied with VxLAN */ ++ /* configure rx parse config, settings associated with VxLAN */ + IPN3KE_MASK_WRITE_REG(hw, + IPN3KE_CLF_RX_PARSE_CFG, + 0, +diff --git a/dpdk/drivers/net/ipn3ke/ipn3ke_representor.c b/dpdk/drivers/net/ipn3ke/ipn3ke_representor.c +index 9e15cce34f..603017f896 100644 +--- a/dpdk/drivers/net/ipn3ke/ipn3ke_representor.c ++++ b/dpdk/drivers/net/ipn3ke/ipn3ke_representor.c +@@ -2228,9 +2228,6 @@ ipn3ke_rpst_xstats_get + struct ipn3ke_rpst_hw_port_stats hw_stats; + struct rte_eth_stats stats; + +- if (!xstats) +- return 0; +- + if (!ethdev) { + IPN3KE_AFU_PMD_ERR("ethernet device to get statistics is NULL"); + return -EINVAL; +@@ -2292,7 +2289,7 @@ ipn3ke_rpst_xstats_get + count++; + } + +- /* Get individiual stats from ipn3ke_rpst_hw_port */ ++ /* Get individual stats from ipn3ke_rpst_hw_port */ + for (i = 0; i < IPN3KE_RPST_HW_PORT_XSTATS_CNT; i++) { + xstats[count].value = *(uint64_t *)(((char *)(&hw_stats)) + + ipn3ke_rpst_hw_port_strings[i].offset); +@@ -2300,7 +2297,7 @@ ipn3ke_rpst_xstats_get + count++; + } + +- /* Get individiual stats from ipn3ke_rpst_rxq_pri */ ++ /* Get individual stats from ipn3ke_rpst_rxq_pri */ + for (i = 0; i < IPN3KE_RPST_RXQ_PRIO_XSTATS_CNT; i++) { + for (prio = 0; prio < IPN3KE_RPST_PRIO_XSTATS_CNT; prio++) { + xstats[count].value = +@@ -2312,7 +2309,7 @@ ipn3ke_rpst_xstats_get + } + } + +- /* Get individiual stats from ipn3ke_rpst_txq_prio */ ++ /* Get individual stats from ipn3ke_rpst_txq_prio */ + for (i = 0; i < IPN3KE_RPST_TXQ_PRIO_XSTATS_CNT; i++) { + for (prio = 0; prio < IPN3KE_RPST_PRIO_XSTATS_CNT; prio++) { + xstats[count].value = +@@ -2350,7 +2347,7 @@ __rte_unused unsigned int limit) + count++; + } + +- /* Get individiual stats from ipn3ke_rpst_hw_port */ ++ /* Get individual stats from ipn3ke_rpst_hw_port */ + for (i = 0; i < IPN3KE_RPST_HW_PORT_XSTATS_CNT; i++) { + snprintf(xstats_names[count].name, + sizeof(xstats_names[count].name), +@@ -2359,7 +2356,7 @@ __rte_unused unsigned int limit) + count++; + } + +- /* Get individiual stats from ipn3ke_rpst_rxq_pri */ ++ /* Get individual stats from ipn3ke_rpst_rxq_pri */ + for (i = 0; i < IPN3KE_RPST_RXQ_PRIO_XSTATS_CNT; i++) { + for (prio = 0; prio < 8; prio++) { + snprintf(xstats_names[count].name, +@@ -2371,7 +2368,7 @@ __rte_unused unsigned int limit) + } + } + +- /* Get individiual stats from ipn3ke_rpst_txq_prio */ ++ /* Get individual stats from ipn3ke_rpst_txq_prio */ + for (i = 0; i < IPN3KE_RPST_TXQ_PRIO_XSTATS_CNT; i++) { + for (prio = 0; prio < 8; prio++) { + snprintf(xstats_names[count].name, +diff --git a/dpdk/drivers/net/ipn3ke/meson.build b/dpdk/drivers/net/ipn3ke/meson.build +index d5000d807c..3f77e34d5c 100644 +--- a/dpdk/drivers/net/ipn3ke/meson.build ++++ b/dpdk/drivers/net/ipn3ke/meson.build +@@ -2,7 +2,7 @@ + # Copyright(c) 2019 Intel Corporation + + # +-# Add the experimenatal APIs called from this PMD ++# Add the experimental APIs called from this PMD + # rte_eth_switch_domain_alloc() + # rte_eth_dev_create() + # rte_eth_dev_destroy() +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_bypass.c b/dpdk/drivers/net/ixgbe/ixgbe_bypass.c +index ae38ce3559..98785b1201 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_bypass.c ++++ b/dpdk/drivers/net/ixgbe/ixgbe_bypass.c +@@ -11,7 +11,7 @@ + + #define BYPASS_STATUS_OFF_MASK 3 + +-/* Macros to check for invlaid function pointers. */ ++/* Macros to check for invalid function pointers. */ + #define FUNC_PTR_OR_ERR_RET(func, retval) do { \ + if ((func) == NULL) { \ + PMD_DRV_LOG(ERR, "%s:%d function not supported", \ +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_bypass_api.h b/dpdk/drivers/net/ixgbe/ixgbe_bypass_api.h +index 8eb773391b..6ef965dbb6 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_bypass_api.h ++++ b/dpdk/drivers/net/ixgbe/ixgbe_bypass_api.h +@@ -135,7 +135,7 @@ static s32 ixgbe_bypass_rw_generic(struct ixgbe_hw *hw, u32 cmd, u32 *status) + * ixgbe_bypass_valid_rd_generic - Verify valid return from bit-bang. + * + * If we send a write we can't be sure it took until we can read back +- * that same register. It can be a problem as some of the feilds may ++ * that same register. It can be a problem as some of the fields may + * for valid reasons change between the time wrote the register and + * we read it again to verify. So this function check everything we + * can check and then assumes it worked. +@@ -189,7 +189,7 @@ static bool ixgbe_bypass_valid_rd_generic(u32 in_reg, u32 out_reg) + } + + /** +- * ixgbe_bypass_set_generic - Set a bypass field in the FW CTRL Regiter. ++ * ixgbe_bypass_set_generic - Set a bypass field in the FW CTRL Register. + * + * @hw: pointer to hardware structure + * @cmd: The control word we are setting. +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c +index fa0f5afd03..3bd4a838e5 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c ++++ b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c +@@ -128,6 +128,13 @@ + #define IXGBE_EXVET_VET_EXT_SHIFT 16 + #define IXGBE_DMATXCTL_VT_MASK 0xFFFF0000 + ++#define IXGBE_DEVARG_FIBER_SDP3_NOT_TX_DISABLE "fiber_sdp3_no_tx_disable" ++ ++static const char * const ixgbe_valid_arguments[] = { ++ IXGBE_DEVARG_FIBER_SDP3_NOT_TX_DISABLE, ++ NULL ++}; ++ + #define IXGBEVF_DEVARG_PFLINK_FULLCHK "pflink_fullchk" + + static const char * const ixgbevf_valid_arguments[] = { +@@ -355,6 +362,8 @@ static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev, + static int ixgbe_filter_restore(struct rte_eth_dev *dev); + static void ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev); + static int ixgbe_wait_for_link_up(struct ixgbe_hw *hw); ++static int devarg_handle_int(__rte_unused const char *key, const char *value, ++ void *extra_args); + + /* + * Define VF Stats MACRO for Non "cleared on read" register +@@ -788,6 +797,20 @@ ixgbe_is_sfp(struct ixgbe_hw *hw) + case ixgbe_phy_sfp_passive_unknown: + return 1; + default: ++ /* x550em devices may be SFP, check media type */ ++ switch (hw->mac.type) { ++ case ixgbe_mac_X550EM_x: ++ case ixgbe_mac_X550EM_a: ++ switch (ixgbe_get_media_type(hw)) { ++ case ixgbe_media_type_fiber: ++ case ixgbe_media_type_fiber_qsfp: ++ return 1; ++ default: ++ break; ++ } ++ default: ++ break; ++ } + return 0; + } + } +@@ -1025,6 +1048,29 @@ ixgbe_swfw_lock_reset(struct ixgbe_hw *hw) + ixgbe_release_swfw_semaphore(hw, mask); + } + ++static void ++ixgbe_parse_devargs(struct ixgbe_adapter *adapter, ++ struct rte_devargs *devargs) ++{ ++ struct rte_kvargs *kvlist; ++ uint16_t sdp3_no_tx_disable; ++ ++ if (devargs == NULL) ++ return; ++ ++ kvlist = rte_kvargs_parse(devargs->args, ixgbe_valid_arguments); ++ if (kvlist == NULL) ++ return; ++ ++ if (rte_kvargs_count(kvlist, IXGBE_DEVARG_FIBER_SDP3_NOT_TX_DISABLE) == 1 && ++ rte_kvargs_process(kvlist, IXGBE_DEVARG_FIBER_SDP3_NOT_TX_DISABLE, ++ devarg_handle_int, &sdp3_no_tx_disable) == 0 && ++ sdp3_no_tx_disable == 1) ++ adapter->sdp3_no_tx_disable = 1; ++ ++ rte_kvargs_free(kvlist); ++} ++ + /* + * This function is based on code in ixgbe_attach() in base/ixgbe.c. + * It returns 0 on success. +@@ -1089,6 +1135,8 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused) + } + + rte_atomic32_clear(&ad->link_thread_running); ++ ixgbe_parse_devargs(eth_dev->data->dev_private, ++ pci_dev->device.devargs); + rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + +@@ -1218,6 +1266,8 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused) + PMD_INIT_LOG(ERR, + "Failed to allocate %d bytes needed to store MAC addresses", + RTE_ETHER_ADDR_LEN * IXGBE_VMDQ_NUM_UC_MAC); ++ rte_free(eth_dev->data->mac_addrs); ++ eth_dev->data->mac_addrs = NULL; + return -ENOMEM; + } + +@@ -1229,13 +1279,8 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused) + + /* initialize PF if max_vfs not zero */ + ret = ixgbe_pf_host_init(eth_dev); +- if (ret) { +- rte_free(eth_dev->data->mac_addrs); +- eth_dev->data->mac_addrs = NULL; +- rte_free(eth_dev->data->hash_mac_addrs); +- eth_dev->data->hash_mac_addrs = NULL; +- return ret; +- } ++ if (ret) ++ goto err_pf_host_init; + + ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); + /* let hardware know driver is loaded */ +@@ -1274,10 +1319,14 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused) + TAILQ_INIT(&filter_info->fivetuple_list); + + /* initialize flow director filter list & hash */ +- ixgbe_fdir_filter_init(eth_dev); ++ ret = ixgbe_fdir_filter_init(eth_dev); ++ if (ret) ++ goto err_fdir_filter_init; + + /* initialize l2 tunnel filter list & hash */ +- ixgbe_l2_tn_filter_init(eth_dev); ++ ret = ixgbe_l2_tn_filter_init(eth_dev); ++ if (ret) ++ goto err_l2_tn_filter_init; + + /* initialize flow filter lists */ + ixgbe_filterlist_init(); +@@ -1289,6 +1338,21 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused) + ixgbe_tm_conf_init(eth_dev); + + return 0; ++ ++err_l2_tn_filter_init: ++ ixgbe_fdir_filter_uninit(eth_dev); ++err_fdir_filter_init: ++ ixgbe_disable_intr(hw); ++ rte_intr_disable(intr_handle); ++ rte_intr_callback_unregister(intr_handle, ++ ixgbe_dev_interrupt_handler, eth_dev); ++ ixgbe_pf_host_uninit(eth_dev); ++err_pf_host_init: ++ rte_free(eth_dev->data->mac_addrs); ++ eth_dev->data->mac_addrs = NULL; ++ rte_free(eth_dev->data->hash_mac_addrs); ++ eth_dev->data->hash_mac_addrs = NULL; ++ return ret; + } + + static int +@@ -1393,6 +1457,7 @@ static int ixgbe_fdir_filter_init(struct rte_eth_dev *eth_dev) + if (!fdir_info->hash_map) { + PMD_INIT_LOG(ERR, + "Failed to allocate memory for fdir hash map!"); ++ rte_hash_free(fdir_info->hash_handle); + return -ENOMEM; + } + fdir_info->mask_added = FALSE; +@@ -1429,6 +1494,7 @@ static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev) + if (!l2_tn_info->hash_map) { + PMD_INIT_LOG(ERR, + "Failed to allocate memory for L2 TN hash map!"); ++ rte_hash_free(l2_tn_info->hash_handle); + return -ENOMEM; + } + l2_tn_info->e_tag_en = FALSE; +@@ -1670,6 +1736,8 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev) + + default: + PMD_INIT_LOG(ERR, "VF Initialization Failure: %d", diag); ++ rte_free(eth_dev->data->mac_addrs); ++ eth_dev->data->mac_addrs = NULL; + return -EIO; + } + +@@ -2377,7 +2445,7 @@ ixgbe_dev_configure(struct rte_eth_dev *dev) + if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) + dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; + +- /* multipe queue mode checking */ ++ /* multiple queue mode checking */ + ret = ixgbe_check_mq_mode(dev); + if (ret != 0) { + PMD_DRV_LOG(ERR, "ixgbe_check_mq_mode fails with %d.", +@@ -2607,7 +2675,7 @@ ixgbe_dev_start(struct rte_eth_dev *dev) + } + } + +- /* confiugre msix for sleep until rx interrupt */ ++ /* configure MSI-X for sleep until Rx interrupt */ + ixgbe_configure_msix(dev); + + /* initialize transmission unit */ +@@ -2914,7 +2982,7 @@ ixgbe_dev_set_link_up(struct rte_eth_dev *dev) + if (hw->mac.type == ixgbe_mac_82599EB) { + #ifdef RTE_LIBRTE_IXGBE_BYPASS + if (hw->device_id == IXGBE_DEV_ID_82599_BYPASS) { +- /* Not suported in bypass mode */ ++ /* Not supported in bypass mode */ + PMD_INIT_LOG(ERR, "Set link up is not supported " + "by device id 0x%x", hw->device_id); + return -ENOTSUP; +@@ -2945,7 +3013,7 @@ ixgbe_dev_set_link_down(struct rte_eth_dev *dev) + if (hw->mac.type == ixgbe_mac_82599EB) { + #ifdef RTE_LIBRTE_IXGBE_BYPASS + if (hw->device_id == IXGBE_DEV_ID_82599_BYPASS) { +- /* Not suported in bypass mode */ ++ /* Not supported in bypass mode */ + PMD_INIT_LOG(ERR, "Set link down is not supported " + "by device id 0x%x", hw->device_id); + return -ENOTSUP; +@@ -3035,6 +3103,7 @@ ixgbe_dev_close(struct rte_eth_dev *dev) + + #ifdef RTE_LIB_SECURITY + rte_free(dev->security_ctx); ++ dev->security_ctx = NULL; + #endif + + return ret; +@@ -3338,6 +3407,13 @@ ixgbe_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + hw_stats->fccrc + + hw_stats->fclast; + ++ /* ++ * 82599 errata, UDP frames with a 0 checksum can be marked as checksum ++ * errors. ++ */ ++ if (hw->mac.type != ixgbe_mac_82599EB) ++ stats->ierrors += hw_stats->xec; ++ + /* Tx Errors */ + stats->oerrors = 0; + return 0; +@@ -3784,6 +3860,7 @@ ixgbevf_dev_stats_reset(struct rte_eth_dev *dev) + hw_stats->vfgorc = 0; + hw_stats->vfgptc = 0; + hw_stats->vfgotc = 0; ++ hw_stats->vfmprc = 0; + + return 0; + } +@@ -3801,9 +3878,11 @@ ixgbe_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) + + etrack_id = (eeprom_verh << 16) | eeprom_verl; + ret = snprintf(fw_version, fw_size, "0x%08x", etrack_id); ++ if (ret < 0) ++ return -EINVAL; + + ret += 1; /* add the size of '\0' */ +- if (fw_size < (u32)ret) ++ if (fw_size < (size_t)ret) + return ret; + else + return 0; +@@ -4233,7 +4312,8 @@ ixgbe_dev_link_update_share(struct rte_eth_dev *dev, + return rte_eth_linkstatus_set(dev, &link); + } + +- if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber) { ++ if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber && ++ !ad->sdp3_no_tx_disable) { + esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); + if ((esdp_reg & IXGBE_ESDP_SDP3)) + link_up = 0; +@@ -4600,7 +4680,7 @@ ixgbe_dev_interrupt_action(struct rte_eth_dev *dev) + * @param handle + * Pointer to interrupt handle. + * @param param +- * The address of parameter (struct rte_eth_dev *) regsitered before. ++ * The address of parameter (struct rte_eth_dev *) registered before. + * + * @return + * void +@@ -4656,7 +4736,7 @@ ixgbe_dev_interrupt_delayed_handler(void *param) + * @param handle + * Pointer to interrupt handle. + * @param param +- * The address of parameter (struct rte_eth_dev *) regsitered before. ++ * The address of parameter (struct rte_eth_dev *) registered before. + * + * @return + * void +@@ -5007,11 +5087,19 @@ ixgbe_dev_rss_reta_update(struct rte_eth_dev *dev, + uint32_t reta, r; + uint16_t idx, shift; + struct ixgbe_adapter *adapter = dev->data->dev_private; ++ struct rte_eth_dev_data *dev_data = dev->data; + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t reta_reg; + + PMD_INIT_FUNC_TRACE(); + ++ if (!dev_data->dev_started) { ++ PMD_DRV_LOG(ERR, ++ "port %d must be started before rss reta update", ++ dev_data->port_id); ++ return -EIO; ++ } ++ + if (!ixgbe_rss_update_sp(hw->mac.type)) { + PMD_DRV_LOG(ERR, "RSS reta update is not supported on this " + "NIC."); +@@ -5340,8 +5428,10 @@ ixgbevf_dev_start(struct rte_eth_dev *dev) + * now only one vector is used for Rx queue + */ + intr_vector = 1; +- if (rte_intr_efd_enable(intr_handle, intr_vector)) ++ if (rte_intr_efd_enable(intr_handle, intr_vector)) { ++ ixgbe_dev_clear_queues(dev); + return -1; ++ } + } + + if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) { +@@ -5351,6 +5441,7 @@ ixgbevf_dev_start(struct rte_eth_dev *dev) + if (intr_handle->intr_vec == NULL) { + PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" + " intr_vec", dev->data->nb_rx_queues); ++ ixgbe_dev_clear_queues(dev); + return -ENOMEM; + } + } +@@ -6106,7 +6197,7 @@ ixgbevf_configure_msix(struct rte_eth_dev *dev) + /* Configure all RX queues of VF */ + for (q_idx = 0; q_idx < dev->data->nb_rx_queues; q_idx++) { + /* Force all queue use vector 0, +- * as IXGBE_VF_MAXMSIVECOTR = 1 ++ * as IXGBE_VF_MAXMSIVECTOR = 1 + */ + ixgbevf_set_ivar_map(hw, 0, q_idx, vector_idx); + intr_handle->intr_vec[q_idx] = vector_idx; +@@ -6454,7 +6545,7 @@ ixgbe_inject_5tuple_filter(struct rte_eth_dev *dev, + * @param + * dev: Pointer to struct rte_eth_dev. + * index: the index the filter allocates. +- * filter: ponter to the filter that will be added. ++ * filter: pointer to the filter that will be added. + * rx_queue: the queue id the filter assigned to. + * + * @return +@@ -7089,7 +7180,7 @@ ixgbe_timesync_disable(struct rte_eth_dev *dev) + /* Disable L2 filtering of IEEE1588/802.1AS Ethernet frame types. */ + IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_1588), 0); + +- /* Stop incrementating the System Time registers. */ ++ /* Stop incrementing the System Time registers. */ + IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, 0); + + return 0; +@@ -7332,9 +7423,6 @@ ixgbe_get_module_eeprom(struct rte_eth_dev *dev, + uint8_t *data = info->data; + uint32_t i = 0; + +- if (info->length == 0) +- return -EINVAL; +- + for (i = info->offset; i < info->offset + info->length; i++) { + if (i < RTE_ETH_MODULE_SFF_8079_LEN) + status = hw->phy.ops.read_i2c_eeprom(hw, i, &databyte); +@@ -8445,6 +8533,8 @@ ixgbe_dev_macsec_register_disable(struct rte_eth_dev *dev) + RTE_PMD_REGISTER_PCI(net_ixgbe, rte_ixgbe_pmd); + RTE_PMD_REGISTER_PCI_TABLE(net_ixgbe, pci_id_ixgbe_map); + RTE_PMD_REGISTER_KMOD_DEP(net_ixgbe, "* igb_uio | uio_pci_generic | vfio-pci"); ++RTE_PMD_REGISTER_PARAM_STRING(net_ixgbe, ++ IXGBE_DEVARG_FIBER_SDP3_NOT_TX_DISABLE "=<0|1>"); + RTE_PMD_REGISTER_PCI(net_ixgbe_vf, rte_ixgbevf_pmd); + RTE_PMD_REGISTER_PCI_TABLE(net_ixgbe_vf, pci_id_ixgbevf_map); + RTE_PMD_REGISTER_KMOD_DEP(net_ixgbe_vf, "* igb_uio | vfio-pci"); +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h +index a0ce18ca24..319a98cebf 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h ++++ b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h +@@ -68,7 +68,7 @@ + #define IXGBE_LPBK_NONE 0x0 /* Default value. Loopback is disabled. */ + #define IXGBE_LPBK_TX_RX 0x1 /* Tx->Rx loopback operation is enabled. */ + /* X540-X550 specific loopback operations */ +-#define IXGBE_MII_AUTONEG_ENABLE 0x1000 /* Auto-negociation enable (default = 1) */ ++#define IXGBE_MII_AUTONEG_ENABLE 0x1000 /* Auto-negotiation enable (default = 1) */ + + #define IXGBE_MAX_JUMBO_FRAME_SIZE 0x2600 /* Maximum Jumbo frame size. */ + +@@ -510,6 +510,9 @@ struct ixgbe_adapter { + /* For RSS reta table update */ + uint8_t rss_reta_updated; + ++ /* Used for limiting SDP3 TX_DISABLE checks */ ++ uint8_t sdp3_no_tx_disable; ++ + /* Used for VF link sync with PF's physical and logical (by checking + * mailbox status) link status. + */ +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_fdir.c b/dpdk/drivers/net/ixgbe/ixgbe_fdir.c +index 11b9effeba..80b2d47464 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_fdir.c ++++ b/dpdk/drivers/net/ixgbe/ixgbe_fdir.c +@@ -390,7 +390,7 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev) + + switch (info->mask.tunnel_type_mask) { + case 0: +- /* Mask turnnel type */ ++ /* Mask tunnel type */ + fdiripv6m |= IXGBE_FDIRIP6M_TUNNEL_TYPE; + break; + case 1: +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_flow.c b/dpdk/drivers/net/ixgbe/ixgbe_flow.c +index 9aeb2e4a49..d444d8588c 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_flow.c ++++ b/dpdk/drivers/net/ixgbe/ixgbe_flow.c +@@ -135,7 +135,7 @@ const struct rte_flow_action *next_no_void_action( + } + + /** +- * Please aware there's an asumption for all the parsers. ++ * Please be aware there's an assumption for all the parsers. + * rte_flow_item is using big endian, rte_flow_attr and + * rte_flow_action are using CPU order. + * Because the pattern is used to describe the packets, +@@ -3261,7 +3261,7 @@ ixgbe_flow_create(struct rte_eth_dev *dev, + + /** + * Check if the flow rule is supported by ixgbe. +- * It only checkes the format. Don't guarantee the rule can be programmed into ++ * It only checks the format. Don't guarantee the rule can be programmed into + * the HW. Because there can be no enough room for the rule. + */ + static int +@@ -3437,6 +3437,7 @@ ixgbe_flow_destroy(struct rte_eth_dev *dev, + TAILQ_REMOVE(&ixgbe_flow_list, + ixgbe_flow_mem_ptr, entries); + rte_free(ixgbe_flow_mem_ptr); ++ break; + } + } + rte_free(flow); +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_ipsec.c b/dpdk/drivers/net/ixgbe/ixgbe_ipsec.c +index 62f2a5f764..69e055c0be 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_ipsec.c ++++ b/dpdk/drivers/net/ixgbe/ixgbe_ipsec.c +@@ -310,7 +310,7 @@ ixgbe_crypto_remove_sa(struct rte_eth_dev *dev, + return -1; + } + +- /* Disable and clear Rx SPI and key table table entryes*/ ++ /* Disable and clear Rx SPI and key table table entries*/ + reg_val = IPSRXIDX_WRITE | IPSRXIDX_TABLE_SPI | (sa_index << 3); + IXGBE_WRITE_REG(hw, IXGBE_IPSRXSPI, 0); + IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPIDX, 0); +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_pf.c b/dpdk/drivers/net/ixgbe/ixgbe_pf.c +index 15982af8da..8ee83c1d71 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_pf.c ++++ b/dpdk/drivers/net/ixgbe/ixgbe_pf.c +@@ -245,7 +245,7 @@ int ixgbe_pf_host_configure(struct rte_eth_dev *eth_dev) + /* PFDMA Tx General Switch Control Enables VMDQ loopback */ + IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); + +- /* clear VMDq map to perment rar 0 */ ++ /* clear VMDq map to permanent rar 0 */ + hw->mac.ops.clear_vmdq(hw, 0, IXGBE_CLEAR_VMDQ_ALL); + + /* clear VMDq map to scan rar 127 */ +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c b/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c +index 3b893b0df0..ab3e70d27e 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c ++++ b/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c +@@ -1923,7 +1923,7 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + * register. + * Update the RDT with the value of the last processed RX descriptor + * minus 1, to guarantee that the RDT register is never equal to the +- * RDH register, which creates a "full" ring situtation from the ++ * RDH register, which creates a "full" ring situation from the + * hardware point of view... + */ + nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold); +@@ -2272,7 +2272,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts, + * register. + * Update the RDT with the value of the last processed RX descriptor + * minus 1, to guarantee that the RDT register is never equal to the +- * RDH register, which creates a "full" ring situtation from the ++ * RDH register, which creates a "full" ring situation from the + * hardware point of view... + */ + if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) { +@@ -2449,6 +2449,7 @@ ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq) + if (txq != NULL && txq->ops != NULL) { + txq->ops->release_mbufs(txq); + txq->ops->free_swring(txq); ++ rte_memzone_free(txq->mz); + rte_free(txq); + } + } +@@ -2634,7 +2635,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev, + */ + tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ? + tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH); +- /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */ ++ /* force tx_rs_thresh to adapt an aggressive tx_free_thresh */ + tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ? + nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH; + if (tx_conf->tx_rs_thresh > 0) +@@ -2730,6 +2731,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev, + return -ENOMEM; + } + ++ txq->mz = tz; + txq->nb_tx_desc = nb_desc; + txq->tx_rs_thresh = tx_rs_thresh; + txq->tx_free_thresh = tx_free_thresh; +@@ -2854,6 +2856,7 @@ ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq) + ixgbe_rx_queue_release_mbufs(rxq); + rte_free(rxq->sw_ring); + rte_free(rxq->sw_sc_ring); ++ rte_memzone_free(rxq->mz); + rte_free(rxq); + } + } +@@ -2948,6 +2951,10 @@ ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq) + rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1); + rxq->rx_tail = 0; + rxq->nb_rx_hold = 0; ++ ++ if (rxq->pkt_first_seg != NULL) ++ rte_pktmbuf_free(rxq->pkt_first_seg); ++ + rxq->pkt_first_seg = NULL; + rxq->pkt_last_seg = NULL; + +@@ -3125,6 +3132,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev, + return -ENOMEM; + } + ++ rxq->mz = rz; + /* + * Zero init all the descriptors in the ring. + */ +@@ -3396,14 +3404,12 @@ ixgbe_dev_free_queues(struct rte_eth_dev *dev) + for (i = 0; i < dev->data->nb_rx_queues; i++) { + ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]); + dev->data->rx_queues[i] = NULL; +- rte_eth_dma_zone_free(dev, "rx_ring", i); + } + dev->data->nb_rx_queues = 0; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]); + dev->data->tx_queues[i] = NULL; +- rte_eth_dma_zone_free(dev, "tx_ring", i); + } + dev->data->nb_tx_queues = 0; + } +@@ -4814,7 +4820,7 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev) + dev->data->port_id); + dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc; + } else { +- PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, " ++ PMD_INIT_LOG(DEBUG, "Using Regular (non-vector, " + "single allocation) " + "Scattered Rx callback " + "(port=%d).", +@@ -5153,7 +5159,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev) + /* + * Setup the Checksum Register. + * Disable Full-Packet Checksum which is mutually exclusive with RSS. +- * Enable IP/L4 checkum computation by hardware if requested to do so. ++ * Enable IP/L4 checksum computation by hardware if requested to do so. + */ + rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); + rxcsum |= IXGBE_RXCSUM_PCSD; +@@ -5653,11 +5659,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev) + * VF packets received can work in all cases. + */ + if (ixgbevf_rlpml_set_vf(hw, +- (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len)) { ++ (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len)) + PMD_INIT_LOG(ERR, "Set max packet length to %d failed.", + dev->data->dev_conf.rxmode.max_rx_pkt_len); +- return -EINVAL; +- } + + /* + * Assume no header split and no VLAN strip support +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h b/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h +index bcadaf79ce..d18cac29bf 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h ++++ b/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h +@@ -138,6 +138,7 @@ struct ixgbe_rx_queue { + struct rte_mbuf fake_mbuf; + /** hold packets to return to application */ + struct rte_mbuf *rx_stage[RTE_PMD_IXGBE_RX_MAX_BURST*2]; ++ const struct rte_memzone *mz; + }; + + /** +@@ -236,6 +237,7 @@ struct ixgbe_tx_queue { + uint8_t using_ipsec; + /**< indicates that IPsec TX feature is in use */ + #endif ++ const struct rte_memzone *mz; + }; + + struct ixgbe_txq_ops { +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h b/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h +index a97c27189b..e650feac82 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h ++++ b/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h +@@ -152,7 +152,7 @@ _ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue *txq) + /* release the used mbufs in sw_ring */ + for (i = txq->tx_next_dd - (txq->tx_rs_thresh - 1); + i != txq->tx_tail; +- i = (i + 1) & max_desc) { ++ i = (i + 1) % txq->nb_tx_desc) { + txe = &txq->sw_ring_v[i]; + rte_pktmbuf_free_seg(txe->mbuf); + } +@@ -168,7 +168,6 @@ _ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue *txq) + static inline void + _ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq) + { +- const unsigned int mask = rxq->nb_rx_desc - 1; + unsigned int i; + + if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc) +@@ -183,7 +182,7 @@ _ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq) + } else { + for (i = rxq->rx_tail; + i != rxq->rxrearm_start; +- i = (i + 1) & mask) { ++ i = (i + 1) % rxq->nb_rx_desc) { + if (rxq->sw_ring[i].mbuf != NULL) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + } +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c b/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c +index 52add17b5d..37a1718926 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c ++++ b/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c +@@ -364,6 +364,17 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, + uint8_t vlan_flags; + uint16_t udp_p_flag = 0; /* Rx Descriptor UDP header present */ + ++ /* ++ * Under the circumstance that `rx_tail` wrap back to zero ++ * and the advance speed of `rx_tail` is greater than `rxrearm_start`, ++ * `rx_tail` will catch up with `rxrearm_start` and surpass it. ++ * This may cause some mbufs be reused by application. ++ * ++ * So we need to make some restrictions to ensure that ++ * `rx_tail` will not exceed `rxrearm_start`. ++ */ ++ nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_RXQ_REARM_THRESH); ++ + /* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */ + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP); + +@@ -454,7 +465,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, + mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); + + /* Read desc statuses backwards to avoid race condition */ +- /* A.1 load 4 pkts desc */ ++ /* A.1 load desc[3] */ + descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); + rte_compiler_barrier(); + +@@ -466,9 +477,9 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, + mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); + #endif + ++ /* A.1 load desc[2-0] */ + descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); + rte_compiler_barrier(); +- /* B.1 load 2 mbuf point */ + descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); + rte_compiler_barrier(); + descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); +@@ -540,7 +551,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, + /* and with mask to extract bits, flipping 1-0 */ + __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); + /* the staterr values are not in order, as the count +- * count of dd bits doesn't care. However, for end of ++ * of dd bits doesn't care. However, for end of + * packet tracking, we do care, so shuffle. This also + * compresses the 32-bit values to 8-bit + */ +@@ -562,7 +573,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, + + desc_to_ptype_v(descs, rxq->pkt_type_mask, &rx_pkts[pos]); + +- /* C.4 calc avaialbe number of desc */ ++ /* C.4 calc available number of desc */ + var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); + nb_pkts_recd += var; + if (likely(var != RTE_IXGBE_DESCS_PER_LOOP)) +diff --git a/dpdk/drivers/net/kni/rte_eth_kni.c b/dpdk/drivers/net/kni/rte_eth_kni.c +index 1696787a9b..5e8d0b722e 100644 +--- a/dpdk/drivers/net/kni/rte_eth_kni.c ++++ b/dpdk/drivers/net/kni/rte_eth_kni.c +@@ -124,7 +124,7 @@ eth_kni_start(struct rte_eth_dev *dev) + struct pmd_internals *internals = dev->data->dev_private; + uint16_t port_id = dev->data->port_id; + struct rte_mempool *mb_pool; +- struct rte_kni_conf conf; ++ struct rte_kni_conf conf = {{0}}; + const char *name = dev->device->name + 4; /* remove net_ */ + + mb_pool = internals->rx_queues[0].mb_pool; +@@ -211,6 +211,9 @@ eth_kni_close(struct rte_eth_dev *eth_dev) + return 0; + + ret = eth_kni_dev_stop(eth_dev); ++ if (ret) ++ PMD_LOG(WARNING, "Not able to stop kni for %s", ++ eth_dev->data->name); + + /* mac_addrs must not be freed alone because part of dev_private */ + eth_dev->data->mac_addrs = NULL; +@@ -406,8 +409,13 @@ eth_kni_create(struct rte_vdev_device *vdev, + static int + kni_init(void) + { +- if (is_kni_initialized == 0) +- rte_kni_init(MAX_KNI_PORTS); ++ int ret; ++ ++ if (is_kni_initialized == 0) { ++ ret = rte_kni_init(MAX_KNI_PORTS); ++ if (ret < 0) ++ return ret; ++ } + + is_kni_initialized++; + +diff --git a/dpdk/drivers/net/liquidio/lio_ethdev.c b/dpdk/drivers/net/liquidio/lio_ethdev.c +index eb0fdab45a..70b2c05c45 100644 +--- a/dpdk/drivers/net/liquidio/lio_ethdev.c ++++ b/dpdk/drivers/net/liquidio/lio_ethdev.c +@@ -2094,7 +2094,6 @@ lio_eth_dev_init(struct rte_eth_dev *eth_dev) + return 0; + + rte_eth_copy_pci_info(eth_dev, pdev); +- eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + + if (pdev->mem_resource[0].addr) { + lio_dev->hw_addr = pdev->mem_resource[0].addr; +diff --git a/dpdk/drivers/net/liquidio/lio_rxtx.c b/dpdk/drivers/net/liquidio/lio_rxtx.c +index 8d705bfe7f..f57b7712b5 100644 +--- a/dpdk/drivers/net/liquidio/lio_rxtx.c ++++ b/dpdk/drivers/net/liquidio/lio_rxtx.c +@@ -1050,7 +1050,6 @@ lio_update_read_index(struct lio_instr_queue *iq) + int + lio_flush_iq(struct lio_device *lio_dev, struct lio_instr_queue *iq) + { +- uint32_t tot_inst_processed = 0; + uint32_t inst_processed = 0; + int tx_done = 1; + +@@ -1073,7 +1072,6 @@ lio_flush_iq(struct lio_device *lio_dev, struct lio_instr_queue *iq) + iq->stats.instr_processed += inst_processed; + } + +- tot_inst_processed += inst_processed; + inst_processed = 0; + + } while (1); +diff --git a/dpdk/drivers/net/memif/memif_socket.c b/dpdk/drivers/net/memif/memif_socket.c +index c526f90778..1a4eccaf10 100644 +--- a/dpdk/drivers/net/memif/memif_socket.c ++++ b/dpdk/drivers/net/memif/memif_socket.c +@@ -396,11 +396,10 @@ memif_msg_enq_init(struct rte_eth_dev *dev) + { + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc); +- memif_msg_init_t *i = &e->msg.init; ++ memif_msg_init_t *i; + + if (e == NULL) + return -1; +- + i = &e->msg.init; + e->msg.type = MEMIF_MSG_TYPE_INIT; + i->version = MEMIF_VERSION; +@@ -712,7 +711,7 @@ memif_msg_receive(struct memif_control_channel *cc) + break; + case MEMIF_MSG_TYPE_INIT: + /* +- * This cc does not have an interface asociated with it. ++ * This cc does not have an interface associated with it. + * If suitable interface is found it will be assigned here. + */ + ret = memif_msg_receive_init(cc, &msg); +@@ -866,6 +865,7 @@ memif_socket_create(char *key, uint8_t listener, bool is_abstract) + { + struct memif_socket *sock; + struct sockaddr_un un = { 0 }; ++ uint32_t sunlen; + int sockfd; + int ret; + int on = 1; +@@ -890,7 +890,11 @@ memif_socket_create(char *key, uint8_t listener, bool is_abstract) + /* abstract address */ + un.sun_path[0] = '\0'; + strlcpy(un.sun_path + 1, sock->filename, MEMIF_SOCKET_UN_SIZE - 1); ++ sunlen = RTE_MIN(1 + strlen(sock->filename), ++ MEMIF_SOCKET_UN_SIZE) + ++ sizeof(un) - sizeof(un.sun_path); + } else { ++ sunlen = sizeof(un); + strlcpy(un.sun_path, sock->filename, MEMIF_SOCKET_UN_SIZE); + } + +@@ -899,7 +903,7 @@ memif_socket_create(char *key, uint8_t listener, bool is_abstract) + if (ret < 0) + goto error; + +- ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un)); ++ ret = bind(sockfd, (struct sockaddr *)&un, sunlen); + if (ret < 0) + goto error; + +@@ -1061,6 +1065,7 @@ memif_connect_client(struct rte_eth_dev *dev) + { + int sockfd; + int ret; ++ uint32_t sunlen; + struct sockaddr_un sun = { 0 }; + struct pmd_internals *pmd = dev->data->dev_private; + +@@ -1075,16 +1080,19 @@ memif_connect_client(struct rte_eth_dev *dev) + } + + sun.sun_family = AF_UNIX; ++ sunlen = sizeof(struct sockaddr_un); + if (pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT) { + /* abstract address */ + sun.sun_path[0] = '\0'; + strlcpy(sun.sun_path + 1, pmd->socket_filename, MEMIF_SOCKET_UN_SIZE - 1); ++ sunlen = RTE_MIN(strlen(pmd->socket_filename) + 1, ++ MEMIF_SOCKET_UN_SIZE) + ++ sizeof(sun) - sizeof(sun.sun_path); + } else { + strlcpy(sun.sun_path, pmd->socket_filename, MEMIF_SOCKET_UN_SIZE); + } + +- ret = connect(sockfd, (struct sockaddr *)&sun, +- sizeof(struct sockaddr_un)); ++ ret = connect(sockfd, (struct sockaddr *)&sun, sunlen); + if (ret < 0) { + MIF_LOG(ERR, "Failed to connect socket: %s.", pmd->socket_filename); + goto error; +diff --git a/dpdk/drivers/net/memif/rte_eth_memif.c b/dpdk/drivers/net/memif/rte_eth_memif.c +index 8ed9aebab7..642c44d4ac 100644 +--- a/dpdk/drivers/net/memif/rte_eth_memif.c ++++ b/dpdk/drivers/net/memif/rte_eth_memif.c +@@ -199,6 +199,7 @@ memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *de + dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS; + dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS; + dev_info->min_rx_bufsize = 0; ++ dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS; + + return 0; + } +@@ -348,13 +349,13 @@ eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + goto no_free_bufs; + mbuf = mbuf_head; + mbuf->port = mq->in_port; ++ dst_off = 0; + + next_slot: + s0 = cur_slot & mask; + d0 = &ring->desc[s0]; + + src_len = d0->length; +- dst_off = 0; + src_off = 0; + + do { +@@ -566,7 +567,7 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + rte_eth_devices[mq->in_port].process_private; + memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq); + uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0; +- uint16_t src_len, src_off, dst_len, dst_off, cp_len; ++ uint16_t src_len, src_off, dst_len, dst_off, cp_len, nb_segs; + memif_ring_type_t type = mq->type; + memif_desc_t *d0; + struct rte_mbuf *mbuf; +@@ -614,6 +615,7 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + + while (n_tx_pkts < nb_pkts && n_free) { + mbuf_head = *bufs++; ++ nb_segs = mbuf_head->nb_segs; + mbuf = mbuf_head; + + saved_slot = slot; +@@ -657,7 +659,7 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + d0->length = dst_off; + } + +- if (rte_pktmbuf_is_contiguous(mbuf) == 0) { ++ if (--nb_segs > 0) { + mbuf = mbuf->next; + goto next_in_chain; + } +@@ -694,6 +696,7 @@ memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq + uint16_t slot, uint16_t n_free) + { + memif_desc_t *d0; ++ uint16_t nb_segs = mbuf->nb_segs; + int used_slots = 1; + + next_in_chain: +@@ -706,6 +709,7 @@ memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq + /* populate descriptor */ + d0 = &ring->desc[slot & mask]; + d0->length = rte_pktmbuf_data_len(mbuf); ++ mq->n_bytes += rte_pktmbuf_data_len(mbuf); + /* FIXME: get region index */ + d0->region = 1; + d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) - +@@ -713,7 +717,7 @@ memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq + d0->flags = 0; + + /* check if buffer is chained */ +- if (rte_pktmbuf_is_contiguous(mbuf) == 0) { ++ if (--nb_segs > 0) { + if (n_free < 2) + return 0; + /* mark buffer as chained */ +@@ -1010,7 +1014,7 @@ memif_regions_init(struct rte_eth_dev *dev) + if (ret < 0) + return ret; + } else { +- /* create one memory region contaning rings and buffers */ ++ /* create one memory region containing rings and buffers */ + ret = memif_region_init_shm(dev, /* has buffers */ 1); + if (ret < 0) + return ret; +@@ -1241,6 +1245,13 @@ memif_dev_start(struct rte_eth_dev *dev) + return ret; + } + ++static int ++memif_dev_stop(struct rte_eth_dev *dev) ++{ ++ memif_disconnect(dev); ++ return 0; ++} ++ + static int + memif_dev_close(struct rte_eth_dev *dev) + { +@@ -1249,7 +1260,6 @@ memif_dev_close(struct rte_eth_dev *dev) + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + memif_msg_enq_disconnect(pmd->cc, "Device closed", 0); +- memif_disconnect(dev); + + for (i = 0; i < dev->data->nb_rx_queues; i++) + (*dev->dev_ops->rx_queue_release)(dev->data->rx_queues[i]); +@@ -1257,8 +1267,6 @@ memif_dev_close(struct rte_eth_dev *dev) + (*dev->dev_ops->tx_queue_release)(dev->data->tx_queues[i]); + + memif_socket_remove_device(dev); +- } else { +- memif_disconnect(dev); + } + + rte_free(dev->process_private); +@@ -1441,25 +1449,9 @@ memif_stats_reset(struct rte_eth_dev *dev) + return 0; + } + +-static int +-memif_rx_queue_intr_enable(struct rte_eth_dev *dev __rte_unused, +- uint16_t qid __rte_unused) +-{ +- MIF_LOG(WARNING, "Interrupt mode not supported."); +- +- return -1; +-} +- +-static int +-memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused) +-{ +- struct pmd_internals *pmd __rte_unused = dev->data->dev_private; +- +- return 0; +-} +- + static const struct eth_dev_ops ops = { + .dev_start = memif_dev_start, ++ .dev_stop = memif_dev_stop, + .dev_close = memif_dev_close, + .dev_infos_get = memif_dev_info, + .dev_configure = memif_dev_configure, +@@ -1467,8 +1459,6 @@ static const struct eth_dev_ops ops = { + .rx_queue_setup = memif_rx_queue_setup, + .rx_queue_release = memif_queue_release, + .tx_queue_release = memif_queue_release, +- .rx_queue_intr_enable = memif_rx_queue_intr_enable, +- .rx_queue_intr_disable = memif_rx_queue_intr_disable, + .link_update = memif_link_update, + .stats_get = memif_stats_get, + .stats_reset = memif_stats_reset, +diff --git a/dpdk/drivers/net/memif/rte_eth_memif.h b/dpdk/drivers/net/memif/rte_eth_memif.h +index 765a7e5998..7e5c15341e 100644 +--- a/dpdk/drivers/net/memif/rte_eth_memif.h ++++ b/dpdk/drivers/net/memif/rte_eth_memif.h +@@ -5,10 +5,6 @@ + #ifndef _RTE_ETH_MEMIF_H_ + #define _RTE_ETH_MEMIF_H_ + +-#ifndef _GNU_SOURCE +-#define _GNU_SOURCE +-#endif /* GNU_SOURCE */ +- + #include + + #include +diff --git a/dpdk/drivers/net/mlx4/meson.build b/dpdk/drivers/net/mlx4/meson.build +index d7602b748e..467fa25a3c 100644 +--- a/dpdk/drivers/net/mlx4/meson.build ++++ b/dpdk/drivers/net/mlx4/meson.build +@@ -42,7 +42,7 @@ foreach libname:libnames + endforeach + if static_ibverbs or dlopen_ibverbs + # Build without adding shared libs to Requires.private +- ibv_cflags = run_command(pkgconf, '--cflags', 'libibverbs').stdout() ++ ibv_cflags = run_command(pkgconf, '--cflags', 'libibverbs', check:true).stdout() + ext_deps += declare_dependency(compile_args: ibv_cflags.split()) + endif + if static_ibverbs +diff --git a/dpdk/drivers/net/mlx4/mlx4.c b/dpdk/drivers/net/mlx4/mlx4.c +index 919a9347f9..b35372ed64 100644 +--- a/dpdk/drivers/net/mlx4/mlx4.c ++++ b/dpdk/drivers/net/mlx4/mlx4.c +@@ -201,6 +201,7 @@ mlx4_proc_priv_init(struct rte_eth_dev *dev) + struct mlx4_proc_priv *ppriv; + size_t ppriv_size; + ++ mlx4_proc_priv_uninit(dev); + /* + * UAR register table follows the process private structure. BlueFlame + * registers for Tx queues are stored in the table. +diff --git a/dpdk/drivers/net/mlx4/mlx4.h b/dpdk/drivers/net/mlx4/mlx4.h +index 87710d3996..48b5580f54 100644 +--- a/dpdk/drivers/net/mlx4/mlx4.h ++++ b/dpdk/drivers/net/mlx4/mlx4.h +@@ -74,7 +74,7 @@ enum mlx4_mp_req_type { + MLX4_MP_REQ_STOP_RXTX, + }; + +-/* Pameters for IPC. */ ++/* Parameters for IPC. */ + struct mlx4_mp_param { + enum mlx4_mp_req_type type; + int port_id; +diff --git a/dpdk/drivers/net/mlx4/mlx4_ethdev.c b/dpdk/drivers/net/mlx4/mlx4_ethdev.c +index 9ff05c6738..aa126bac3e 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_ethdev.c ++++ b/dpdk/drivers/net/mlx4/mlx4_ethdev.c +@@ -752,7 +752,7 @@ mlx4_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + * Pointer to Ethernet device structure. + * + * @return +- * alwasy 0 on success ++ * always 0 on success + */ + int + mlx4_stats_reset(struct rte_eth_dev *dev) +diff --git a/dpdk/drivers/net/mlx4/mlx4_flow.c b/dpdk/drivers/net/mlx4/mlx4_flow.c +index 2a86382db7..cf231e5944 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_flow.c ++++ b/dpdk/drivers/net/mlx4/mlx4_flow.c +@@ -713,7 +713,8 @@ mlx4_flow_prepare(struct mlx4_priv *priv, + flow->internal = 1; + continue; + } +- if (flow->promisc || flow->allmulti) { ++ if ((item->type != RTE_FLOW_ITEM_TYPE_VLAN && flow->promisc) || ++ flow->allmulti) { + msg = "mlx4 does not support additional matching" + " criteria combined with indiscriminate" + " matching on Ethernet headers"; +@@ -791,7 +792,8 @@ mlx4_flow_prepare(struct mlx4_priv *priv, + rss = action->conf; + /* Default RSS configuration if none is provided. */ + if (rss->key_len) { +- rss_key = rss->key; ++ rss_key = rss->key ? ++ rss->key : mlx4_rss_hash_key_default; + rss_key_len = rss->key_len; + } else { + rss_key = mlx4_rss_hash_key_default; +diff --git a/dpdk/drivers/net/mlx4/mlx4_mp.c b/dpdk/drivers/net/mlx4/mlx4_mp.c +index 3622d61075..dd72755252 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_mp.c ++++ b/dpdk/drivers/net/mlx4/mlx4_mp.c +@@ -126,7 +126,6 @@ mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer) + switch (param->type) { + case MLX4_MP_REQ_START_RXTX: + INFO("port %u starting datapath", dev->data->port_id); +- rte_mb(); + dev->tx_pkt_burst = mlx4_tx_burst; + dev->rx_pkt_burst = mlx4_rx_burst; + #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET +@@ -144,6 +143,7 @@ mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer) + } + } + #endif ++ rte_mb(); + mp_init_msg(dev, &mp_res, param->type); + res->result = 0; + ret = rte_mp_reply(&mp_res, peer); +diff --git a/dpdk/drivers/net/mlx4/mlx4_rxtx.c b/dpdk/drivers/net/mlx4/mlx4_rxtx.c +index adc1c9bf81..ecf08f53cf 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_rxtx.c ++++ b/dpdk/drivers/net/mlx4/mlx4_rxtx.c +@@ -921,10 +921,6 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) + if (likely(elt->buf != NULL)) { + struct rte_mbuf *tmp = elt->buf; + +-#ifdef RTE_LIBRTE_MLX4_DEBUG +- /* Poisoning. */ +- memset(&elt->buf, 0x66, sizeof(struct rte_mbuf *)); +-#endif + /* Faster than rte_pktmbuf_free(). */ + do { + struct rte_mbuf *next = tmp->next; +diff --git a/dpdk/drivers/net/mlx4/mlx4_txq.c b/dpdk/drivers/net/mlx4/mlx4_txq.c +index 60560d9545..cc5200a1e1 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_txq.c ++++ b/dpdk/drivers/net/mlx4/mlx4_txq.c +@@ -206,19 +206,18 @@ mlx4_tx_uar_uninit_secondary(struct rte_eth_dev *dev __rte_unused) + static void + mlx4_txq_free_elts(struct txq *txq) + { +- unsigned int elts_head = txq->elts_head; +- unsigned int elts_tail = txq->elts_tail; + struct txq_elt (*elts)[txq->elts_n] = txq->elts; +- unsigned int elts_m = txq->elts_n - 1; ++ unsigned int n = txq->elts_n; + +- DEBUG("%p: freeing WRs", (void *)txq); +- while (elts_tail != elts_head) { +- struct txq_elt *elt = &(*elts)[elts_tail++ & elts_m]; ++ DEBUG("%p: freeing WRs, %u", (void *)txq, n); ++ while (n--) { ++ struct txq_elt *elt = &(*elts)[n]; + +- MLX4_ASSERT(elt->buf != NULL); +- rte_pktmbuf_free(elt->buf); +- elt->buf = NULL; +- elt->wqe = NULL; ++ if (elt->buf) { ++ rte_pktmbuf_free(elt->buf); ++ elt->buf = NULL; ++ elt->wqe = NULL; ++ } + } + txq->elts_tail = txq->elts_head; + } +diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/dpdk/drivers/net/mlx5/linux/mlx5_ethdev_os.c +index e36a78091c..b15fc7e5e0 100644 +--- a/dpdk/drivers/net/mlx5/linux/mlx5_ethdev_os.c ++++ b/dpdk/drivers/net/mlx5/linux/mlx5_ethdev_os.c +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + + #include "mlx5.h" + #include "mlx5_rxtx.h" +@@ -730,6 +731,57 @@ mlx5_dev_interrupt_device_fatal(struct mlx5_dev_ctx_shared *sh) + } + } + ++static void ++mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg) ++{ ++ struct mlx5_dev_ctx_shared *sh = cb_arg; ++ uint32_t i; ++ uint32_t if_index; ++ ++ if (mlx5_nl_parse_link_status_update(hdr, &if_index) < 0) ++ return; ++ for (i = 0; i < sh->max_port; i++) { ++ struct mlx5_dev_shared_port *port = &sh->port[i]; ++ struct rte_eth_dev *dev; ++ struct mlx5_priv *priv; ++ bool configured; ++ ++ if (port->nl_ih_port_id >= RTE_MAX_ETHPORTS) ++ continue; ++ dev = &rte_eth_devices[port->nl_ih_port_id]; ++ configured = dev->process_private != NULL; ++ /* Probing may initiate an LSC before configuration is done. */ ++ if (configured && !dev->data->dev_conf.intr_conf.lsc) ++ break; ++ priv = dev->data->dev_private; ++ if (priv->if_index == if_index) { ++ /* Block logical LSC events. */ ++ uint16_t prev_status = dev->data->dev_link.link_status; ++ ++ if (mlx5_link_update(dev, 0) < 0) ++ DRV_LOG(ERR, "Failed to update link status: %s", ++ rte_strerror(rte_errno)); ++ else if (prev_status != dev->data->dev_link.link_status) ++ rte_eth_dev_callback_process ++ (dev, RTE_ETH_EVENT_INTR_LSC, NULL); ++ break; ++ } ++ } ++} ++ ++void ++mlx5_dev_interrupt_handler_nl(void *arg) ++{ ++ struct mlx5_dev_ctx_shared *sh = arg; ++ int nlsk_fd = sh->intr_handle_nl.fd; ++ ++ if (nlsk_fd < 0) ++ return; ++ if (mlx5_nl_read_events(nlsk_fd, mlx5_dev_interrupt_nl_cb, sh) < 0) ++ DRV_LOG(ERR, "Failed to process Netlink events: %s", ++ rte_strerror(rte_errno)); ++} ++ + /** + * Handle shared asynchronous events the NIC (removal event + * and link status change). Supports multiport IB device. +@@ -793,18 +845,6 @@ mlx5_dev_interrupt_handler(void *cb_arg) + tmp = sh->port[tmp - 1].ih_port_id; + dev = &rte_eth_devices[tmp]; + MLX5_ASSERT(dev); +- if ((event.event_type == IBV_EVENT_PORT_ACTIVE || +- event.event_type == IBV_EVENT_PORT_ERR) && +- dev->data->dev_conf.intr_conf.lsc) { +- mlx5_glue->ack_async_event(&event); +- if (mlx5_link_update(dev, 0) == -EAGAIN) { +- usleep(0); +- continue; +- } +- rte_eth_dev_callback_process +- (dev, RTE_ETH_EVENT_INTR_LSC, NULL); +- continue; +- } + DRV_LOG(DEBUG, + "port %u cannot handle an unknown event (type %d)", + dev->data->port_id, event.event_type); +@@ -1013,6 +1053,9 @@ mlx5_sysfs_check_switch_info(bool device_dir, + /* New representors naming schema. */ + switch_info->representor = 1; + break; ++ default: ++ switch_info->master = device_dir; ++ break; + } + } + +@@ -1044,7 +1087,6 @@ mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) + bool port_switch_id_set = false; + bool device_dir = false; + char c; +- int ret; + + if (!if_indextoname(ifindex, ifname)) { + rte_errno = errno; +@@ -1060,10 +1102,9 @@ mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) + + file = fopen(phys_port_name, "rb"); + if (file != NULL) { +- ret = fscanf(file, "%" RTE_STR(IF_NAMESIZE) "s", port_name); +- fclose(file); +- if (ret == 1) ++ if (fgets(port_name, IF_NAMESIZE, file) != NULL) + mlx5_translate_port_name(port_name, &data); ++ fclose(file); + } + file = fopen(phys_switch_id, "rb"); + if (file == NULL) { +@@ -1169,7 +1210,7 @@ mlx5_get_module_info(struct rte_eth_dev *dev, + }; + int ret = 0; + +- if (!dev || !modinfo) { ++ if (!dev) { + DRV_LOG(WARNING, "missing argument, cannot get module info"); + rte_errno = EINVAL; + return -rte_errno; +@@ -1203,7 +1244,7 @@ int mlx5_get_module_eeprom(struct rte_eth_dev *dev, + struct ifreq ifr; + int ret = 0; + +- if (!dev || !info) { ++ if (!dev) { + DRV_LOG(WARNING, "missing argument, cannot get module eeprom"); + rte_errno = EINVAL; + return -rte_errno; +diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_mp_os.c b/dpdk/drivers/net/mlx5/linux/mlx5_mp_os.c +index 95372e2084..8567e43471 100644 +--- a/dpdk/drivers/net/mlx5/linux/mlx5_mp_os.c ++++ b/dpdk/drivers/net/mlx5/linux/mlx5_mp_os.c +@@ -130,7 +130,6 @@ struct rte_mp_msg mp_res; + switch (param->type) { + case MLX5_MP_REQ_START_RXTX: + DRV_LOG(INFO, "port %u starting datapath", dev->data->port_id); +- rte_mb(); + dev->rx_pkt_burst = mlx5_select_rx_function(dev); + dev->tx_pkt_burst = mlx5_select_tx_function(dev); + ppriv = (struct mlx5_proc_priv *)dev->process_private; +@@ -147,6 +146,7 @@ struct rte_mp_msg mp_res; + return -rte_errno; + } + } ++ rte_mb(); + mp_init_msg(&priv->mp_id, &mp_res, param->type); + res->result = 0; + ret = rte_mp_reply(&mp_res, peer); +diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_os.c b/dpdk/drivers/net/mlx5/linux/mlx5_os.c +index 91001473b0..8bd717e6a9 100644 +--- a/dpdk/drivers/net/mlx5/linux/mlx5_os.c ++++ b/dpdk/drivers/net/mlx5/linux/mlx5_os.c +@@ -74,7 +74,7 @@ static struct mlx5_local_data mlx5_local_data; + * Pointer to RQ channel object, which includes the channel fd + * + * @param[out] fd +- * The file descriptor (representing the intetrrupt) used in this channel. ++ * The file descriptor (representing the interrupt) used in this channel. + * + * @return + * 0 on successfully setting the fd to non-blocking, non-zero otherwise. +@@ -147,6 +147,8 @@ mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *device_attr) + #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT + device_attr->tunnel_offloads_caps = dv_attr.tunnel_offloads_caps; + #endif ++ strlcpy(device_attr->fw_ver, attr_ex.orig_attr.fw_ver, ++ sizeof(device_attr->fw_ver)); + + return err; + } +@@ -319,7 +321,17 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv) + goto error; + } + sh->fdb_domain = domain; +- sh->esw_drop_action = mlx5_glue->dr_create_flow_action_drop(); ++ } ++ /* ++ * The drop action is just some dummy placeholder in rdma-core. It ++ * does not belong to domains and has no any attributes, and, can be ++ * shared by the entire device. ++ */ ++ sh->dr_drop_action = mlx5_glue->dr_create_flow_action_drop(); ++ if (!sh->dr_drop_action) { ++ DRV_LOG(ERR, "FDB mlx5dv_dr_create_flow_action_drop"); ++ err = errno; ++ goto error; + } + #endif + if (!sh->tunnel_hub) +@@ -355,9 +367,9 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv) + mlx5_glue->dr_destroy_domain(sh->fdb_domain); + sh->fdb_domain = NULL; + } +- if (sh->esw_drop_action) { +- mlx5_glue->destroy_flow_action(sh->esw_drop_action); +- sh->esw_drop_action = NULL; ++ if (sh->dr_drop_action) { ++ mlx5_glue->destroy_flow_action(sh->dr_drop_action); ++ sh->dr_drop_action = NULL; + } + if (sh->pop_vlan_action) { + mlx5_glue->destroy_flow_action(sh->pop_vlan_action); +@@ -412,9 +424,9 @@ mlx5_os_free_shared_dr(struct mlx5_priv *priv) + mlx5_glue->dr_destroy_domain(sh->fdb_domain); + sh->fdb_domain = NULL; + } +- if (sh->esw_drop_action) { +- mlx5_glue->destroy_flow_action(sh->esw_drop_action); +- sh->esw_drop_action = NULL; ++ if (sh->dr_drop_action) { ++ mlx5_glue->destroy_flow_action(sh->dr_drop_action); ++ sh->dr_drop_action = NULL; + } + #endif + if (sh->pop_vlan_action) { +@@ -639,6 +651,79 @@ mlx5_flow_counter_mode_config(struct rte_eth_dev *dev __rte_unused) + #endif + } + ++/** ++ * DR flow drop action support detect. ++ * ++ * @param dev ++ * Pointer to rte_eth_dev structure. ++ * ++ */ ++static void ++mlx5_flow_drop_action_config(struct rte_eth_dev *dev __rte_unused) ++{ ++#ifdef HAVE_MLX5DV_DR ++ struct mlx5_priv *priv = dev->data->dev_private; ++ ++ if (!priv->config.dv_flow_en || !priv->sh->dr_drop_action) ++ return; ++ /** ++ * DR supports drop action placeholder when it is supported; ++ * otherwise, use the queue drop action. ++ */ ++ if (mlx5_flow_discover_dr_action_support(dev)) ++ priv->root_drop_action = priv->drop_queue.hrxq->action; ++ else ++ priv->root_drop_action = priv->sh->dr_drop_action; ++#endif ++} ++ ++static void ++mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev) ++{ ++ struct mlx5_priv *priv = dev->data->dev_private; ++ void *ctx = priv->sh->ctx; ++ ++ priv->q_counters = mlx5_devx_cmd_queue_counter_alloc(ctx); ++ if (!priv->q_counters) { ++ struct ibv_cq *cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0); ++ struct ibv_wq *wq; ++ ++ DRV_LOG(DEBUG, "Port %d queue counter object cannot be created " ++ "by DevX - fall-back to use the kernel driver global " ++ "queue counter.", dev->data->port_id); ++ /* Create WQ by kernel and query its queue counter ID. */ ++ if (cq) { ++ wq = mlx5_glue->create_wq(ctx, ++ &(struct ibv_wq_init_attr){ ++ .wq_type = IBV_WQT_RQ, ++ .max_wr = 1, ++ .max_sge = 1, ++ .pd = priv->sh->pd, ++ .cq = cq, ++ }); ++ if (wq) { ++ /* Counter is assigned only on RDY state. */ ++ int ret = mlx5_glue->modify_wq(wq, ++ &(struct ibv_wq_attr){ ++ .attr_mask = IBV_WQ_ATTR_STATE, ++ .wq_state = IBV_WQS_RDY, ++ }); ++ ++ if (ret == 0) ++ mlx5_devx_cmd_wq_query(wq, ++ &priv->counter_set_id); ++ claim_zero(mlx5_glue->destroy_wq(wq)); ++ } ++ claim_zero(mlx5_glue->destroy_cq(cq)); ++ } ++ } else { ++ priv->counter_set_id = priv->q_counters->id; ++ } ++ if (priv->counter_set_id == 0) ++ DRV_LOG(INFO, "Part of the port %d statistics will not be " ++ "available.", dev->data->port_id); ++} ++ + /** + * Spawn an Ethernet device from Verbs information. + * +@@ -670,22 +755,15 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + int err = 0; + unsigned int hw_padding = 0; + unsigned int mps; +- unsigned int tunnel_en = 0; + unsigned int mpls_en = 0; + unsigned int swp = 0; + unsigned int mprq = 0; +- unsigned int mprq_min_stride_size_n = 0; +- unsigned int mprq_max_stride_size_n = 0; +- unsigned int mprq_min_stride_num_n = 0; +- unsigned int mprq_max_stride_num_n = 0; + struct rte_ether_addr mac; + char name[RTE_ETH_NAME_MAX_LEN]; + int own_domain_id = 0; + uint16_t port_id; + unsigned int i; +-#ifdef HAVE_MLX5DV_DR_DEVX_PORT +- struct mlx5dv_devx_port devx_port = { .comp_mask = 0 }; +-#endif ++ struct mlx5_port_info vport_info = { .query_flags = 0 }; + + /* Determine if this port representor is supposed to be spawned. */ + if (switch_info->representor && dpdk_dev->devargs) { +@@ -790,10 +868,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + strerror(rte_errno)); + goto error; + } +- if (config->dv_miss_info) { +- if (switch_info->master || switch_info->representor) +- config->dv_xmeta_en = MLX5_XMETA_MODE_META16; +- } + mlx5_malloc_mem_select(config->sys_mem_en); + sh = mlx5_alloc_shared_dev_ctx(spawn, config); + if (!sh) +@@ -833,7 +907,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + swp = dv_attr.sw_parsing_caps.sw_parsing_offloads; + DRV_LOG(DEBUG, "SWP support: %u", swp); + #endif +- config->swp = !!swp; ++ config->swp = swp & (MLX5_SW_PARSING_CAP | MLX5_SW_PARSING_CSUM_CAP | ++ MLX5_SW_PARSING_TSO_CAP); + #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT + if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_STRIDING_RQ) { + struct mlx5dv_striding_rq_caps mprq_caps = +@@ -851,13 +926,13 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + mprq_caps.supported_qpts); + DRV_LOG(DEBUG, "device supports Multi-Packet RQ"); + mprq = 1; +- mprq_min_stride_size_n = ++ config->mprq.log_min_stride_size = + mprq_caps.min_single_stride_log_num_of_bytes; +- mprq_max_stride_size_n = ++ config->mprq.log_max_stride_size = + mprq_caps.max_single_stride_log_num_of_bytes; +- mprq_min_stride_num_n = ++ config->mprq.log_min_stride_num = + mprq_caps.min_single_wqe_log_num_of_strides; +- mprq_max_stride_num_n = ++ config->mprq.log_max_stride_num = + mprq_caps.max_single_wqe_log_num_of_strides; + } + #endif +@@ -865,20 +940,27 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + config->cqe_comp = 1; + #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT + if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) { +- tunnel_en = ((dv_attr.tunnel_offloads_caps & +- MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_VXLAN) && +- (dv_attr.tunnel_offloads_caps & +- MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GRE) && +- (dv_attr.tunnel_offloads_caps & +- MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GENEVE)); +- } +- DRV_LOG(DEBUG, "tunnel offloading is %ssupported", +- tunnel_en ? "" : "not "); ++ config->tunnel_en = dv_attr.tunnel_offloads_caps & ++ (MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_VXLAN | ++ MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GRE | ++ MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GENEVE); ++ } ++ if (config->tunnel_en) { ++ DRV_LOG(DEBUG, "tunnel offloading is supported for %s%s%s", ++ config->tunnel_en & ++ MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_VXLAN ? "[VXLAN]" : "", ++ config->tunnel_en & ++ MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GRE ? "[GRE]" : "", ++ config->tunnel_en & ++ MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GENEVE ? "[GENEVE]" : "" ++ ); ++ } else { ++ DRV_LOG(DEBUG, "tunnel offloading is not supported"); ++ } + #else + DRV_LOG(WARNING, + "tunnel offloading disabled due to old OFED/rdma-core version"); + #endif +- config->tunnel_en = tunnel_en; + #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT + mpls_en = ((dv_attr.tunnel_offloads_caps & + MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_GRE) && +@@ -920,37 +1002,35 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + priv->pci_dev = spawn->pci_dev; + priv->mtu = RTE_ETHER_MTU; + /* Some internal functions rely on Netlink sockets, open them now. */ +- priv->nl_socket_rdma = mlx5_nl_init(NETLINK_RDMA); +- priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE); ++ priv->nl_socket_rdma = mlx5_nl_init(NETLINK_RDMA, 0); ++ priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE, 0); + priv->representor = !!switch_info->representor; + priv->master = !!switch_info->master; + priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; + priv->vport_meta_tag = 0; + priv->vport_meta_mask = 0; + priv->pf_bond = spawn->pf_bond; +-#ifdef HAVE_MLX5DV_DR_DEVX_PORT + /* +- * The DevX port query API is implemented. E-Switch may use +- * either vport or reg_c[0] metadata register to match on +- * vport index. The engaged part of metadata register is +- * defined by mask. ++ * If we have E-Switch we should determine the vport attributes. ++ * E-Switch may use either source vport field or reg_c[0] metadata ++ * register to match on vport index. The engaged part of metadata ++ * register is defined by mask. + */ + if (switch_info->representor || switch_info->master) { +- devx_port.comp_mask = MLX5DV_DEVX_PORT_VPORT | +- MLX5DV_DEVX_PORT_MATCH_REG_C_0; +- err = mlx5_glue->devx_port_query(sh->ctx, spawn->phys_port, +- &devx_port); ++ err = mlx5_glue->devx_port_query(sh->ctx, ++ spawn->phys_port, ++ &vport_info); + if (err) { + DRV_LOG(WARNING, + "can't query devx port %d on device %s", + spawn->phys_port, + mlx5_os_get_dev_device_name(spawn->phys_dev)); +- devx_port.comp_mask = 0; ++ vport_info.query_flags = 0; + } + } +- if (devx_port.comp_mask & MLX5DV_DEVX_PORT_MATCH_REG_C_0) { +- priv->vport_meta_tag = devx_port.reg_c_0.value; +- priv->vport_meta_mask = devx_port.reg_c_0.mask; ++ if (vport_info.query_flags & MLX5_PORT_QUERY_REG_C0) { ++ priv->vport_meta_tag = vport_info.vport_meta_tag; ++ priv->vport_meta_mask = vport_info.vport_meta_mask; + if (!priv->vport_meta_mask) { + DRV_LOG(ERR, "vport zero mask for port %d" + " on bonding device %s", +@@ -970,9 +1050,10 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + goto error; + } + } +- if (devx_port.comp_mask & MLX5DV_DEVX_PORT_VPORT) { +- priv->vport_id = devx_port.vport_num; +- } else if (spawn->pf_bond >= 0) { ++ if (vport_info.query_flags & MLX5_PORT_QUERY_VPORT) { ++ priv->vport_id = vport_info.vport_id; ++ } else if (spawn->pf_bond >= 0 && ++ (switch_info->representor || switch_info->master)) { + DRV_LOG(ERR, "can't deduce vport index for port %d" + " on bonding device %s", + spawn->phys_port, +@@ -980,25 +1061,21 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + err = ENOTSUP; + goto error; + } else { +- /* Suppose vport index in compatible way. */ ++ /* ++ * Suppose vport index in compatible way. Kernel/rdma_core ++ * support single E-Switch per PF configurations only and ++ * vport_id field contains the vport index for associated VF, ++ * which is deduced from representor port name. ++ * For example, let's have the IB device port 10, it has ++ * attached network device eth0, which has port name attribute ++ * pf0vf2, we can deduce the VF number as 2, and set vport index ++ * as 3 (2+1). This assigning schema should be changed if the ++ * multiple E-Switch instances per PF configurations or/and PCI ++ * subfunctions are added. ++ */ + priv->vport_id = switch_info->representor ? + switch_info->port_name + 1 : -1; + } +-#else +- /* +- * Kernel/rdma_core support single E-Switch per PF configurations +- * only and vport_id field contains the vport index for +- * associated VF, which is deduced from representor port name. +- * For example, let's have the IB device port 10, it has +- * attached network device eth0, which has port name attribute +- * pf0vf2, we can deduce the VF number as 2, and set vport index +- * as 3 (2+1). This assigning schema should be changed if the +- * multiple E-Switch instances per PF configurations or/and PCI +- * subfunctions are added. +- */ +- priv->vport_id = switch_info->representor ? +- switch_info->port_name + 1 : -1; +-#endif + /* representor_id field keeps the unmodified VF index. */ + priv->representor_id = switch_info->representor ? + switch_info->port_name : -1; +@@ -1030,6 +1107,39 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + } + /* Override some values set by hardware configuration. */ + mlx5_args(config, dpdk_dev->devargs); ++ /* Update final values for devargs before check sibling config. */ ++#if !defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_MLX5DV_DR) ++ if (config->dv_flow_en) { ++ DRV_LOG(WARNING, "DV flow is not supported."); ++ config->dv_flow_en = 0; ++ } ++#endif ++ if (config->devx) { ++ err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config->hca_attr); ++ if (err) { ++ err = -err; ++ goto error; ++ } ++ } ++#ifdef HAVE_MLX5DV_DR_ESWITCH ++ if (!(config->hca_attr.eswitch_manager && config->dv_flow_en && ++ (switch_info->representor || switch_info->master))) ++ config->dv_esw_en = 0; ++#else ++ config->dv_esw_en = 0; ++#endif ++ if (config->dv_miss_info) { ++ if (switch_info->master || switch_info->representor) ++ config->dv_xmeta_en = MLX5_XMETA_MODE_META16; ++ } ++ if (!priv->config.dv_esw_en && ++ priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { ++ DRV_LOG(WARNING, ++ "Metadata mode %u is not supported (no E-Switch).", ++ priv->config.dv_xmeta_en); ++ priv->config.dv_xmeta_en = MLX5_XMETA_MODE_LEGACY; ++ } ++ /* Check sibling device configurations. */ + err = mlx5_dev_check_sibling_config(priv, config); + if (err) + goto error; +@@ -1040,12 +1150,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + #if !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) && \ + !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45) + DRV_LOG(DEBUG, "counters are not supported"); +-#endif +-#if !defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_MLX5DV_DR) +- if (config->dv_flow_en) { +- DRV_LOG(WARNING, "DV flow is not supported"); +- config->dv_flow_en = 0; +- } + #endif + config->ind_table_max_size = + sh->device_attr.max_rwq_indirection_table_size; +@@ -1093,22 +1197,15 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + config->mps == MLX5_MPW_ENHANCED ? "enhanced " : + config->mps == MLX5_MPW ? "legacy " : "", + config->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled"); ++ config->mprq.log_min_stride_wqe_size = ++ MLX5_MPRQ_LOG_MIN_STRIDE_WQE_SIZE; ++ config->mprq.log_stride_num = MLX5_MPRQ_DEFAULT_LOG_STRIDE_NUM; + if (config->devx) { +- err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config->hca_attr); +- if (err) { +- err = -err; +- goto error; +- } +- /* Check relax ordering support. */ +- if (!haswell_broadwell_cpu) { +- sh->cmng.relaxed_ordering_write = +- config->hca_attr.relaxed_ordering_write; +- sh->cmng.relaxed_ordering_read = +- config->hca_attr.relaxed_ordering_read; +- } else { +- sh->cmng.relaxed_ordering_read = 0; +- sh->cmng.relaxed_ordering_write = 0; +- } ++ config->mprq.log_min_stride_wqe_size = ++ config->hca_attr.log_min_stride_wqe_sz; ++ sh->rq_ts_format = config->hca_attr.rq_ts_format; ++ sh->sq_ts_format = config->hca_attr.sq_ts_format; ++ sh->qp_ts_format = config->hca_attr.qp_ts_format; + /* Check for LRO support. */ + if (config->dest_tir && config->hca_attr.lro_cap && + config->dv_flow_en) { +@@ -1304,36 +1401,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + config->hw_fcs_strip = 0; + DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported", + (config->hw_fcs_strip ? "" : "not ")); +- if (config->mprq.enabled && mprq) { +- if (config->mprq.stride_num_n && +- (config->mprq.stride_num_n > mprq_max_stride_num_n || +- config->mprq.stride_num_n < mprq_min_stride_num_n)) { +- config->mprq.stride_num_n = +- RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_NUM_N, +- mprq_min_stride_num_n), +- mprq_max_stride_num_n); +- DRV_LOG(WARNING, +- "the number of strides" +- " for Multi-Packet RQ is out of range," +- " setting default value (%u)", +- 1 << config->mprq.stride_num_n); +- } +- if (config->mprq.stride_size_n && +- (config->mprq.stride_size_n > mprq_max_stride_size_n || +- config->mprq.stride_size_n < mprq_min_stride_size_n)) { +- config->mprq.stride_size_n = +- RTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_SIZE_N, +- mprq_min_stride_size_n), +- mprq_max_stride_size_n); +- DRV_LOG(WARNING, +- "the size of a stride" +- " for Multi-Packet RQ is out of range," +- " setting default value (%u)", +- 1 << config->mprq.stride_size_n); +- } +- config->mprq.min_stride_size_n = mprq_min_stride_size_n; +- config->mprq.max_stride_size_n = mprq_max_stride_size_n; +- } else if (config->mprq.enabled && !mprq) { ++ if (config->mprq.enabled && !mprq) { + DRV_LOG(WARNING, "Multi-Packet RQ isn't supported"); + config->mprq.enabled = 0; + } +@@ -1451,20 +1519,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + /* Bring Ethernet device up. */ + DRV_LOG(DEBUG, "port %u forcing Ethernet interface up", + eth_dev->data->port_id); +- mlx5_set_link_up(eth_dev); +- /* +- * Even though the interrupt handler is not installed yet, +- * interrupts will still trigger on the async_fd from +- * Verbs context returned by ibv_open_device(). +- */ + mlx5_link_update(eth_dev, 0); +-#ifdef HAVE_MLX5DV_DR_ESWITCH +- if (!(config->hca_attr.eswitch_manager && config->dv_flow_en && +- (switch_info->representor || switch_info->master))) +- config->dv_esw_en = 0; +-#else +- config->dv_esw_en = 0; +-#endif ++ /* Watch LSC interrupts between port probe and port start. */ ++ priv->sh->port[priv->dev_port - 1].nl_ih_port_id = ++ eth_dev->data->port_id; ++ mlx5_set_link_up(eth_dev); + /* Detect minimal data bytes to inline. */ + mlx5_set_min_inline(spawn, config); + /* Store device configuration on private structure. */ +@@ -1492,10 +1551,28 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + /* Use specific wrappers for Tx object. */ + priv->obj_ops.txq_obj_new = mlx5_os_txq_obj_new; + priv->obj_ops.txq_obj_release = mlx5_os_txq_obj_release; +- ++ priv->obj_ops.lb_dummy_queue_create = ++ mlx5_rxq_ibv_obj_dummy_lb_create; ++ priv->obj_ops.lb_dummy_queue_release = ++ mlx5_rxq_ibv_obj_dummy_lb_release; ++ mlx5_queue_counter_id_prepare(eth_dev); + } else { + priv->obj_ops = ibv_obj_ops; + } ++ if (config->tx_pp && ++ priv->obj_ops.txq_obj_new != mlx5_os_txq_obj_new) { ++ /* ++ * HAVE_MLX5DV_DEVX_UAR_OFFSET is required to support ++ * packet pacing and already checked above. ++ * Hence, we should only make sure the SQs will be created ++ * with DevX, not with Verbs. ++ * Verbs allocates the SQ UAR on its own and it can't be shared ++ * with Clock Queue UAR as required for Tx scheduling. ++ */ ++ DRV_LOG(ERR, "Verbs SQs, UAR can't be shared as required for packet pacing"); ++ err = ENODEV; ++ goto error; ++ } + priv->drop_queue.hrxq = mlx5_drop_action_create(eth_dev); + if (!priv->drop_queue.hrxq) + goto error; +@@ -1506,12 +1583,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + goto error; + } + priv->config.flow_prio = err; +- if (!priv->config.dv_esw_en && +- priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { +- DRV_LOG(WARNING, "metadata mode %u is not supported " +- "(no E-Switch)", priv->config.dv_xmeta_en); +- priv->config.dv_xmeta_en = MLX5_XMETA_MODE_LEGACY; +- } + mlx5_set_metadata_mask(eth_dev); + if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && + !priv->sh->dv_regc0_mask) { +@@ -1561,6 +1632,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + } + rte_spinlock_init(&priv->shared_act_sl); + mlx5_flow_counter_mode_config(eth_dev); ++ mlx5_flow_drop_action_config(eth_dev); + if (priv->config.dv_flow_en) + eth_dev->data->dev_flags |= RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE; + return eth_dev; +@@ -1803,8 +1875,8 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + * matching ones, gathering into the list. + */ + struct ibv_device *ibv_match[ret + 1]; +- int nl_route = mlx5_nl_init(NETLINK_ROUTE); +- int nl_rdma = mlx5_nl_init(NETLINK_RDMA); ++ int nl_route = mlx5_nl_init(NETLINK_ROUTE, 0); ++ int nl_rdma = mlx5_nl_init(NETLINK_RDMA, 0); + unsigned int i; + + while (ret-- > 0) { +@@ -1878,19 +1950,6 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + goto exit; + } + } +-#ifndef HAVE_MLX5DV_DR_DEVX_PORT +- if (bd >= 0) { +- /* +- * This may happen if there is VF LAG kernel support and +- * application is compiled with older rdma_core library. +- */ +- DRV_LOG(ERR, +- "No kernel/verbs support for VF LAG bonding found."); +- rte_errno = ENOTSUP; +- ret = -rte_errno; +- goto exit; +- } +-#endif + /* + * Now we can determine the maximal + * amount of devices to be spawned. +@@ -1953,10 +2012,18 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + (list[ns].ifindex, + &list[ns].info); + } +-#ifdef HAVE_MLX5DV_DR_DEVX_PORT + if (!ret && bd >= 0) { + switch (list[ns].info.name_type) { + case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: ++ if (np == 1) { ++ /* ++ * Force standalone bonding ++ * device for ROCE LAG ++ * configurations. ++ */ ++ list[ns].info.master = 0; ++ list[ns].info.representor = 0; ++ } + if (list[ns].info.port_name == bd) + ns++; + break; +@@ -1971,7 +2038,6 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + } + continue; + } +-#endif + if (!ret && (list[ns].info.representor ^ + list[ns].info.master)) + ns++; +@@ -2095,6 +2161,18 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + ret = -rte_errno; + goto exit; + } ++ /* ++ * New kernels may add the switch_id attribute for the case ++ * there is no E-Switch and we wrongly recognized the ++ * only device as master. Override this if there is the ++ * single device with single port and new device name ++ * format present. ++ */ ++ if (nd == 1 && ++ list[0].info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) { ++ list[0].info.master = 0; ++ list[0].info.representor = 0; ++ } + } + MLX5_ASSERT(ns); + /* +@@ -2149,6 +2227,31 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + } + restore = list[i].eth_dev->data->dev_flags; + rte_eth_copy_pci_info(list[i].eth_dev, pci_dev); ++ /** ++ * Each representor has a dedicated interrupts vector. ++ * rte_eth_copy_pci_info() assigns PF interrupts handle to ++ * representor eth_dev object because representor and PF ++ * share the same PCI address. ++ * Override representor device with a dedicated ++ * interrupts handle here. ++ * Representor interrupts handle is released in mlx5_dev_stop(). ++ */ ++ if (list[i].info.representor) { ++ struct rte_intr_handle *intr_handle; ++ intr_handle = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, ++ sizeof(*intr_handle), 0, ++ SOCKET_ID_ANY); ++ if (!intr_handle) { ++ DRV_LOG(ERR, ++ "port %u failed to allocate memory for interrupt handler " ++ "Rx interrupts will not be supported", ++ i); ++ rte_errno = ENOMEM; ++ ret = -rte_errno; ++ goto exit; ++ } ++ list[i].eth_dev->intr_handle = intr_handle; ++ } + /* Restore non-PCI flags cleared by the above call. */ + list[i].eth_dev->data->dev_flags |= restore; + rte_eth_dev_probing_finish(list[i].eth_dev); +@@ -2324,6 +2427,49 @@ mlx5_os_open_device(const struct mlx5_dev_spawn_data *spawn, + return err; + } + ++/** ++ * Cleanup resources when the last device is closed. ++ */ ++void ++mlx5_os_net_cleanup(void) ++{ ++ mlx5_pmd_socket_uninit(); ++} ++ ++static int ++mlx5_os_dev_shared_handler_install_lsc(struct mlx5_dev_ctx_shared *sh) ++{ ++ int nlsk_fd, flags, ret; ++ ++ nlsk_fd = mlx5_nl_init(NETLINK_ROUTE, RTMGRP_LINK); ++ if (nlsk_fd < 0) { ++ DRV_LOG(ERR, "Failed to create a socket for Netlink events: %s", ++ rte_strerror(rte_errno)); ++ return -1; ++ } ++ flags = fcntl(nlsk_fd, F_GETFL); ++ ret = fcntl(nlsk_fd, F_SETFL, flags | O_NONBLOCK); ++ if (ret != 0) { ++ DRV_LOG(ERR, "Failed to make Netlink event socket non-blocking: %s", ++ strerror(errno)); ++ rte_errno = errno; ++ goto error; ++ } ++ sh->intr_handle_nl.type = RTE_INTR_HANDLE_EXT; ++ sh->intr_handle_nl.fd = nlsk_fd; ++ if (rte_intr_callback_register(&sh->intr_handle_nl, ++ mlx5_dev_interrupt_handler_nl, ++ sh) != 0) { ++ DRV_LOG(ERR, "Failed to register Netlink events interrupt"); ++ sh->intr_handle_nl.fd = -1; ++ goto error; ++ } ++ return 0; ++error: ++ close(nlsk_fd); ++ return -1; ++} ++ + /** + * Install shared asynchronous device events handler. + * This function is implemented to support event sharing +@@ -2354,6 +2500,11 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh) + sh->intr_handle.fd = -1; + } + } ++ sh->intr_handle_nl.fd = -1; ++ if (mlx5_os_dev_shared_handler_install_lsc(sh) < 0) { ++ DRV_LOG(INFO, "Fail to install the shared Netlink event handler."); ++ sh->intr_handle_nl.fd = -1; ++ } + if (sh->devx) { + #ifdef HAVE_IBV_DEVX_ASYNC + sh->intr_handle_devx.fd = -1; +@@ -2394,9 +2545,18 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh) + void + mlx5_os_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh) + { ++ int nlsk_fd; ++ + if (sh->intr_handle.fd >= 0) + mlx5_intr_callback_unregister(&sh->intr_handle, + mlx5_dev_interrupt_handler, sh); ++ nlsk_fd = sh->intr_handle_nl.fd; ++ if (nlsk_fd >= 0) { ++ mlx5_intr_callback_unregister(&sh->intr_handle_nl, ++ mlx5_dev_interrupt_handler_nl, ++ sh); ++ close(nlsk_fd); ++ } + #ifdef HAVE_IBV_DEVX_ASYNC + if (sh->intr_handle_devx.fd >= 0) + rte_intr_callback_unregister(&sh->intr_handle_devx, +@@ -2427,6 +2587,10 @@ mlx5_os_read_dev_stat(struct mlx5_priv *priv, const char *ctr_name, + int fd; + + if (priv->sh) { ++ if (priv->q_counters != NULL && ++ strcmp(ctr_name, "out_of_buffer") == 0) ++ return mlx5_devx_cmd_queue_counter_query ++ (priv->q_counters, 0, (uint32_t *)stat); + MKSTR(path, "%s/ports/%d/hw_counters/%s", + priv->sh->ibdev_path, + priv->dev_port, +diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_os.h b/dpdk/drivers/net/mlx5/linux/mlx5_os.h +index 7dbacceabe..a524f71d2b 100644 +--- a/dpdk/drivers/net/mlx5/linux/mlx5_os.h ++++ b/dpdk/drivers/net/mlx5/linux/mlx5_os.h +@@ -19,4 +19,31 @@ enum { + #define PCI_DRV_FLAGS (RTE_PCI_DRV_INTR_LSC | \ + RTE_PCI_DRV_INTR_RMV | \ + RTE_PCI_DRV_PROBE_AGAIN) ++ ++enum mlx5_sw_parsing_offloads { ++#ifdef HAVE_IBV_MLX5_MOD_SWP ++ MLX5_SW_PARSING_CAP = MLX5DV_SW_PARSING, ++ MLX5_SW_PARSING_CSUM_CAP = MLX5DV_SW_PARSING_CSUM, ++ MLX5_SW_PARSING_TSO_CAP = MLX5DV_SW_PARSING_LSO, ++#else ++ MLX5_SW_PARSING_CAP = 0, ++ MLX5_SW_PARSING_CSUM_CAP = 0, ++ MLX5_SW_PARSING_TSO_CAP = 0, ++#endif ++}; ++ ++enum mlx5_tunnel_offloads { ++#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT ++ MLX5_TUNNELED_OFFLOADS_VXLAN_CAP = ++ MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_VXLAN, ++ MLX5_TUNNELED_OFFLOADS_GRE_CAP = ++ MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GRE, ++ MLX5_TUNNELED_OFFLOADS_GENEVE_CAP = ++ MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GENEVE, ++#else ++ MLX5_TUNNELED_OFFLOADS_VXLAN_CAP = 0, ++ MLX5_TUNNELED_OFFLOADS_GRE_CAP = 0, ++ MLX5_TUNNELED_OFFLOADS_GENEVE_CAP = 0, ++#endif ++}; + #endif /* RTE_PMD_MLX5_OS_H_ */ +diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_socket.c b/dpdk/drivers/net/mlx5/linux/mlx5_socket.c +index 1938453980..6a12a66669 100644 +--- a/dpdk/drivers/net/mlx5/linux/mlx5_socket.c ++++ b/dpdk/drivers/net/mlx5/linux/mlx5_socket.c +@@ -2,10 +2,6 @@ + * Copyright 2019 Mellanox Technologies, Ltd + */ + +-#ifndef _GNU_SOURCE +-#define _GNU_SOURCE +-#endif +- + #include + #include + #include +@@ -22,7 +18,7 @@ + + #define MLX5_SOCKET_PATH "/var/tmp/dpdk_net_mlx5_%d" + +-int server_socket; /* Unix socket for primary process. */ ++int server_socket = -1; /* Unix socket for primary process. */ + struct rte_intr_handle server_intr_handle; /* Interrupt handler. */ + + /** +@@ -121,7 +117,7 @@ mlx5_pmd_socket_handle(void *cb __rte_unused) + static int + mlx5_pmd_interrupt_handler_install(void) + { +- MLX5_ASSERT(server_socket); ++ MLX5_ASSERT(server_socket != -1); + server_intr_handle.fd = server_socket; + server_intr_handle.type = RTE_INTR_HANDLE_EXT; + return rte_intr_callback_register(&server_intr_handle, +@@ -134,7 +130,7 @@ mlx5_pmd_interrupt_handler_install(void) + static void + mlx5_pmd_interrupt_handler_uninstall(void) + { +- if (server_socket) { ++ if (server_socket != -1) { + mlx5_intr_callback_unregister(&server_intr_handle, + mlx5_pmd_socket_handle, + NULL); +@@ -144,10 +140,7 @@ mlx5_pmd_interrupt_handler_uninstall(void) + } + + /** +- * Initialise the socket to communicate with the secondary process +- * +- * @param[in] dev +- * Pointer to Ethernet device. ++ * Initialise the socket to communicate with external tools. + * + * @return + * 0 on success, a negative value otherwise. +@@ -162,12 +155,8 @@ mlx5_pmd_socket_init(void) + int flags; + + MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); +- if (server_socket) ++ if (server_socket != -1) + return 0; +- /* +- * Initialize the socket to communicate with the secondary +- * process. +- */ + ret = socket(AF_UNIX, SOCK_STREAM, 0); + if (ret < 0) { + DRV_LOG(WARNING, "Failed to open mlx5 socket: %s", +@@ -177,10 +166,10 @@ mlx5_pmd_socket_init(void) + server_socket = ret; + flags = fcntl(server_socket, F_GETFL, 0); + if (flags == -1) +- goto error; ++ goto close; + ret = fcntl(server_socket, F_SETFL, flags | O_NONBLOCK); + if (ret < 0) +- goto error; ++ goto close; + snprintf(sun.sun_path, sizeof(sun.sun_path), MLX5_SOCKET_PATH, + getpid()); + remove(sun.sun_path); +@@ -188,25 +177,26 @@ mlx5_pmd_socket_init(void) + if (ret < 0) { + DRV_LOG(WARNING, + "cannot bind mlx5 socket: %s", strerror(errno)); +- goto close; ++ goto remove; + } + ret = listen(server_socket, 0); + if (ret < 0) { + DRV_LOG(WARNING, "cannot listen on mlx5 socket: %s", + strerror(errno)); +- goto close; ++ goto remove; + } + if (mlx5_pmd_interrupt_handler_install()) { + DRV_LOG(WARNING, "cannot register interrupt handler for mlx5 socket: %s", + strerror(errno)); +- goto close; ++ goto remove; + } + return 0; +-close: ++remove: + remove(sun.sun_path); +-error: ++close: + claim_zero(close(server_socket)); +- server_socket = 0; ++ server_socket = -1; ++error: + DRV_LOG(ERR, "Cannot initialize socket: %s", strerror(errno)); + return -errno; + } +@@ -214,13 +204,14 @@ mlx5_pmd_socket_init(void) + /** + * Un-Initialize the pmd socket + */ +-RTE_FINI(mlx5_pmd_socket_uninit) ++void ++mlx5_pmd_socket_uninit(void) + { +- if (!server_socket) ++ if (server_socket == -1) + return; + mlx5_pmd_interrupt_handler_uninstall(); + claim_zero(close(server_socket)); +- server_socket = 0; ++ server_socket = -1; + MKSTR(path, MLX5_SOCKET_PATH, getpid()); + claim_zero(remove(path)); + } +diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_verbs.c b/dpdk/drivers/net/mlx5/linux/mlx5_verbs.c +index 6b98a4c166..ccb6918394 100644 +--- a/dpdk/drivers/net/mlx5/linux/mlx5_verbs.c ++++ b/dpdk/drivers/net/mlx5/linux/mlx5_verbs.c +@@ -134,7 +134,6 @@ mlx5_ibv_modify_qp(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type, + .qp_state = IBV_QPS_RESET, + .port_num = dev_port, + }; +- int attr_mask = (IBV_QP_STATE | IBV_QP_PORT); + int ret; + + if (type != MLX5_TXQ_MOD_RST2RDY) { +@@ -148,10 +147,8 @@ mlx5_ibv_modify_qp(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type, + if (type == MLX5_TXQ_MOD_RDY2RST) + return 0; + } +- if (type == MLX5_TXQ_MOD_ERR2RDY) +- attr_mask = IBV_QP_STATE; + mod.qp_state = IBV_QPS_INIT; +- ret = mlx5_glue->modify_qp(obj->qp, &mod, attr_mask); ++ ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE | IBV_QP_PORT); + if (ret) { + DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s", + strerror(errno)); +@@ -317,8 +314,8 @@ mlx5_rxq_ibv_wq_create(struct rte_eth_dev *dev, uint16_t idx) + + wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ; + *mprq_attr = (struct mlx5dv_striding_rq_init_attr){ +- .single_stride_log_num_of_bytes = rxq_data->strd_sz_n, +- .single_wqe_log_num_of_strides = rxq_data->strd_num_n, ++ .single_stride_log_num_of_bytes = rxq_data->log_strd_sz, ++ .single_wqe_log_num_of_strides = rxq_data->log_strd_num, + .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT, + }; + } +@@ -1027,20 +1024,18 @@ mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx) + } + } + #endif +- txq_ctrl->bf_reg = qp.bf.reg; + if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) { + txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset; + DRV_LOG(DEBUG, "Port %u: uar_mmap_offset 0x%" PRIx64 ".", + dev->data->port_id, txq_ctrl->uar_mmap_offset); + } else { + DRV_LOG(ERR, +- "Port %u failed to retrieve UAR info, invalid" +- " libmlx5.so", ++ "Port %u failed to retrieve UAR info, invalid libmlx5.so", + dev->data->port_id); + rte_errno = EINVAL; + goto error; + } +- txq_uar_init(txq_ctrl); ++ txq_uar_init(txq_ctrl, qp.bf.reg); + dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; + return 0; + error: +@@ -1053,6 +1048,125 @@ mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx) + return -rte_errno; + } + ++/* ++ * Create the dummy QP with minimal resources for loopback. ++ * ++ * @param dev ++ * Pointer to Ethernet device. ++ * ++ * @return ++ * 0 on success, a negative errno value otherwise and rte_errno is set. ++ */ ++int ++mlx5_rxq_ibv_obj_dummy_lb_create(struct rte_eth_dev *dev) ++{ ++#if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT) ++ struct mlx5_priv *priv = dev->data->dev_private; ++ struct mlx5_dev_ctx_shared *sh = priv->sh; ++ struct ibv_context *ctx = sh->ctx; ++ struct mlx5dv_qp_init_attr qp_init_attr = {0}; ++ struct { ++ struct ibv_cq_init_attr_ex ibv; ++ struct mlx5dv_cq_init_attr mlx5; ++ } cq_attr = {{0}}; ++ ++ if (dev->data->dev_conf.lpbk_mode) { ++ /* Allow packet sent from NIC loop back w/o source MAC check. */ ++ qp_init_attr.comp_mask |= ++ MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; ++ qp_init_attr.create_flags |= ++ MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC; ++ } else { ++ return 0; ++ } ++ /* Only need to check refcnt, 0 after "sh" is allocated. */ ++ if (!!(__atomic_fetch_add(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) { ++ MLX5_ASSERT(sh->self_lb.ibv_cq && sh->self_lb.qp); ++ priv->lb_used = 1; ++ return 0; ++ } ++ cq_attr.ibv = (struct ibv_cq_init_attr_ex){ ++ .cqe = 1, ++ .channel = NULL, ++ .comp_mask = 0, ++ }; ++ cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){ ++ .comp_mask = 0, ++ }; ++ /* Only CQ is needed, no WQ(RQ) is required in this case. */ ++ sh->self_lb.ibv_cq = mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(ctx, ++ &cq_attr.ibv, ++ &cq_attr.mlx5)); ++ if (!sh->self_lb.ibv_cq) { ++ DRV_LOG(ERR, "Port %u cannot allocate CQ for loopback.", ++ dev->data->port_id); ++ rte_errno = errno; ++ goto error; ++ } ++ sh->self_lb.qp = mlx5_glue->dv_create_qp(ctx, ++ &(struct ibv_qp_init_attr_ex){ ++ .qp_type = IBV_QPT_RAW_PACKET, ++ .comp_mask = IBV_QP_INIT_ATTR_PD, ++ .pd = sh->pd, ++ .send_cq = sh->self_lb.ibv_cq, ++ .recv_cq = sh->self_lb.ibv_cq, ++ .cap.max_recv_wr = 1, ++ }, ++ &qp_init_attr); ++ if (!sh->self_lb.qp) { ++ DRV_LOG(DEBUG, "Port %u cannot allocate QP for loopback.", ++ dev->data->port_id); ++ rte_errno = errno; ++ goto error; ++ } ++ priv->lb_used = 1; ++ return 0; ++error: ++ if (sh->self_lb.ibv_cq) { ++ claim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq)); ++ sh->self_lb.ibv_cq = NULL; ++ } ++ (void)__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED); ++ return -rte_errno; ++#else ++ RTE_SET_USED(dev); ++ return 0; ++#endif ++} ++ ++/* ++ * Release the dummy queue resources for loopback. ++ * ++ * @param dev ++ * Pointer to Ethernet device. ++ */ ++void ++mlx5_rxq_ibv_obj_dummy_lb_release(struct rte_eth_dev *dev) ++{ ++#if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT) ++ struct mlx5_priv *priv = dev->data->dev_private; ++ struct mlx5_dev_ctx_shared *sh = priv->sh; ++ ++ if (!priv->lb_used) ++ return; ++ MLX5_ASSERT(__atomic_load_n(&sh->self_lb.refcnt, __ATOMIC_RELAXED)); ++ if (!(__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) { ++ if (sh->self_lb.qp) { ++ claim_zero(mlx5_glue->destroy_qp(sh->self_lb.qp)); ++ sh->self_lb.qp = NULL; ++ } ++ if (sh->self_lb.ibv_cq) { ++ claim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq)); ++ sh->self_lb.ibv_cq = NULL; ++ } ++ } ++ priv->lb_used = 0; ++#else ++ RTE_SET_USED(dev); ++ return; ++#endif ++} ++ + /** + * Release an Tx verbs queue object. + * +@@ -1082,4 +1196,6 @@ struct mlx5_obj_ops ibv_obj_ops = { + .txq_obj_new = mlx5_txq_ibv_obj_new, + .txq_obj_modify = mlx5_ibv_modify_qp, + .txq_obj_release = mlx5_txq_ibv_obj_release, ++ .lb_dummy_queue_create = NULL, ++ .lb_dummy_queue_release = NULL, + }; +diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_verbs.h b/dpdk/drivers/net/mlx5/linux/mlx5_verbs.h +index 0670f6c47e..e4975051d9 100644 +--- a/dpdk/drivers/net/mlx5/linux/mlx5_verbs.h ++++ b/dpdk/drivers/net/mlx5/linux/mlx5_verbs.h +@@ -14,6 +14,8 @@ struct mlx5_verbs_ops { + + int mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx); + void mlx5_txq_ibv_obj_release(struct mlx5_txq_obj *txq_obj); ++int mlx5_rxq_ibv_obj_dummy_lb_create(struct rte_eth_dev *dev); ++void mlx5_rxq_ibv_obj_dummy_lb_release(struct rte_eth_dev *dev); + + /* Verbs ops struct */ + extern const struct mlx5_verbs_ops mlx5_verbs_ops; +diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_vlan_os.c b/dpdk/drivers/net/mlx5/linux/mlx5_vlan_os.c +index 40e895e080..598026c414 100644 +--- a/dpdk/drivers/net/mlx5/linux/mlx5_vlan_os.c ++++ b/dpdk/drivers/net/mlx5/linux/mlx5_vlan_os.c +@@ -136,7 +136,7 @@ mlx5_vlan_vmwa_init(struct rte_eth_dev *dev, uint32_t ifindex) + return NULL; + } + rte_spinlock_init(&vmwa->sl); +- vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE); ++ vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE, 0); + if (vmwa->nl_socket < 0) { + DRV_LOG(WARNING, + "Can not create Netlink socket" +diff --git a/dpdk/drivers/net/mlx5/meson.build b/dpdk/drivers/net/mlx5/meson.build +index e7495a764a..701afeab19 100644 +--- a/dpdk/drivers/net/mlx5/meson.build ++++ b/dpdk/drivers/net/mlx5/meson.build +@@ -9,6 +9,7 @@ if not (is_linux or is_windows) + endif + + deps += ['hash', 'common_mlx5'] ++headers = files('rte_pmd_mlx5.h') + sources = files( + 'mlx5.c', + 'mlx5_ethdev.c', +diff --git a/dpdk/drivers/net/mlx5/mlx5.c b/dpdk/drivers/net/mlx5/mlx5.c +index bdb446d2d2..0b82969b4d 100644 +--- a/dpdk/drivers/net/mlx5/mlx5.c ++++ b/dpdk/drivers/net/mlx5/mlx5.c +@@ -477,8 +477,7 @@ mlx5_flow_destroy_counter_stat_mem_mng(struct mlx5_counter_stats_mem_mng *mng) + uint8_t *mem = (uint8_t *)(uintptr_t)mng->raws[0].data; + + LIST_REMOVE(mng, next); +- claim_zero(mlx5_devx_cmd_destroy(mng->dm)); +- claim_zero(mlx5_glue->devx_umem_dereg(mng->umem)); ++ mlx5_os_wrapped_mkey_destroy(&mng->wm); + mlx5_free(mem); + } + +@@ -549,11 +548,13 @@ mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh) + age_info = &sh->port[i].age_info; + if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) + continue; +- if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) ++ MLX5_AGE_UNSET(age_info, MLX5_AGE_EVENT_NEW); ++ if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) { ++ MLX5_AGE_UNSET(age_info, MLX5_AGE_TRIGGER); + rte_eth_dev_callback_process + (&rte_eth_devices[sh->port[i].devx_ih_port_id], + RTE_ETH_EVENT_FLOW_AGED, NULL); +- age_info->flags = 0; ++ } + } + } + +@@ -562,7 +563,7 @@ mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh) + * + * @param[in] sh + * Pointer to mlx5_dev_ctx_shared object. +- * @param[in] sh ++ * @param[in] config + * Pointer to user dev config. + */ + static void +@@ -767,7 +768,7 @@ mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh, + * the UAR mapping type into account on UAR setup + * on queue creation. + */ +- DRV_LOG(WARNING, "Failed to allocate Tx DevX UAR (BF)"); ++ DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (BF)"); + uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; + sh->tx_uar = mlx5_glue->devx_alloc_uar + (sh->ctx, uar_mapping); +@@ -780,7 +781,7 @@ mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh, + * If Verbs/kernel does not support "Non-Cached" + * try the "Write-Combining". + */ +- DRV_LOG(WARNING, "Failed to allocate Tx DevX UAR (NC)"); ++ DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (NC)"); + uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF; + sh->tx_uar = mlx5_glue->devx_alloc_uar + (sh->ctx, uar_mapping); +@@ -799,7 +800,7 @@ mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh, + * IB device context, on context closure all UARs + * will be freed, should be no memory/object leakage. + */ +- DRV_LOG(WARNING, "Retrying to allocate Tx DevX UAR"); ++ DRV_LOG(DEBUG, "Retrying to allocate Tx DevX UAR"); + sh->tx_uar = NULL; + } + /* Check whether we finally succeeded with valid UAR allocation. */ +@@ -820,7 +821,7 @@ mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh, + * should be no datapath noticeable impact, + * can try "Non-Cached" mapping safely. + */ +- DRV_LOG(WARNING, "Failed to allocate Rx DevX UAR (BF)"); ++ DRV_LOG(DEBUG, "Failed to allocate Rx DevX UAR (BF)"); + uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC; + sh->devx_rx_uar = mlx5_glue->devx_alloc_uar + (sh->ctx, uar_mapping); +@@ -839,7 +840,7 @@ mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh, + * IB device context, on context closure all UARs + * will be freed, should be no memory/object leakage. + */ +- DRV_LOG(WARNING, "Retrying to allocate Rx DevX UAR"); ++ DRV_LOG(DEBUG, "Retrying to allocate Rx DevX UAR"); + sh->devx_rx_uar = NULL; + } + /* Check whether we finally succeeded with valid UAR allocation. */ +@@ -927,6 +928,7 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, + for (i = 0; i < sh->max_port; i++) { + sh->port[i].ih_port_id = RTE_MAX_ETHPORTS; + sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS; ++ sh->port[i].nl_ih_port_id = RTE_MAX_ETHPORTS; + } + sh->pd = mlx5_glue->alloc_pd(sh->ctx); + if (sh->pd == NULL) { +@@ -1019,6 +1021,8 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, + MLX5_ASSERT(sh); + if (sh->cnt_id_tbl) + mlx5_l3t_destroy(sh->cnt_id_tbl); ++ if (sh->share_cache.cache.table) ++ mlx5_mr_btree_free(&sh->share_cache.cache); + if (sh->tis) + claim_zero(mlx5_devx_cmd_destroy(sh->tis)); + if (sh->td) +@@ -1075,6 +1079,10 @@ mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh) + mlx5_mr_release_cache(&sh->share_cache); + /* Remove context from the global device list. */ + LIST_REMOVE(sh, next); ++ /* Release resources on the last device removal. */ ++ if (LIST_EMPTY(&mlx5_dev_ctx_list)) { ++ mlx5_os_net_cleanup(); ++ } + pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex); + /* + * Ensure there is no async event handler installed. +@@ -1126,6 +1134,7 @@ mlx5_free_table_hash_list(struct mlx5_priv *priv) + if (!sh->flow_tbls) + return; + mlx5_hlist_destroy(sh->flow_tbls); ++ sh->flow_tbls = NULL; + } + + /** +@@ -1241,6 +1250,7 @@ mlx5_proc_priv_init(struct rte_eth_dev *dev) + struct mlx5_proc_priv *ppriv; + size_t ppriv_size; + ++ mlx5_proc_priv_uninit(dev); + /* + * UAR register table follows the process private structure. BlueFlame + * registers for Tx queues are stored in the table. +@@ -1255,6 +1265,8 @@ mlx5_proc_priv_init(struct rte_eth_dev *dev) + } + ppriv->uar_table_sz = priv->txqs_n; + dev->process_private = ppriv; ++ if (rte_eal_process_type() == RTE_PROC_PRIMARY) ++ priv->sh->pppriv = ppriv; + return 0; + } + +@@ -1332,6 +1344,11 @@ mlx5_dev_close(struct rte_eth_dev *dev) + priv->rxqs_n = 0; + priv->rxqs = NULL; + } ++ if (priv->representor) { ++ /* Each representor has a dedicated interrupts handler */ ++ mlx5_free(dev->intr_handle); ++ dev->intr_handle = NULL; ++ } + if (priv->txqs != NULL) { + /* XXX race condition if mlx5_tx_burst() is still running. */ + usleep(1000); +@@ -1341,6 +1358,10 @@ mlx5_dev_close(struct rte_eth_dev *dev) + priv->txqs = NULL; + } + mlx5_proc_priv_uninit(dev); ++ if (priv->q_counters) { ++ mlx5_devx_cmd_destroy(priv->q_counters); ++ priv->q_counters = NULL; ++ } + if (priv->drop_queue.hrxq) + mlx5_drop_action_destroy(dev); + if (priv->mreg_cp_tbl) +@@ -1391,7 +1412,7 @@ mlx5_dev_close(struct rte_eth_dev *dev) + /* + * Free the shared context in last turn, because the cleanup + * routines above may use some shared fields, like +- * mlx5_os_mac_addr_flush() uses ibdev_path for retrieveing ++ * mlx5_os_mac_addr_flush() uses ibdev_path for retrieving + * ifindex if Netlink fails. + */ + mlx5_free_shared_dev_ctx(priv->sh); +@@ -1625,9 +1646,9 @@ mlx5_args_check(const char *key, const char *val, void *opaque) + } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) { + config->mprq.enabled = !!tmp; + } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) { +- config->mprq.stride_num_n = tmp; ++ config->mprq.log_stride_num = tmp; + } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_SIZE, key) == 0) { +- config->mprq.stride_size_n = tmp; ++ config->mprq.log_stride_size = tmp; + } else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) { + config->mprq.max_memcpy_len = tmp; + } else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) { +@@ -1715,7 +1736,7 @@ mlx5_args_check(const char *key, const char *val, void *opaque) + if (tmp != MLX5_RCM_NONE && + tmp != MLX5_RCM_LIGHT && + tmp != MLX5_RCM_AGGR) { +- DRV_LOG(ERR, "Unrecognize %s: \"%s\"", key, val); ++ DRV_LOG(ERR, "Unrecognized %s: \"%s\"", key, val); + rte_errno = EINVAL; + return -rte_errno; + } +@@ -1725,9 +1746,9 @@ mlx5_args_check(const char *key, const char *val, void *opaque) + } else if (strcmp(MLX5_DECAP_EN, key) == 0) { + config->decap_en = !!tmp; + } else { +- DRV_LOG(WARNING, "%s: unknown parameter", key); +- rte_errno = EINVAL; +- return -rte_errno; ++ DRV_LOG(WARNING, ++ "%s: unknown parameter, maybe it's for another class.", ++ key); + } + return 0; + } +@@ -1746,71 +1767,25 @@ mlx5_args_check(const char *key, const char *val, void *opaque) + int + mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) + { +- const char **params = (const char *[]){ +- MLX5_RXQ_CQE_COMP_EN, +- MLX5_RXQ_PKT_PAD_EN, +- MLX5_RX_MPRQ_EN, +- MLX5_RX_MPRQ_LOG_STRIDE_NUM, +- MLX5_RX_MPRQ_LOG_STRIDE_SIZE, +- MLX5_RX_MPRQ_MAX_MEMCPY_LEN, +- MLX5_RXQS_MIN_MPRQ, +- MLX5_TXQ_INLINE, +- MLX5_TXQ_INLINE_MIN, +- MLX5_TXQ_INLINE_MAX, +- MLX5_TXQ_INLINE_MPW, +- MLX5_TXQS_MIN_INLINE, +- MLX5_TXQS_MAX_VEC, +- MLX5_TXQ_MPW_EN, +- MLX5_TXQ_MPW_HDR_DSEG_EN, +- MLX5_TXQ_MAX_INLINE_LEN, +- MLX5_TX_DB_NC, +- MLX5_TX_PP, +- MLX5_TX_SKEW, +- MLX5_TX_VEC_EN, +- MLX5_RX_VEC_EN, +- MLX5_L3_VXLAN_EN, +- MLX5_VF_NL_EN, +- MLX5_DV_ESW_EN, +- MLX5_DV_FLOW_EN, +- MLX5_DV_XMETA_EN, +- MLX5_LACP_BY_USER, +- MLX5_MR_EXT_MEMSEG_EN, +- MLX5_REPRESENTOR, +- MLX5_MAX_DUMP_FILES_NUM, +- MLX5_LRO_TIMEOUT_USEC, +- MLX5_CLASS_ARG_NAME, +- MLX5_HP_BUF_SIZE, +- MLX5_RECLAIM_MEM, +- MLX5_SYS_MEM_EN, +- MLX5_DECAP_EN, +- NULL, +- }; + struct rte_kvargs *kvlist; + int ret = 0; +- int i; + + if (devargs == NULL) + return 0; + /* Following UGLY cast is done to pass checkpatch. */ +- kvlist = rte_kvargs_parse(devargs->args, params); ++ kvlist = rte_kvargs_parse(devargs->args, NULL); + if (kvlist == NULL) { + rte_errno = EINVAL; + return -rte_errno; + } + /* Process parameters. */ +- for (i = 0; (params[i] != NULL); ++i) { +- if (rte_kvargs_count(kvlist, params[i])) { +- ret = rte_kvargs_process(kvlist, params[i], +- mlx5_args_check, config); +- if (ret) { +- rte_errno = EINVAL; +- rte_kvargs_free(kvlist); +- return -rte_errno; +- } +- } ++ ret = rte_kvargs_process(kvlist, NULL, mlx5_args_check, config); ++ if (ret) { ++ rte_errno = EINVAL; ++ ret = -rte_errno; + } + rte_kvargs_free(kvlist); +- return 0; ++ return ret; + } + + /** +@@ -1967,17 +1942,17 @@ mlx5_set_metadata_mask(struct rte_eth_dev *dev) + break; + } + if (sh->dv_mark_mask && sh->dv_mark_mask != mark) +- DRV_LOG(WARNING, "metadata MARK mask mismatche %08X:%08X", ++ DRV_LOG(WARNING, "metadata MARK mask mismatch %08X:%08X", + sh->dv_mark_mask, mark); + else + sh->dv_mark_mask = mark; + if (sh->dv_meta_mask && sh->dv_meta_mask != meta) +- DRV_LOG(WARNING, "metadata META mask mismatche %08X:%08X", ++ DRV_LOG(WARNING, "metadata META mask mismatch %08X:%08X", + sh->dv_meta_mask, meta); + else + sh->dv_meta_mask = meta; + if (sh->dv_regc0_mask && sh->dv_regc0_mask != reg_c0) +- DRV_LOG(WARNING, "metadata reg_c0 mask mismatche %08X:%08X", ++ DRV_LOG(WARNING, "metadata reg_c0 mask mismatch %08X:%08X", + sh->dv_meta_mask, reg_c0); + else + sh->dv_regc0_mask = reg_c0; +diff --git a/dpdk/drivers/net/mlx5/mlx5.h b/dpdk/drivers/net/mlx5/mlx5.h +index 9bf1bf3146..6c85c4e2db 100644 +--- a/dpdk/drivers/net/mlx5/mlx5.h ++++ b/dpdk/drivers/net/mlx5/mlx5.h +@@ -202,7 +202,7 @@ struct mlx5_dev_config { + unsigned int hw_fcs_strip:1; /* FCS stripping is supported. */ + unsigned int hw_padding:1; /* End alignment padding is supported. */ + unsigned int vf:1; /* This is a VF. */ +- unsigned int tunnel_en:1; ++ unsigned int tunnel_en:3; + /* Whether tunnel stateless offloads are supported. */ + unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */ + unsigned int cqe_comp:1; /* CQE compression is enabled. */ +@@ -218,7 +218,7 @@ struct mlx5_dev_config { + unsigned int dv_xmeta_en:2; /* Enable extensive flow metadata. */ + unsigned int lacp_by_user:1; + /* Enable user to manage LACP traffic. */ +- unsigned int swp:1; /* Tx generic tunnel checksum and TSO offload. */ ++ unsigned int swp:3; /* Tx generic tunnel checksum and TSO offload. */ + unsigned int devx:1; /* Whether devx interface is available or not. */ + unsigned int dest_tir:1; /* Whether advanced DR API is available. */ + unsigned int reclaim_mode:2; /* Memory reclaim mode. */ +@@ -228,10 +228,14 @@ struct mlx5_dev_config { + unsigned int dv_miss_info:1; /* restore packet after partial hw miss */ + struct { + unsigned int enabled:1; /* Whether MPRQ is enabled. */ +- unsigned int stride_num_n; /* Number of strides. */ +- unsigned int stride_size_n; /* Size of a stride. */ +- unsigned int min_stride_size_n; /* Min size of a stride. */ +- unsigned int max_stride_size_n; /* Max size of a stride. */ ++ unsigned int log_stride_num; /* Log number of strides. */ ++ unsigned int log_stride_size; /* Log size of a stride. */ ++ unsigned int log_min_stride_size; /* Log min size of a stride.*/ ++ unsigned int log_max_stride_size; /* Log max size of a stride.*/ ++ unsigned int log_min_stride_num; /* Log min num of strides. */ ++ unsigned int log_max_stride_num; /* Log max num of strides. */ ++ unsigned int log_min_stride_wqe_size; ++ /* Log min WQE size, (size of single stride)*(num of strides).*/ + unsigned int max_memcpy_len; + /* Maximum packet size to memcpy Rx packets. */ + unsigned int min_rxqs_num; +@@ -269,6 +273,13 @@ struct mlx5_drop { + struct mlx5_rxq_obj *rxq; /* Rx queue object. */ + }; + ++/* Loopback dummy queue resources required due to Verbs API. */ ++struct mlx5_lb_ctx { ++ struct ibv_qp *qp; /* QP object. */ ++ void *ibv_cq; /* Completion queue. */ ++ uint16_t refcnt; /* Reference count for representors. */ ++}; ++ + #define MLX5_COUNTERS_PER_POOL 512 + #define MLX5_MAX_PENDING_QUERIES 4 + #define MLX5_CNT_CONTAINER_RESIZE 64 +@@ -416,8 +427,7 @@ struct mlx5_flow_counter_pool { + struct mlx5_counter_stats_mem_mng { + LIST_ENTRY(mlx5_counter_stats_mem_mng) next; + struct mlx5_counter_stats_raw *raws; +- struct mlx5_devx_obj *dm; +- void *umem; ++ struct mlx5_pmd_wrapped_mr wm; + }; + + /* Raw memory structure for the counter statistics values of a pool. */ +@@ -448,8 +458,6 @@ struct mlx5_flow_counter_mng { + uint8_t pending_queries; + uint16_t pool_index; + uint8_t query_thread_on; +- bool relaxed_ordering_read; +- bool relaxed_ordering_write; + bool counter_fallback; /* Use counter fallback management. */ + LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs; + LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws; +@@ -471,14 +479,6 @@ struct mlx5_aso_cq { + uint64_t errors; + }; + +-struct mlx5_aso_devx_mr { +- void *buf; +- uint64_t length; +- struct mlx5dv_devx_umem *umem; +- struct mlx5_devx_obj *mkey; +- bool is_indirect; +-}; +- + struct mlx5_aso_sq_elem { + struct mlx5_aso_age_pool *pool; + uint16_t burst_size; +@@ -495,7 +495,7 @@ struct mlx5_aso_sq { + }; + volatile uint32_t *db_rec; + volatile uint64_t *uar_addr; +- struct mlx5_aso_devx_mr mr; ++ struct mlx5_pmd_mr mr; + uint16_t pi; + uint32_t head; + uint32_t tail; +@@ -538,6 +538,8 @@ struct mlx5_aso_age_mng { + #define MLX5_AGE_TRIGGER 2 + #define MLX5_AGE_SET(age_info, BIT) \ + ((age_info)->flags |= (1 << (BIT))) ++#define MLX5_AGE_UNSET(age_info, BIT) \ ++ ((age_info)->flags &= ~(1 << (BIT))) + #define MLX5_AGE_GET(age_info, BIT) \ + ((age_info)->flags & (1 << (BIT))) + #define GET_PORT_AGE_INFO(priv) \ +@@ -557,6 +559,7 @@ struct mlx5_age_info { + struct mlx5_dev_shared_port { + uint32_t ih_port_id; + uint32_t devx_ih_port_id; ++ uint32_t nl_ih_port_id; + /* + * Interrupt handler port_id. Used by shared interrupt + * handler to find the corresponding rte_eth device +@@ -583,7 +586,6 @@ union mlx5_flow_tbl_key { + /* Table structure. */ + struct mlx5_flow_tbl_resource { + void *obj; /**< Pointer to DR table object. */ +- uint32_t refcnt; /**< Reference counter. */ + }; + + #define MLX5_MAX_TABLES UINT16_MAX +@@ -604,7 +606,7 @@ struct mlx5_flow_id_pool { + uint32_t base_index; + /**< The next index that can be used without any free elements. */ + uint32_t *curr; /**< Pointer to the index to pop. */ +- uint32_t *last; /**< Pointer to the last element in the empty arrray. */ ++ uint32_t *last; /**< Pointer to the last element in the empty array. */ + uint32_t max_id; /**< Maximum id can be allocated from the pool. */ + }; + +@@ -653,7 +655,7 @@ struct mlx5_dev_txpp { + void *pp; /* Packet pacing context. */ + uint16_t pp_id; /* Packet pacing context index. */ + uint16_t ts_n; /* Number of captured timestamps. */ +- uint16_t ts_p; /* Pointer to statisticks timestamp. */ ++ uint16_t ts_p; /* Pointer to statistics timestamp. */ + struct mlx5_txpp_ts *tsa; /* Timestamps sliding window stats. */ + struct mlx5_txpp_ts ts; /* Cached completion id/timestamp. */ + uint32_t sync_lost:1; /* ci/timestamp synchronization lost. */ +@@ -689,6 +691,9 @@ struct mlx5_dev_ctx_shared { + uint16_t bond_dev; /* Bond primary device id. */ + uint32_t devx:1; /* Opened with DV. */ + uint32_t flow_hit_aso_en:1; /* Flow Hit ASO is supported. */ ++ uint32_t rq_ts_format:2; /* RQ timestamp formats supported. */ ++ uint32_t sq_ts_format:2; /* SQ timestamp formats supported. */ ++ uint32_t qp_ts_format:2; /* QP timestamp formats supported. */ + uint32_t eqn; /* Event Queue number. */ + uint32_t max_port; /* Maximal IB device port index. */ + void *ctx; /* Verbs/DV/DevX context. */ +@@ -707,7 +712,7 @@ struct mlx5_dev_ctx_shared { + /* Shared DV/DR flow data section. */ + uint32_t dv_meta_mask; /* flow META metadata supported mask. */ + uint32_t dv_mark_mask; /* flow MARK metadata supported mask. */ +- uint32_t dv_regc0_mask; /* available bits of metatada reg_c[0]. */ ++ uint32_t dv_regc0_mask; /* available bits of metadata reg_c[0]. */ + void *fdb_domain; /* FDB Direct Rules name space handle. */ + void *rx_domain; /* RX Direct Rules name space handle. */ + void *tx_domain; /* TX Direct Rules name space handle. */ +@@ -719,7 +724,7 @@ struct mlx5_dev_ctx_shared { + struct mlx5_hlist *flow_tbls; + struct mlx5_flow_tunnel_hub *tunnel_hub; + /* Direct Rules tables for FDB, NIC TX+RX */ +- void *esw_drop_action; /* Pointer to DR E-Switch drop action. */ ++ void *dr_drop_action; /* Pointer to DR drop action, any domain. */ + void *pop_vlan_action; /* Pointer to DR pop VLAN action. */ + struct mlx5_hlist *encaps_decaps; /* Encap/decap action hash list. */ + struct mlx5_hlist *modify_cmds; +@@ -737,15 +742,18 @@ struct mlx5_dev_ctx_shared { + /* Shared interrupt handler section. */ + struct rte_intr_handle intr_handle; /* Interrupt handler for device. */ + struct rte_intr_handle intr_handle_devx; /* DEVX interrupt handler. */ ++ struct rte_intr_handle intr_handle_nl; /* Netlink interrupt handler. */ + void *devx_comp; /* DEVX async comp obj. */ + struct mlx5_devx_obj *tis; /* TIS object. */ + struct mlx5_devx_obj *td; /* Transport domain. */ + void *tx_uar; /* Tx/packet pacing shared UAR. */ ++ struct mlx5_proc_priv *pppriv; /* Pointer to primary private process. */ + struct mlx5_flex_parser_profiles fp[MLX5_FLEX_PARSER_MAX]; + /* Flex parser profiles information. */ + void *devx_rx_uar; /* DevX UAR for Rx. */ + struct mlx5_aso_age_mng *aso_age_mng; + /* Management data for aging mechanism using ASO Flow Hit. */ ++ struct mlx5_lb_ctx self_lb; /* QP to enable self loopback for Devx. */ + struct mlx5_dev_shared_port port[]; /* per device port data array. */ + }; + +@@ -910,6 +918,8 @@ struct mlx5_obj_ops { + int (*txq_obj_modify)(struct mlx5_txq_obj *obj, + enum mlx5_txq_modify_type type, uint8_t dev_port); + void (*txq_obj_release)(struct mlx5_txq_obj *txq_obj); ++ int (*lb_dummy_queue_create)(struct rte_eth_dev *dev); ++ void (*lb_dummy_queue_release)(struct rte_eth_dev *dev); + }; + + #define MLX5_RSS_HASH_FIELDS_LEN RTE_DIM(mlx5_rss_hash_fields) +@@ -933,6 +943,8 @@ struct mlx5_priv { + unsigned int mtr_en:1; /* Whether support meter. */ + unsigned int mtr_reg_share:1; /* Whether support meter REG_C share. */ + unsigned int sampler_en:1; /* Whether support sampler. */ ++ unsigned int lb_used:1; /* Loopback queue is referred to. */ ++ uint32_t mark_enabled:1; /* If mark action is enabled on rxqs. */ + uint16_t domain_id; /* Switch domain identifier. */ + uint16_t vport_id; /* Associated VF vport index (if any). */ + uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */ +@@ -953,6 +965,7 @@ struct mlx5_priv { + unsigned int reta_idx_n; /* RETA index size. */ + struct mlx5_drop drop_queue; /* Flow drop queues. */ + uint32_t flows; /* RTE Flow rules. */ ++ void *root_drop_action; /* Pointer to root drop action. */ + uint32_t ctrl_flows; /* Control flow rules. */ + rte_spinlock_t flow_list_lock; + struct mlx5_obj_ops obj_ops; /* HW objects operations. */ +@@ -990,6 +1003,8 @@ struct mlx5_priv { + LIST_HEAD(fdir, mlx5_fdir_flow) fdir_flows; /* fdir flows. */ + rte_spinlock_t shared_act_sl; /* Shared actions spinlock. */ + uint32_t rss_shared_actions; /* RSS shared actions. */ ++ struct mlx5_devx_obj *q_counters; /* DevX queue counter object. */ ++ uint32_t counter_set_id; /* Queue counter ID to set in DevX objects. */ + }; + + #define PORT_ID(priv) ((priv)->dev_data->port_id) +@@ -1031,7 +1046,6 @@ void mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn, + void mlx5_set_metadata_mask(struct rte_eth_dev *dev); + int mlx5_dev_check_sibling_config(struct mlx5_priv *priv, + struct mlx5_dev_config *config); +-int mlx5_dev_configure(struct rte_eth_dev *dev); + int mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info); + int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size); + int mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu); +@@ -1073,6 +1087,7 @@ int mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf); + void mlx5_dev_interrupt_handler(void *arg); + void mlx5_dev_interrupt_handler_devx(void *arg); ++void mlx5_dev_interrupt_handler_nl(void *arg); + int mlx5_set_link_down(struct rte_eth_dev *dev); + int mlx5_set_link_up(struct rte_eth_dev *dev); + int mlx5_is_removed(struct rte_eth_dev *dev); +@@ -1214,8 +1229,6 @@ int mlx5_ctrl_flow(struct rte_eth_dev *dev, + struct rte_flow_item_eth *eth_mask); + int mlx5_flow_lacp_miss(struct rte_eth_dev *dev); + struct rte_flow *mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev); +-int mlx5_flow_create_drop_queue(struct rte_eth_dev *dev); +-void mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev); + void mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, + uint64_t async_id, int status); + void mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh); +@@ -1244,6 +1257,7 @@ int mlx5_mp_os_req_queue_control(struct rte_eth_dev *dev, uint16_t queue_id, + /* mlx5_socket.c */ + + int mlx5_pmd_socket_init(void); ++void mlx5_pmd_socket_uninit(void); + + /* mlx5_flow_meter.c */ + +@@ -1281,6 +1295,7 @@ int mlx5_os_set_promisc(struct rte_eth_dev *dev, int enable); + int mlx5_os_set_allmulti(struct rte_eth_dev *dev, int enable); + int mlx5_os_set_nonblock_channel_fd(int fd); + void mlx5_os_mac_addr_flush(struct rte_eth_dev *dev); ++void mlx5_os_net_cleanup(void); + + /* mlx5_txpp.c */ + +diff --git a/dpdk/drivers/net/mlx5/mlx5_defs.h b/dpdk/drivers/net/mlx5/mlx5_defs.h +index aa55db3750..ee5c61409c 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_defs.h ++++ b/dpdk/drivers/net/mlx5/mlx5_defs.h +@@ -59,7 +59,7 @@ + #define MLX5_MAX_XSTATS 32 + + /* Maximum Packet headers size (L2+L3+L4) for TSO. */ +-#define MLX5_MAX_TSO_HEADER (128u + 34u) ++#define MLX5_MAX_TSO_HEADER 192U + + /* Inline data size required by NICs. */ + #define MLX5_INLINE_HSIZE_NONE 0 +@@ -138,10 +138,10 @@ + #endif + + /* Log 2 of the default number of strides per WQE for Multi-Packet RQ. */ +-#define MLX5_MPRQ_STRIDE_NUM_N 6U ++#define MLX5_MPRQ_DEFAULT_LOG_STRIDE_NUM 6U + + /* Log 2 of the default size of a stride per WQE for Multi-Packet RQ. */ +-#define MLX5_MPRQ_STRIDE_SIZE_N 11U ++#define MLX5_MPRQ_DEFAULT_LOG_STRIDE_SIZE 11U + + /* Two-byte shift is disabled for Multi-Packet RQ. */ + #define MLX5_MPRQ_TWO_BYTE_SHIFT 0 +diff --git a/dpdk/drivers/net/mlx5/mlx5_devx.c b/dpdk/drivers/net/mlx5/mlx5_devx.c +index 9970a58156..b2c770f537 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_devx.c ++++ b/dpdk/drivers/net/mlx5/mlx5_devx.c +@@ -339,6 +339,7 @@ mlx5_rxq_create_devx_rq_resources(struct rte_eth_dev *dev, uint16_t idx) + rq_attr.mem_rq_type = MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE; + rq_attr.flush_in_error_en = 1; + mlx5_devx_create_rq_attr_fill(rxq_data, cqn, &rq_attr); ++ rq_attr.ts_format = mlx5_ts_format_conv(priv->sh->rq_ts_format); + /* Fill WQ attributes for this RQ. */ + if (mlx5_rxq_mprq_enabled(rxq_data)) { + rq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ; +@@ -347,11 +348,11 @@ mlx5_rxq_create_devx_rq_resources(struct rte_eth_dev *dev, uint16_t idx) + * 512*2^single_wqe_log_num_of_strides. + */ + rq_attr.wq_attr.single_wqe_log_num_of_strides = +- rxq_data->strd_num_n - ++ rxq_data->log_strd_num - + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; + /* Stride size = (2^single_stride_log_num_of_bytes)*64B. */ + rq_attr.wq_attr.single_stride_log_num_of_bytes = +- rxq_data->strd_sz_n - ++ rxq_data->log_strd_sz - + MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES; + wqe_size = sizeof(struct mlx5_wqe_mprq); + } else { +@@ -391,6 +392,7 @@ mlx5_rxq_create_devx_rq_resources(struct rte_eth_dev *dev, uint16_t idx) + (uintptr_t)rxq_ctrl->rq_dbr_offset); + /* Create RQ using DevX API. */ + mlx5_devx_wq_attr_fill(priv, rxq_ctrl, &rq_attr.wq_attr); ++ rq_attr.counter_set_id = priv->counter_set_id; + rq = mlx5_devx_cmd_create_rq(priv->sh->ctx, &rq_attr, rxq_ctrl->socket); + if (!rq) + goto error; +@@ -599,6 +601,7 @@ mlx5_rxq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx) + attr.wq_attr.log_hairpin_num_packets = + attr.wq_attr.log_hairpin_data_sz - + MLX5_HAIRPIN_QUEUE_STRIDE; ++ attr.counter_set_id = priv->counter_set_id; + tmpl->rq = mlx5_devx_cmd_create_rq(priv->sh->ctx, &attr, + rxq_ctrl->socket); + if (!tmpl->rq) { +@@ -1369,6 +1372,7 @@ mlx5_txq_create_devx_sq_resources(struct rte_eth_dev *dev, uint16_t idx, + sq_attr.allow_multi_pkt_send_wqe = !!priv->config.mps; + sq_attr.allow_swp = !!priv->config.swp; + sq_attr.min_wqe_inline_mode = priv->config.hca_attr.vport_inline_mode; ++ sq_attr.ts_format = mlx5_ts_format_conv(priv->sh->sq_ts_format); + sq_attr.wq_attr.uar_page = + mlx5_os_get_devx_uar_page_id(priv->sh->tx_uar); + sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; +@@ -1428,7 +1432,6 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx) + #else + struct mlx5_dev_ctx_shared *sh = priv->sh; + struct mlx5_txq_obj *txq_obj = txq_ctrl->obj; +- void *reg_addr; + uint32_t cqe_n, log_desc_n; + uint32_t wqe_n, wqe_size; + int ret = 0; +@@ -1512,13 +1515,10 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx) + if (!priv->sh->tdn) + priv->sh->tdn = priv->sh->td->id; + #endif +- MLX5_ASSERT(sh->tx_uar); +- reg_addr = mlx5_os_get_devx_uar_reg_addr(sh->tx_uar); +- MLX5_ASSERT(reg_addr); +- txq_ctrl->bf_reg = reg_addr; ++ MLX5_ASSERT(sh->tx_uar && mlx5_os_get_devx_uar_reg_addr(sh->tx_uar)); + txq_ctrl->uar_mmap_offset = + mlx5_os_get_devx_uar_mmap_offset(sh->tx_uar); +- txq_uar_init(txq_ctrl); ++ txq_uar_init(txq_ctrl, mlx5_os_get_devx_uar_reg_addr(sh->tx_uar)); + dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; + return 0; + error: +@@ -1566,4 +1566,6 @@ struct mlx5_obj_ops devx_obj_ops = { + .txq_obj_new = mlx5_txq_devx_obj_new, + .txq_obj_modify = mlx5_devx_modify_sq, + .txq_obj_release = mlx5_txq_devx_obj_release, ++ .lb_dummy_queue_create = NULL, ++ .lb_dummy_queue_release = NULL, + }; +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow.c b/dpdk/drivers/net/mlx5/mlx5_flow.c +index cda3ca557c..7a149ab761 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow.c ++++ b/dpdk/drivers/net/mlx5/mlx5_flow.c +@@ -50,6 +50,7 @@ flow_tunnel_add_default_miss(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_action *app_actions, + uint32_t flow_idx, ++ const struct mlx5_flow_tunnel *tunnel, + struct tunnel_default_miss_ctx *ctx, + struct rte_flow_error *error); + static struct mlx5_flow_tunnel * +@@ -99,8 +100,25 @@ struct mlx5_flow_expand_node { + * RSS types bit-field associated with this node + * (see ETH_RSS_* definitions). + */ ++ uint64_t node_flags; ++ /**< ++ * Bit-fields that define how the node is used in the expansion. ++ * (see MLX5_EXPANSION_NODE_* definitions). ++ */ + }; + ++/* Optional expand field. The expansion alg will not go deeper. */ ++#define MLX5_EXPANSION_NODE_OPTIONAL (UINT64_C(1) << 0) ++ ++/* The node is not added implicitly as expansion to the flow pattern. ++ * If the node type does not match the flow pattern item type, the ++ * expansion alg will go deeper to its next items. ++ * In the current implementation, the list of next nodes indexes can ++ * have up to one node with this flag set and it has to be the last ++ * node index (before the list terminator). ++ */ ++#define MLX5_EXPANSION_NODE_EXPLICIT (UINT64_C(1) << 1) ++ + /** Object returned by mlx5_flow_expand_rss(). */ + struct mlx5_flow_expand_rss { + uint32_t entries; +@@ -111,108 +129,212 @@ struct mlx5_flow_expand_rss { + } entry[]; + }; + ++static void ++mlx5_dbg__print_pattern(const struct rte_flow_item *item); ++ ++static const struct mlx5_flow_expand_node * ++mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern, ++ unsigned int item_idx, ++ const struct mlx5_flow_expand_node graph[], ++ const struct mlx5_flow_expand_node *node); ++ ++static bool ++mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item) ++{ ++ switch (item->type) { ++ case RTE_FLOW_ITEM_TYPE_ETH: ++ case RTE_FLOW_ITEM_TYPE_VLAN: ++ case RTE_FLOW_ITEM_TYPE_IPV4: ++ case RTE_FLOW_ITEM_TYPE_IPV6: ++ case RTE_FLOW_ITEM_TYPE_UDP: ++ case RTE_FLOW_ITEM_TYPE_TCP: ++ case RTE_FLOW_ITEM_TYPE_ICMP: ++ case RTE_FLOW_ITEM_TYPE_ICMP6: ++ case RTE_FLOW_ITEM_TYPE_VXLAN: ++ case RTE_FLOW_ITEM_TYPE_NVGRE: ++ case RTE_FLOW_ITEM_TYPE_GRE: ++ case RTE_FLOW_ITEM_TYPE_GENEVE: ++ case RTE_FLOW_ITEM_TYPE_MPLS: ++ case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: ++ case RTE_FLOW_ITEM_TYPE_GRE_KEY: ++ case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT: ++ case RTE_FLOW_ITEM_TYPE_GTP: ++ return true; ++ default: ++ break; ++ } ++ return false; ++} ++ ++/** ++ * Network Service Header (NSH) and its next protocol values ++ * are described in RFC-8393. ++ */ ++static enum rte_flow_item_type ++mlx5_nsh_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask) ++{ ++ enum rte_flow_item_type type; ++ ++ switch (proto_mask & proto_spec) { ++ case 0: ++ type = RTE_FLOW_ITEM_TYPE_VOID; ++ break; ++ case RTE_VXLAN_GPE_TYPE_IPV4: ++ type = RTE_FLOW_ITEM_TYPE_IPV4; ++ break; ++ case RTE_VXLAN_GPE_TYPE_IPV6: ++ type = RTE_VXLAN_GPE_TYPE_IPV6; ++ break; ++ case RTE_VXLAN_GPE_TYPE_ETH: ++ type = RTE_FLOW_ITEM_TYPE_ETH; ++ break; ++ default: ++ type = RTE_FLOW_ITEM_TYPE_END; ++ } ++ return type; ++} ++ ++static enum rte_flow_item_type ++mlx5_inet_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask) ++{ ++ enum rte_flow_item_type type; ++ ++ switch (proto_mask & proto_spec) { ++ case 0: ++ type = RTE_FLOW_ITEM_TYPE_VOID; ++ break; ++ case IPPROTO_UDP: ++ type = RTE_FLOW_ITEM_TYPE_UDP; ++ break; ++ case IPPROTO_TCP: ++ type = RTE_FLOW_ITEM_TYPE_TCP; ++ break; ++ case IPPROTO_IPIP: ++ type = RTE_FLOW_ITEM_TYPE_IPV4; ++ break; ++ case IPPROTO_IPV6: ++ type = RTE_FLOW_ITEM_TYPE_IPV6; ++ break; ++ default: ++ type = RTE_FLOW_ITEM_TYPE_END; ++ } ++ return type; ++} ++ ++static enum rte_flow_item_type ++mlx5_ethertype_to_item_type(rte_be16_t type_spec, ++ rte_be16_t type_mask, bool is_tunnel) ++{ ++ enum rte_flow_item_type type; ++ ++ switch (rte_be_to_cpu_16(type_spec & type_mask)) { ++ case 0: ++ type = RTE_FLOW_ITEM_TYPE_VOID; ++ break; ++ case RTE_ETHER_TYPE_TEB: ++ type = is_tunnel ? ++ RTE_FLOW_ITEM_TYPE_ETH : RTE_FLOW_ITEM_TYPE_END; ++ break; ++ case RTE_ETHER_TYPE_VLAN: ++ type = !is_tunnel ? ++ RTE_FLOW_ITEM_TYPE_VLAN : RTE_FLOW_ITEM_TYPE_END; ++ break; ++ case RTE_ETHER_TYPE_IPV4: ++ type = RTE_FLOW_ITEM_TYPE_IPV4; ++ break; ++ case RTE_ETHER_TYPE_IPV6: ++ type = RTE_FLOW_ITEM_TYPE_IPV6; ++ break; ++ default: ++ type = RTE_FLOW_ITEM_TYPE_END; ++ } ++ return type; ++} ++ + static enum rte_flow_item_type + mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item) + { +- enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID; +- uint16_t ether_type = 0; +- uint16_t ether_type_m; +- uint8_t ip_next_proto = 0; +- uint8_t ip_next_proto_m; ++#define MLX5_XSET_ITEM_MASK_SPEC(type, fld) \ ++ do { \ ++ const void *m = item->mask; \ ++ const void *s = item->spec; \ ++ mask = m ? \ ++ ((const struct rte_flow_item_##type *)m)->fld : \ ++ rte_flow_item_##type##_mask.fld; \ ++ spec = ((const struct rte_flow_item_##type *)s)->fld; \ ++ } while (0) ++ ++ enum rte_flow_item_type ret; ++ uint16_t spec, mask; + + if (item == NULL || item->spec == NULL) +- return ret; ++ return RTE_FLOW_ITEM_TYPE_VOID; + switch (item->type) { + case RTE_FLOW_ITEM_TYPE_ETH: +- if (item->mask) +- ether_type_m = ((const struct rte_flow_item_eth *) +- (item->mask))->type; +- else +- ether_type_m = rte_flow_item_eth_mask.type; +- if (ether_type_m != RTE_BE16(0xFFFF)) +- break; +- ether_type = ((const struct rte_flow_item_eth *) +- (item->spec))->type; +- if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) +- ret = RTE_FLOW_ITEM_TYPE_IPV4; +- else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) +- ret = RTE_FLOW_ITEM_TYPE_IPV6; +- else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) +- ret = RTE_FLOW_ITEM_TYPE_VLAN; +- else +- ret = RTE_FLOW_ITEM_TYPE_END; ++ MLX5_XSET_ITEM_MASK_SPEC(eth, type); ++ if (!mask) ++ return RTE_FLOW_ITEM_TYPE_VOID; ++ ret = mlx5_ethertype_to_item_type(spec, mask, false); + break; + case RTE_FLOW_ITEM_TYPE_VLAN: +- if (item->mask) +- ether_type_m = ((const struct rte_flow_item_vlan *) +- (item->mask))->inner_type; +- else +- ether_type_m = rte_flow_item_vlan_mask.inner_type; +- if (ether_type_m != RTE_BE16(0xFFFF)) +- break; +- ether_type = ((const struct rte_flow_item_vlan *) +- (item->spec))->inner_type; +- if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) +- ret = RTE_FLOW_ITEM_TYPE_IPV4; +- else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) +- ret = RTE_FLOW_ITEM_TYPE_IPV6; +- else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) +- ret = RTE_FLOW_ITEM_TYPE_VLAN; +- else +- ret = RTE_FLOW_ITEM_TYPE_END; ++ MLX5_XSET_ITEM_MASK_SPEC(vlan, inner_type); ++ if (!mask) ++ return RTE_FLOW_ITEM_TYPE_VOID; ++ ret = mlx5_ethertype_to_item_type(spec, mask, false); + break; + case RTE_FLOW_ITEM_TYPE_IPV4: +- if (item->mask) +- ip_next_proto_m = ((const struct rte_flow_item_ipv4 *) +- (item->mask))->hdr.next_proto_id; +- else +- ip_next_proto_m = +- rte_flow_item_ipv4_mask.hdr.next_proto_id; +- if (ip_next_proto_m != 0xFF) +- break; +- ip_next_proto = ((const struct rte_flow_item_ipv4 *) +- (item->spec))->hdr.next_proto_id; +- if (ip_next_proto == IPPROTO_UDP) +- ret = RTE_FLOW_ITEM_TYPE_UDP; +- else if (ip_next_proto == IPPROTO_TCP) +- ret = RTE_FLOW_ITEM_TYPE_TCP; +- else if (ip_next_proto == IPPROTO_IP) +- ret = RTE_FLOW_ITEM_TYPE_IPV4; +- else if (ip_next_proto == IPPROTO_IPV6) +- ret = RTE_FLOW_ITEM_TYPE_IPV6; +- else +- ret = RTE_FLOW_ITEM_TYPE_END; ++ MLX5_XSET_ITEM_MASK_SPEC(ipv4, hdr.next_proto_id); ++ if (!mask) ++ return RTE_FLOW_ITEM_TYPE_VOID; ++ ret = mlx5_inet_proto_to_item_type(spec, mask); + break; + case RTE_FLOW_ITEM_TYPE_IPV6: +- if (item->mask) +- ip_next_proto_m = ((const struct rte_flow_item_ipv6 *) +- (item->mask))->hdr.proto; +- else +- ip_next_proto_m = +- rte_flow_item_ipv6_mask.hdr.proto; +- if (ip_next_proto_m != 0xFF) +- break; +- ip_next_proto = ((const struct rte_flow_item_ipv6 *) +- (item->spec))->hdr.proto; +- if (ip_next_proto == IPPROTO_UDP) +- ret = RTE_FLOW_ITEM_TYPE_UDP; +- else if (ip_next_proto == IPPROTO_TCP) +- ret = RTE_FLOW_ITEM_TYPE_TCP; +- else if (ip_next_proto == IPPROTO_IP) +- ret = RTE_FLOW_ITEM_TYPE_IPV4; +- else if (ip_next_proto == IPPROTO_IPV6) +- ret = RTE_FLOW_ITEM_TYPE_IPV6; +- else +- ret = RTE_FLOW_ITEM_TYPE_END; ++ MLX5_XSET_ITEM_MASK_SPEC(ipv6, hdr.proto); ++ if (!mask) ++ return RTE_FLOW_ITEM_TYPE_VOID; ++ ret = mlx5_inet_proto_to_item_type(spec, mask); ++ break; ++ case RTE_FLOW_ITEM_TYPE_GENEVE: ++ MLX5_XSET_ITEM_MASK_SPEC(geneve, protocol); ++ ret = mlx5_ethertype_to_item_type(spec, mask, true); ++ break; ++ case RTE_FLOW_ITEM_TYPE_GRE: ++ MLX5_XSET_ITEM_MASK_SPEC(gre, protocol); ++ ret = mlx5_ethertype_to_item_type(spec, mask, true); ++ break; ++ case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: ++ MLX5_XSET_ITEM_MASK_SPEC(vxlan_gpe, protocol); ++ ret = mlx5_nsh_proto_to_item_type(spec, mask); + break; + default: + ret = RTE_FLOW_ITEM_TYPE_VOID; + break; + } + return ret; ++#undef MLX5_XSET_ITEM_MASK_SPEC ++} ++ ++static const int * ++mlx5_flow_expand_rss_skip_explicit(const struct mlx5_flow_expand_node graph[], ++ const int *next_node) ++{ ++ const struct mlx5_flow_expand_node *node = NULL; ++ const int *next = next_node; ++ ++ while (next && *next) { ++ /* ++ * Skip the nodes with the MLX5_EXPANSION_NODE_EXPLICIT ++ * flag set, because they were not found in the flow pattern. ++ */ ++ node = &graph[*next]; ++ if (!(node->node_flags & MLX5_EXPANSION_NODE_EXPLICIT)) ++ break; ++ next = node->next; ++ } ++ return next; + } + +-#define MLX5_RSS_EXP_ELT_N 8 ++#define MLX5_RSS_EXP_ELT_N 16 + + /** + * Expand RSS flows into several possible flows according to the RSS hash +@@ -237,6 +359,7 @@ mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item) + * set, the following errors are defined: + * + * -E2BIG: graph-depth @p graph is too deep. ++ * -EINVAL: @p size has not enough space for expanded pattern. + */ + static int + mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, +@@ -250,7 +373,7 @@ mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, + const int *stack[MLX5_RSS_EXP_ELT_N]; + int stack_pos = 0; + struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N]; +- unsigned int i; ++ unsigned int i, item_idx, last_expand_item_idx = 0; + size_t lsize; + size_t user_pattern_size = 0; + void *addr = NULL; +@@ -258,24 +381,37 @@ mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, + struct rte_flow_item missed_item; + int missed = 0; + int elt = 0; +- const struct rte_flow_item *last_item = NULL; ++ const struct rte_flow_item *last_expand_item = NULL; + + memset(&missed_item, 0, sizeof(missed_item)); + lsize = offsetof(struct mlx5_flow_expand_rss, entry) + + MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]); +- if (lsize <= size) { +- buf->entry[0].priority = 0; +- buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N]; +- buf->entries = 0; +- addr = buf->entry[0].pattern; +- } +- for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { +- if (item->type != RTE_FLOW_ITEM_TYPE_VOID) +- last_item = item; +- for (i = 0; node->next && node->next[i]; ++i) { ++ if (lsize > size) ++ return -EINVAL; ++ buf->entry[0].priority = 0; ++ buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N]; ++ buf->entries = 0; ++ addr = buf->entry[0].pattern; ++ for (item = pattern, item_idx = 0; ++ item->type != RTE_FLOW_ITEM_TYPE_END; ++ item++, item_idx++) { ++ if (!mlx5_flow_is_rss_expandable_item(item)) { ++ user_pattern_size += sizeof(*item); ++ continue; ++ } ++ last_expand_item = item; ++ last_expand_item_idx = item_idx; ++ i = 0; ++ while (node->next && node->next[i]) { + next = &graph[node->next[i]]; + if (next->type == item->type) + break; ++ if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) { ++ node = next; ++ i = 0; ++ } else { ++ ++i; ++ } + } + if (next) + node = next; +@@ -283,12 +419,12 @@ mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, + } + user_pattern_size += sizeof(*item); /* Handle END item. */ + lsize += user_pattern_size; ++ if (lsize > size) ++ return -EINVAL; + /* Copy the user pattern in the first entry of the buffer. */ +- if (lsize <= size) { +- rte_memcpy(addr, pattern, user_pattern_size); +- addr = (void *)(((uintptr_t)addr) + user_pattern_size); +- buf->entries = 1; +- } ++ rte_memcpy(addr, pattern, user_pattern_size); ++ addr = (void *)(((uintptr_t)addr) + user_pattern_size); ++ buf->entries = 1; + /* Start expanding. */ + memset(flow_items, 0, sizeof(flow_items)); + user_pattern_size -= sizeof(*item); +@@ -296,7 +432,7 @@ mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, + * Check if the last valid item has spec set, need complete pattern, + * and the pattern can be used for expansion. + */ +- missed_item.type = mlx5_flow_expand_rss_item_complete(last_item); ++ missed_item.type = mlx5_flow_expand_rss_item_complete(last_expand_item); + if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) { + /* Item type END indicates expansion is not required. */ + return lsize; +@@ -304,13 +440,20 @@ mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, + if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) { + next = NULL; + missed = 1; +- for (i = 0; node->next && node->next[i]; ++i) { ++ i = 0; ++ while (node->next && node->next[i]) { + next = &graph[node->next[i]]; + if (next->type == missed_item.type) { + flow_items[0].type = missed_item.type; + flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; + break; + } ++ if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) { ++ node = next; ++ i = 0; ++ } else { ++ ++i; ++ } + next = NULL; + } + } +@@ -318,7 +461,9 @@ mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, + elt = 2; /* missed item + item end. */ + node = next; + lsize += elt * sizeof(*item) + user_pattern_size; +- if ((node->rss_types & types) && lsize <= size) { ++ if (lsize > size) ++ return -EINVAL; ++ if (node->rss_types & types) { + buf->entry[buf->entries].priority = 1; + buf->entry[buf->entries].pattern = addr; + buf->entries++; +@@ -329,14 +474,19 @@ mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, + addr = (void *)(((uintptr_t)addr) + + elt * sizeof(*item)); + } ++ } else if (last_expand_item != NULL) { ++ node = mlx5_flow_expand_rss_adjust_node(pattern, ++ last_expand_item_idx, graph, node); + } + memset(flow_items, 0, sizeof(flow_items)); +- next_node = node->next; ++ next_node = mlx5_flow_expand_rss_skip_explicit(graph, ++ node->next); + stack[stack_pos] = next_node; + node = next_node ? &graph[*next_node] : NULL; + while (node) { + flow_items[stack_pos].type = node->type; + if (node->rss_types & types) { ++ size_t n; + /* + * compute the number of items to copy from the + * expansion and copy it. +@@ -346,28 +496,29 @@ mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, + elt = stack_pos + 2; + flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END; + lsize += elt * sizeof(*item) + user_pattern_size; +- if (lsize <= size) { +- size_t n = elt * sizeof(*item); +- +- buf->entry[buf->entries].priority = +- stack_pos + 1 + missed; +- buf->entry[buf->entries].pattern = addr; +- buf->entries++; +- rte_memcpy(addr, buf->entry[0].pattern, +- user_pattern_size); +- addr = (void *)(((uintptr_t)addr) + +- user_pattern_size); +- rte_memcpy(addr, &missed_item, +- missed * sizeof(*item)); +- addr = (void *)(((uintptr_t)addr) + +- missed * sizeof(*item)); +- rte_memcpy(addr, flow_items, n); +- addr = (void *)(((uintptr_t)addr) + n); +- } ++ if (lsize > size) ++ return -EINVAL; ++ n = elt * sizeof(*item); ++ buf->entry[buf->entries].priority = ++ stack_pos + 1 + missed; ++ buf->entry[buf->entries].pattern = addr; ++ buf->entries++; ++ rte_memcpy(addr, buf->entry[0].pattern, ++ user_pattern_size); ++ addr = (void *)(((uintptr_t)addr) + ++ user_pattern_size); ++ rte_memcpy(addr, &missed_item, ++ missed * sizeof(*item)); ++ addr = (void *)(((uintptr_t)addr) + ++ missed * sizeof(*item)); ++ rte_memcpy(addr, flow_items, n); ++ addr = (void *)(((uintptr_t)addr) + n); + } + /* Go deeper. */ +- if (node->next) { +- next_node = node->next; ++ if (!(node->node_flags & MLX5_EXPANSION_NODE_OPTIONAL) && ++ node->next) { ++ next_node = mlx5_flow_expand_rss_skip_explicit(graph, ++ node->next); + if (stack_pos++ == MLX5_RSS_EXP_ELT_N) { + rte_errno = E2BIG; + return -rte_errno; +@@ -375,62 +526,65 @@ mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, + stack[stack_pos] = next_node; + } else if (*(next_node + 1)) { + /* Follow up with the next possibility. */ ++ next_node = mlx5_flow_expand_rss_skip_explicit(graph, ++ ++next_node); ++ } else if (!stack_pos) { ++ /* ++ * Completing the traverse over the different paths. ++ * The next_node is advanced to the terminator. ++ */ + ++next_node; + } else { + /* Move to the next path. */ +- if (stack_pos) ++ while (stack_pos) { + next_node = stack[--stack_pos]; +- next_node++; ++ next_node++; ++ if (*next_node) ++ break; ++ } ++ next_node = mlx5_flow_expand_rss_skip_explicit(graph, ++ next_node); + stack[stack_pos] = next_node; + } +- node = *next_node ? &graph[*next_node] : NULL; ++ node = next_node && *next_node ? &graph[*next_node] : NULL; + }; +- /* no expanded flows but we have missed item, create one rule for it */ +- if (buf->entries == 1 && missed != 0) { +- elt = 2; +- lsize += elt * sizeof(*item) + user_pattern_size; +- if (lsize <= size) { +- buf->entry[buf->entries].priority = 1; +- buf->entry[buf->entries].pattern = addr; +- buf->entries++; +- flow_items[0].type = missed_item.type; +- flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; +- rte_memcpy(addr, buf->entry[0].pattern, +- user_pattern_size); +- addr = (void *)(((uintptr_t)addr) + user_pattern_size); +- rte_memcpy(addr, flow_items, elt * sizeof(*item)); +- } +- } + return lsize; + } + + enum mlx5_expansion { + MLX5_EXPANSION_ROOT, + MLX5_EXPANSION_ROOT_OUTER, +- MLX5_EXPANSION_ROOT_ETH_VLAN, +- MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN, + MLX5_EXPANSION_OUTER_ETH, +- MLX5_EXPANSION_OUTER_ETH_VLAN, + MLX5_EXPANSION_OUTER_VLAN, + MLX5_EXPANSION_OUTER_IPV4, + MLX5_EXPANSION_OUTER_IPV4_UDP, + MLX5_EXPANSION_OUTER_IPV4_TCP, ++ MLX5_EXPANSION_OUTER_IPV4_ICMP, + MLX5_EXPANSION_OUTER_IPV6, + MLX5_EXPANSION_OUTER_IPV6_UDP, + MLX5_EXPANSION_OUTER_IPV6_TCP, ++ MLX5_EXPANSION_OUTER_IPV6_ICMP6, + MLX5_EXPANSION_VXLAN, ++ MLX5_EXPANSION_STD_VXLAN, ++ MLX5_EXPANSION_L3_VXLAN, + MLX5_EXPANSION_VXLAN_GPE, + MLX5_EXPANSION_GRE, ++ MLX5_EXPANSION_NVGRE, ++ MLX5_EXPANSION_GRE_KEY, + MLX5_EXPANSION_MPLS, + MLX5_EXPANSION_ETH, +- MLX5_EXPANSION_ETH_VLAN, + MLX5_EXPANSION_VLAN, + MLX5_EXPANSION_IPV4, + MLX5_EXPANSION_IPV4_UDP, + MLX5_EXPANSION_IPV4_TCP, ++ MLX5_EXPANSION_IPV4_ICMP, + MLX5_EXPANSION_IPV6, + MLX5_EXPANSION_IPV6_UDP, + MLX5_EXPANSION_IPV6_TCP, ++ MLX5_EXPANSION_IPV6_ICMP6, ++ MLX5_EXPANSION_IPV6_FRAG_EXT, ++ MLX5_EXPANSION_GTP, ++ MLX5_EXPANSION_GENEVE, + }; + + /** Supported expansion of items. */ +@@ -447,23 +601,7 @@ static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { + MLX5_EXPANSION_OUTER_IPV6), + .type = RTE_FLOW_ITEM_TYPE_END, + }, +- [MLX5_EXPANSION_ROOT_ETH_VLAN] = { +- .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), +- .type = RTE_FLOW_ITEM_TYPE_END, +- }, +- [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { +- .next = MLX5_FLOW_EXPAND_RSS_NEXT +- (MLX5_EXPANSION_OUTER_ETH_VLAN), +- .type = RTE_FLOW_ITEM_TYPE_END, +- }, + [MLX5_EXPANSION_OUTER_ETH] = { +- .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, +- MLX5_EXPANSION_OUTER_IPV6, +- MLX5_EXPANSION_MPLS), +- .type = RTE_FLOW_ITEM_TYPE_ETH, +- .rss_types = 0, +- }, +- [MLX5_EXPANSION_OUTER_ETH_VLAN] = { + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), + .type = RTE_FLOW_ITEM_TYPE_ETH, + .rss_types = 0, +@@ -472,12 +610,15 @@ static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, + MLX5_EXPANSION_OUTER_IPV6), + .type = RTE_FLOW_ITEM_TYPE_VLAN, ++ .node_flags = MLX5_EXPANSION_NODE_EXPLICIT, + }, + [MLX5_EXPANSION_OUTER_IPV4] = { + .next = MLX5_FLOW_EXPAND_RSS_NEXT + (MLX5_EXPANSION_OUTER_IPV4_UDP, + MLX5_EXPANSION_OUTER_IPV4_TCP, ++ MLX5_EXPANSION_OUTER_IPV4_ICMP, + MLX5_EXPANSION_GRE, ++ MLX5_EXPANSION_NVGRE, + MLX5_EXPANSION_IPV4, + MLX5_EXPANSION_IPV6), + .type = RTE_FLOW_ITEM_TYPE_IPV4, +@@ -486,7 +627,10 @@ static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { + }, + [MLX5_EXPANSION_OUTER_IPV4_UDP] = { + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, +- MLX5_EXPANSION_VXLAN_GPE), ++ MLX5_EXPANSION_VXLAN_GPE, ++ MLX5_EXPANSION_MPLS, ++ MLX5_EXPANSION_GENEVE, ++ MLX5_EXPANSION_GTP), + .type = RTE_FLOW_ITEM_TYPE_UDP, + .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, + }, +@@ -494,19 +638,28 @@ static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { + .type = RTE_FLOW_ITEM_TYPE_TCP, + .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, + }, ++ [MLX5_EXPANSION_OUTER_IPV4_ICMP] = { ++ .type = RTE_FLOW_ITEM_TYPE_ICMP, ++ }, + [MLX5_EXPANSION_OUTER_IPV6] = { + .next = MLX5_FLOW_EXPAND_RSS_NEXT + (MLX5_EXPANSION_OUTER_IPV6_UDP, + MLX5_EXPANSION_OUTER_IPV6_TCP, ++ MLX5_EXPANSION_OUTER_IPV6_ICMP6, + MLX5_EXPANSION_IPV4, +- MLX5_EXPANSION_IPV6), ++ MLX5_EXPANSION_IPV6, ++ MLX5_EXPANSION_GRE, ++ MLX5_EXPANSION_NVGRE), + .type = RTE_FLOW_ITEM_TYPE_IPV6, + .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | + ETH_RSS_NONFRAG_IPV6_OTHER, + }, + [MLX5_EXPANSION_OUTER_IPV6_UDP] = { + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, +- MLX5_EXPANSION_VXLAN_GPE), ++ MLX5_EXPANSION_VXLAN_GPE, ++ MLX5_EXPANSION_MPLS, ++ MLX5_EXPANSION_GENEVE, ++ MLX5_EXPANSION_GTP), + .type = RTE_FLOW_ITEM_TYPE_UDP, + .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, + }, +@@ -514,12 +667,24 @@ static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { + .type = RTE_FLOW_ITEM_TYPE_TCP, + .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, + }, ++ [MLX5_EXPANSION_OUTER_IPV6_ICMP6] = { ++ .type = RTE_FLOW_ITEM_TYPE_ICMP6, ++ }, + [MLX5_EXPANSION_VXLAN] = { + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, + MLX5_EXPANSION_IPV4, + MLX5_EXPANSION_IPV6), + .type = RTE_FLOW_ITEM_TYPE_VXLAN, + }, ++ [MLX5_EXPANSION_STD_VXLAN] = { ++ .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH), ++ .type = RTE_FLOW_ITEM_TYPE_VXLAN, ++ }, ++ [MLX5_EXPANSION_L3_VXLAN] = { ++ .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, ++ MLX5_EXPANSION_IPV6), ++ .type = RTE_FLOW_ITEM_TYPE_VXLAN, ++ }, + [MLX5_EXPANSION_VXLAN_GPE] = { + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, + MLX5_EXPANSION_IPV4, +@@ -527,20 +692,32 @@ static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { + .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, + }, + [MLX5_EXPANSION_GRE] = { +- .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), ++ .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, ++ MLX5_EXPANSION_IPV4, ++ MLX5_EXPANSION_IPV6, ++ MLX5_EXPANSION_GRE_KEY, ++ MLX5_EXPANSION_MPLS), + .type = RTE_FLOW_ITEM_TYPE_GRE, + }, ++ [MLX5_EXPANSION_GRE_KEY] = { ++ .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, ++ MLX5_EXPANSION_IPV6, ++ MLX5_EXPANSION_MPLS), ++ .type = RTE_FLOW_ITEM_TYPE_GRE_KEY, ++ .node_flags = MLX5_EXPANSION_NODE_OPTIONAL, ++ }, ++ [MLX5_EXPANSION_NVGRE] = { ++ .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH), ++ .type = RTE_FLOW_ITEM_TYPE_NVGRE, ++ }, + [MLX5_EXPANSION_MPLS] = { + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, +- MLX5_EXPANSION_IPV6), ++ MLX5_EXPANSION_IPV6, ++ MLX5_EXPANSION_ETH), + .type = RTE_FLOW_ITEM_TYPE_MPLS, ++ .node_flags = MLX5_EXPANSION_NODE_OPTIONAL, + }, + [MLX5_EXPANSION_ETH] = { +- .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, +- MLX5_EXPANSION_IPV6), +- .type = RTE_FLOW_ITEM_TYPE_ETH, +- }, +- [MLX5_EXPANSION_ETH_VLAN] = { + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), + .type = RTE_FLOW_ITEM_TYPE_ETH, + }, +@@ -548,10 +725,12 @@ static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, + MLX5_EXPANSION_IPV6), + .type = RTE_FLOW_ITEM_TYPE_VLAN, ++ .node_flags = MLX5_EXPANSION_NODE_EXPLICIT, + }, + [MLX5_EXPANSION_IPV4] = { + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, +- MLX5_EXPANSION_IPV4_TCP), ++ MLX5_EXPANSION_IPV4_TCP, ++ MLX5_EXPANSION_IPV4_ICMP), + .type = RTE_FLOW_ITEM_TYPE_IPV4, + .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | + ETH_RSS_NONFRAG_IPV4_OTHER, +@@ -564,9 +743,14 @@ static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { + .type = RTE_FLOW_ITEM_TYPE_TCP, + .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, + }, ++ [MLX5_EXPANSION_IPV4_ICMP] = { ++ .type = RTE_FLOW_ITEM_TYPE_ICMP, ++ }, + [MLX5_EXPANSION_IPV6] = { + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, +- MLX5_EXPANSION_IPV6_TCP), ++ MLX5_EXPANSION_IPV6_TCP, ++ MLX5_EXPANSION_IPV6_ICMP6, ++ MLX5_EXPANSION_IPV6_FRAG_EXT), + .type = RTE_FLOW_ITEM_TYPE_IPV6, + .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | + ETH_RSS_NONFRAG_IPV6_OTHER, +@@ -579,6 +763,23 @@ static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { + .type = RTE_FLOW_ITEM_TYPE_TCP, + .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, + }, ++ [MLX5_EXPANSION_IPV6_FRAG_EXT] = { ++ .type = RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT, ++ }, ++ [MLX5_EXPANSION_IPV6_ICMP6] = { ++ .type = RTE_FLOW_ITEM_TYPE_ICMP6, ++ }, ++ [MLX5_EXPANSION_GTP] = { ++ .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, ++ MLX5_EXPANSION_IPV6), ++ .type = RTE_FLOW_ITEM_TYPE_GTP ++ }, ++ [MLX5_EXPANSION_GENEVE] = { ++ .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, ++ MLX5_EXPANSION_IPV4, ++ MLX5_EXPANSION_IPV6), ++ .type = RTE_FLOW_ITEM_TYPE_GENEVE, ++ }, + }; + + static struct rte_flow_shared_action * +@@ -779,6 +980,7 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev, + REG_C_3; + case MLX5_MTR_COLOR: + case MLX5_ASO_FLOW_HIT: /* Both features use the same REG_C. */ ++ case MLX5_SAMPLE_ID: + MLX5_ASSERT(priv->mtr_color_reg != REG_NON); + return priv->mtr_color_reg; + case MLX5_COPY_MARK: +@@ -987,7 +1189,7 @@ flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl) + } + + /** +- * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive ++ * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the device + * flow. + * + * @param[in] dev +@@ -1000,7 +1202,6 @@ flow_drv_rxq_flags_set(struct rte_eth_dev *dev, + struct mlx5_flow_handle *dev_handle) + { + struct mlx5_priv *priv = dev->data->dev_private; +- const int mark = dev_handle->mark; + const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); + struct mlx5_ind_table_obj *ind_tbl = NULL; + unsigned int i; +@@ -1034,15 +1235,6 @@ flow_drv_rxq_flags_set(struct rte_eth_dev *dev, + * this must be always enabled (metadata may arive + * from other port - not from local flows only. + */ +- if (priv->config.dv_flow_en && +- priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && +- mlx5_flow_ext_mreg_supported(dev)) { +- rxq_ctrl->rxq.mark = 1; +- rxq_ctrl->flow_mark_n = 1; +- } else if (mark) { +- rxq_ctrl->rxq.mark = 1; +- rxq_ctrl->flow_mark_n++; +- } + if (tunnel) { + unsigned int j; + +@@ -1060,6 +1252,20 @@ flow_drv_rxq_flags_set(struct rte_eth_dev *dev, + } + } + ++static void ++flow_rxq_mark_flag_set(struct rte_eth_dev *dev) ++{ ++ struct mlx5_priv *priv = dev->data->dev_private; ++ struct mlx5_rxq_ctrl *rxq_ctrl; ++ ++ if (priv->mark_enabled) ++ return; ++ LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) { ++ rxq_ctrl->rxq.mark = 1; ++ } ++ priv->mark_enabled = 1; ++} ++ + /** + * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow + * +@@ -1074,7 +1280,11 @@ flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow) + struct mlx5_priv *priv = dev->data->dev_private; + uint32_t handle_idx; + struct mlx5_flow_handle *dev_handle; ++ struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace(); + ++ MLX5_ASSERT(wks); ++ if (wks->mark) ++ flow_rxq_mark_flag_set(dev); + SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, + handle_idx, dev_handle, next) + flow_drv_rxq_flags_set(dev, dev_handle); +@@ -1094,7 +1304,6 @@ flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, + struct mlx5_flow_handle *dev_handle) + { + struct mlx5_priv *priv = dev->data->dev_private; +- const int mark = dev_handle->mark; + const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); + struct mlx5_ind_table_obj *ind_tbl = NULL; + unsigned int i; +@@ -1124,15 +1333,6 @@ flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, + container_of((*priv->rxqs)[idx], + struct mlx5_rxq_ctrl, rxq); + +- if (priv->config.dv_flow_en && +- priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && +- mlx5_flow_ext_mreg_supported(dev)) { +- rxq_ctrl->rxq.mark = 1; +- rxq_ctrl->flow_mark_n = 1; +- } else if (mark) { +- rxq_ctrl->flow_mark_n--; +- rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n; +- } + if (tunnel) { + unsigned int j; + +@@ -1191,12 +1391,12 @@ flow_rxq_flags_clear(struct rte_eth_dev *dev) + continue; + rxq_ctrl = container_of((*priv->rxqs)[i], + struct mlx5_rxq_ctrl, rxq); +- rxq_ctrl->flow_mark_n = 0; + rxq_ctrl->rxq.mark = 0; + for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) + rxq_ctrl->flow_tunnels_n[j] = 0; + rxq_ctrl->rxq.tunnel = 0; + } ++ priv->mark_enabled = 0; + } + + /** +@@ -1220,10 +1420,14 @@ mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev) + data->dynf_meta = 0; + data->flow_meta_mask = 0; + data->flow_meta_offset = -1; ++ data->flow_meta_port_mask = 0; + } else { + data->dynf_meta = 1; + data->flow_meta_mask = rte_flow_dynf_metadata_mask; + data->flow_meta_offset = rte_flow_dynf_metadata_offs; ++ data->flow_meta_port_mask = (uint32_t)~0; ++ if (priv->config.dv_xmeta_en == MLX5_XMETA_MODE_META16) ++ data->flow_meta_port_mask >>= 16; + } + } + } +@@ -1854,6 +2058,10 @@ mlx5_flow_validate_item_eth(const struct rte_flow_item *item, + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "L2 layer should not follow VLAN"); ++ if (item_flags & MLX5_FLOW_LAYER_GTP) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM, item, ++ "L2 layer should not follow GTP"); + if (!mask) + mask = &rte_flow_item_eth_mask; + ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, +@@ -2009,7 +2217,7 @@ mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "IPv4 cannot follow L2/VLAN layer " + "which ether type is not IPv4"); +- if (item_flags & MLX5_FLOW_LAYER_IPIP) { ++ if (item_flags & MLX5_FLOW_LAYER_TUNNEL) { + if (mask && spec) + next_proto = mask->hdr.next_proto_id & + spec->hdr.next_proto_id; +@@ -2117,7 +2325,7 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, + "which ether type is not IPv6"); + if (mask && mask->hdr.proto == UINT8_MAX && spec) + next_proto = spec->hdr.proto; +- if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) { ++ if (item_flags & MLX5_FLOW_LAYER_TUNNEL) { + if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, +@@ -2662,9 +2870,8 @@ mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused, + "MPLS not supported or" + " disabled in firmware" + " configuration."); +- /* MPLS over IP, UDP, GRE is allowed */ +- if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 | +- MLX5_FLOW_LAYER_OUTER_L4_UDP | ++ /* MPLS over UDP, GRE is allowed */ ++ if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP | + MLX5_FLOW_LAYER_GRE | + MLX5_FLOW_LAYER_GRE_KEY))) + return rte_flow_error_set(error, EINVAL, +@@ -3361,20 +3568,8 @@ flow_get_shared_rss_action(struct rte_eth_dev *dev, + } + + static unsigned int +-find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) ++find_graph_root(uint32_t rss_level) + { +- const struct rte_flow_item *item; +- unsigned int has_vlan = 0; +- +- for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { +- if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { +- has_vlan = 1; +- break; +- } +- } +- if (has_vlan) +- return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN : +- MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN; + return rss_level < 2 ? MLX5_EXPANSION_ROOT : + MLX5_EXPANSION_ROOT_OUTER; + } +@@ -3390,7 +3585,7 @@ find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) + * subflow. + * + * @param[in] dev_flow +- * Pointer the created preifx subflow. ++ * Pointer the created prefix subflow. + * + * @return + * The layers get from prefix subflow. +@@ -3635,7 +3830,7 @@ flow_dv_mreg_create_cb(struct mlx5_hlist *list, uint64_t key, + [3] = { .type = RTE_FLOW_ACTION_TYPE_END, }, + }; + +- /* Fill the register fileds in the flow. */ ++ /* Fill the register fields in the flow. */ + ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error); + if (ret < 0) + return NULL; +@@ -3703,7 +3898,7 @@ flow_dv_mreg_create_cb(struct mlx5_hlist *list, uint64_t key, + /* + * The copy Flows are not included in any list. There + * ones are referenced from other Flows and can not +- * be applied, removed, deleted in ardbitrary order ++ * be applied, removed, deleted in arbitrary order + * by list traversing. + */ + mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items, +@@ -4106,6 +4301,7 @@ flow_create_split_inner(struct rte_eth_dev *dev, + struct rte_flow_error *error) + { + struct mlx5_flow *dev_flow; ++ struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace(); + + dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, + flow_split_info->flow_idx, error); +@@ -4120,12 +4316,14 @@ flow_create_split_inner(struct rte_eth_dev *dev, + /* + * If dev_flow is as one of the suffix flow, some actions in suffix + * flow may need some user defined item layer flags, and pass the +- * Metadate rxq mark flag to suffix flow as well. ++ * Metadata rxq mark flag to suffix flow as well. + */ + if (flow_split_info->prefix_layers) + dev_flow->handle->layers = flow_split_info->prefix_layers; +- if (flow_split_info->prefix_mark) +- dev_flow->handle->mark = 1; ++ if (flow_split_info->prefix_mark) { ++ MLX5_ASSERT(wks); ++ wks->mark = 1; ++ } + if (sub_flow) + *sub_flow = dev_flow; + return flow_drv_translate(dev, dev_flow, attr, items, actions, error); +@@ -4318,6 +4516,8 @@ flow_meter_split_prep(struct rte_eth_dev *dev, + * Pointer to the Q/RSS action. + * @param[in] actions_n + * Number of original actions. ++ * @param[in] mtr_sfx ++ * Check if it is in meter suffix table. + * @param[out] error + * Perform verbose error reporting if not NULL. + * +@@ -4330,7 +4530,8 @@ flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, + struct rte_flow_action *split_actions, + const struct rte_flow_action *actions, + const struct rte_flow_action *qrss, +- int actions_n, struct rte_flow_error *error) ++ int actions_n, int mtr_sfx, ++ struct rte_flow_error *error) + { + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_rte_flow_action_set_tag *set_tag; +@@ -4345,15 +4546,15 @@ flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, + * - Add jump to mreg CP_TBL. + * As a result, there will be one more action. + */ +- ++actions_n; + memcpy(split_actions, actions, sizeof(*split_actions) * actions_n); ++ /* Count MLX5_RTE_FLOW_ACTION_TYPE_TAG. */ ++ ++actions_n; + set_tag = (void *)(split_actions + actions_n); + /* +- * If tag action is not set to void(it means we are not the meter +- * suffix flow), add the tag action. Since meter suffix flow already +- * has the tag added. ++ * If we are not the meter suffix flow, add the tag action. ++ * Since meter suffix flow already has the tag added. + */ +- if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) { ++ if (!mtr_sfx) { + /* + * Allocate the new subflow ID. This one is unique within + * device and not shared with representors. Otherwise, +@@ -4386,6 +4587,12 @@ flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, + MLX5_RTE_FLOW_ACTION_TYPE_TAG, + .conf = set_tag, + }; ++ } else { ++ /* ++ * If we are the suffix flow of meter, tag already exist. ++ * Set the QUEUE/RSS action to void. ++ */ ++ split_actions[qrss_idx].type = RTE_FLOW_ACTION_TYPE_VOID; + } + /* JUMP action to jump to mreg copy table (CP_TBL). */ + jump = (void *)(set_tag + 1); +@@ -4417,7 +4624,7 @@ flow_mreg_split_qrss_prep(struct rte_eth_dev *dev, + * @param[out] error + * Perform verbose error reporting if not NULL. + * @param[in] encap_idx +- * The encap action inndex. ++ * The encap action index. + * + * @return + * 0 on success, negative value otherwise +@@ -4529,7 +4736,7 @@ flow_check_match_action(const struct rte_flow_action actions[], + return flag ? actions_n + 1 : 0; + } + +-#define SAMPLE_SUFFIX_ITEM 2 ++#define SAMPLE_SUFFIX_ITEM 3 + + /** + * Split the sample flow. +@@ -4568,6 +4775,7 @@ flow_check_match_action(const struct rte_flow_action actions[], + static int + flow_sample_split_prep(struct rte_eth_dev *dev, + uint32_t fdb_tx, ++ const struct rte_flow_item items[], + struct rte_flow_item sfx_items[], + const struct rte_flow_action actions[], + struct rte_flow_action actions_sfx[], +@@ -4593,7 +4801,13 @@ flow_sample_split_prep(struct rte_eth_dev *dev, + if (!fdb_tx) { + /* Prepare the prefix tag action. */ + set_tag = (void *)(actions_pre + actions_n + 1); +- ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error); ++ ret = mlx5_flow_get_reg_id(dev, MLX5_SAMPLE_ID, 0, error); ++ /* Trust VF/SF on CX5 not supported meter so that the reserved ++ * metadata regC is REG_NON, back to use application tag ++ * index 0. ++ */ ++ if (unlikely(ret == REG_NON)) ++ ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error); + if (ret < 0) + return ret; + set_tag->id = ret; +@@ -4601,6 +4815,12 @@ flow_sample_split_prep(struct rte_eth_dev *dev, + [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id); + set_tag->data = tag_id; + /* Prepare the suffix subflow items. */ ++ for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { ++ if (items->type == RTE_FLOW_ITEM_TYPE_PORT_ID) { ++ memcpy(sfx_items, items, sizeof(*sfx_items)); ++ sfx_items++; ++ } ++ } + tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM); + tag_spec->data = tag_id; + tag_spec->id = set_tag->id; +@@ -4758,17 +4978,6 @@ flow_create_split_metadata(struct rte_eth_dev *dev, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "no memory to split " + "metadata flow"); +- /* +- * If we are the suffix flow of meter, tag already exist. +- * Set the tag action to void. +- */ +- if (mtr_sfx) +- ext_actions[qrss - actions].type = +- RTE_FLOW_ACTION_TYPE_VOID; +- else +- ext_actions[qrss - actions].type = +- (enum rte_flow_action_type) +- MLX5_RTE_FLOW_ACTION_TYPE_TAG; + /* + * Create the new actions list with removed Q/RSS action + * and appended set tag and jump to register copy table +@@ -4776,7 +4985,8 @@ flow_create_split_metadata(struct rte_eth_dev *dev, + * in advance, because it is needed for set tag action. + */ + qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions, +- qrss, actions_n, error); ++ qrss, actions_n, ++ mtr_sfx, error); + if (!mtr_sfx && !qrss_id) { + ret = -rte_errno; + goto exit; +@@ -4926,6 +5136,7 @@ flow_create_split_meter(struct rte_eth_dev *dev, + struct mlx5_flow_split_info *flow_split_info, + struct rte_flow_error *error) + { ++ struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace(); + struct mlx5_priv *priv = dev->data->dev_private; + struct rte_flow_action *sfx_actions = NULL; + struct rte_flow_action *pre_actions = NULL; +@@ -4967,7 +5178,6 @@ flow_create_split_meter(struct rte_eth_dev *dev, + goto exit; + } + /* Add the prefix subflow. */ +- flow_split_info->prefix_mark = 0; + ret = flow_create_split_inner(dev, flow, &dev_flow, + attr, items, pre_actions, + flow_split_info, error); +@@ -4982,7 +5192,7 @@ flow_create_split_meter(struct rte_eth_dev *dev, + MLX5_FLOW_TABLE_LEVEL_SUFFIX; + flow_split_info->prefix_layers = + flow_get_prefix_layer_flags(dev_flow); +- flow_split_info->prefix_mark = dev_flow->handle->mark; ++ flow_split_info->prefix_mark |= wks->mark; + } + /* Add the prefix subflow. */ + ret = flow_create_split_metadata(dev, flow, +@@ -5048,6 +5258,7 @@ flow_create_split_sample(struct rte_eth_dev *dev, + struct mlx5_flow_tbl_data_entry *sfx_tbl_data; + struct mlx5_flow_tbl_resource *sfx_tbl; + union mlx5_flow_tbl_key sfx_table_key; ++ struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace(); + #endif + size_t act_size; + size_t item_size; +@@ -5081,7 +5292,7 @@ flow_create_split_sample(struct rte_eth_dev *dev, + sfx_items = (struct rte_flow_item *)((char *)sfx_actions + + act_size); + pre_actions = sfx_actions + actions_n; +- tag_id = flow_sample_split_prep(dev, fdb_tx, sfx_items, ++ tag_id = flow_sample_split_prep(dev, fdb_tx, items, sfx_items, + actions, sfx_actions, + pre_actions, actions_n, + sample_action_pos, +@@ -5113,7 +5324,8 @@ flow_create_split_sample(struct rte_eth_dev *dev, + sfx_table_key.table_id; + flow_split_info->prefix_layers = + flow_get_prefix_layer_flags(dev_flow); +- flow_split_info->prefix_mark = dev_flow->handle->mark; ++ MLX5_ASSERT(wks); ++ flow_split_info->prefix_mark |= wks->mark; + /* Suffix group level already be scaled with factor, set + * skip_scale to 1 to avoid scale again in translation. + */ +@@ -5183,22 +5395,14 @@ flow_create_split_outer(struct rte_eth_dev *dev, + return ret; + } + +-static struct mlx5_flow_tunnel * +-flow_tunnel_from_rule(struct rte_eth_dev *dev, +- const struct rte_flow_attr *attr, +- const struct rte_flow_item items[], +- const struct rte_flow_action actions[]) ++static inline struct mlx5_flow_tunnel * ++flow_tunnel_from_rule(const struct mlx5_flow *flow) + { + struct mlx5_flow_tunnel *tunnel; + + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wcast-qual" +- if (is_flow_tunnel_match_rule(dev, attr, items, actions)) +- tunnel = (struct mlx5_flow_tunnel *)items[0].spec; +- else if (is_flow_tunnel_steer_rule(dev, attr, items, actions)) +- tunnel = (struct mlx5_flow_tunnel *)actions[0].conf; +- else +- tunnel = NULL; ++ tunnel = (typeof(tunnel))flow->tunnel; + #pragma GCC diagnostic pop + + return tunnel; +@@ -5274,7 +5478,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, + int shared_actions_n = MLX5_MAX_SHARED_ACTIONS; + union { + struct mlx5_flow_expand_rss buf; +- uint8_t buffer[2048]; ++ uint8_t buffer[4096]; + } expand_buffer; + union { + struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS]; +@@ -5364,12 +5568,16 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, + if (rss && rss->types) { + unsigned int graph_root; + +- graph_root = find_graph_root(items, rss->level); ++ graph_root = find_graph_root(rss->level); + ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer), + items, rss->types, + mlx5_support_expansion, graph_root); + MLX5_ASSERT(ret > 0 && + (unsigned int)ret < sizeof(expand_buffer.buffer)); ++ if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) { ++ for (i = 0; i < buf->entries; ++i) ++ mlx5_dbg__print_pattern(buf->entry[i].pattern); ++ } + } else { + buf->entries = 1; + buf->entry[0].pattern = (void *)(uintptr_t)items; +@@ -5392,12 +5600,11 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, + error); + if (ret < 0) + goto error; +- if (is_flow_tunnel_steer_rule(dev, attr, +- buf->entry[i].pattern, +- p_actions_rx)) { ++ if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) { + ret = flow_tunnel_add_default_miss(dev, flow, attr, + p_actions_rx, + idx, ++ wks->flows[0].tunnel, + &default_miss_ctx, + error); + if (ret < 0) { +@@ -5461,7 +5668,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, + } + flow_rxq_flags_set(dev, flow); + rte_free(translated_actions); +- tunnel = flow_tunnel_from_rule(dev, attr, items, actions); ++ tunnel = flow_tunnel_from_rule(wks->flows); + if (tunnel) { + flow->tunnel = 1; + flow->tunnel_id = tunnel->tunnel_id; +@@ -5664,7 +5871,7 @@ flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, + * @param list + * Pointer to the Indexed flow list. + * @param active +- * If flushing is called avtively. ++ * If flushing is called actively. + */ + void + mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active) +@@ -6438,7 +6645,6 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, + static int + mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh) + { +- struct mlx5_devx_mkey_attr mkey_attr; + struct mlx5_counter_stats_mem_mng *mem_mng; + volatile struct flow_counter_stats *raw_data; + int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES; +@@ -6448,6 +6654,7 @@ mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh) + sizeof(struct mlx5_counter_stats_mem_mng); + size_t pgsize = rte_mem_page_size(); + uint8_t *mem; ++ int ret; + int i; + + if (pgsize == (size_t)-1) { +@@ -6462,26 +6669,10 @@ mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh) + } + mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1; + size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n; +- mem_mng->umem = mlx5_glue->devx_umem_reg(sh->ctx, mem, size, +- IBV_ACCESS_LOCAL_WRITE); +- if (!mem_mng->umem) { +- rte_errno = errno; +- mlx5_free(mem); +- return -rte_errno; +- } +- mkey_attr.addr = (uintptr_t)mem; +- mkey_attr.size = size; +- mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem); +- mkey_attr.pd = sh->pdn; +- mkey_attr.log_entity_size = 0; +- mkey_attr.pg_access = 0; +- mkey_attr.klm_array = NULL; +- mkey_attr.klm_num = 0; +- mkey_attr.relaxed_ordering_write = sh->cmng.relaxed_ordering_write; +- mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read; +- mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr); +- if (!mem_mng->dm) { +- mlx5_glue->devx_umem_dereg(mem_mng->umem); ++ ret = mlx5_os_wrapped_mkey_create(sh->ctx, sh->pd, ++ sh->pdn, mem, size, ++ &mem_mng->wm); ++ if (ret) { + rte_errno = errno; + mlx5_free(mem); + return -rte_errno; +@@ -6600,7 +6791,7 @@ mlx5_flow_query_alarm(void *arg) + ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0, + MLX5_COUNTERS_PER_POOL, + NULL, NULL, +- pool->raw_hw->mem_mng->dm->id, ++ pool->raw_hw->mem_mng->wm.lkey, + (void *)(uintptr_t) + pool->raw_hw->data, + sh->devx_comp, +@@ -6906,7 +7097,7 @@ mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev) + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * @return +- * 0 on success, a nagative value otherwise. ++ * 0 on success, a negative value otherwise. + */ + int + mlx5_flow_dev_dump(struct rte_eth_dev *dev, +@@ -7210,8 +7401,30 @@ int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains) + return ret; + } + ++const struct mlx5_flow_tunnel * ++mlx5_get_tof(const struct rte_flow_item *item, ++ const struct rte_flow_action *action, ++ enum mlx5_tof_rule_type *rule_type) ++{ ++ for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { ++ if (item->type == (typeof(item->type)) ++ MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) { ++ *rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE; ++ return flow_items_to_tunnel(item); ++ } ++ } ++ for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) { ++ if (action->type == (typeof(action->type)) ++ MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) { ++ *rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE; ++ return flow_actions_to_tunnel(action); ++ } ++ } ++ return NULL; ++} ++ + /** +- * tunnel offload functionalilty is defined for DV environment only ++ * tunnel offload functionality is defined for DV environment only + */ + #ifdef HAVE_IBV_FLOW_DV_SUPPORT + __extension__ +@@ -7240,13 +7453,13 @@ flow_tunnel_add_default_miss(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_action *app_actions, + uint32_t flow_idx, ++ const struct mlx5_flow_tunnel *tunnel, + struct tunnel_default_miss_ctx *ctx, + struct rte_flow_error *error) + { + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_flow *dev_flow; + struct rte_flow_attr miss_attr = *attr; +- const struct mlx5_flow_tunnel *tunnel = app_actions[0].conf; + const struct rte_flow_item miss_items[2] = { + { + .type = RTE_FLOW_ITEM_TYPE_ETH, +@@ -7332,6 +7545,7 @@ flow_tunnel_add_default_miss(struct rte_eth_dev *dev, + dev_flow->flow = flow; + dev_flow->external = true; + dev_flow->tunnel = tunnel; ++ dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE; + /* Subflow object was created, we must include one in the list. */ + SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, + dev_flow->handle, next); +@@ -7603,10 +7817,11 @@ static void get_tunnel_miss(struct rte_eth_dev *dev, void *x) + + rte_spinlock_unlock(&thub->sl); + ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel); +- ctx->tunnel->refctn = 1; + rte_spinlock_lock(&thub->sl); +- if (ctx->tunnel) ++ if (ctx->tunnel) { ++ ctx->tunnel->refctn = 1; + LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain); ++ } + } + + +@@ -7648,7 +7863,8 @@ int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh) + return -ENOMEM; + LIST_INIT(&thub->tunnels); + rte_spinlock_init(&thub->sl); +- thub->groups = mlx5_hlist_create("flow groups", MLX5_MAX_TABLES, 0, ++ thub->groups = mlx5_hlist_create("flow groups", ++ rte_align32pow2(MLX5_MAX_TABLES), 0, + 0, mlx5_flow_tunnel_grp2tbl_create_cb, + NULL, + mlx5_flow_tunnel_grp2tbl_remove_cb); +@@ -7669,30 +7885,37 @@ int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh) + return err; + } + +-static inline bool ++static inline int + mlx5_flow_tunnel_validate(struct rte_eth_dev *dev, + struct rte_flow_tunnel *tunnel, +- const char *err_msg) ++ struct rte_flow_error *error) + { +- err_msg = NULL; +- if (!is_tunnel_offload_active(dev)) { +- err_msg = "tunnel offload was not activated"; +- goto out; +- } else if (!tunnel) { +- err_msg = "no application tunnel"; +- goto out; +- } ++ struct mlx5_priv *priv = dev->data->dev_private; + ++ if (!priv->config.dv_flow_en) ++ return rte_flow_error_set(error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, ++ "flow DV interface is off"); ++ if (!is_tunnel_offload_active(dev)) ++ return rte_flow_error_set(error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, ++ "tunnel offload was not activated"); ++ if (!tunnel) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, ++ "no application tunnel"); + switch (tunnel->type) { + default: +- err_msg = "unsupported tunnel type"; +- goto out; ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, ++ "unsupported tunnel type"); + case RTE_FLOW_ITEM_TYPE_VXLAN: ++ case RTE_FLOW_ITEM_TYPE_GRE: ++ case RTE_FLOW_ITEM_TYPE_NVGRE: ++ case RTE_FLOW_ITEM_TYPE_GENEVE: + break; + } +- +-out: +- return !err_msg; ++ return 0; + } + + static int +@@ -7702,15 +7925,11 @@ mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev, + uint32_t *num_of_actions, + struct rte_flow_error *error) + { +- int ret; + struct mlx5_flow_tunnel *tunnel; +- const char *err_msg = NULL; +- bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg); ++ int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error); + +- if (!verdict) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, +- err_msg); ++ if (ret) ++ return ret; + ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel); + if (ret < 0) { + return rte_flow_error_set(error, ret, +@@ -7729,15 +7948,11 @@ mlx5_flow_tunnel_match(struct rte_eth_dev *dev, + uint32_t *num_of_items, + struct rte_flow_error *error) + { +- int ret; + struct mlx5_flow_tunnel *tunnel; +- const char *err_msg = NULL; +- bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg); ++ int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error); + +- if (!verdict) +- return rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_HANDLE, NULL, +- err_msg); ++ if (ret) ++ return ret; + ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel); + if (ret < 0) { + return rte_flow_error_set(error, ret, +@@ -7924,6 +8139,7 @@ flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev, + __rte_unused const struct rte_flow_attr *attr, + __rte_unused const struct rte_flow_action *actions, + __rte_unused uint32_t flow_idx, ++ __rte_unused const struct mlx5_flow_tunnel *tunnel, + __rte_unused struct tunnel_default_miss_ctx *ctx, + __rte_unused struct rte_flow_error *error) + { +@@ -7962,3 +8178,63 @@ mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh, + } + #endif /* HAVE_IBV_FLOW_DV_SUPPORT */ + ++static void ++mlx5_dbg__print_pattern(const struct rte_flow_item *item) ++{ ++ int ret; ++ struct rte_flow_error error; ++ ++ for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { ++ char *item_name; ++ ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name, ++ sizeof(item_name), ++ (void *)(uintptr_t)item->type, &error); ++ if (ret > 0) ++ printf("%s ", item_name); ++ else ++ printf("%d\n", (int)item->type); ++ } ++ printf("END\n"); ++} ++ ++static int ++mlx5_flow_is_std_vxlan_port(const struct rte_flow_item *udp_item) ++{ ++ const struct rte_flow_item_udp *spec = udp_item->spec; ++ const struct rte_flow_item_udp *mask = udp_item->mask; ++ uint16_t udp_dport = 0; ++ ++ if (spec != NULL) { ++ if (!mask) ++ mask = &rte_flow_item_udp_mask; ++ udp_dport = rte_be_to_cpu_16(spec->hdr.dst_port & ++ mask->hdr.dst_port); ++ } ++ return (!udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN); ++} ++ ++static const struct mlx5_flow_expand_node * ++mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern, ++ unsigned int item_idx, ++ const struct mlx5_flow_expand_node graph[], ++ const struct mlx5_flow_expand_node *node) ++{ ++ const struct rte_flow_item *item = pattern + item_idx, *prev_item; ++ ++ if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN && ++ node != NULL && ++ node->type == RTE_FLOW_ITEM_TYPE_VXLAN) { ++ /* ++ * The expansion node is VXLAN and it is also the last ++ * expandable item in the pattern, so need to continue ++ * expansion of the inner tunnel. ++ */ ++ MLX5_ASSERT(item_idx > 0); ++ prev_item = pattern + item_idx - 1; ++ MLX5_ASSERT(prev_item->type == RTE_FLOW_ITEM_TYPE_UDP); ++ if (mlx5_flow_is_std_vxlan_port(prev_item)) ++ return &graph[MLX5_EXPANSION_STD_VXLAN]; ++ return &graph[MLX5_EXPANSION_L3_VXLAN]; ++ } ++ return node; ++} +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow.h b/dpdk/drivers/net/mlx5/mlx5_flow.h +index 91f48923c0..090cc7c77f 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow.h ++++ b/dpdk/drivers/net/mlx5/mlx5_flow.h +@@ -81,6 +81,7 @@ enum mlx5_feature_name { + MLX5_MTR_COLOR, + MLX5_MTR_SFX, + MLX5_ASO_FLOW_HIT, ++ MLX5_SAMPLE_ID, + }; + + /* Default queue number. */ +@@ -362,7 +363,7 @@ enum mlx5_feature_name { + #define MLX5_ACT_NUM_MDF_IPV6 4 + #define MLX5_ACT_NUM_MDF_MAC 2 + #define MLX5_ACT_NUM_MDF_VID 1 +-#define MLX5_ACT_NUM_MDF_PORT 2 ++#define MLX5_ACT_NUM_MDF_PORT 1 + #define MLX5_ACT_NUM_MDF_TTL 1 + #define MLX5_ACT_NUM_DEC_TTL MLX5_ACT_NUM_MDF_TTL + #define MLX5_ACT_NUM_MDF_TCPSEQ 1 +@@ -533,7 +534,7 @@ struct mlx5_flow_tbl_data_entry { + const struct mlx5_flow_tunnel *tunnel; + uint32_t group_id; + bool external; +- bool tunnel_offload; /* Tunnel offlod table or not. */ ++ bool tunnel_offload; /* Tunnel offload table or not. */ + bool is_egress; /**< Egress table. */ + }; + +@@ -631,7 +632,6 @@ struct mlx5_flow_handle { + /**< Bit-fields of present layers, see MLX5_FLOW_LAYER_*. */ + void *drv_flow; /**< pointer to driver flow object. */ + uint32_t split_flow_id:28; /**< Sub flow unique match flow id. */ +- uint32_t mark:1; /**< Metadate rxq mark flag. */ + uint32_t fate_action:3; /**< Fate action type. */ + union { + uint32_t rix_hrxq; /**< Hash Rx queue object index. */ +@@ -753,6 +753,16 @@ struct mlx5_flow_verbs_workspace { + /** Maximal number of device sub-flows supported. */ + #define MLX5_NUM_MAX_DEV_FLOWS 32 + ++/** ++ * tunnel offload rules type ++ */ ++enum mlx5_tof_rule_type { ++ MLX5_TUNNEL_OFFLOAD_NONE = 0, ++ MLX5_TUNNEL_OFFLOAD_SET_RULE, ++ MLX5_TUNNEL_OFFLOAD_MATCH_RULE, ++ MLX5_TUNNEL_OFFLOAD_MISS_RULE, ++}; ++ + /** Device flow structure. */ + __extension__ + struct mlx5_flow { +@@ -774,6 +784,7 @@ struct mlx5_flow { + struct mlx5_flow_handle *handle; + uint32_t handle_idx; /* Index of the mlx5 flow handle memory. */ + const struct mlx5_flow_tunnel *tunnel; ++ enum mlx5_tof_rule_type tof_type; + }; + + /* Flow meter state. */ +@@ -983,10 +994,10 @@ mlx5_tunnel_hub(struct rte_eth_dev *dev) + } + + static inline bool +-is_tunnel_offload_active(struct rte_eth_dev *dev) ++is_tunnel_offload_active(const struct rte_eth_dev *dev) + { + #ifdef HAVE_IBV_FLOW_DV_SUPPORT +- struct mlx5_priv *priv = dev->data->dev_private; ++ const struct mlx5_priv *priv = dev->data->dev_private; + return !!priv->config.dv_miss_info; + #else + RTE_SET_USED(dev); +@@ -995,23 +1006,15 @@ is_tunnel_offload_active(struct rte_eth_dev *dev) + } + + static inline bool +-is_flow_tunnel_match_rule(__rte_unused struct rte_eth_dev *dev, +- __rte_unused const struct rte_flow_attr *attr, +- __rte_unused const struct rte_flow_item items[], +- __rte_unused const struct rte_flow_action actions[]) ++is_flow_tunnel_match_rule(enum mlx5_tof_rule_type tof_rule_type) + { +- return (items[0].type == (typeof(items[0].type)) +- MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL); ++ return tof_rule_type == MLX5_TUNNEL_OFFLOAD_MATCH_RULE; + } + + static inline bool +-is_flow_tunnel_steer_rule(__rte_unused struct rte_eth_dev *dev, +- __rte_unused const struct rte_flow_attr *attr, +- __rte_unused const struct rte_flow_item items[], +- __rte_unused const struct rte_flow_action actions[]) ++is_flow_tunnel_steer_rule(enum mlx5_tof_rule_type tof_rule_type) + { +- return (actions[0].type == (typeof(actions[0].type)) +- MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET); ++ return tof_rule_type == MLX5_TUNNEL_OFFLOAD_SET_RULE; + } + + static inline const struct mlx5_flow_tunnel * +@@ -1059,6 +1062,26 @@ struct rte_flow { + #define MLX5_RSS_HASH_IPV6_UDP \ + (MLX5_RSS_HASH_IPV6 | \ + IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP) ++#define MLX5_RSS_HASH_IPV4_SRC_ONLY IBV_RX_HASH_SRC_IPV4 ++#define MLX5_RSS_HASH_IPV4_DST_ONLY IBV_RX_HASH_DST_IPV4 ++#define MLX5_RSS_HASH_IPV6_SRC_ONLY IBV_RX_HASH_SRC_IPV6 ++#define MLX5_RSS_HASH_IPV6_DST_ONLY IBV_RX_HASH_DST_IPV6 ++#define MLX5_RSS_HASH_IPV4_UDP_SRC_ONLY \ ++ (MLX5_RSS_HASH_IPV4 | IBV_RX_HASH_SRC_PORT_UDP) ++#define MLX5_RSS_HASH_IPV4_UDP_DST_ONLY \ ++ (MLX5_RSS_HASH_IPV4 | IBV_RX_HASH_DST_PORT_UDP) ++#define MLX5_RSS_HASH_IPV6_UDP_SRC_ONLY \ ++ (MLX5_RSS_HASH_IPV6 | IBV_RX_HASH_SRC_PORT_UDP) ++#define MLX5_RSS_HASH_IPV6_UDP_DST_ONLY \ ++ (MLX5_RSS_HASH_IPV6 | IBV_RX_HASH_DST_PORT_UDP) ++#define MLX5_RSS_HASH_IPV4_TCP_SRC_ONLY \ ++ (MLX5_RSS_HASH_IPV4 | IBV_RX_HASH_SRC_PORT_TCP) ++#define MLX5_RSS_HASH_IPV4_TCP_DST_ONLY \ ++ (MLX5_RSS_HASH_IPV4 | IBV_RX_HASH_DST_PORT_TCP) ++#define MLX5_RSS_HASH_IPV6_TCP_SRC_ONLY \ ++ (MLX5_RSS_HASH_IPV6 | IBV_RX_HASH_SRC_PORT_TCP) ++#define MLX5_RSS_HASH_IPV6_TCP_DST_ONLY \ ++ (MLX5_RSS_HASH_IPV6 | IBV_RX_HASH_DST_PORT_TCP) + #define MLX5_RSS_HASH_NONE 0ULL + + /* array of valid combinations of RX Hash fields for RSS */ +@@ -1082,8 +1105,6 @@ struct mlx5_shared_action_rss { + /**< Hash RX queues (hrxq, hrxq_tunnel fields) indirection table. */ + uint32_t hrxq[MLX5_RSS_HASH_FIELDS_LEN]; + /**< Hash RX queue indexes mapped to mlx5_rss_hash_fields */ +- uint32_t hrxq_tunnel[MLX5_RSS_HASH_FIELDS_LEN]; +- /**< Hash RX queue indexes for tunneled RSS */ + rte_spinlock_t action_rss_sl; /**< Shared RSS action spinlock. */ + }; + +@@ -1101,14 +1122,15 @@ struct mlx5_flow_workspace { + struct mlx5_flow_rss_desc rss_desc; + uint32_t rssq_num; /* Allocated queue num in rss_desc. */ + uint32_t flow_idx; /* Intermediate device flow index. */ ++ uint32_t mark:1; /* Indicates if flow contains mark action. */ + }; + + struct mlx5_flow_split_info { +- bool external; ++ uint32_t external:1; + /**< True if flow is created by request external to PMD. */ +- uint8_t skip_scale; /**< Skip the scale the table with factor. */ ++ uint32_t prefix_mark:1; /**< Prefix subflow mark flag. */ ++ uint32_t skip_scale:8; /**< Skip the scale the table with factor. */ + uint32_t flow_idx; /**< This memory pool index to the flow. */ +- uint32_t prefix_mark; /**< Prefix subflow mark flag. */ + uint64_t prefix_layers; /**< Prefix subflow layers. */ + }; + +@@ -1234,11 +1256,10 @@ struct flow_grp_info { + + static inline bool + tunnel_use_standard_attr_group_translate +- (struct rte_eth_dev *dev, +- const struct mlx5_flow_tunnel *tunnel, ++ (const struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, +- const struct rte_flow_item items[], +- const struct rte_flow_action actions[]) ++ const struct mlx5_flow_tunnel *tunnel, ++ enum mlx5_tof_rule_type tof_rule_type) + { + bool verdict; + +@@ -1254,7 +1275,7 @@ tunnel_use_standard_attr_group_translate + * method + */ + verdict = !attr->group && +- is_flow_tunnel_steer_rule(dev, attr, items, actions); ++ is_flow_tunnel_steer_rule(tof_rule_type); + } else { + /* + * non-tunnel group translation uses standard method for +@@ -1266,6 +1287,20 @@ tunnel_use_standard_attr_group_translate + return verdict; + } + ++static inline uint16_t ++mlx5_translate_tunnel_etypes(uint64_t pattern_flags) ++{ ++ if (pattern_flags & MLX5_FLOW_LAYER_INNER_L2) ++ return RTE_ETHER_TYPE_TEB; ++ else if (pattern_flags & MLX5_FLOW_LAYER_INNER_L3_IPV4) ++ return RTE_ETHER_TYPE_IPV4; ++ else if (pattern_flags & MLX5_FLOW_LAYER_INNER_L3_IPV6) ++ return RTE_ETHER_TYPE_IPV6; ++ else if (pattern_flags & MLX5_FLOW_LAYER_MPLS) ++ return RTE_ETHER_TYPE_MPLS; ++ return 0; ++} ++ + int mlx5_flow_group_to_table(struct rte_eth_dev *dev, + const struct mlx5_flow_tunnel *tunnel, + uint32_t group, uint32_t *table, +@@ -1409,6 +1444,7 @@ int mlx5_flow_meter_flush(struct rte_eth_dev *dev, + struct rte_mtr_error *error); + int mlx5_flow_dv_discover_counter_offset_support(struct rte_eth_dev *dev); + int mlx5_shared_action_flush(struct rte_eth_dev *dev); ++int mlx5_flow_discover_dr_action_support(struct rte_eth_dev *dev); + void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id); + int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh); + +@@ -1487,4 +1523,10 @@ void flow_dv_dest_array_remove_cb(struct mlx5_cache_list *list, + struct mlx5_cache_entry *entry); + struct mlx5_aso_age_action *flow_aso_age_get_by_idx(struct rte_eth_dev *dev, + uint32_t age_idx); ++const struct mlx5_flow_tunnel * ++mlx5_get_tof(const struct rte_flow_item *items, ++ const struct rte_flow_action *actions, ++ enum mlx5_tof_rule_type *rule_type); ++ ++ + #endif /* RTE_PMD_MLX5_FLOW_H_ */ +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow_age.c b/dpdk/drivers/net/mlx5/mlx5_flow_age.c +index 0ea61be4eb..e110288c85 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow_age.c ++++ b/dpdk/drivers/net/mlx5/mlx5_flow_age.c +@@ -38,8 +38,6 @@ mlx5_aso_cq_destroy(struct mlx5_aso_cq *cq) + * Pointer to CQ to create. + * @param[in] log_desc_n + * Log of number of descriptors in queue. +- * @param[in] socket +- * Socket to use for allocation. + * @param[in] uar_page_id + * UAR page ID to use. + * @param[in] eqn +@@ -50,7 +48,7 @@ mlx5_aso_cq_destroy(struct mlx5_aso_cq *cq) + */ + static int + mlx5_aso_cq_create(void *ctx, struct mlx5_aso_cq *cq, uint16_t log_desc_n, +- int socket, int uar_page_id, uint32_t eqn) ++ int uar_page_id, uint32_t eqn) + { + struct mlx5_devx_cq_attr attr = { 0 }; + size_t pgsize = sysconf(_SC_PAGESIZE); +@@ -60,7 +58,7 @@ mlx5_aso_cq_create(void *ctx, struct mlx5_aso_cq *cq, uint16_t log_desc_n, + cq->log_desc_n = log_desc_n; + umem_size = sizeof(struct mlx5_cqe) * cq_size + sizeof(*cq->db_rec) * 2; + cq->umem_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size, +- 4096, socket); ++ 4096, SOCKET_ID_ANY); + if (!cq->umem_buf) { + DRV_LOG(ERR, "Failed to allocate memory for CQ."); + rte_errno = ENOMEM; +@@ -101,86 +99,65 @@ mlx5_aso_cq_create(void *ctx, struct mlx5_aso_cq *cq, uint16_t log_desc_n, + /** + * Free MR resources. + * ++ * @param[in] sh ++ * Pointer to shared device context. + * @param[in] mr + * MR to free. + */ + static void +-mlx5_aso_devx_dereg_mr(struct mlx5_aso_devx_mr *mr) ++mlx5_aso_dereg_mr(struct mlx5_dev_ctx_shared *sh, struct mlx5_pmd_mr *mr) + { +- claim_zero(mlx5_devx_cmd_destroy(mr->mkey)); +- if (!mr->is_indirect && mr->umem) +- claim_zero(mlx5_glue->devx_umem_dereg(mr->umem)); +- mlx5_free(mr->buf); ++ void *addr = mr->addr; ++ ++ sh->share_cache.dereg_mr_cb(mr); ++ mlx5_free(addr); + memset(mr, 0, sizeof(*mr)); + } + + /** + * Register Memory Region. + * +- * @param[in] ctx +- * Context returned from mlx5 open_device() glue function. ++ * @param[in] sh ++ * Pointer to shared device context. + * @param[in] length + * Size of MR buffer. + * @param[in/out] mr + * Pointer to MR to create. +- * @param[in] socket +- * Socket to use for allocation. +- * @param[in] pdn +- * Protection Domain number to use. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ + static int +-mlx5_aso_devx_reg_mr(void *ctx, size_t length, struct mlx5_aso_devx_mr *mr, +- int socket, int pdn) ++mlx5_aso_reg_mr(struct mlx5_dev_ctx_shared *sh, size_t length, ++ struct mlx5_pmd_mr *mr) + { +- struct mlx5_devx_mkey_attr mkey_attr; ++ int ret; + +- mr->buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096, +- socket); +- if (!mr->buf) { +- DRV_LOG(ERR, "Failed to create ASO bits mem for MR by Devx."); ++ mr->addr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096, ++ SOCKET_ID_ANY); ++ if (!mr->addr) { ++ DRV_LOG(ERR, "Failed to create ASO bits mem for MR."); + return -1; + } +- mr->umem = mlx5_glue->devx_umem_reg(ctx, mr->buf, length, +- IBV_ACCESS_LOCAL_WRITE); +- if (!mr->umem) { +- DRV_LOG(ERR, "Failed to register Umem for MR by Devx."); +- goto error; +- } +- mkey_attr.addr = (uintptr_t)mr->buf; +- mkey_attr.size = length; +- mkey_attr.umem_id = mlx5_os_get_umem_id(mr->umem); +- mkey_attr.pd = pdn; +- mkey_attr.pg_access = 1; +- mkey_attr.klm_array = NULL; +- mkey_attr.klm_num = 0; +- mkey_attr.relaxed_ordering_read = 0; +- mkey_attr.relaxed_ordering_write = 0; +- mr->mkey = mlx5_devx_cmd_mkey_create(ctx, &mkey_attr); +- if (!mr->mkey) { ++ ret = sh->share_cache.reg_mr_cb(sh->pd, mr->addr, length, mr); ++ if (ret) { + DRV_LOG(ERR, "Failed to create direct Mkey."); +- goto error; ++ mlx5_free(mr->addr); ++ return -1; + } +- mr->length = length; +- mr->is_indirect = false; + return 0; +-error: +- if (mr->umem) +- claim_zero(mlx5_glue->devx_umem_dereg(mr->umem)); +- mlx5_free(mr->buf); +- return -1; + } + + /** + * Destroy Send Queue used for ASO access. + * ++ * @param[in] sh ++ * Pointer to shared device context. + * @param[in] sq + * ASO SQ to destroy. + */ + static void +-mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq) ++mlx5_aso_destroy_sq(struct mlx5_dev_ctx_shared *sh, struct mlx5_aso_sq *sq) + { + if (sq->wqe_umem) { + mlx5_glue->devx_umem_dereg(sq->wqe_umem); +@@ -196,7 +173,7 @@ mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq) + } + if (sq->cq.cq) + mlx5_aso_cq_destroy(&sq->cq); +- mlx5_aso_devx_dereg_mr(&sq->mr); ++ mlx5_aso_dereg_mr(sh, &sq->mr); + memset(sq, 0, sizeof(*sq)); + } + +@@ -218,8 +195,8 @@ mlx5_aso_init_sq(struct mlx5_aso_sq *sq) + for (i = 0, wqe = &sq->wqes[0]; i < size; ++i, ++wqe) { + wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) | + (sizeof(*wqe) >> 4)); +- wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.mkey->id); +- addr = (uint64_t)((uint64_t *)sq->mr.buf + i * ++ wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey); ++ addr = (uint64_t)((uint64_t *)sq->mr.addr + i * + MLX5_ASO_AGE_ACTIONS_PER_POOL / 64); + wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32)); + wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u); +@@ -236,12 +213,10 @@ mlx5_aso_init_sq(struct mlx5_aso_sq *sq) + /** + * Create Send Queue used for ASO access. + * +- * @param[in] ctx +- * Context returned from mlx5 open_device() glue function. ++ * @param[in] sh ++ * Pointer to shared device context. + * @param[in/out] sq + * Pointer to SQ to create. +- * @param[in] socket +- * Socket to use for allocation. + * @param[in] uar + * User Access Region object. + * @param[in] pdn +@@ -250,14 +225,16 @@ mlx5_aso_init_sq(struct mlx5_aso_sq *sq) + * EQ number. + * @param[in] log_desc_n + * Log of number of descriptors in queue. ++ * @param[in] ts_format ++ * timestamp format supported by the queue. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ + static int +-mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, ++mlx5_aso_sq_create(struct mlx5_dev_ctx_shared *sh, struct mlx5_aso_sq *sq, + struct mlx5dv_devx_uar *uar, uint32_t pdn, +- uint32_t eqn, uint16_t log_desc_n) ++ uint32_t eqn, uint16_t log_desc_n, uint32_t ts_format) + { + struct mlx5_devx_create_sq_attr attr = { 0 }; + struct mlx5_devx_modify_sq_attr modify_attr = { 0 }; +@@ -267,21 +244,22 @@ mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, + uint32_t wq_size = sizeof(struct mlx5_aso_wqe) * sq_desc_n; + int ret; + +- if (mlx5_aso_devx_reg_mr(ctx, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) * +- sq_desc_n, &sq->mr, socket, pdn)) ++ if (mlx5_aso_reg_mr(sh, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) * sq_desc_n, ++ &sq->mr)) + return -1; +- if (mlx5_aso_cq_create(ctx, &sq->cq, log_desc_n, socket, ++ if (mlx5_aso_cq_create(sh->ctx, &sq->cq, log_desc_n, + mlx5_os_get_devx_uar_page_id(uar), eqn)) + goto error; + sq->log_desc_n = log_desc_n; +- sq->umem_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, wq_size + +- sizeof(*sq->db_rec) * 2, 4096, socket); ++ sq->umem_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, ++ wq_size + sizeof(*sq->db_rec) * 2, ++ 4096, SOCKET_ID_ANY); + if (!sq->umem_buf) { + DRV_LOG(ERR, "Can't allocate wqe buffer."); + rte_errno = ENOMEM; + goto error; + } +- sq->wqe_umem = mlx5_glue->devx_umem_reg(ctx, ++ sq->wqe_umem = mlx5_glue->devx_umem_reg(sh->ctx, + (void *)(uintptr_t)sq->umem_buf, + wq_size + + sizeof(*sq->db_rec) * 2, +@@ -296,6 +274,7 @@ mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, + attr.tis_num = 0; + attr.user_index = 0xFFFF; + attr.cqn = sq->cq.cq->id; ++ attr.ts_format = mlx5_ts_format_conv(ts_format); + wq_attr->uar_page = mlx5_os_get_devx_uar_page_id(uar); + wq_attr->pd = pdn; + wq_attr->wq_type = MLX5_WQ_TYPE_CYCLIC; +@@ -308,7 +287,7 @@ mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, + wq_attr->dbr_umem_id = wq_attr->wq_umem_id; + wq_attr->dbr_addr = wq_size; + wq_attr->dbr_umem_valid = 1; +- sq->sq = mlx5_devx_cmd_create_sq(ctx, &attr); ++ sq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &attr); + if (!sq->sq) { + DRV_LOG(ERR, "Can't create sq object."); + rte_errno = ENOMEM; +@@ -330,7 +309,7 @@ mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, + mlx5_aso_init_sq(sq); + return 0; + error: +- mlx5_aso_destroy_sq(sq); ++ mlx5_aso_destroy_sq(sh, sq); + return -1; + } + +@@ -346,9 +325,9 @@ mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, + int + mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh) + { +- return mlx5_aso_sq_create(sh->ctx, &sh->aso_age_mng->aso_sq, 0, ++ return mlx5_aso_sq_create(sh, &sh->aso_age_mng->aso_sq, + sh->tx_uar, sh->pdn, sh->eqn, +- MLX5_ASO_QUEUE_LOG_DESC); ++ MLX5_ASO_QUEUE_LOG_DESC, sh->sq_ts_format); + } + + /** +@@ -360,7 +339,7 @@ mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh) + void + mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh) + { +- mlx5_aso_destroy_sq(&sh->aso_age_mng->aso_sq); ++ mlx5_aso_destroy_sq(sh, &sh->aso_age_mng->aso_sq); + } + + /** +@@ -490,7 +469,7 @@ mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n) + uint16_t idx = (sq->tail + i) & mask; + struct mlx5_aso_age_pool *pool = sq->elts[idx].pool; + uint64_t diff = curr - pool->time_of_last_age_check; +- uint64_t *addr = sq->mr.buf; ++ uint64_t *addr = sq->mr.addr; + int j; + + addr += idx * MLX5_ASO_AGE_ACTIONS_PER_POOL / 64; +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow_dv.c b/dpdk/drivers/net/mlx5/mlx5_flow_dv.c +index 3fdc3ffe16..c1505b69e6 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow_dv.c ++++ b/dpdk/drivers/net/mlx5/mlx5_flow_dv.c +@@ -132,6 +132,7 @@ flow_dv_attr_init(const struct rte_flow_item *item, union flow_dv_attr *attr, + case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: + case RTE_FLOW_ITEM_TYPE_GENEVE: + case RTE_FLOW_ITEM_TYPE_MPLS: ++ case RTE_FLOW_ITEM_TYPE_GTP: + if (tunnel_decap) + attr->attr = 0; + break; +@@ -1442,7 +1443,7 @@ flow_dv_validate_item_meta(struct rte_eth_dev *dev __rte_unused, + if (reg == REG_NON) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, item, +- "unavalable extended metadata register"); ++ "unavailable extended metadata register"); + if (reg == REG_B) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, item, +@@ -1450,13 +1451,20 @@ flow_dv_validate_item_meta(struct rte_eth_dev *dev __rte_unused, + "isn't supported"); + if (reg != REG_A) + nic_mask.data = priv->sh->dv_meta_mask; +- } else if (attr->transfer) { +- return rte_flow_error_set(error, ENOTSUP, ++ } else { ++ if (attr->transfer) ++ return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "extended metadata feature " + "should be enabled when " + "meta item is requested " + "with e-switch mode "); ++ if (attr->ingress) ++ return rte_flow_error_set(error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_ITEM, item, ++ "match on metadata for ingress " ++ "is not supported in legacy " ++ "metadata mode"); + } + if (!mask) + mask = &rte_flow_item_meta_mask; +@@ -2459,7 +2467,7 @@ flow_dv_validate_action_set_meta(struct rte_eth_dev *dev, + if (reg == REG_NON) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, action, +- "unavalable extended metadata register"); ++ "unavailable extended metadata register"); + if (reg != REG_A && reg != REG_B) { + struct mlx5_priv *priv = dev->data->dev_private; + +@@ -3032,6 +3040,7 @@ flow_dv_port_id_create_cb(struct mlx5_cache_list *list, + "cannot create action"); + return NULL; + } ++ cache->idx = idx; + return &cache->entry; + } + +@@ -3123,6 +3132,7 @@ flow_dv_push_vlan_create_cb(struct mlx5_cache_list *list, + "cannot create push vlan action"); + return NULL; + } ++ cache->idx = idx; + return &cache->entry; + } + +@@ -3968,7 +3978,7 @@ flow_dv_validate_action_jump(struct rte_eth_dev *dev, + const struct rte_flow_attr *attributes, + bool external, struct rte_flow_error *error) + { +- uint32_t target_group, table; ++ uint32_t target_group, table = 0; + int ret = 0; + struct flow_grp_info grp_info = { + .external = !!external, +@@ -4009,6 +4019,10 @@ flow_dv_validate_action_jump(struct rte_eth_dev *dev, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "target group must be other than" + " the current flow group"); ++ if (table == 0) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ACTION_CONF, ++ NULL, "root table shouldn't be destination"); + return 0; + } + +@@ -4122,7 +4136,7 @@ flow_dv_modify_hdr_action_max(struct rte_eth_dev *dev __rte_unused, + * Pointer to error structure. + * + * @return +- * 0 on success, a negative errno value otherwise and rte_ernno is set. ++ * 0 on success, a negative errno value otherwise and rte_errno is set. + */ + static int + mlx5_flow_validate_action_meter(struct rte_eth_dev *dev, +@@ -5314,32 +5328,39 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + int16_t rw_act_num = 0; + uint64_t is_root; + const struct mlx5_flow_tunnel *tunnel; ++ enum mlx5_tof_rule_type tof_rule_type; + struct flow_grp_info grp_info = { + .external = !!external, + .transfer = !!attr->transfer, + .fdb_def_rule = !!priv->fdb_def_rule, ++ .std_tbl_fix = true, + }; + const struct rte_eth_hairpin_conf *conf; ++ uint32_t tag_id = 0; + + if (items == NULL) + return -1; +- if (is_flow_tunnel_match_rule(dev, attr, items, actions)) { +- tunnel = flow_items_to_tunnel(items); +- action_flags |= MLX5_FLOW_ACTION_TUNNEL_MATCH | +- MLX5_FLOW_ACTION_DECAP; +- } else if (is_flow_tunnel_steer_rule(dev, attr, items, actions)) { +- tunnel = flow_actions_to_tunnel(actions); +- action_flags |= MLX5_FLOW_ACTION_TUNNEL_SET; +- } else { +- tunnel = NULL; ++ tunnel = is_tunnel_offload_active(dev) ? ++ mlx5_get_tof(items, actions, &tof_rule_type) : NULL; ++ if (tunnel) { ++ if (!priv->config.dv_flow_en) ++ return rte_flow_error_set ++ (error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, ++ NULL, "tunnel offload requires DV flow interface"); ++ if (priv->representor) ++ return rte_flow_error_set ++ (error, ENOTSUP, ++ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, ++ NULL, "decap not supported for VF representor"); ++ if (tof_rule_type == MLX5_TUNNEL_OFFLOAD_SET_RULE) ++ action_flags |= MLX5_FLOW_ACTION_TUNNEL_SET; ++ else if (tof_rule_type == MLX5_TUNNEL_OFFLOAD_MATCH_RULE) ++ action_flags |= MLX5_FLOW_ACTION_TUNNEL_MATCH | ++ MLX5_FLOW_ACTION_DECAP; ++ grp_info.std_tbl_fix = tunnel_use_standard_attr_group_translate ++ (dev, attr, tunnel, tof_rule_type); + } +- if (tunnel && priv->representor) +- return rte_flow_error_set(error, ENOTSUP, +- RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, +- "decap not supported " +- "for VF representor"); +- grp_info.std_tbl_fix = tunnel_use_standard_attr_group_translate +- (dev, tunnel, attr, items, actions); + ret = flow_dv_validate_attributes(dev, tunnel, attr, &grp_info, error); + if (ret < 0) + return ret; +@@ -5353,15 +5374,6 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "item not supported"); + switch (type) { +- case MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL: +- if (items[0].type != (typeof(items[0].type)) +- MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) +- return rte_flow_error_set +- (error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ITEM, +- NULL, "MLX5 private items " +- "must be the first"); +- break; + case RTE_FLOW_ITEM_TYPE_VOID: + break; + case RTE_FLOW_ITEM_TYPE_PORT_ID: +@@ -5601,8 +5613,10 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + return ret; + last_item = MLX5_FLOW_ITEM_TAG; + break; +- case MLX5_RTE_FLOW_ITEM_TYPE_TAG: + case MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE: ++ last_item = MLX5_FLOW_ITEM_TX_QUEUE; ++ break; ++ case MLX5_RTE_FLOW_ITEM_TYPE_TAG: + break; + case RTE_FLOW_ITEM_TYPE_GTP: + ret = flow_dv_validate_item_gtp(dev, items, item_flags, +@@ -5622,6 +5636,11 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + return ret; + last_item = MLX5_FLOW_LAYER_ECPRI; + break; ++ case MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL: ++ /* tunnel offload item was processed before ++ * list it here as a supported type ++ */ ++ break; + default: + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, +@@ -5713,6 +5732,8 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + /* Count all modify-header actions as one action. */ + if (!(action_flags & MLX5_FLOW_MODIFY_HDR_ACTIONS)) + ++actions_n; ++ tag_id = ((const struct rte_flow_action_set_tag *) ++ actions->conf)->index; + action_flags |= MLX5_FLOW_ACTION_SET_TAG; + rw_act_num += MLX5_ACT_NUM_SET_TAG; + break; +@@ -6086,19 +6107,18 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + error); + if (ret < 0) + return ret; ++ if ((action_flags & MLX5_FLOW_ACTION_SET_TAG) && ++ tag_id == 0 && priv->mtr_color_reg == REG_NON) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ACTION, NULL, ++ "sample after tag action causes metadata tag index 0 corruption"); + action_flags |= MLX5_FLOW_ACTION_SAMPLE; + ++actions_n; + break; + case MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET: +- if (actions[0].type != (typeof(actions[0].type)) +- MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) +- return rte_flow_error_set +- (error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ACTION, +- NULL, "MLX5 private action " +- "must be the first"); +- +- action_flags |= MLX5_FLOW_ACTION_TUNNEL_SET; ++ /* tunnel offload action was processed before ++ * list it here as a supported type ++ */ + break; + default: + return rte_flow_error_set(error, ENOTSUP, +@@ -6112,7 +6132,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + * - Explicit decap action is prohibited by the tunnel offload API. + * - Drop action in tunnel steer rule is prohibited by the API. + * - Application cannot use MARK action because it's value can mask +- * tunnel default miss nitification. ++ * tunnel default miss notification. + * - JUMP in tunnel match rule has no support in current PMD + * implementation. + * - TAG & META are reserved for future uses. +@@ -6264,6 +6284,18 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + NULL, "too many header modify" + " actions to support"); + } ++ /* ++ * Validation the NIC Egress flow on representor, except implicit ++ * hairpin default egress flow with TX_QUEUE item, other flows not ++ * work due to metadata regC0 mismatch. ++ */ ++ if ((!attr->transfer && attr->egress) && priv->representor && ++ !(item_flags & MLX5_FLOW_ITEM_TX_QUEUE)) ++ return rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ITEM, ++ NULL, ++ "NIC egress rules on representors" ++ " is not supported"); + return 0; + } + +@@ -6982,18 +7014,19 @@ flow_dv_translate_item_gre_key(void *matcher, void *key, + * Flow matcher value. + * @param[in] item + * Flow pattern to translate. +- * @param[in] inner +- * Item is inner pattern. ++ * @param[in] pattern_flags ++ * Accumulated pattern flags. + */ + static void + flow_dv_translate_item_gre(void *matcher, void *key, + const struct rte_flow_item *item, +- int inner) ++ uint64_t pattern_flags) + { ++ static const struct rte_flow_item_gre empty_gre = {0,}; + const struct rte_flow_item_gre *gre_m = item->mask; + const struct rte_flow_item_gre *gre_v = item->spec; +- void *headers_m; +- void *headers_v; ++ void *headers_m = MLX5_ADDR_OF(fte_match_param, matcher, outer_headers); ++ void *headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers); + void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters); + struct { +@@ -7010,26 +7043,17 @@ flow_dv_translate_item_gre(void *matcher, void *key, + uint16_t value; + }; + } gre_crks_rsvd0_ver_m, gre_crks_rsvd0_ver_v; ++ uint16_t protocol_m, protocol_v; + +- if (inner) { +- headers_m = MLX5_ADDR_OF(fte_match_param, matcher, +- inner_headers); +- headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers); +- } else { +- headers_m = MLX5_ADDR_OF(fte_match_param, matcher, +- outer_headers); +- headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers); +- } + MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE); +- if (!gre_v) +- return; +- if (!gre_m) +- gre_m = &rte_flow_item_gre_mask; +- MLX5_SET(fte_match_set_misc, misc_m, gre_protocol, +- rte_be_to_cpu_16(gre_m->protocol)); +- MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, +- rte_be_to_cpu_16(gre_v->protocol & gre_m->protocol)); ++ if (!gre_v) { ++ gre_v = &empty_gre; ++ gre_m = &empty_gre; ++ } else { ++ if (!gre_m) ++ gre_m = &rte_flow_item_gre_mask; ++ } + gre_crks_rsvd0_ver_m.value = rte_be_to_cpu_16(gre_m->c_rsvd0_ver); + gre_crks_rsvd0_ver_v.value = rte_be_to_cpu_16(gre_v->c_rsvd0_ver); + MLX5_SET(fte_match_set_misc, misc_m, gre_c_present, +@@ -7047,6 +7071,17 @@ flow_dv_translate_item_gre(void *matcher, void *key, + MLX5_SET(fte_match_set_misc, misc_v, gre_s_present, + gre_crks_rsvd0_ver_v.s_present & + gre_crks_rsvd0_ver_m.s_present); ++ protocol_m = rte_be_to_cpu_16(gre_m->protocol); ++ protocol_v = rte_be_to_cpu_16(gre_v->protocol); ++ if (!protocol_m) { ++ /* Force next protocol to prevent matchers duplication */ ++ protocol_v = mlx5_translate_tunnel_etypes(pattern_flags); ++ if (protocol_v) ++ protocol_m = 0xFFFF; ++ } ++ MLX5_SET(fte_match_set_misc, misc_m, gre_protocol, protocol_m); ++ MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ++ protocol_m & protocol_v); + } + + /** +@@ -7058,13 +7093,13 @@ flow_dv_translate_item_gre(void *matcher, void *key, + * Flow matcher value. + * @param[in] item + * Flow pattern to translate. +- * @param[in] inner +- * Item is inner pattern. ++ * @param[in] pattern_flags ++ * Accumulated pattern flags. + */ + static void + flow_dv_translate_item_nvgre(void *matcher, void *key, + const struct rte_flow_item *item, +- int inner) ++ unsigned long pattern_flags) + { + const struct rte_flow_item_nvgre *nvgre_m = item->mask; + const struct rte_flow_item_nvgre *nvgre_v = item->spec; +@@ -7091,7 +7126,7 @@ flow_dv_translate_item_nvgre(void *matcher, void *key, + .mask = &gre_mask, + .last = NULL, + }; +- flow_dv_translate_item_gre(matcher, key, &gre_item, inner); ++ flow_dv_translate_item_gre(matcher, key, &gre_item, pattern_flags); + if (!nvgre_v) + return; + if (!nvgre_m) +@@ -7177,46 +7212,40 @@ flow_dv_translate_item_vxlan(void *matcher, void *key, + + static void + flow_dv_translate_item_vxlan_gpe(void *matcher, void *key, +- const struct rte_flow_item *item, int inner) ++ const struct rte_flow_item *item, ++ const uint64_t pattern_flags) + { ++ static const struct rte_flow_item_vxlan_gpe dummy_vxlan_gpe_hdr = {0, }; + const struct rte_flow_item_vxlan_gpe *vxlan_m = item->mask; + const struct rte_flow_item_vxlan_gpe *vxlan_v = item->spec; +- void *headers_m; +- void *headers_v; ++ /* The item was validated to be on the outer side */ ++ void *headers_m = MLX5_ADDR_OF(fte_match_param, matcher, outer_headers); ++ void *headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers); + void *misc_m = + MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters_3); + void *misc_v = + MLX5_ADDR_OF(fte_match_param, key, misc_parameters_3); +- char *vni_m; +- char *vni_v; +- uint16_t dport; +- int size; +- int i; ++ char *vni_m = ++ MLX5_ADDR_OF(fte_match_set_misc3, misc_m, outer_vxlan_gpe_vni); ++ char *vni_v = ++ MLX5_ADDR_OF(fte_match_set_misc3, misc_v, outer_vxlan_gpe_vni); ++ int i, size = sizeof(vxlan_m->vni); + uint8_t flags_m = 0xff; + uint8_t flags_v = 0xc; ++ uint8_t m_protocol, v_protocol; + +- if (inner) { +- headers_m = MLX5_ADDR_OF(fte_match_param, matcher, +- inner_headers); +- headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers); +- } else { +- headers_m = MLX5_ADDR_OF(fte_match_param, matcher, +- outer_headers); +- headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers); +- } +- dport = item->type == RTE_FLOW_ITEM_TYPE_VXLAN ? +- MLX5_UDP_PORT_VXLAN : MLX5_UDP_PORT_VXLAN_GPE; + if (!MLX5_GET16(fte_match_set_lyr_2_4, headers_v, udp_dport)) { + MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport, 0xFFFF); +- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, dport); ++ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ++ MLX5_UDP_PORT_VXLAN_GPE); ++ } ++ if (!vxlan_v) { ++ vxlan_v = &dummy_vxlan_gpe_hdr; ++ vxlan_m = &dummy_vxlan_gpe_hdr; ++ } else { ++ if (!vxlan_m) ++ vxlan_m = &rte_flow_item_vxlan_gpe_mask; + } +- if (!vxlan_v) +- return; +- if (!vxlan_m) +- vxlan_m = &rte_flow_item_vxlan_gpe_mask; +- size = sizeof(vxlan_m->vni); +- vni_m = MLX5_ADDR_OF(fte_match_set_misc3, misc_m, outer_vxlan_gpe_vni); +- vni_v = MLX5_ADDR_OF(fte_match_set_misc3, misc_v, outer_vxlan_gpe_vni); + memcpy(vni_m, vxlan_m->vni, size); + for (i = 0; i < size; ++i) + vni_v[i] = vni_m[i] & vxlan_v->vni[i]; +@@ -7226,10 +7255,23 @@ flow_dv_translate_item_vxlan_gpe(void *matcher, void *key, + } + MLX5_SET(fte_match_set_misc3, misc_m, outer_vxlan_gpe_flags, flags_m); + MLX5_SET(fte_match_set_misc3, misc_v, outer_vxlan_gpe_flags, flags_v); +- MLX5_SET(fte_match_set_misc3, misc_m, outer_vxlan_gpe_next_protocol, +- vxlan_m->protocol); +- MLX5_SET(fte_match_set_misc3, misc_v, outer_vxlan_gpe_next_protocol, +- vxlan_v->protocol); ++ m_protocol = vxlan_m->protocol; ++ v_protocol = vxlan_v->protocol; ++ if (!m_protocol) { ++ /* Force next protocol to ensure next headers parsing. */ ++ if (pattern_flags & MLX5_FLOW_LAYER_INNER_L2) ++ v_protocol = RTE_VXLAN_GPE_TYPE_ETH; ++ else if (pattern_flags & MLX5_FLOW_LAYER_INNER_L3_IPV4) ++ v_protocol = RTE_VXLAN_GPE_TYPE_IPV4; ++ else if (pattern_flags & MLX5_FLOW_LAYER_INNER_L3_IPV6) ++ v_protocol = RTE_VXLAN_GPE_TYPE_IPV6; ++ if (v_protocol) ++ m_protocol = 0xFF; ++ } ++ MLX5_SET(fte_match_set_misc3, misc_m, ++ outer_vxlan_gpe_next_protocol, m_protocol); ++ MLX5_SET(fte_match_set_misc3, misc_v, ++ outer_vxlan_gpe_next_protocol, m_protocol & v_protocol); + } + + /** +@@ -7247,49 +7289,39 @@ flow_dv_translate_item_vxlan_gpe(void *matcher, void *key, + + static void + flow_dv_translate_item_geneve(void *matcher, void *key, +- const struct rte_flow_item *item, int inner) ++ const struct rte_flow_item *item, ++ uint64_t pattern_flags) + { ++ static const struct rte_flow_item_geneve empty_geneve = {0,}; + const struct rte_flow_item_geneve *geneve_m = item->mask; + const struct rte_flow_item_geneve *geneve_v = item->spec; +- void *headers_m; +- void *headers_v; ++ /* GENEVE flow item validation allows single tunnel item */ ++ void *headers_m = MLX5_ADDR_OF(fte_match_param, matcher, outer_headers); ++ void *headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers); + void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters); +- uint16_t dport; + uint16_t gbhdr_m; + uint16_t gbhdr_v; +- char *vni_m; +- char *vni_v; +- size_t size, i; ++ char *vni_m = MLX5_ADDR_OF(fte_match_set_misc, misc_m, geneve_vni); ++ char *vni_v = MLX5_ADDR_OF(fte_match_set_misc, misc_v, geneve_vni); ++ size_t size = sizeof(geneve_m->vni), i; ++ uint16_t protocol_m, protocol_v; + +- if (inner) { +- headers_m = MLX5_ADDR_OF(fte_match_param, matcher, +- inner_headers); +- headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers); +- } else { +- headers_m = MLX5_ADDR_OF(fte_match_param, matcher, +- outer_headers); +- headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers); +- } +- dport = MLX5_UDP_PORT_GENEVE; + if (!MLX5_GET16(fte_match_set_lyr_2_4, headers_v, udp_dport)) { + MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport, 0xFFFF); +- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, dport); ++ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ++ MLX5_UDP_PORT_GENEVE); ++ } ++ if (!geneve_v) { ++ geneve_v = &empty_geneve; ++ geneve_m = &empty_geneve; ++ } else { ++ if (!geneve_m) ++ geneve_m = &rte_flow_item_geneve_mask; + } +- if (!geneve_v) +- return; +- if (!geneve_m) +- geneve_m = &rte_flow_item_geneve_mask; +- size = sizeof(geneve_m->vni); +- vni_m = MLX5_ADDR_OF(fte_match_set_misc, misc_m, geneve_vni); +- vni_v = MLX5_ADDR_OF(fte_match_set_misc, misc_v, geneve_vni); + memcpy(vni_m, geneve_m->vni, size); + for (i = 0; i < size; ++i) + vni_v[i] = vni_m[i] & geneve_v->vni[i]; +- MLX5_SET(fte_match_set_misc, misc_m, geneve_protocol_type, +- rte_be_to_cpu_16(geneve_m->protocol)); +- MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, +- rte_be_to_cpu_16(geneve_v->protocol & geneve_m->protocol)); + gbhdr_m = rte_be_to_cpu_16(geneve_m->ver_opt_len_o_c_rsvd0); + gbhdr_v = rte_be_to_cpu_16(geneve_v->ver_opt_len_o_c_rsvd0); + MLX5_SET(fte_match_set_misc, misc_m, geneve_oam, +@@ -7301,6 +7333,17 @@ flow_dv_translate_item_geneve(void *matcher, void *key, + MLX5_SET(fte_match_set_misc, misc_v, geneve_opt_len, + MLX5_GENEVE_OPTLEN_VAL(gbhdr_v) & + MLX5_GENEVE_OPTLEN_VAL(gbhdr_m)); ++ protocol_m = rte_be_to_cpu_16(geneve_m->protocol); ++ protocol_v = rte_be_to_cpu_16(geneve_v->protocol); ++ if (!protocol_m) { ++ /* Force next protocol to prevent matchers duplication */ ++ protocol_v = mlx5_translate_tunnel_etypes(pattern_flags); ++ if (protocol_v) ++ protocol_m = 0xFFFF; ++ } ++ MLX5_SET(fte_match_set_misc, misc_m, geneve_protocol_type, protocol_m); ++ MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ++ protocol_m & protocol_v); + } + + /** +@@ -7337,19 +7380,24 @@ flow_dv_translate_item_mpls(void *matcher, void *key, + + switch (prev_layer) { + case MLX5_FLOW_LAYER_OUTER_L4_UDP: +- MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport, 0xffff); +- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, +- MLX5_UDP_PORT_MPLS); ++ if (!MLX5_GET16(fte_match_set_lyr_2_4, headers_v, udp_dport)) { ++ MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport, ++ 0xffff); ++ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ++ MLX5_UDP_PORT_MPLS); ++ } + break; + case MLX5_FLOW_LAYER_GRE: +- MLX5_SET(fte_match_set_misc, misc_m, gre_protocol, 0xffff); +- MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, +- RTE_ETHER_TYPE_MPLS); ++ /* Fall-through. */ ++ case MLX5_FLOW_LAYER_GRE_KEY: ++ if (!MLX5_GET16(fte_match_set_misc, misc_v, gre_protocol)) { ++ MLX5_SET(fte_match_set_misc, misc_m, gre_protocol, ++ 0xffff); ++ MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ++ RTE_ETHER_TYPE_MPLS); ++ } + break; + default: +- MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff); +- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, +- IPPROTO_MPLS); + break; + } + if (!in_mpls_v) +@@ -7928,12 +7976,13 @@ flow_dv_translate_item_gtp(void *matcher, void *key, + * Flow matcher value. + * @param[in] item + * Flow pattern to translate. +- * @param[in] samples +- * Sample IDs to be used in the matching. ++ * @param[in] last_item ++ * Last item flags. + */ + static void + flow_dv_translate_item_ecpri(struct rte_eth_dev *dev, void *matcher, +- void *key, const struct rte_flow_item *item) ++ void *key, const struct rte_flow_item *item, ++ uint64_t last_item) + { + struct mlx5_priv *priv = dev->data->dev_private; + const struct rte_flow_item_ecpri *ecpri_m = item->mask; +@@ -7946,6 +7995,22 @@ flow_dv_translate_item_ecpri(struct rte_eth_dev *dev, void *matcher, + void *dw_m; + void *dw_v; + ++ /* ++ * In case of eCPRI over Ethernet, if EtherType is not specified, ++ * match on eCPRI EtherType implicitly. ++ */ ++ if (last_item & MLX5_FLOW_LAYER_OUTER_L2) { ++ void *hdrs_m, *hdrs_v, *l2m, *l2v; ++ ++ hdrs_m = MLX5_ADDR_OF(fte_match_param, matcher, outer_headers); ++ hdrs_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers); ++ l2m = MLX5_ADDR_OF(fte_match_set_lyr_2_4, hdrs_m, ethertype); ++ l2v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, hdrs_v, ethertype); ++ if (*(uint16_t *)l2m == 0 && *(uint16_t *)l2v == 0) { ++ *(uint16_t *)l2m = UINT16_MAX; ++ *(uint16_t *)l2v = RTE_BE16(RTE_ETHER_TYPE_ECPRI); ++ } ++ } + if (!ecpri_v) + return; + if (!ecpri_m) +@@ -8499,7 +8564,7 @@ flow_dv_translate_action_port_id(struct rte_eth_dev *dev, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "No eswitch info was found for port"); +-#ifdef HAVE_MLX5DV_DR_DEVX_PORT ++#ifdef HAVE_MLX5DV_DR_CREATE_DEST_IB_PORT + /* + * This parameter is transferred to + * mlx5dv_dr_action_create_dest_ib_port(). +@@ -8617,10 +8682,8 @@ flow_dv_hashfields_set(struct mlx5_flow *dev_flow, + + dev_flow->hash_fields = 0; + #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT +- if (rss_desc->level >= 2) { +- dev_flow->hash_fields |= IBV_RX_HASH_INNER; ++ if (rss_desc->level >= 2) + rss_inner = 1; +- } + #endif + if ((rss_inner && (items & MLX5_FLOW_LAYER_INNER_L3_IPV4)) || + (!rss_inner && (items & MLX5_FLOW_LAYER_OUTER_L3_IPV4))) { +@@ -8643,6 +8706,12 @@ flow_dv_hashfields_set(struct mlx5_flow *dev_flow, + dev_flow->hash_fields |= MLX5_IPV6_IBV_RX_HASH; + } + } ++ if (dev_flow->hash_fields == 0) ++ /* ++ * There is no match between the RSS types and the ++ * L3 protocol (IPv4/IPv6) defined in the flow rule. ++ */ ++ return; + if ((rss_inner && (items & MLX5_FLOW_LAYER_INNER_L4_UDP)) || + (!rss_inner && (items & MLX5_FLOW_LAYER_OUTER_L4_UDP))) { + if (rss_types & ETH_RSS_UDP) { +@@ -8668,6 +8737,8 @@ flow_dv_hashfields_set(struct mlx5_flow *dev_flow, + dev_flow->hash_fields |= MLX5_TCP_IBV_RX_HASH; + } + } ++ if (rss_inner) ++ dev_flow->hash_fields |= IBV_RX_HASH_INNER; + } + + /** +@@ -8693,6 +8764,7 @@ flow_dv_hrxq_prepare(struct rte_eth_dev *dev, + { + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_flow_handle *dh = dev_flow->handle; ++ uint32_t shared_rss = rss_desc->shared_rss; + struct mlx5_hrxq *hrxq; + + MLX5_ASSERT(rss_desc->queue_num); +@@ -8700,11 +8772,14 @@ flow_dv_hrxq_prepare(struct rte_eth_dev *dev, + rss_desc->hash_fields = dev_flow->hash_fields; + rss_desc->tunnel = !!(dh->layers & MLX5_FLOW_LAYER_TUNNEL); + rss_desc->shared_rss = 0; ++ if (rss_desc->hash_fields == 0) ++ rss_desc->queue_num = 1; + *hrxq_idx = mlx5_hrxq_get(dev, rss_desc); + if (!*hrxq_idx) + return NULL; + hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], + *hrxq_idx); ++ rss_desc->shared_rss = shared_rss; + return hrxq; + } + +@@ -8996,7 +9071,7 @@ flow_dv_dest_array_create_cb(struct mlx5_cache_list *list __rte_unused, + dest_attr[idx]->dest = sample_act->dr_port_id_action; + } + } +- /* create a dest array actioin */ ++ /* create a dest array action */ + cache_resource->action = mlx5_glue->dr_create_flow_action_dest_array + (domain, + cache_resource->num_of_dest, +@@ -9015,20 +9090,8 @@ flow_dv_dest_array_create_cb(struct mlx5_cache_list *list __rte_unused, + return &cache_resource->entry; + error: + for (idx = 0; idx < resource->num_of_dest; idx++) { +- struct mlx5_flow_sub_actions_idx *act_res = +- &cache_resource->sample_idx[idx]; +- if (act_res->rix_hrxq && +- !mlx5_hrxq_release(dev, +- act_res->rix_hrxq)) +- act_res->rix_hrxq = 0; +- if (act_res->rix_encap_decap && +- !flow_dv_encap_decap_resource_release(dev, +- act_res->rix_encap_decap)) +- act_res->rix_encap_decap = 0; +- if (act_res->rix_port_id_action && +- !flow_dv_port_id_action_resource_release(dev, +- act_res->rix_port_id_action)) +- act_res->rix_port_id_action = 0; ++ flow_dv_sample_sub_actions_release(dev, ++ &cache_resource->sample_idx[idx]); + if (dest_attr[idx]) + mlx5_free(dest_attr[idx]); + } +@@ -9166,7 +9229,7 @@ flow_dv_translate_action_sample(struct rte_eth_dev *dev, + (((const struct rte_flow_action_mark *) + (sub_actions->conf))->id); + +- dev_flow->handle->mark = 1; ++ wks->mark = 1; + pre_rix = dev_flow->handle->dvh.rix_tag; + /* Save the mark resource before sample */ + pre_r = dev_flow->dv.tag_resource; +@@ -9359,6 +9422,7 @@ flow_dv_create_action_sample(struct rte_eth_dev *dev, + dev_flow->handle->dvh.rix_encap_decap; + sample_act->dr_encap_action = + dev_flow->dv.encap_decap->action; ++ dev_flow->handle->dvh.rix_encap_decap = 0; + } + if (sample_act->action_flags & MLX5_FLOW_ACTION_PORT_ID) { + normal_idx++; +@@ -9366,6 +9430,7 @@ flow_dv_create_action_sample(struct rte_eth_dev *dev, + dev_flow->handle->rix_port_id_action; + sample_act->dr_port_id_action = + dev_flow->dv.port_id_action->action; ++ dev_flow->handle->rix_port_id_action = 0; + } + sample_act->actions_num = normal_idx; + /* update sample action resource into first index of array */ +@@ -9731,13 +9796,15 @@ flow_dv_translate(struct rte_eth_dev *dev, + int tmp_actions_n = 0; + uint32_t table; + int ret = 0; +- const struct mlx5_flow_tunnel *tunnel; ++ const struct mlx5_flow_tunnel *tunnel = NULL; + struct flow_grp_info grp_info = { + .external = !!dev_flow->external, + .transfer = !!attr->transfer, + .fdb_def_rule = !!priv->fdb_def_rule, + .skip_scale = !!dev_flow->skip_scale, ++ .std_tbl_fix = true, + }; ++ const struct rte_flow_item *tunnel_item = NULL; + + if (!wks) + return rte_flow_error_set(error, ENOMEM, +@@ -9751,15 +9818,21 @@ flow_dv_translate(struct rte_eth_dev *dev, + MLX5DV_FLOW_TABLE_TYPE_NIC_RX; + /* update normal path action resource into last index of array */ + sample_act = &mdest_res.sample_act[MLX5_MAX_DEST_NUM - 1]; +- tunnel = is_flow_tunnel_match_rule(dev, attr, items, actions) ? +- flow_items_to_tunnel(items) : +- is_flow_tunnel_steer_rule(dev, attr, items, actions) ? +- flow_actions_to_tunnel(actions) : +- dev_flow->tunnel ? dev_flow->tunnel : NULL; ++ if (is_tunnel_offload_active(dev)) { ++ if (dev_flow->tunnel) { ++ RTE_VERIFY(dev_flow->tof_type == ++ MLX5_TUNNEL_OFFLOAD_MISS_RULE); ++ tunnel = dev_flow->tunnel; ++ } else { ++ tunnel = mlx5_get_tof(items, actions, ++ &dev_flow->tof_type); ++ dev_flow->tunnel = tunnel; ++ } ++ grp_info.std_tbl_fix = tunnel_use_standard_attr_group_translate ++ (dev, attr, tunnel, dev_flow->tof_type); ++ } + mhdr_res->ft_type = attr->egress ? MLX5DV_FLOW_TABLE_TYPE_NIC_TX : + MLX5DV_FLOW_TABLE_TYPE_NIC_RX; +- grp_info.std_tbl_fix = tunnel_use_standard_attr_group_translate +- (dev, tunnel, attr, items, actions); + ret = mlx5_flow_group_to_table(dev, tunnel, attr->group, &table, + &grp_info, error); + if (ret) +@@ -9771,7 +9844,7 @@ flow_dv_translate(struct rte_eth_dev *dev, + priority = dev_conf->flow_prio - 1; + /* number of actions must be set to 0 in case of dirty stack. */ + mhdr_res->actions_num = 0; +- if (is_flow_tunnel_match_rule(dev, attr, items, actions)) { ++ if (is_flow_tunnel_match_rule(dev_flow->tof_type)) { + /* + * do not add decap action if match rule drops packet + * HW rejects rules with decap & drop +@@ -9808,6 +9881,7 @@ flow_dv_translate(struct rte_eth_dev *dev, + const struct rte_flow_action_meter *mtr; + struct mlx5_flow_tbl_resource *tbl; + struct mlx5_aso_age_action *age_act; ++ uint32_t owner_idx; + uint32_t port_id = 0; + struct mlx5_flow_dv_port_id_action_resource port_id_resource; + int action_type = actions->type; +@@ -9844,7 +9918,7 @@ flow_dv_translate(struct rte_eth_dev *dev, + break; + case RTE_FLOW_ACTION_TYPE_FLAG: + action_flags |= MLX5_FLOW_ACTION_FLAG; +- dev_flow->handle->mark = 1; ++ wks->mark = 1; + if (dev_conf->dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { + struct rte_flow_action_mark mark = { + .id = MLX5_FLOW_MARK_DEFAULT, +@@ -9873,7 +9947,7 @@ flow_dv_translate(struct rte_eth_dev *dev, + break; + case RTE_FLOW_ACTION_TYPE_MARK: + action_flags |= MLX5_FLOW_ACTION_MARK; +- dev_flow->handle->mark = 1; ++ wks->mark = 1; + if (dev_conf->dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) { + const struct rte_flow_action_mark *mark = + (const struct rte_flow_action_mark *) +@@ -9947,10 +10021,13 @@ flow_dv_translate(struct rte_eth_dev *dev, + MLX5_FLOW_FATE_QUEUE; + break; + case MLX5_RTE_FLOW_ACTION_TYPE_AGE: +- flow->age = (uint32_t)(uintptr_t)(action->conf); +- age_act = flow_aso_age_get_by_idx(dev, flow->age); +- __atomic_fetch_add(&age_act->refcnt, 1, +- __ATOMIC_RELAXED); ++ owner_idx = (uint32_t)(uintptr_t)action->conf; ++ age_act = flow_aso_age_get_by_idx(dev, owner_idx); ++ if (flow->age == 0) { ++ flow->age = owner_idx; ++ __atomic_fetch_add(&age_act->refcnt, 1, ++ __ATOMIC_RELAXED); ++ } + dev_flow->dv.actions[actions_n++] = age_act->dr_action; + action_flags |= MLX5_FLOW_ACTION_AGE; + break; +@@ -10500,10 +10577,9 @@ flow_dv_translate(struct rte_eth_dev *dev, + MLX5_FLOW_LAYER_OUTER_L4_UDP; + break; + case RTE_FLOW_ITEM_TYPE_GRE: +- flow_dv_translate_item_gre(match_mask, match_value, +- items, tunnel); + matcher.priority = MLX5_TUNNEL_PRIO_GET(rss_desc); + last_item = MLX5_FLOW_LAYER_GRE; ++ tunnel_item = items; + break; + case RTE_FLOW_ITEM_TYPE_GRE_KEY: + flow_dv_translate_item_gre_key(match_mask, +@@ -10511,10 +10587,9 @@ flow_dv_translate(struct rte_eth_dev *dev, + last_item = MLX5_FLOW_LAYER_GRE_KEY; + break; + case RTE_FLOW_ITEM_TYPE_NVGRE: +- flow_dv_translate_item_nvgre(match_mask, match_value, +- items, tunnel); + matcher.priority = MLX5_TUNNEL_PRIO_GET(rss_desc); + last_item = MLX5_FLOW_LAYER_GRE; ++ tunnel_item = items; + break; + case RTE_FLOW_ITEM_TYPE_VXLAN: + flow_dv_translate_item_vxlan(match_mask, match_value, +@@ -10523,17 +10598,14 @@ flow_dv_translate(struct rte_eth_dev *dev, + last_item = MLX5_FLOW_LAYER_VXLAN; + break; + case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: +- flow_dv_translate_item_vxlan_gpe(match_mask, +- match_value, items, +- tunnel); + matcher.priority = MLX5_TUNNEL_PRIO_GET(rss_desc); + last_item = MLX5_FLOW_LAYER_VXLAN_GPE; ++ tunnel_item = items; + break; + case RTE_FLOW_ITEM_TYPE_GENEVE: +- flow_dv_translate_item_geneve(match_mask, match_value, +- items, tunnel); + matcher.priority = MLX5_TUNNEL_PRIO_GET(rss_desc); + last_item = MLX5_FLOW_LAYER_GENEVE; ++ tunnel_item = items; + break; + case RTE_FLOW_ITEM_TYPE_MPLS: + flow_dv_translate_item_mpls(match_mask, match_value, +@@ -10554,11 +10626,13 @@ flow_dv_translate(struct rte_eth_dev *dev, + case RTE_FLOW_ITEM_TYPE_ICMP: + flow_dv_translate_item_icmp(match_mask, match_value, + items, tunnel); ++ matcher.priority = MLX5_PRIORITY_MAP_L4; + last_item = MLX5_FLOW_LAYER_ICMP; + break; + case RTE_FLOW_ITEM_TYPE_ICMP6: + flow_dv_translate_item_icmp6(match_mask, match_value, + items, tunnel); ++ matcher.priority = MLX5_PRIORITY_MAP_L4; + last_item = MLX5_FLOW_LAYER_ICMP6; + break; + case RTE_FLOW_ITEM_TYPE_TAG: +@@ -10599,7 +10673,8 @@ flow_dv_translate(struct rte_eth_dev *dev, + dev_flow->dv.value.size = + MLX5_ST_SZ_BYTES(fte_match_param); + flow_dv_translate_item_ecpri(dev, match_mask, +- match_value, items); ++ match_value, items, ++ last_item); + /* No other protocol should follow eCPRI layer. */ + last_item = MLX5_FLOW_LAYER_ECPRI; + break; +@@ -10611,16 +10686,34 @@ flow_dv_translate(struct rte_eth_dev *dev, + /* + * When E-Switch mode is enabled, we have two cases where we need to + * set the source port manually. +- * The first one, is in case of Nic steering rule, and the second is +- * E-Switch rule where no port_id item was found. In both cases +- * the source port is set according the current port in use. ++ * The first one, is in case of NIC ingress steering rule, and the ++ * second is E-Switch rule where no port_id item was found. ++ * In both cases the source port is set according the current port ++ * in use. + */ + if (!(item_flags & MLX5_FLOW_ITEM_PORT_ID) && +- (priv->representor || priv->master)) { ++ (priv->representor || priv->master) && ++ !(attr->egress && !attr->transfer)) { + if (flow_dv_translate_item_port_id(dev, match_mask, + match_value, NULL, attr)) + return -rte_errno; + } ++ if (item_flags & MLX5_FLOW_LAYER_VXLAN_GPE) ++ flow_dv_translate_item_vxlan_gpe(match_mask, match_value, ++ tunnel_item, item_flags); ++ else if (item_flags & MLX5_FLOW_LAYER_GENEVE) ++ flow_dv_translate_item_geneve(match_mask, match_value, ++ tunnel_item, item_flags); ++ else if (item_flags & MLX5_FLOW_LAYER_GRE) { ++ if (tunnel_item->type == RTE_FLOW_ITEM_TYPE_GRE) ++ flow_dv_translate_item_gre(match_mask, match_value, ++ tunnel_item, item_flags); ++ else if (tunnel_item->type == RTE_FLOW_ITEM_TYPE_NVGRE) ++ flow_dv_translate_item_nvgre(match_mask, match_value, ++ tunnel_item, item_flags); ++ else ++ MLX5_ASSERT(false); ++ } + #ifdef RTE_LIBRTE_MLX5_DEBUG + MLX5_ASSERT(!flow_dv_check_valid_spec(matcher.mask.buf, + dev_flow->dv.value.buf)); +@@ -10666,28 +10759,51 @@ flow_dv_translate(struct rte_eth_dev *dev, + static int + __flow_dv_action_rss_hrxq_set(struct mlx5_shared_action_rss *action, + const uint64_t hash_fields, +- const int tunnel, + uint32_t hrxq_idx) + { +- uint32_t *hrxqs = tunnel ? action->hrxq : action->hrxq_tunnel; ++ uint32_t *hrxqs = action->hrxq; + + switch (hash_fields & ~IBV_RX_HASH_INNER) { + case MLX5_RSS_HASH_IPV4: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV4_DST_ONLY: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV4_SRC_ONLY: + hrxqs[0] = hrxq_idx; + return 0; + case MLX5_RSS_HASH_IPV4_TCP: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV4_TCP_DST_ONLY: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV4_TCP_SRC_ONLY: + hrxqs[1] = hrxq_idx; + return 0; + case MLX5_RSS_HASH_IPV4_UDP: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV4_UDP_DST_ONLY: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV4_UDP_SRC_ONLY: + hrxqs[2] = hrxq_idx; + return 0; + case MLX5_RSS_HASH_IPV6: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_DST_ONLY: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_SRC_ONLY: + hrxqs[3] = hrxq_idx; + return 0; + case MLX5_RSS_HASH_IPV6_TCP: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_TCP_DST_ONLY: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_TCP_SRC_ONLY: + hrxqs[4] = hrxq_idx; + return 0; + case MLX5_RSS_HASH_IPV6_UDP: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_UDP_DST_ONLY: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_UDP_SRC_ONLY: + hrxqs[5] = hrxq_idx; + return 0; + case MLX5_RSS_HASH_NONE: +@@ -10716,33 +10832,56 @@ __flow_dv_action_rss_hrxq_set(struct mlx5_shared_action_rss *action, + */ + static uint32_t + __flow_dv_action_rss_hrxq_lookup(struct rte_eth_dev *dev, uint32_t idx, +- const uint64_t hash_fields, +- const int tunnel) ++ const uint64_t hash_fields) + { + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_shared_action_rss *shared_rss = + mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx); +- const uint32_t *hrxqs = tunnel ? shared_rss->hrxq : +- shared_rss->hrxq_tunnel; ++ const uint32_t *hrxqs = shared_rss->hrxq; + + switch (hash_fields & ~IBV_RX_HASH_INNER) { + case MLX5_RSS_HASH_IPV4: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV4_DST_ONLY: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV4_SRC_ONLY: + return hrxqs[0]; + case MLX5_RSS_HASH_IPV4_TCP: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV4_TCP_DST_ONLY: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV4_TCP_SRC_ONLY: + return hrxqs[1]; + case MLX5_RSS_HASH_IPV4_UDP: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV4_UDP_DST_ONLY: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV4_UDP_SRC_ONLY: + return hrxqs[2]; + case MLX5_RSS_HASH_IPV6: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_DST_ONLY: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_SRC_ONLY: + return hrxqs[3]; + case MLX5_RSS_HASH_IPV6_TCP: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_TCP_DST_ONLY: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_TCP_SRC_ONLY: + return hrxqs[4]; + case MLX5_RSS_HASH_IPV6_UDP: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_UDP_DST_ONLY: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_UDP_SRC_ONLY: + return hrxqs[5]; + case MLX5_RSS_HASH_NONE: + return hrxqs[6]; + default: + return 0; + } ++ + } + + /** +@@ -10784,11 +10923,21 @@ flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, + n = dv->actions_n; + if (dh->fate_action == MLX5_FLOW_FATE_DROP) { + if (dv->transfer) { +- dv->actions[n++] = priv->sh->esw_drop_action; ++ MLX5_ASSERT(priv->sh->dr_drop_action); ++ dv->actions[n++] = priv->sh->dr_drop_action; + } else { ++#ifdef HAVE_MLX5DV_DR ++ /* DR supports drop action placeholder. */ ++ MLX5_ASSERT(priv->sh->dr_drop_action); ++ dv->actions[n++] = dv->group ? ++ priv->sh->dr_drop_action : ++ priv->root_drop_action; ++#else ++ /* For DV we use the explicit drop queue. */ + MLX5_ASSERT(priv->drop_queue.hrxq); + dv->actions[n++] = + priv->drop_queue.hrxq->action; ++#endif + } + } else if ((dh->fate_action == MLX5_FLOW_FATE_QUEUE && + !dv_h->rix_sample && !dv_h->rix_dest_array)) { +@@ -10812,9 +10961,7 @@ flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, + + hrxq_idx = __flow_dv_action_rss_hrxq_lookup(dev, + rss_desc->shared_rss, +- dev_flow->hash_fields, +- !!(dh->layers & +- MLX5_FLOW_LAYER_TUNNEL)); ++ dev_flow->hash_fields); + if (hrxq_idx) + hrxq = mlx5_ipool_get + (priv->sh->ipool[MLX5_IPOOL_HRXQ], +@@ -11132,7 +11279,8 @@ flow_dv_fate_resource_release(struct rte_eth_dev *dev, + return; + switch (handle->fate_action) { + case MLX5_FLOW_FATE_QUEUE: +- mlx5_hrxq_release(dev, handle->rix_hrxq); ++ if (!handle->dvh.rix_sample && !handle->dvh.rix_dest_array) ++ mlx5_hrxq_release(dev, handle->rix_hrxq); + break; + case MLX5_FLOW_FATE_JUMP: + flow_dv_jump_tbl_resource_release(dev, handle); +@@ -11406,8 +11554,85 @@ static int + __flow_dv_action_rss_hrxqs_release(struct rte_eth_dev *dev, + struct mlx5_shared_action_rss *shared_rss) + { +- return __flow_dv_hrxqs_release(dev, &shared_rss->hrxq) + +- __flow_dv_hrxqs_release(dev, &shared_rss->hrxq_tunnel); ++ return __flow_dv_hrxqs_release(dev, &shared_rss->hrxq); ++} ++ ++/** ++ * Adjust L3/L4 hash value of pre-created shared RSS hrxq according to ++ * user input. ++ * ++ * Only one hash value is available for one L3+L4 combination: ++ * for example: ++ * MLX5_RSS_HASH_IPV4, MLX5_RSS_HASH_IPV4_SRC_ONLY, and ++ * MLX5_RSS_HASH_IPV4_DST_ONLY are mutually exclusive so they can share ++ * same slot in mlx5_rss_hash_fields. ++ * ++ * @param[in] orig_rss_types ++ * RSS type as provided in shared RSS action. ++ * @param[in, out] hash_field ++ * hash_field variable needed to be adjusted. ++ * ++ * @return ++ * void ++ */ ++static void ++__flow_dv_action_rss_l34_hash_adjust(uint64_t orig_rss_types, ++ uint64_t *hash_field) ++{ ++ uint64_t rss_types = rte_eth_rss_hf_refine(orig_rss_types); ++ ++ switch (*hash_field & ~IBV_RX_HASH_INNER) { ++ case MLX5_RSS_HASH_IPV4: ++ if (rss_types & MLX5_IPV4_LAYER_TYPES) { ++ *hash_field &= ~MLX5_RSS_HASH_IPV4; ++ if (rss_types & ETH_RSS_L3_DST_ONLY) ++ *hash_field |= IBV_RX_HASH_DST_IPV4; ++ else if (rss_types & ETH_RSS_L3_SRC_ONLY) ++ *hash_field |= IBV_RX_HASH_SRC_IPV4; ++ else ++ *hash_field |= MLX5_RSS_HASH_IPV4; ++ } ++ return; ++ case MLX5_RSS_HASH_IPV6: ++ if (rss_types & MLX5_IPV6_LAYER_TYPES) { ++ *hash_field &= ~MLX5_RSS_HASH_IPV6; ++ if (rss_types & ETH_RSS_L3_DST_ONLY) ++ *hash_field |= IBV_RX_HASH_DST_IPV6; ++ else if (rss_types & ETH_RSS_L3_SRC_ONLY) ++ *hash_field |= IBV_RX_HASH_SRC_IPV6; ++ else ++ *hash_field |= MLX5_RSS_HASH_IPV6; ++ } ++ return; ++ case MLX5_RSS_HASH_IPV4_UDP: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_UDP: ++ if (rss_types & ETH_RSS_UDP) { ++ *hash_field &= ~MLX5_UDP_IBV_RX_HASH; ++ if (rss_types & ETH_RSS_L4_DST_ONLY) ++ *hash_field |= IBV_RX_HASH_DST_PORT_UDP; ++ else if (rss_types & ETH_RSS_L4_SRC_ONLY) ++ *hash_field |= IBV_RX_HASH_SRC_PORT_UDP; ++ else ++ *hash_field |= MLX5_UDP_IBV_RX_HASH; ++ } ++ return; ++ case MLX5_RSS_HASH_IPV4_TCP: ++ /* fall-through. */ ++ case MLX5_RSS_HASH_IPV6_TCP: ++ if (rss_types & ETH_RSS_TCP) { ++ *hash_field &= ~MLX5_TCP_IBV_RX_HASH; ++ if (rss_types & ETH_RSS_L4_DST_ONLY) ++ *hash_field |= IBV_RX_HASH_DST_PORT_TCP; ++ else if (rss_types & ETH_RSS_L4_SRC_ONLY) ++ *hash_field |= IBV_RX_HASH_SRC_PORT_TCP; ++ else ++ *hash_field |= MLX5_TCP_IBV_RX_HASH; ++ } ++ return; ++ default: ++ return; ++ } + } + + /** +@@ -11453,23 +11678,27 @@ __flow_dv_action_rss_setup(struct rte_eth_dev *dev, + for (i = 0; i < MLX5_RSS_HASH_FIELDS_LEN; i++) { + uint32_t hrxq_idx; + uint64_t hash_fields = mlx5_rss_hash_fields[i]; +- int tunnel; ++ int tunnel = 0; + +- for (tunnel = 0; tunnel < 2; tunnel++) { +- rss_desc.tunnel = tunnel; +- rss_desc.hash_fields = hash_fields; +- hrxq_idx = mlx5_hrxq_get(dev, &rss_desc); +- if (!hrxq_idx) { +- rte_flow_error_set +- (error, rte_errno, +- RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, +- "cannot get hash queue"); +- goto error_hrxq_new; +- } +- err = __flow_dv_action_rss_hrxq_set +- (shared_rss, hash_fields, tunnel, hrxq_idx); +- MLX5_ASSERT(!err); ++ __flow_dv_action_rss_l34_hash_adjust(shared_rss->origin.types, ++ &hash_fields); ++ if (shared_rss->origin.level > 1) { ++ hash_fields |= IBV_RX_HASH_INNER; ++ tunnel = 1; ++ } ++ rss_desc.tunnel = tunnel; ++ rss_desc.hash_fields = hash_fields; ++ hrxq_idx = mlx5_hrxq_get(dev, &rss_desc); ++ if (!hrxq_idx) { ++ rte_flow_error_set ++ (error, rte_errno, ++ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, ++ "cannot get hash queue"); ++ goto error_hrxq_new; + } ++ err = __flow_dv_action_rss_hrxq_set ++ (shared_rss, hash_fields, hrxq_idx); ++ MLX5_ASSERT(!err); + } + return 0; + error_hrxq_new: +@@ -11604,12 +11833,6 @@ __flow_dv_action_rss_release(struct rte_eth_dev *dev, uint32_t idx, + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "invalid shared action"); +- remaining = __flow_dv_action_rss_hrxqs_release(dev, shared_rss); +- if (remaining) +- return rte_flow_error_set(error, EBUSY, +- RTE_FLOW_ERROR_TYPE_ACTION, +- NULL, +- "shared rss hrxq has references"); + if (!__atomic_compare_exchange_n(&shared_rss->refcnt, &old_refcnt, + 0, 0, __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED)) +@@ -11617,6 +11840,12 @@ __flow_dv_action_rss_release(struct rte_eth_dev *dev, uint32_t idx, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "shared rss has references"); ++ remaining = __flow_dv_action_rss_hrxqs_release(dev, shared_rss); ++ if (remaining) ++ return rte_flow_error_set(error, EBUSY, ++ RTE_FLOW_ERROR_TYPE_ACTION, ++ NULL, ++ "shared rss hrxq has references"); + queue = shared_rss->ind_tbl->queues; + remaining = mlx5_ind_table_obj_release(dev, shared_rss->ind_tbl, true); + if (remaining) +@@ -12430,6 +12659,72 @@ flow_dv_create_policer_rules(struct rte_eth_dev *dev, + flow_dv_destroy_policer_rules(dev, fm, attr); + return -1; + } ++/** ++ * Check whether the DR drop action is supported on the root table or not. ++ * ++ * Create a simple flow with DR drop action on root table to validate ++ * if DR drop action on root table is supported or not. ++ * ++ * @param[in] dev ++ * Pointer to rte_eth_dev structure. ++ * ++ * @return ++ * 0 on success, a negative errno value otherwise and rte_errno is set. ++ */ ++int ++mlx5_flow_discover_dr_action_support(struct rte_eth_dev *dev) ++{ ++ struct mlx5_priv *priv = dev->data->dev_private; ++ struct mlx5_dev_ctx_shared *sh = priv->sh; ++ struct mlx5_flow_dv_match_params mask = { ++ .size = sizeof(mask.buf), ++ }; ++ struct mlx5_flow_dv_match_params value = { ++ .size = sizeof(value.buf), ++ }; ++ struct mlx5dv_flow_matcher_attr dv_attr = { ++ .type = IBV_FLOW_ATTR_NORMAL, ++ .priority = 0, ++ .match_criteria_enable = 0, ++ .match_mask = (void *)&mask, ++ }; ++ struct mlx5_flow_tbl_resource *tbl = NULL; ++ void *matcher = NULL; ++ void *flow = NULL; ++ int ret = -1; ++ ++ tbl = flow_dv_tbl_resource_get(dev, 0, 0, 0, false, NULL, ++ 0, 0, NULL); ++ if (!tbl) ++ goto err; ++ dv_attr.match_criteria_enable = flow_dv_matcher_enable(mask.buf); ++ ret = mlx5_flow_os_create_flow_matcher(sh->ctx, &dv_attr, tbl->obj, ++ &matcher); ++ if (ret) ++ goto err; ++ ret = mlx5_flow_os_create_flow(matcher, (void *)&value, 1, ++ &sh->dr_drop_action, &flow); ++err: ++ /* ++ * If DR drop action is not supported on root table, flow create will ++ * be failed with EOPNOTSUPP or EPROTONOSUPPORT. ++ */ ++ if (!flow) { ++ if (matcher && ++ (errno == EPROTONOSUPPORT || errno == EOPNOTSUPP)) ++ DRV_LOG(INFO, "DR drop action is not supported in root table."); ++ else ++ DRV_LOG(ERR, "Unexpected error in DR drop action support detection"); ++ ret = -1; ++ } else { ++ claim_zero(mlx5_flow_os_destroy_flow(flow)); ++ } ++ if (matcher) ++ claim_zero(mlx5_flow_os_destroy_flow_matcher(matcher)); ++ if (tbl) ++ flow_dv_tbl_resource_release(MLX5_SH(dev), tbl); ++ return ret; ++} + + /** + * Validate the batch counter support in root table. +@@ -12455,7 +12750,7 @@ mlx5_flow_dv_discover_counter_offset_support(struct rte_eth_dev *dev) + .size = sizeof(value.buf), + }; + struct mlx5dv_flow_matcher_attr dv_attr = { +- .type = IBV_FLOW_ATTR_NORMAL, ++ .type = IBV_FLOW_ATTR_NORMAL | IBV_FLOW_ATTR_FLAGS_EGRESS, + .priority = 0, + .match_criteria_enable = 0, + .match_mask = (void *)&mask, +@@ -12467,7 +12762,7 @@ mlx5_flow_dv_discover_counter_offset_support(struct rte_eth_dev *dev) + void *flow = NULL; + int ret = -1; + +- tbl = flow_dv_tbl_resource_get(dev, 0, 0, 0, false, NULL, 0, 0, NULL); ++ tbl = flow_dv_tbl_resource_get(dev, 0, 1, 0, false, NULL, 0, 0, NULL); + if (!tbl) + goto err; + dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0x4); +@@ -12477,13 +12772,12 @@ mlx5_flow_dv_discover_counter_offset_support(struct rte_eth_dev *dev) + &actions[0]); + if (ret) + goto err; +- actions[1] = priv->drop_queue.hrxq->action; + dv_attr.match_criteria_enable = flow_dv_matcher_enable(mask.buf); + ret = mlx5_flow_os_create_flow_matcher(sh->ctx, &dv_attr, tbl->obj, + &matcher); + if (ret) + goto err; +- ret = mlx5_flow_os_create_flow(matcher, (void *)&value, 2, ++ ret = mlx5_flow_os_create_flow(matcher, (void *)&value, 1, + actions, &flow); + err: + /* +@@ -12580,7 +12874,7 @@ flow_dv_counter_query(struct rte_eth_dev *dev, uint32_t counter, bool clear, + * @note: only stub for now + */ + static int +-flow_get_aged_flows(struct rte_eth_dev *dev, ++flow_dv_get_aged_flows(struct rte_eth_dev *dev, + void **context, + uint32_t nb_contexts, + struct rte_flow_error *error) +@@ -12728,7 +13022,7 @@ const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = { + .counter_alloc = flow_dv_counter_allocate, + .counter_free = flow_dv_counter_free, + .counter_query = flow_dv_counter_query, +- .get_aged_flows = flow_get_aged_flows, ++ .get_aged_flows = flow_dv_get_aged_flows, + .action_validate = flow_dv_action_validate, + .action_create = flow_dv_action_create, + .action_destroy = flow_dv_action_destroy, +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow_meter.c b/dpdk/drivers/net/mlx5/mlx5_flow_meter.c +index 03a5e79eb8..058d94c9cf 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow_meter.c ++++ b/dpdk/drivers/net/mlx5/mlx5_flow_meter.c +@@ -208,7 +208,7 @@ mlx5_flow_meter_xbs_man_exp_calc(uint64_t xbs, uint8_t *man, uint8_t *exp) + int _exp; + double _man; + +- /* Special case xbs == 0 ? both exp and matissa are 0. */ ++ /* Special case xbs == 0 ? both exp and mantissa are 0. */ + if (xbs == 0) { + *man = 0; + *exp = 0; +@@ -216,8 +216,10 @@ mlx5_flow_meter_xbs_man_exp_calc(uint64_t xbs, uint8_t *man, uint8_t *exp) + } + /* xbs = xbs_mantissa * 2^xbs_exponent */ + _man = frexp(xbs, &_exp); +- _man = _man * pow(2, MLX5_MAN_WIDTH); +- _exp = _exp - MLX5_MAN_WIDTH; ++ if (_exp >= MLX5_MAN_WIDTH) { ++ _man = _man * pow(2, MLX5_MAN_WIDTH); ++ _exp = _exp - MLX5_MAN_WIDTH; ++ } + *man = (uint8_t)ceil(_man); + *exp = _exp; + } +@@ -226,7 +228,7 @@ mlx5_flow_meter_xbs_man_exp_calc(uint64_t xbs, uint8_t *man, uint8_t *exp) + * Fill the prm meter parameter. + * + * @param[in,out] fmp +- * Pointer to meter profie to be converted. ++ * Pointer to meter profile to be converted. + * @param[out] error + * Pointer to the error structure. + * +@@ -948,7 +950,7 @@ mlx5_flow_meter_profile_update(struct rte_eth_dev *dev, + return -rte_mtr_error_set(error, -ret, + RTE_MTR_ERROR_TYPE_MTR_PARAMS, + NULL, "Failed to update meter" +- " parmeters in hardware."); ++ " parameters in hardware."); + } + old_fmp->ref_cnt--; + fmp->ref_cnt++; +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c b/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c +index bd060e9d44..6a755e7c36 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c ++++ b/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c +@@ -935,13 +935,48 @@ flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr, + } + } + ++/** ++ * Reserve space for GRE spec in spec buffer. ++ * ++ * @param[in,out] dev_flow ++ * Pointer to dev_flow structure. ++ * ++ * @return ++ * Pointer to reserved space in spec buffer. ++ */ ++static uint8_t * ++flow_verbs_reserve_gre(struct mlx5_flow *dev_flow) ++{ ++ uint8_t *buffer; ++ struct mlx5_flow_verbs_workspace *verbs = &dev_flow->verbs; ++#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT ++ unsigned int size = sizeof(struct ibv_flow_spec_tunnel); ++ struct ibv_flow_spec_tunnel tunnel = { ++ .type = IBV_FLOW_SPEC_VXLAN_TUNNEL, ++ .size = size, ++ }; ++#else ++ unsigned int size = sizeof(struct ibv_flow_spec_gre); ++ struct ibv_flow_spec_gre tunnel = { ++ .type = IBV_FLOW_SPEC_GRE, ++ .size = size, ++ }; ++#endif ++ ++ buffer = verbs->specs + verbs->size; ++ flow_verbs_spec_add(verbs, &tunnel, size); ++ return buffer; ++} ++ + /** + * Convert the @p item into a Verbs specification. This function assumes that +- * the input is valid and that there is space to insert the requested item +- * into the flow. ++ * the input is valid and that Verbs specification will be placed in ++ * the pre-reserved space. + * + * @param[in, out] dev_flow + * Pointer to dev_flow structure. ++ * @param[in, out] gre_spec ++ * Pointer to space reserved for GRE spec. + * @param[in] item + * Item specification. + * @param[in] item_flags +@@ -949,6 +984,7 @@ flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr, + */ + static void + flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow, ++ uint8_t *gre_spec, + const struct rte_flow_item *item __rte_unused, + uint64_t item_flags) + { +@@ -960,6 +996,7 @@ flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow, + .size = size, + }; + #else ++ static const struct rte_flow_item_gre empty_gre = {0,}; + const struct rte_flow_item_gre *spec = item->spec; + const struct rte_flow_item_gre *mask = item->mask; + unsigned int size = sizeof(struct ibv_flow_spec_gre); +@@ -968,17 +1005,29 @@ flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow, + .size = size, + }; + +- if (!mask) +- mask = &rte_flow_item_gre_mask; +- if (spec) { +- tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver; +- tunnel.val.protocol = spec->protocol; +- tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver; +- tunnel.mask.protocol = mask->protocol; +- /* Remove unwanted bits from values. */ +- tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver; ++ if (!spec) { ++ spec = &empty_gre; ++ mask = &empty_gre; ++ } else { ++ if (!mask) ++ mask = &rte_flow_item_gre_mask; ++ } ++ tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver; ++ tunnel.val.protocol = spec->protocol; ++ tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver; ++ tunnel.mask.protocol = mask->protocol; ++ /* Remove unwanted bits from values. */ ++ tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver; ++ tunnel.val.key &= tunnel.mask.key; ++ if (tunnel.mask.protocol) { + tunnel.val.protocol &= tunnel.mask.protocol; +- tunnel.val.key &= tunnel.mask.key; ++ } else { ++ tunnel.val.protocol = mlx5_translate_tunnel_etypes(item_flags); ++ if (tunnel.val.protocol) { ++ tunnel.mask.protocol = 0xFFFF; ++ tunnel.val.protocol = ++ rte_cpu_to_be_16(tunnel.val.protocol); ++ } + } + #endif + if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) +@@ -989,7 +1038,8 @@ flow_verbs_translate_item_gre(struct mlx5_flow *dev_flow, + flow_verbs_item_gre_ip_protocol_update(&verbs->attr, + IBV_FLOW_SPEC_IPV6, + IPPROTO_GRE); +- flow_verbs_spec_add(verbs, &tunnel, size); ++ MLX5_ASSERT(gre_spec); ++ memcpy(gre_spec, &tunnel, size); + } + + /** +@@ -1709,6 +1759,8 @@ flow_verbs_translate(struct rte_eth_dev *dev, + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace(); + struct mlx5_flow_rss_desc *rss_desc; ++ const struct rte_flow_item *tunnel_item = NULL; ++ uint8_t *gre_spec = NULL; + + MLX5_ASSERT(wks); + rss_desc = &wks->rss_desc; +@@ -1723,12 +1775,12 @@ flow_verbs_translate(struct rte_eth_dev *dev, + case RTE_FLOW_ACTION_TYPE_FLAG: + flow_verbs_translate_action_flag(dev_flow, actions); + action_flags |= MLX5_FLOW_ACTION_FLAG; +- dev_flow->handle->mark = 1; ++ wks->mark = 1; + break; + case RTE_FLOW_ACTION_TYPE_MARK: + flow_verbs_translate_action_mark(dev_flow, actions); + action_flags |= MLX5_FLOW_ACTION_MARK; +- dev_flow->handle->mark = 1; ++ wks->mark = 1; + break; + case RTE_FLOW_ACTION_TYPE_DROP: + flow_verbs_translate_action_drop(dev_flow, actions); +@@ -1811,8 +1863,9 @@ flow_verbs_translate(struct rte_eth_dev *dev, + flow_verbs_translate_item_tcp(dev_flow, items, + item_flags); + subpriority = MLX5_PRIORITY_MAP_L4; +- dev_flow->hash_fields |= +- mlx5_flow_hashfields_adjust ++ if (dev_flow->hash_fields != 0) ++ dev_flow->hash_fields |= ++ mlx5_flow_hashfields_adjust + (rss_desc, tunnel, ETH_RSS_TCP, + (IBV_RX_HASH_SRC_PORT_TCP | + IBV_RX_HASH_DST_PORT_TCP)); +@@ -1823,8 +1876,9 @@ flow_verbs_translate(struct rte_eth_dev *dev, + flow_verbs_translate_item_udp(dev_flow, items, + item_flags); + subpriority = MLX5_PRIORITY_MAP_L4; +- dev_flow->hash_fields |= +- mlx5_flow_hashfields_adjust ++ if (dev_flow->hash_fields != 0) ++ dev_flow->hash_fields |= ++ mlx5_flow_hashfields_adjust + (rss_desc, tunnel, ETH_RSS_UDP, + (IBV_RX_HASH_SRC_PORT_UDP | + IBV_RX_HASH_DST_PORT_UDP)); +@@ -1844,10 +1898,10 @@ flow_verbs_translate(struct rte_eth_dev *dev, + item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE; + break; + case RTE_FLOW_ITEM_TYPE_GRE: +- flow_verbs_translate_item_gre(dev_flow, items, +- item_flags); ++ gre_spec = flow_verbs_reserve_gre(dev_flow); + subpriority = MLX5_TUNNEL_PRIO_GET(rss_desc); + item_flags |= MLX5_FLOW_LAYER_GRE; ++ tunnel_item = items; + break; + case RTE_FLOW_ITEM_TYPE_MPLS: + flow_verbs_translate_item_mpls(dev_flow, items, +@@ -1861,6 +1915,9 @@ flow_verbs_translate(struct rte_eth_dev *dev, + NULL, "item not supported"); + } + } ++ if (item_flags & MLX5_FLOW_LAYER_GRE) ++ flow_verbs_translate_item_gre(dev_flow, gre_spec, ++ tunnel_item, item_flags); + dev_flow->handle->layers = item_flags; + /* Other members of attr will be ignored. */ + dev_flow->verbs.attr.priority = +diff --git a/dpdk/drivers/net/mlx5/mlx5_mr.c b/dpdk/drivers/net/mlx5/mlx5_mr.c +index 8b20ee3f83..2a7fac8ad3 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_mr.c ++++ b/dpdk/drivers/net/mlx5/mlx5_mr.c +@@ -29,98 +29,6 @@ struct mr_update_mp_data { + int ret; + }; + +-/** +- * Callback for memory free event. Iterate freed memsegs and check whether it +- * belongs to an existing MR. If found, clear the bit from bitmap of MR. As a +- * result, the MR would be fragmented. If it becomes empty, the MR will be freed +- * later by mlx5_mr_garbage_collect(). Even if this callback is called from a +- * secondary process, the garbage collector will be called in primary process +- * as the secondary process can't call mlx5_mr_create(). +- * +- * The global cache must be rebuilt if there's any change and this event has to +- * be propagated to dataplane threads to flush the local caches. +- * +- * @param sh +- * Pointer to the Ethernet device shared context. +- * @param addr +- * Address of freed memory. +- * @param len +- * Size of freed memory. +- */ +-static void +-mlx5_mr_mem_event_free_cb(struct mlx5_dev_ctx_shared *sh, +- const void *addr, size_t len) +-{ +- const struct rte_memseg_list *msl; +- struct mlx5_mr *mr; +- int ms_n; +- int i; +- int rebuild = 0; +- +- DEBUG("device %s free callback: addr=%p, len=%zu", +- sh->ibdev_name, addr, len); +- msl = rte_mem_virt2memseg_list(addr); +- /* addr and len must be page-aligned. */ +- MLX5_ASSERT((uintptr_t)addr == +- RTE_ALIGN((uintptr_t)addr, msl->page_sz)); +- MLX5_ASSERT(len == RTE_ALIGN(len, msl->page_sz)); +- ms_n = len / msl->page_sz; +- rte_rwlock_write_lock(&sh->share_cache.rwlock); +- /* Clear bits of freed memsegs from MR. */ +- for (i = 0; i < ms_n; ++i) { +- const struct rte_memseg *ms; +- struct mr_cache_entry entry; +- uintptr_t start; +- int ms_idx; +- uint32_t pos; +- +- /* Find MR having this memseg. */ +- start = (uintptr_t)addr + i * msl->page_sz; +- mr = mlx5_mr_lookup_list(&sh->share_cache, &entry, start); +- if (mr == NULL) +- continue; +- MLX5_ASSERT(mr->msl); /* Can't be external memory. */ +- ms = rte_mem_virt2memseg((void *)start, msl); +- MLX5_ASSERT(ms != NULL); +- MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); +- ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); +- pos = ms_idx - mr->ms_base_idx; +- MLX5_ASSERT(rte_bitmap_get(mr->ms_bmp, pos)); +- MLX5_ASSERT(pos < mr->ms_bmp_n); +- DEBUG("device %s MR(%p): clear bitmap[%u] for addr %p", +- sh->ibdev_name, (void *)mr, pos, (void *)start); +- rte_bitmap_clear(mr->ms_bmp, pos); +- if (--mr->ms_n == 0) { +- LIST_REMOVE(mr, mr); +- LIST_INSERT_HEAD(&sh->share_cache.mr_free_list, mr, mr); +- DEBUG("device %s remove MR(%p) from list", +- sh->ibdev_name, (void *)mr); +- } +- /* +- * MR is fragmented or will be freed. the global cache must be +- * rebuilt. +- */ +- rebuild = 1; +- } +- if (rebuild) { +- mlx5_mr_rebuild_cache(&sh->share_cache); +- /* +- * Flush local caches by propagating invalidation across cores. +- * rte_smp_wmb() is enough to synchronize this event. If one of +- * freed memsegs is seen by other core, that means the memseg +- * has been allocated by allocator, which will come after this +- * free call. Therefore, this store instruction (incrementing +- * generation below) will be guaranteed to be seen by other core +- * before the core sees the newly allocated memory. +- */ +- ++sh->share_cache.dev_gen; +- DEBUG("broadcasting local cache flush, gen=%d", +- sh->share_cache.dev_gen); +- rte_smp_wmb(); +- } +- rte_rwlock_write_unlock(&sh->share_cache.rwlock); +-} +- + /** + * Callback for memory event. This can be called from both primary and secondary + * process. +@@ -146,7 +54,8 @@ mlx5_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr, + rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); + /* Iterate all the existing mlx5 devices. */ + LIST_FOREACH(sh, dev_list, mem_event_cb) +- mlx5_mr_mem_event_free_cb(sh, addr, len); ++ mlx5_free_mr_by_addr(&sh->share_cache, ++ sh->ibdev_name, addr, len); + rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); + break; + case RTE_MEM_EVENT_ALLOC: +@@ -393,10 +302,10 @@ mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, + } + priv = dev->data->dev_private; + sh = priv->sh; +- rte_rwlock_read_lock(&sh->share_cache.rwlock); ++ rte_rwlock_write_lock(&sh->share_cache.rwlock); + mr = mlx5_mr_lookup_list(&sh->share_cache, &entry, (uintptr_t)addr); + if (!mr) { +- rte_rwlock_read_unlock(&sh->share_cache.rwlock); ++ rte_rwlock_write_unlock(&sh->share_cache.rwlock); + DRV_LOG(WARNING, "address 0x%" PRIxPTR " wasn't registered " + "to PCI device %p", (uintptr_t)addr, + (void *)pdev); +@@ -421,7 +330,7 @@ mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, + DEBUG("broadcasting local cache flush, gen=%d", + sh->share_cache.dev_gen); + rte_smp_wmb(); +- rte_rwlock_read_unlock(&sh->share_cache.rwlock); ++ rte_rwlock_write_unlock(&sh->share_cache.rwlock); + return 0; + } + +diff --git a/dpdk/drivers/net/mlx5/mlx5_rss.c b/dpdk/drivers/net/mlx5/mlx5_rss.c +index 845cebe2e8..c79cfcbba8 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rss.c ++++ b/dpdk/drivers/net/mlx5/mlx5_rss.c +@@ -210,17 +210,11 @@ mlx5_dev_rss_reta_update(struct rte_eth_dev *dev, + for (idx = 0, i = 0; (i != reta_size); ++i) { + idx = i / RTE_RETA_GROUP_SIZE; + pos = i % RTE_RETA_GROUP_SIZE; +- if (((reta_conf[idx].mask >> i) & 0x1) == 0) ++ if (((reta_conf[idx].mask >> pos) & 0x1) == 0) + continue; + MLX5_ASSERT(reta_conf[idx].reta[pos] < priv->rxqs_n); + (*priv->reta_idx)[i] = reta_conf[idx].reta[pos]; + } +- + priv->skip_default_rss_reta = 1; +- +- if (dev->data->dev_started) { +- mlx5_dev_stop(dev); +- return mlx5_dev_start(dev); +- } +- return 0; ++ return mlx5_traffic_restart(dev); + } +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxq.c b/dpdk/drivers/net/mlx5/mlx5_rxq.c +index 1a5cf99d51..cb79a8b772 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxq.c ++++ b/dpdk/drivers/net/mlx5/mlx5_rxq.c +@@ -80,7 +80,7 @@ mlx5_check_mprq_support(struct rte_eth_dev *dev) + inline int + mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq) + { +- return rxq->strd_num_n > 0; ++ return rxq->log_strd_num > 0; + } + + /** +@@ -135,7 +135,7 @@ mlx5_rxq_cqe_num(struct mlx5_rxq_data *rxq_data) + unsigned int wqe_n = 1 << rxq_data->elts_n; + + if (mlx5_rxq_mprq_enabled(rxq_data)) +- cqe_n = wqe_n * (1 << rxq_data->strd_num_n) - 1; ++ cqe_n = wqe_n * RTE_BIT32(rxq_data->log_strd_num) - 1; + else + cqe_n = wqe_n - 1; + return cqe_n; +@@ -198,15 +198,17 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl) + * Pointer to RX queue structure. + * + * @return +- * 0 on success, errno value on failure. ++ * 0 on success, negative errno value on failure. + */ + static int + rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) + { + const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; + unsigned int elts_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? +- (1 << rxq_ctrl->rxq.elts_n) * (1 << rxq_ctrl->rxq.strd_num_n) : +- (1 << rxq_ctrl->rxq.elts_n); ++ RTE_BIT32(rxq_ctrl->rxq.elts_n) * ++ RTE_BIT32(rxq_ctrl->rxq.log_strd_num) : ++ RTE_BIT32(rxq_ctrl->rxq.elts_n); ++ bool has_vec_support = mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0; + unsigned int i; + int err; + +@@ -222,8 +224,9 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) + rte_errno = ENOMEM; + goto error; + } +- /* Headroom is reserved by rte_pktmbuf_alloc(). */ +- MLX5_ASSERT(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); ++ /* Only vectored Rx routines rely on headroom size. */ ++ MLX5_ASSERT(!has_vec_support || ++ DATA_OFF(buf) >= RTE_PKTMBUF_HEADROOM); + /* Buffer is supposed to be empty. */ + MLX5_ASSERT(rte_pktmbuf_data_len(buf) == 0); + MLX5_ASSERT(rte_pktmbuf_pkt_len(buf) == 0); +@@ -236,7 +239,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) + (*rxq_ctrl->rxq.elts)[i] = buf; + } + /* If Rx vector is activated. */ +- if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) { ++ if (has_vec_support) { + struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; + struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; + struct rte_pktmbuf_pool_private *priv = +@@ -289,7 +292,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) + * Pointer to RX queue structure. + * + * @return +- * 0 on success, errno value on failure. ++ * 0 on success, negative errno value on failure. + */ + int + rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) +@@ -302,7 +305,9 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) + */ + if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) + ret = rxq_alloc_elts_mprq(rxq_ctrl); +- return (ret || rxq_alloc_elts_sprq(rxq_ctrl)); ++ if (ret == 0) ++ ret = rxq_alloc_elts_sprq(rxq_ctrl); ++ return ret; + } + + /** +@@ -343,8 +348,8 @@ rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) + { + struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; + const uint16_t q_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? +- (1 << rxq->elts_n) * (1 << rxq->strd_num_n) : +- (1 << rxq->elts_n); ++ RTE_BIT32(rxq->elts_n) * RTE_BIT32(rxq->log_strd_num) : ++ RTE_BIT32(rxq->elts_n); + const uint16_t q_mask = q_n - 1; + uint16_t elts_ci = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? + rxq->elts_ci : rxq->rq_ci; +@@ -749,8 +754,19 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + (struct rte_eth_rxseg_split *)conf->rx_seg; + struct rte_eth_rxseg_split rx_single = {.mp = mp}; + uint16_t n_seg = conf->rx_nseg; ++ bool is_extmem = false; ++ uint64_t offloads = conf->offloads | ++ dev->data->dev_conf.rxmode.offloads; + int res; + ++ if ((offloads & DEV_RX_OFFLOAD_TCP_LRO) && ++ !priv->config.lro.supported) { ++ DRV_LOG(ERR, ++ "Port %u queue %u LRO is configured but not supported.", ++ dev->data->port_id, idx); ++ rte_errno = EINVAL; ++ return -rte_errno; ++ } + if (mp) { + /* + * The parameters should be checked on rte_eth_dev layer. +@@ -759,11 +775,10 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + */ + rx_seg = &rx_single; + n_seg = 1; ++ is_extmem = rte_pktmbuf_priv_flags(mp) & ++ RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF; + } + if (n_seg > 1) { +- uint64_t offloads = conf->offloads | +- dev->data->dev_conf.rxmode.offloads; +- + /* The offloads should be checked on rte_eth_dev layer. */ + MLX5_ASSERT(offloads & DEV_RX_OFFLOAD_SCATTER); + if (!(offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)) { +@@ -778,7 +793,8 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + res = mlx5_rx_queue_pre_setup(dev, idx, &desc); + if (res) + return res; +- rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, conf, rx_seg, n_seg); ++ rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, ++ conf, rx_seg, n_seg, is_extmem); + if (!rxq_ctrl) { + DRV_LOG(ERR, "port %u unable to allocate queue index %u", + dev->data->port_id, idx); +@@ -904,9 +920,6 @@ mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev) + unsigned int count = 0; + struct rte_intr_handle *intr_handle = dev->intr_handle; + +- /* Representor shares dev->intr_handle with PF. */ +- if (priv->representor) +- return 0; + if (!dev->data->dev_conf.intr_conf.rxq) + return 0; + mlx5_rx_intr_vec_disable(dev); +@@ -987,9 +1000,6 @@ mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev) + unsigned int rxqs_n = priv->rxqs_n; + unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); + +- /* Representor shares dev->intr_handle with PF. */ +- if (priv->representor) +- return; + if (!dev->data->dev_conf.intr_conf.rxq) + return; + if (!intr_handle->intr_vec) +@@ -1237,8 +1247,8 @@ mlx5_mprq_alloc_mp(struct rte_eth_dev *dev) + unsigned int buf_len; + unsigned int obj_num; + unsigned int obj_size; +- unsigned int strd_num_n = 0; +- unsigned int strd_sz_n = 0; ++ unsigned int log_strd_num = 0; ++ unsigned int log_strd_sz = 0; + unsigned int i; + unsigned int n_ibv = 0; + +@@ -1255,16 +1265,18 @@ mlx5_mprq_alloc_mp(struct rte_eth_dev *dev) + n_ibv++; + desc += 1 << rxq->elts_n; + /* Get the max number of strides. */ +- if (strd_num_n < rxq->strd_num_n) +- strd_num_n = rxq->strd_num_n; ++ if (log_strd_num < rxq->log_strd_num) ++ log_strd_num = rxq->log_strd_num; + /* Get the max size of a stride. */ +- if (strd_sz_n < rxq->strd_sz_n) +- strd_sz_n = rxq->strd_sz_n; +- } +- MLX5_ASSERT(strd_num_n && strd_sz_n); +- buf_len = (1 << strd_num_n) * (1 << strd_sz_n); +- obj_size = sizeof(struct mlx5_mprq_buf) + buf_len + (1 << strd_num_n) * +- sizeof(struct rte_mbuf_ext_shared_info) + RTE_PKTMBUF_HEADROOM; ++ if (log_strd_sz < rxq->log_strd_sz) ++ log_strd_sz = rxq->log_strd_sz; ++ } ++ MLX5_ASSERT(log_strd_num && log_strd_sz); ++ buf_len = RTE_BIT32(log_strd_num) * RTE_BIT32(log_strd_sz); ++ obj_size = sizeof(struct mlx5_mprq_buf) + buf_len + ++ RTE_BIT32(log_strd_num) * ++ sizeof(struct rte_mbuf_ext_shared_info) + ++ RTE_PKTMBUF_HEADROOM; + /* + * Received packets can be either memcpy'd or externally referenced. In + * case that the packet is attached to an mbuf as an external buffer, as +@@ -1310,7 +1322,7 @@ mlx5_mprq_alloc_mp(struct rte_eth_dev *dev) + snprintf(name, sizeof(name), "port-%u-mprq", dev->data->port_id); + mp = rte_mempool_create(name, obj_num, obj_size, MLX5_MPRQ_MP_CACHE_SZ, + 0, NULL, NULL, mlx5_mprq_buf_init, +- (void *)(uintptr_t)(1 << strd_num_n), ++ (void *)((uintptr_t)1 << log_strd_num), + dev->device->numa_node, 0); + if (mp == NULL) { + DRV_LOG(ERR, +@@ -1379,6 +1391,130 @@ mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint16_t idx, + priv->max_lro_msg_size * MLX5_LRO_SEG_CHUNK_SIZE); + } + ++/** ++ * Prepare both size and number of stride for Multi-Packet RQ. ++ * ++ * @param dev ++ * Pointer to Ethernet device. ++ * @param idx ++ * RX queue index. ++ * @param desc ++ * Number of descriptors to configure in queue. ++ * @param rx_seg_en ++ * Indicator if Rx segment enables, if so Multi-Packet RQ doesn't enable. ++ * @param min_mbuf_size ++ * Non scatter min mbuf size, max_rx_pktlen plus overhead. ++ * @param actual_log_stride_num ++ * Log number of strides to configure for this queue. ++ * @param actual_log_stride_size ++ * Log stride size to configure for this queue. ++ * @param is_extmem ++ * Is external pinned memory pool used. ++ * ++ * @return ++ * 0 if Multi-Packet RQ is supported, otherwise -1. ++ */ ++static int ++mlx5_mprq_prepare(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, ++ bool rx_seg_en, uint32_t min_mbuf_size, ++ uint32_t *actual_log_stride_num, ++ uint32_t *actual_log_stride_size, ++ bool is_extmem) ++{ ++ struct mlx5_priv *priv = dev->data->dev_private; ++ struct mlx5_dev_config *config = &priv->config; ++ uint32_t log_min_stride_num = config->mprq.log_min_stride_num; ++ uint32_t log_max_stride_num = config->mprq.log_max_stride_num; ++ uint32_t log_def_stride_num = ++ RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_NUM, ++ log_min_stride_num), ++ log_max_stride_num); ++ uint32_t log_min_stride_size = config->mprq.log_min_stride_size; ++ uint32_t log_max_stride_size = config->mprq.log_max_stride_size; ++ uint32_t log_def_stride_size = ++ RTE_MIN(RTE_MAX(MLX5_MPRQ_DEFAULT_LOG_STRIDE_SIZE, ++ log_min_stride_size), ++ log_max_stride_size); ++ uint32_t log_stride_wqe_size; ++ ++ if (mlx5_check_mprq_support(dev) != 1 || rx_seg_en || is_extmem) ++ goto unsupport; ++ /* Checks if chosen number of strides is in supported range. */ ++ if (config->mprq.log_stride_num > log_max_stride_num || ++ config->mprq.log_stride_num < log_min_stride_num) { ++ *actual_log_stride_num = log_def_stride_num; ++ DRV_LOG(WARNING, ++ "Port %u Rx queue %u number of strides for Multi-Packet RQ is out of range, setting default value (%u)", ++ dev->data->port_id, idx, RTE_BIT32(log_def_stride_num)); ++ } else { ++ *actual_log_stride_num = config->mprq.log_stride_num; ++ } ++ if (config->mprq.log_stride_size) { ++ /* Checks if chosen size of stride is in supported range. */ ++ if (config->mprq.log_stride_size > log_max_stride_size || ++ config->mprq.log_stride_size < log_min_stride_size) { ++ *actual_log_stride_size = log_def_stride_size; ++ DRV_LOG(WARNING, ++ "Port %u Rx queue %u size of a stride for Multi-Packet RQ is out of range, setting default value (%u)", ++ dev->data->port_id, idx, ++ RTE_BIT32(log_def_stride_size)); ++ } else { ++ *actual_log_stride_size = config->mprq.log_stride_size; ++ } ++ } else { ++ if (min_mbuf_size <= RTE_BIT32(log_max_stride_size)) ++ *actual_log_stride_size = log2above(min_mbuf_size); ++ else ++ goto unsupport; ++ } ++ log_stride_wqe_size = *actual_log_stride_num + *actual_log_stride_size; ++ /* Check if WQE buffer size is supported by hardware. */ ++ if (log_stride_wqe_size < config->mprq.log_min_stride_wqe_size) { ++ *actual_log_stride_num = log_def_stride_num; ++ *actual_log_stride_size = log_def_stride_size; ++ DRV_LOG(WARNING, ++ "Port %u Rx queue %u size of WQE buffer for Multi-Packet RQ is too small, setting default values (stride_num_n=%u, stride_size_n=%u)", ++ dev->data->port_id, idx, RTE_BIT32(log_def_stride_num), ++ RTE_BIT32(log_def_stride_size)); ++ log_stride_wqe_size = log_def_stride_num + log_def_stride_size; ++ } ++ MLX5_ASSERT(log_stride_wqe_size >= ++ config->mprq.log_min_stride_wqe_size); ++ if (desc <= RTE_BIT32(*actual_log_stride_num)) ++ goto unsupport; ++ if (min_mbuf_size > RTE_BIT32(log_stride_wqe_size)) { ++ DRV_LOG(WARNING, "Port %u Rx queue %u " ++ "Multi-Packet RQ is unsupported, WQE buffer size (%u) " ++ "is smaller than min mbuf size (%u)", ++ dev->data->port_id, idx, RTE_BIT32(log_stride_wqe_size), ++ min_mbuf_size); ++ goto unsupport; ++ } ++ DRV_LOG(DEBUG, "Port %u Rx queue %u " ++ "Multi-Packet RQ is enabled strd_num_n = %u, strd_sz_n = %u", ++ dev->data->port_id, idx, RTE_BIT32(*actual_log_stride_num), ++ RTE_BIT32(*actual_log_stride_size)); ++ return 0; ++unsupport: ++ if (config->mprq.enabled) ++ DRV_LOG(WARNING, ++ "Port %u MPRQ is requested but cannot be enabled\n" ++ " (requested: pkt_sz = %u, desc_num = %u," ++ " rxq_num = %u, stride_sz = %u, stride_num = %u\n" ++ " supported: min_rxqs_num = %u, min_buf_wqe_sz = %u" ++ " min_stride_sz = %u, max_stride_sz = %u).\n" ++ "Rx segment is %senabled. External mempool is %sused.", ++ dev->data->port_id, min_mbuf_size, desc, priv->rxqs_n, ++ RTE_BIT32(config->mprq.log_stride_size), ++ RTE_BIT32(config->mprq.log_stride_num), ++ config->mprq.min_rxqs_num, ++ RTE_BIT32(config->mprq.log_min_stride_wqe_size), ++ RTE_BIT32(config->mprq.log_min_stride_size), ++ RTE_BIT32(config->mprq.log_max_stride_size), ++ rx_seg_en ? "" : "not ", is_extmem ? "" : "not "); ++ return -1; ++} ++ + /** + * Create a DPDK Rx queue. + * +@@ -1397,7 +1533,8 @@ mlx5_max_lro_msg_size_adjust(struct rte_eth_dev *dev, uint16_t idx, + struct mlx5_rxq_ctrl * + mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + unsigned int socket, const struct rte_eth_rxconf *conf, +- const struct rte_eth_rxseg_split *rx_seg, uint16_t n_seg) ++ const struct rte_eth_rxseg_split *rx_seg, uint16_t n_seg, ++ bool is_extmem) + { + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_rxq_ctrl *tmpl; +@@ -1413,30 +1550,29 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + RTE_PKTMBUF_HEADROOM; + unsigned int max_lro_size = 0; + unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM; +- const int mprq_en = mlx5_check_mprq_support(dev) > 0 && n_seg == 1 && +- !rx_seg[0].offset && !rx_seg[0].length; +- unsigned int mprq_stride_nums = config->mprq.stride_num_n ? +- config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N; +- unsigned int mprq_stride_size = non_scatter_min_mbuf_size <= +- (1U << config->mprq.max_stride_size_n) ? +- log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N; +- unsigned int mprq_stride_cap = (config->mprq.stride_num_n ? +- (1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) * +- (config->mprq.stride_size_n ? +- (1U << config->mprq.stride_size_n) : (1U << mprq_stride_size)); ++ uint32_t mprq_log_actual_stride_num = 0; ++ uint32_t mprq_log_actual_stride_size = 0; ++ bool rx_seg_en = n_seg != 1 || rx_seg[0].offset || rx_seg[0].length; ++ const int mprq_en = !mlx5_mprq_prepare(dev, idx, desc, rx_seg_en, ++ non_scatter_min_mbuf_size, ++ &mprq_log_actual_stride_num, ++ &mprq_log_actual_stride_size, ++ is_extmem); + /* + * Always allocate extra slots, even if eventually + * the vector Rx will not be used. + */ + uint16_t desc_n = desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; ++ size_t alloc_size = sizeof(*tmpl) + desc_n * sizeof(struct rte_mbuf *); + const struct rte_eth_rxseg_split *qs_seg = rx_seg; + unsigned int tail_len; + +- tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, +- sizeof(*tmpl) + desc_n * sizeof(struct rte_mbuf *) + +- (!!mprq_en) * +- (desc >> mprq_stride_nums) * sizeof(struct mlx5_mprq_buf *), +- 0, socket); ++ if (mprq_en) { ++ /* Trim the number of descs needed. */ ++ desc >>= mprq_log_actual_stride_num; ++ alloc_size += desc * sizeof(struct mlx5_mprq_buf *); ++ } ++ tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, alloc_size, 0, socket); + if (!tmpl) { + rte_errno = ENOMEM; + return NULL; +@@ -1525,43 +1661,19 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + tmpl->socket = socket; + if (dev->data->dev_conf.intr_conf.rxq) + tmpl->irq = 1; +- /* +- * This Rx queue can be configured as a Multi-Packet RQ if all of the +- * following conditions are met: +- * - MPRQ is enabled. +- * - The number of descs is more than the number of strides. +- * - max_rx_pkt_len plus overhead is less than the max size +- * of a stride or mprq_stride_size is specified by a user. +- * Need to make sure that there are enough strides to encap +- * the maximum packet size in case mprq_stride_size is set. +- * Otherwise, enable Rx scatter if necessary. +- */ +- if (mprq_en && desc > (1U << mprq_stride_nums) && +- (non_scatter_min_mbuf_size <= +- (1U << config->mprq.max_stride_size_n) || +- (config->mprq.stride_size_n && +- non_scatter_min_mbuf_size <= mprq_stride_cap))) { ++ if (mprq_en) { + /* TODO: Rx scatter isn't supported yet. */ + tmpl->rxq.sges_n = 0; +- /* Trim the number of descs needed. */ +- desc >>= mprq_stride_nums; +- tmpl->rxq.strd_num_n = config->mprq.stride_num_n ? +- config->mprq.stride_num_n : mprq_stride_nums; +- tmpl->rxq.strd_sz_n = config->mprq.stride_size_n ? +- config->mprq.stride_size_n : mprq_stride_size; ++ tmpl->rxq.log_strd_num = mprq_log_actual_stride_num; ++ tmpl->rxq.log_strd_sz = mprq_log_actual_stride_size; + tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT; + tmpl->rxq.strd_scatter_en = + !!(offloads & DEV_RX_OFFLOAD_SCATTER); + tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size, + config->mprq.max_memcpy_len); + max_lro_size = RTE_MIN(max_rx_pkt_len, +- (1u << tmpl->rxq.strd_num_n) * +- (1u << tmpl->rxq.strd_sz_n)); +- DRV_LOG(DEBUG, +- "port %u Rx queue %u: Multi-Packet RQ is enabled" +- " strd_num_n = %u, strd_sz_n = %u", +- dev->data->port_id, idx, +- tmpl->rxq.strd_num_n, tmpl->rxq.strd_sz_n); ++ RTE_BIT32(tmpl->rxq.log_strd_num) * ++ RTE_BIT32(tmpl->rxq.log_strd_sz)); + } else if (tmpl->rxq.rxseg_n == 1) { + MLX5_ASSERT(max_rx_pkt_len <= first_mb_free_size); + tmpl->rxq.sges_n = 0; +@@ -1595,24 +1707,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + tmpl->rxq.sges_n = sges_n; + max_lro_size = max_rx_pkt_len; + } +- if (config->mprq.enabled && !mlx5_rxq_mprq_enabled(&tmpl->rxq)) +- DRV_LOG(WARNING, +- "port %u MPRQ is requested but cannot be enabled\n" +- " (requested: pkt_sz = %u, desc_num = %u," +- " rxq_num = %u, stride_sz = %u, stride_num = %u\n" +- " supported: min_rxqs_num = %u," +- " min_stride_sz = %u, max_stride_sz = %u).", +- dev->data->port_id, non_scatter_min_mbuf_size, +- desc, priv->rxqs_n, +- config->mprq.stride_size_n ? +- (1U << config->mprq.stride_size_n) : +- (1U << mprq_stride_size), +- config->mprq.stride_num_n ? +- (1U << config->mprq.stride_num_n) : +- (1U << mprq_stride_nums), +- config->mprq.min_rxqs_num, +- (1U << config->mprq.min_stride_size_n), +- (1U << config->mprq.max_stride_size_n)); + DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u", + dev->data->port_id, 1 << tmpl->rxq.sges_n); + if (desc % (1 << tmpl->rxq.sges_n)) { +@@ -1670,17 +1764,14 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + dev->data->port_id, + tmpl->rxq.crc_present ? "disabled" : "enabled", + tmpl->rxq.crc_present << 2); +- /* Save port ID. */ + tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf && + (!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS)); + tmpl->rxq.port_id = dev->data->port_id; + tmpl->priv = priv; + tmpl->rxq.mp = rx_seg[0].mp; + tmpl->rxq.elts_n = log2above(desc); +- tmpl->rxq.rq_repl_thresh = +- MLX5_VPMD_RXQ_RPLNSH_THRESH(desc_n); +- tmpl->rxq.elts = +- (struct rte_mbuf *(*)[desc_n])(tmpl + 1); ++ tmpl->rxq.rq_repl_thresh = MLX5_VPMD_RXQ_RPLNSH_THRESH(desc_n); ++ tmpl->rxq.elts = (struct rte_mbuf *(*)[desc_n])(tmpl + 1); + tmpl->rxq.mprq_bufs = + (struct mlx5_mprq_buf *(*)[desc])(*tmpl->rxq.elts + desc_n); + #ifndef RTE_ARCH_64 +@@ -1782,7 +1873,7 @@ mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx) + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_rxq_ctrl *rxq_ctrl; + +- if (!(*priv->rxqs)[idx]) ++ if (priv->rxqs == NULL || (*priv->rxqs)[idx] == NULL) + return 0; + rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); + if (__atomic_sub_fetch(&rxq_ctrl->refcnt, 1, __ATOMIC_RELAXED) > 1) +@@ -1896,7 +1987,7 @@ mlx5_rxq_get_hairpin_conf(struct rte_eth_dev *dev, uint16_t idx) + * Number of queues in the array. + * + * @return +- * 1 if all queues in indirection table match 0 othrwise. ++ * 1 if all queues in indirection table match 0 otherwise. + */ + static int + mlx5_ind_table_obj_match_queues(const struct mlx5_ind_table_obj *ind_tbl, +@@ -2150,7 +2241,7 @@ mlx5_ind_table_obj_modify(struct rte_eth_dev *dev, + error: + err = rte_errno; + for (j = 0; j < i; j++) +- mlx5_rxq_release(dev, ind_tbl->queues[j]); ++ mlx5_rxq_release(dev, queues[j]); + rte_errno = err; + DEBUG("Port %u cannot setup indirection table.", dev->data->port_id); + return ret; +@@ -2244,7 +2335,7 @@ mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hrxq_idx, + if (hrxq->standalone) { + /* + * Replacement of indirection table unsupported for +- * stanalone hrxq objects (used by shared RSS). ++ * standalone hrxq objects (used by shared RSS). + */ + rte_errno = ENOTSUP; + return -rte_errno; +@@ -2472,7 +2563,7 @@ mlx5_drop_action_create(struct rte_eth_dev *dev) + + if (priv->drop_queue.hrxq) + return priv->drop_queue.hrxq; +- hrxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq), 0, SOCKET_ID_ANY); ++ hrxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*hrxq) + MLX5_RSS_HASH_KEY_LEN, 0, SOCKET_ID_ANY); + if (!hrxq) { + DRV_LOG(WARNING, + "Port %u cannot allocate memory for drop queue.", +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx.c b/dpdk/drivers/net/mlx5/mlx5_rxtx.c +index d12d746c2f..baacd7587a 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxtx.c ++++ b/dpdk/drivers/net/mlx5/mlx5_rxtx.c +@@ -78,6 +78,7 @@ static uint16_t mlx5_tx_burst_##func(void *txq, \ + } + + #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx}, ++ struct mlx5_txq_stats stats_reset; /* stats on last reset. */ + + static __rte_always_inline uint32_t + rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, +@@ -465,7 +466,7 @@ rx_queue_count(struct mlx5_rxq_data *rxq) + const unsigned int cqe_n = (1 << rxq->cqe_n); + const unsigned int sges_n = (1 << rxq->sges_n); + const unsigned int elts_n = (1 << rxq->elts_n); +- const unsigned int strd_n = (1 << rxq->strd_num_n); ++ const unsigned int strd_n = RTE_BIT32(rxq->log_strd_num); + const unsigned int cqe_cnt = cqe_n - 1; + unsigned int cq_ci, used; + +@@ -566,8 +567,8 @@ mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, + qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; + qinfo->scattered_rx = dev->data->scattered_rx; + qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ? +- (1 << rxq->elts_n) * (1 << rxq->strd_num_n) : +- (1 << rxq->elts_n); ++ RTE_BIT32(rxq->elts_n) * RTE_BIT32(rxq->log_strd_num) : ++ RTE_BIT32(rxq->elts_n); + } + + /** +@@ -872,10 +873,10 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) + + scat = &((volatile struct mlx5_wqe_mprq *) + rxq->wqes)[i].dseg; +- addr = (uintptr_t)mlx5_mprq_buf_addr(buf, +- 1 << rxq->strd_num_n); +- byte_count = (1 << rxq->strd_sz_n) * +- (1 << rxq->strd_num_n); ++ addr = (uintptr_t)mlx5_mprq_buf_addr ++ (buf, RTE_BIT32(rxq->log_strd_num)); ++ byte_count = RTE_BIT32(rxq->log_strd_sz) * ++ RTE_BIT32(rxq->log_strd_num); + } else { + struct rte_mbuf *buf = (*rxq->elts)[i]; + +@@ -899,7 +900,7 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) + .ai = 0, + }; + rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ? +- (wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0; ++ (wqe_n >> rxq->sges_n) * RTE_BIT32(rxq->log_strd_num) : 0; + /* Update doorbell counter. */ + rxq->rq_ci = wqe_n >> rxq->sges_n; + rte_io_wmb(); +@@ -982,6 +983,11 @@ mlx5_queue_state_modify(struct rte_eth_dev *dev, + return ret; + } + ++/* Must be negative. */ ++#define MLX5_ERROR_CQE_RET (-1) ++/* Must not be negative. */ ++#define MLX5_RECOVERY_ERROR_RET 0 ++ + /** + * Handle a Rx error. + * The function inserts the RQ state to reset when the first error CQE is +@@ -996,7 +1002,7 @@ mlx5_queue_state_modify(struct rte_eth_dev *dev, + * 0 when called from non-vectorized Rx burst. + * + * @return +- * -1 in case of recovery error, otherwise the CQE status. ++ * MLX5_RECOVERY_ERROR_RET in case of recovery error, otherwise the CQE status. + */ + int + mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) +@@ -1004,7 +1010,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) + const uint16_t cqe_n = 1 << rxq->cqe_n; + const uint16_t cqe_mask = cqe_n - 1; + const uint16_t wqe_n = 1 << rxq->elts_n; +- const uint16_t strd_n = 1 << rxq->strd_num_n; ++ const uint16_t strd_n = RTE_BIT32(rxq->log_strd_num); + struct mlx5_rxq_ctrl *rxq_ctrl = + container_of(rxq, struct mlx5_rxq_ctrl, rxq); + union { +@@ -1025,7 +1031,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) + sm.queue_id = rxq->idx; + sm.state = IBV_WQS_RESET; + if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) +- return -1; ++ return MLX5_RECOVERY_ERROR_RET; + if (rxq_ctrl->dump_file_n < + rxq_ctrl->priv->config.max_dump_files_num) { + MKSTR(err_str, "Unexpected CQE error syndrome " +@@ -1066,7 +1072,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) + sm.state = IBV_WQS_RDY; + if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), + &sm)) +- return -1; ++ return MLX5_RECOVERY_ERROR_RET; + if (vec) { + const uint32_t elts_n = + mlx5_rxq_mprq_enabled(rxq) ? +@@ -1094,7 +1100,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) + rte_pktmbuf_free_seg + (*elt); + } +- return -1; ++ return MLX5_RECOVERY_ERROR_RET; + } + } + for (i = 0; i < (int)elts_n; ++i) { +@@ -1113,7 +1119,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) + } + return ret; + default: +- return -1; ++ return MLX5_RECOVERY_ERROR_RET; + } + } + +@@ -1131,7 +1137,9 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) + * written. + * + * @return +- * 0 in case of empty CQE, otherwise the packet size in bytes. ++ * 0 in case of empty CQE, MLX5_ERROR_CQE_RET in case of error CQE, ++ * otherwise the packet size in regular RxQ, and striding byte ++ * count format in mprq case. + */ + static inline int + mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, +@@ -1198,8 +1206,8 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, + rxq->err_state)) { + ret = mlx5_rx_err_handle(rxq, 0); + if (ret == MLX5_CQE_STATUS_HW_OWN || +- ret == -1) +- return 0; ++ ret == MLX5_RECOVERY_ERROR_RET) ++ return MLX5_ERROR_CQE_RET; + } else { + return 0; + } +@@ -1338,10 +1346,15 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, + } + } + } +- if (rxq->dynf_meta && cqe->flow_table_metadata) { +- pkt->ol_flags |= rxq->flow_meta_mask; +- *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, uint32_t *) = +- cqe->flow_table_metadata; ++ if (rxq->dynf_meta) { ++ uint32_t meta = cqe->flow_table_metadata & ++ rxq->flow_meta_port_mask; ++ ++ if (meta) { ++ pkt->ol_flags |= rxq->flow_meta_mask; ++ *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, ++ uint32_t *) = meta; ++ } + } + if (rxq->csum) + pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); +@@ -1430,13 +1443,18 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) + rte_mbuf_raw_free(pkt); + pkt = rep; + } ++ rq_ci >>= sges_n; ++ ++rq_ci; ++ rq_ci <<= sges_n; + break; + } + if (!pkt) { + cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; + len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); +- if (!len) { ++ if (len <= 0) { + rte_mbuf_raw_free(rep); ++ if (unlikely(len == MLX5_ERROR_CQE_RET)) ++ rq_ci = rxq->rq_ci << sges_n; + break; + } + pkt = seg; +@@ -1643,8 +1661,8 @@ uint16_t + mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) + { + struct mlx5_rxq_data *rxq = dpdk_rxq; +- const uint32_t strd_n = 1 << rxq->strd_num_n; +- const uint32_t strd_sz = 1 << rxq->strd_sz_n; ++ const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); ++ const uint32_t strd_sz = RTE_BIT32(rxq->log_strd_sz); + const uint32_t cq_mask = (1 << rxq->cqe_n) - 1; + const uint32_t wq_mask = (1 << rxq->elts_n) - 1; + volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; +@@ -1673,8 +1691,13 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) + } + cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; + ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); +- if (!ret) ++ if (ret == 0) + break; ++ if (unlikely(ret == MLX5_ERROR_CQE_RET)) { ++ rq_ci = rxq->rq_ci; ++ consumed_strd = rxq->consumed_strd; ++ break; ++ } + byte_cnt = ret; + len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; + MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); +@@ -2576,7 +2599,6 @@ mlx5_tx_mseg_memcpy(uint8_t *pdst, + uint8_t *psrc; + + MLX5_ASSERT(len); +- MLX5_ASSERT(must <= len); + do { + /* Allow zero length packets, must check first. */ + dlen = rte_pktmbuf_data_len(loc->mbuf); +@@ -2603,9 +2625,11 @@ mlx5_tx_mseg_memcpy(uint8_t *pdst, + if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { + /* + * Copy only the minimal required +- * part of the data buffer. ++ * part of the data buffer. Limit amount ++ * of data to be copied to the length of ++ * available space. + */ +- len = diff; ++ len = RTE_MIN(len, diff); + } + } + continue; +@@ -2735,7 +2759,8 @@ mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, + * Copying may be interrupted inside the routine + * if run into no inline hint flag. + */ +- copy = tlen >= txq->inlen_mode ? 0 : (txq->inlen_mode - tlen); ++ copy = tso ? inlen : txq->inlen_mode; ++ copy = tlen >= copy ? 0 : (copy - tlen); + copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); + tlen += copy; + if (likely(inlen <= tlen) || copy < part) { +@@ -3229,7 +3254,6 @@ mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, + inlen <= MLX5_ESEG_MIN_INLINE_SIZE || + inlen > (dlen + vlan))) + return MLX5_TXCMP_CODE_ERROR; +- MLX5_ASSERT(inlen >= txq->inlen_mode); + /* + * Check whether there are enough free WQEBBs: + * - Control Segment +@@ -3448,6 +3472,8 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, + unsigned int nxlen; + uintptr_t start; + ++ mbuf = loc->mbuf; ++ nxlen = rte_pktmbuf_data_len(mbuf); + /* + * Packet length exceeds the allowed inline + * data length, check whether the minimal +@@ -3457,29 +3483,26 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, + MLX5_ASSERT(txq->inlen_mode >= + MLX5_ESEG_MIN_INLINE_SIZE); + MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); +- inlen = txq->inlen_mode; +- } else { +- if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE || +- !vlan || txq->vlan_en) { +- /* +- * VLAN insertion will be done inside by HW. +- * It is not utmost effective - VLAN flag is +- * checked twice, but we should proceed the +- * inlining length correctly and take into +- * account the VLAN header being inserted. +- */ +- return mlx5_tx_packet_multi_send +- (txq, loc, olx); +- } ++ inlen = RTE_MIN(txq->inlen_mode, inlen); ++ } else if (vlan && !txq->vlan_en) { ++ /* ++ * VLAN insertion is requested and hardware does not ++ * support the offload, will do with software inline. ++ */ + inlen = MLX5_ESEG_MIN_INLINE_SIZE; ++ } else if (mbuf->ol_flags & PKT_TX_DYNF_NOINLINE || ++ nxlen > txq->inlen_send) { ++ return mlx5_tx_packet_multi_send(txq, loc, olx); ++ } else { ++ goto do_first; + } ++ if (mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) ++ goto do_build; + /* + * Now we know the minimal amount of data is requested + * to inline. Check whether we should inline the buffers + * from the chain beginning to eliminate some mbufs. + */ +- mbuf = loc->mbuf; +- nxlen = rte_pktmbuf_data_len(mbuf); + if (unlikely(nxlen <= txq->inlen_send)) { + /* We can inline first mbuf at least. */ + if (nxlen < inlen) { +@@ -3501,11 +3524,14 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, + goto do_align; + } + } ++do_first: + do { + inlen = nxlen; + mbuf = NEXT(mbuf); + /* There should be not end of packet. */ + MLX5_ASSERT(mbuf); ++ if (mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) ++ break; + nxlen = inlen + rte_pktmbuf_data_len(mbuf); + } while (unlikely(nxlen < txq->inlen_send)); + } +@@ -3533,6 +3559,7 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, + * Estimate the number of Data Segments conservatively, + * supposing no any mbufs is being freed during inlining. + */ ++do_build: + MLX5_ASSERT(inlen <= txq->inlen_send); + ds = NB_SEGS(loc->mbuf) + 2 + (inlen - + MLX5_ESEG_MIN_INLINE_SIZE + +@@ -3541,7 +3568,7 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, + if (unlikely(loc->wqe_free < ((ds + 3) / 4))) + return MLX5_TXCMP_CODE_EXIT; + /* Check for maximal WQE size. */ +- if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) ++ if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) + return MLX5_TXCMP_CODE_ERROR; + #ifdef MLX5_PMD_SOFT_COUNTERS + /* Update sent data bytes/packets counters. */ +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx.h b/dpdk/drivers/net/mlx5/mlx5_rxtx.h +index c57ccc32ed..964ebaaaad 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxtx.h ++++ b/dpdk/drivers/net/mlx5/mlx5_rxtx.h +@@ -69,7 +69,6 @@ struct rxq_zip { + struct mlx5_mprq_buf { + struct rte_mempool *mp; + uint16_t refcnt; /* Atomically accessed refcnt. */ +- uint8_t pad[RTE_PKTMBUF_HEADROOM]; /* Headroom for the first packet. */ + struct rte_mbuf_ext_shared_info shinfos[]; + /* + * Shared information per stride. +@@ -119,8 +118,8 @@ struct mlx5_rxq_data { + unsigned int elts_n:4; /* Log 2 of Mbufs. */ + unsigned int rss_hash:1; /* RSS hash result is enabled. */ + unsigned int mark:1; /* Marked flow available on the queue. */ +- unsigned int strd_num_n:5; /* Log 2 of the number of stride. */ +- unsigned int strd_sz_n:4; /* Log 2 of stride size. */ ++ unsigned int log_strd_num:5; /* Log 2 of the number of stride. */ ++ unsigned int log_strd_sz:4; /* Log 2 of stride size. */ + unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */ + unsigned int err_state:2; /* enum mlx5_rxq_err_state. */ + unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */ +@@ -154,6 +153,7 @@ struct mlx5_rxq_data { + struct mlx5_dev_ctx_shared *sh; /* Shared context. */ + uint16_t idx; /* Queue index. */ + struct mlx5_rxq_stats stats; ++ struct mlx5_rxq_stats stats_reset; /* stats on last reset. */ + rte_xmm_t mbuf_initializer; /* Default rearm/flags for vectorized Rx. */ + struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */ + void *cq_uar; /* Verbs CQ user access region. */ +@@ -168,6 +168,7 @@ struct mlx5_rxq_data { + uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */ + uint64_t flow_meta_mask; + int32_t flow_meta_offset; ++ uint32_t flow_meta_port_mask; + uint32_t rxseg_n; /* Number of split segment descriptions. */ + struct mlx5_eth_rxseg rxseg[MLX5_MAX_RXQ_NSEG]; + /* Buffer split segment descriptions - sizes, offsets, pools. */ +@@ -189,7 +190,6 @@ struct mlx5_rxq_ctrl { + enum mlx5_rxq_type type; /* Rxq type. */ + unsigned int socket; /* CPU socket ID for allocations. */ + unsigned int irq:1; /* Whether IRQ is enabled. */ +- uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */ + uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */ + uint32_t wqn; /* WQ number. */ + uint16_t dump_file_n; /* Number of dump files. */ +@@ -272,6 +272,7 @@ struct mlx5_txq_data { + int32_t ts_offset; /* Timestamp field dynamic offset. */ + struct mlx5_dev_ctx_shared *sh; /* Shared context. */ + struct mlx5_txq_stats stats; /* TX queue counters. */ ++ struct mlx5_txq_stats stats_reset; /* stats on last reset. */ + #ifndef RTE_ARCH_64 + rte_spinlock_t *uar_lock; + /* UAR access lock required for 32bit implementations */ +@@ -337,7 +338,7 @@ struct mlx5_rxq_ctrl *mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, + uint16_t desc, unsigned int socket, + const struct rte_eth_rxconf *conf, + const struct rte_eth_rxseg_split *rx_seg, +- uint16_t n_seg); ++ uint16_t n_seg, bool is_extmem); + struct mlx5_rxq_ctrl *mlx5_rxq_hairpin_new + (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + const struct rte_eth_hairpin_conf *hairpin_conf); +@@ -396,7 +397,7 @@ int mlx5_tx_hairpin_queue_setup + (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + const struct rte_eth_hairpin_conf *hairpin_conf); + void mlx5_tx_queue_release(void *dpdk_txq); +-void txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl); ++void txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl, void *bf_reg); + int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); + void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); + int mlx5_txq_obj_verify(struct rte_eth_dev *dev); +@@ -748,7 +749,7 @@ mlx5_timestamp_set(struct rte_mbuf *mbuf, int offset, + static __rte_always_inline void + mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx) + { +- const uint32_t strd_n = 1 << rxq->strd_num_n; ++ const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); + struct mlx5_mprq_buf *rep = rxq->mprq_repl; + volatile struct mlx5_wqe_data_seg *wqe = + &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg; +@@ -806,8 +807,8 @@ static __rte_always_inline enum mlx5_rqx_code + mprq_buf_to_pkt(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, uint32_t len, + struct mlx5_mprq_buf *buf, uint16_t strd_idx, uint16_t strd_cnt) + { +- const uint32_t strd_n = 1 << rxq->strd_num_n; +- const uint16_t strd_sz = 1 << rxq->strd_sz_n; ++ const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); ++ const uint16_t strd_sz = RTE_BIT32(rxq->log_strd_sz); + const uint16_t strd_shift = + MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en; + const int32_t hdrm_overlap = +@@ -890,7 +891,7 @@ mprq_buf_to_pkt(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, uint32_t len, + buf_len, shinfo); + /* Set mbuf head-room. */ + SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM); +- MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF); ++ MLX5_ASSERT(pkt->ol_flags & EXT_ATTACHED_MBUF); + MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >= + len - (hdrm_overlap > 0 ? hdrm_overlap : 0)); + DATA_LEN(pkt) = len; +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c +index 028e0f6121..d156de4ec1 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c ++++ b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec.c +@@ -142,7 +142,7 @@ static inline void + mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq) + { + const uint16_t wqe_n = 1 << rxq->elts_n; +- const uint32_t strd_n = 1 << rxq->strd_num_n; ++ const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); + const uint32_t elts_n = wqe_n * strd_n; + const uint32_t wqe_mask = elts_n - 1; + uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi); +@@ -151,7 +151,8 @@ mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq) + unsigned int i; + + if (n >= rxq->rq_repl_thresh && +- rxq->elts_ci - rxq->rq_pi <= rxq->rq_repl_thresh) { ++ rxq->elts_ci - rxq->rq_pi <= ++ rxq->rq_repl_thresh + MLX5_VPMD_RX_MAX_BURST) { + MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n)); + MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) > + MLX5_VPMD_DESCS_PER_LOOP); +@@ -190,8 +191,8 @@ rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq, + { + const uint16_t wqe_n = 1 << rxq->elts_n; + const uint16_t wqe_mask = wqe_n - 1; +- const uint16_t strd_sz = 1 << rxq->strd_sz_n; +- const uint32_t strd_n = 1 << rxq->strd_num_n; ++ const uint16_t strd_sz = RTE_BIT32(rxq->log_strd_sz); ++ const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); + const uint32_t elts_n = wqe_n * strd_n; + const uint32_t elts_mask = elts_n - 1; + uint32_t elts_idx = rxq->rq_pi & elts_mask; +@@ -421,7 +422,7 @@ rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, + const uint16_t q_n = 1 << rxq->cqe_n; + const uint16_t q_mask = q_n - 1; + const uint16_t wqe_n = 1 << rxq->elts_n; +- const uint32_t strd_n = 1 << rxq->strd_num_n; ++ const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); + const uint32_t elts_n = wqe_n * strd_n; + const uint32_t elts_mask = elts_n - 1; + volatile struct mlx5_cqe *cq; +@@ -441,6 +442,8 @@ rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, + rte_prefetch0(cq + 3); + pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST); + mlx5_rx_mprq_replenish_bulk_mbuf(rxq); ++ /* Not to move past the allocated mbufs. */ ++ pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi); + /* See if there're unreturned mbufs from compressed CQE. */ + rcvd_pkt = rxq->decompressed; + if (rcvd_pkt > 0) { +@@ -456,8 +459,6 @@ rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, + /* Not to cross queue end. */ + pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx); + pkts_n = RTE_MIN(pkts_n, q_n - cq_idx); +- /* Not to move past the allocated mbufs. */ +- pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi); + if (!pkts_n) { + *no_cq = !cp_pkt; + return cp_pkt; +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h +index 48b677e40d..418e6ae23b 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h ++++ b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h +@@ -840,7 +840,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + + /* + * A. load first Qword (8bytes) in one loop. +- * B. copy 4 mbuf pointers from elts ring to returing pkts. ++ * B. copy 4 mbuf pointers from elts ring to returning pkts. + * C. load remaining CQE data and extract necessary fields. + * Final 16bytes cqes[] extracted from original 64bytes CQE has the + * following structure: +@@ -974,10 +974,10 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + (vector unsigned short)cqe_tmp1, cqe_sel_mask1); + cqe_tmp2 = (vector unsigned char)(vector unsigned long){ + *(__rte_aligned(8) unsigned long *) +- &cq[pos + p3].rsvd3[9], 0LL}; ++ &cq[pos + p3].rsvd4[2], 0LL}; + cqe_tmp1 = (vector unsigned char)(vector unsigned long){ + *(__rte_aligned(8) unsigned long *) +- &cq[pos + p2].rsvd3[9], 0LL}; ++ &cq[pos + p2].rsvd4[2], 0LL}; + cqes[3] = (vector unsigned char) + vec_sel((vector unsigned short)cqes[3], + (vector unsigned short)cqe_tmp2, +@@ -1037,10 +1037,10 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + (vector unsigned short)cqe_tmp1, cqe_sel_mask1); + cqe_tmp2 = (vector unsigned char)(vector unsigned long){ + *(__rte_aligned(8) unsigned long *) +- &cq[pos + p1].rsvd3[9], 0LL}; ++ &cq[pos + p1].rsvd4[2], 0LL}; + cqe_tmp1 = (vector unsigned char)(vector unsigned long){ + *(__rte_aligned(8) unsigned long *) +- &cq[pos].rsvd3[9], 0LL}; ++ &cq[pos].rsvd4[2], 0LL}; + cqes[1] = (vector unsigned char) + vec_sel((vector unsigned short)cqes[1], + (vector unsigned short)cqe_tmp2, cqe_sel_mask2); +@@ -1221,22 +1221,23 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + if (rxq->dynf_meta) { + uint64_t flag = rxq->flow_meta_mask; + int32_t offs = rxq->flow_meta_offset; +- uint32_t metadata; ++ uint32_t metadata, mask; + +- /* This code is subject for futher optimization. */ +- metadata = cq[pos].flow_table_metadata; ++ mask = rxq->flow_meta_port_mask; ++ /* This code is subject for further optimization. */ ++ metadata = cq[pos].flow_table_metadata & mask; + *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = + metadata; + pkts[pos]->ol_flags |= metadata ? flag : 0ULL; +- metadata = cq[pos + 1].flow_table_metadata; ++ metadata = cq[pos + 1].flow_table_metadata & mask; + *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) = + metadata; + pkts[pos + 1]->ol_flags |= metadata ? flag : 0ULL; +- metadata = cq[pos + 2].flow_table_metadata; ++ metadata = cq[pos + 2].flow_table_metadata & mask; + *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) = + metadata; + pkts[pos + 2]->ol_flags |= metadata ? flag : 0ULL; +- metadata = cq[pos + 3].flow_table_metadata; ++ metadata = cq[pos + 3].flow_table_metadata & mask; + *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) = + metadata; + pkts[pos + 3]->ol_flags |= metadata ? flag : 0ULL; +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +index 4c067d8801..aa60ee8b92 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_neon.h ++++ b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +@@ -593,7 +593,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + * there's no instruction to count trailing zeros. __builtin_clzl() is + * used instead. + * +- * A. copy 4 mbuf pointers from elts ring to returing pkts. ++ * A. copy 4 mbuf pointers from elts ring to returning pkts. + * B. load 64B CQE and extract necessary fields + * Final 16bytes cqes[] extracted from original 64bytes CQE has the + * following structure: +@@ -767,16 +767,15 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + comp_idx = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16( + comp_mask), 0)) / + (sizeof(uint16_t) * 8); +- /* D.6 mask out entries after the compressed CQE. */ +- mask = vcreate_u16(comp_idx < MLX5_VPMD_DESCS_PER_LOOP ? +- -1UL >> (comp_idx * sizeof(uint16_t) * 8) : +- 0); +- invalid_mask = vorr_u16(invalid_mask, mask); ++ invalid_mask = vorr_u16(invalid_mask, comp_mask); + /* D.7 count non-compressed valid CQEs. */ + n = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16( + invalid_mask), 0)) / (sizeof(uint16_t) * 8); + nocmp_n += n; +- /* D.2 get the final invalid mask. */ ++ /* ++ * D.2 mask out entries after the compressed CQE. ++ * get the final invalid mask. ++ */ + mask = vcreate_u16(n < MLX5_VPMD_DESCS_PER_LOOP ? + -1UL >> (n * sizeof(uint16_t) * 8) : 0); + invalid_mask = vorr_u16(invalid_mask, mask); +@@ -830,21 +829,26 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + } + } + if (rxq->dynf_meta) { +- /* This code is subject for futher optimization. */ ++ /* This code is subject for further optimization. */ + int32_t offs = rxq->flow_meta_offset; ++ uint32_t mask = rxq->flow_meta_port_mask; + + *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = + container_of(p0, struct mlx5_cqe, +- pkt_info)->flow_table_metadata; ++ pkt_info)->flow_table_metadata & ++ mask; + *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) = + container_of(p1, struct mlx5_cqe, +- pkt_info)->flow_table_metadata; ++ pkt_info)->flow_table_metadata & ++ mask; + *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) = + container_of(p2, struct mlx5_cqe, +- pkt_info)->flow_table_metadata; ++ pkt_info)->flow_table_metadata & ++ mask; + *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) = + container_of(p3, struct mlx5_cqe, +- pkt_info)->flow_table_metadata; ++ pkt_info)->flow_table_metadata & ++ mask; + if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *)) + elts[pos]->ol_flags |= rxq->flow_meta_mask; + if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *)) +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +index 0b3f240e10..9b812e3844 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_sse.h ++++ b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +@@ -565,7 +565,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + const __m128i flow_mark_adj = _mm_set_epi32(rxq->mark * (-1), 0, 0, 0); + /* + * A. load first Qword (8bytes) in one loop. +- * B. copy 4 mbuf pointers from elts ring to returing pkts. ++ * B. copy 4 mbuf pointers from elts ring to returning pkts. + * C. load remained CQE data and extract necessary fields. + * Final 16bytes cqes[] extracted from original 64bytes CQE has the + * following structure: +@@ -766,17 +766,18 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + } + } + if (rxq->dynf_meta) { +- /* This code is subject for futher optimization. */ ++ /* This code is subject for further optimization. */ + int32_t offs = rxq->flow_meta_offset; ++ uint32_t mask = rxq->flow_meta_port_mask; + + *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = +- cq[pos].flow_table_metadata; ++ cq[pos].flow_table_metadata & mask; + *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) = +- cq[pos + p1].flow_table_metadata; ++ cq[pos + p1].flow_table_metadata & mask; + *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) = +- cq[pos + p2].flow_table_metadata; ++ cq[pos + p2].flow_table_metadata & mask; + *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) = +- cq[pos + p3].flow_table_metadata; ++ cq[pos + p3].flow_table_metadata & mask; + if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *)) + pkts[pos]->ol_flags |= rxq->flow_meta_mask; + if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *)) +diff --git a/dpdk/drivers/net/mlx5/mlx5_stats.c b/dpdk/drivers/net/mlx5/mlx5_stats.c +index 82d4d4a745..450037aea0 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_stats.c ++++ b/dpdk/drivers/net/mlx5/mlx5_stats.c +@@ -113,18 +113,23 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + idx = rxq->idx; + if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) { + #ifdef MLX5_PMD_SOFT_COUNTERS +- tmp.q_ipackets[idx] += rxq->stats.ipackets; +- tmp.q_ibytes[idx] += rxq->stats.ibytes; ++ tmp.q_ipackets[idx] += rxq->stats.ipackets - ++ rxq->stats_reset.ipackets; ++ tmp.q_ibytes[idx] += rxq->stats.ibytes - ++ rxq->stats_reset.ibytes; + #endif + tmp.q_errors[idx] += (rxq->stats.idropped + +- rxq->stats.rx_nombuf); ++ rxq->stats.rx_nombuf) - ++ (rxq->stats_reset.idropped + ++ rxq->stats_reset.rx_nombuf); + } + #ifdef MLX5_PMD_SOFT_COUNTERS +- tmp.ipackets += rxq->stats.ipackets; +- tmp.ibytes += rxq->stats.ibytes; ++ tmp.ipackets += rxq->stats.ipackets - rxq->stats_reset.ipackets; ++ tmp.ibytes += rxq->stats.ibytes - rxq->stats_reset.ibytes; + #endif +- tmp.ierrors += rxq->stats.idropped; +- tmp.rx_nombuf += rxq->stats.rx_nombuf; ++ tmp.ierrors += rxq->stats.idropped - rxq->stats_reset.idropped; ++ tmp.rx_nombuf += rxq->stats.rx_nombuf - ++ rxq->stats_reset.rx_nombuf; + } + for (i = 0; (i != priv->txqs_n); ++i) { + struct mlx5_txq_data *txq = (*priv->txqs)[i]; +@@ -134,15 +139,17 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + idx = txq->idx; + if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) { + #ifdef MLX5_PMD_SOFT_COUNTERS +- tmp.q_opackets[idx] += txq->stats.opackets; +- tmp.q_obytes[idx] += txq->stats.obytes; ++ tmp.q_opackets[idx] += txq->stats.opackets - ++ txq->stats_reset.opackets; ++ tmp.q_obytes[idx] += txq->stats.obytes - ++ txq->stats_reset.obytes; + #endif + } + #ifdef MLX5_PMD_SOFT_COUNTERS +- tmp.opackets += txq->stats.opackets; +- tmp.obytes += txq->stats.obytes; ++ tmp.opackets += txq->stats.opackets - txq->stats_reset.opackets; ++ tmp.obytes += txq->stats.obytes - txq->stats_reset.obytes; + #endif +- tmp.oerrors += txq->stats.oerrors; ++ tmp.oerrors += txq->stats.oerrors - txq->stats_reset.oerrors; + } + ret = mlx5_os_read_dev_stat(priv, "out_of_buffer", &tmp.imissed); + if (ret == 0) { +@@ -182,14 +189,14 @@ mlx5_stats_reset(struct rte_eth_dev *dev) + for (i = 0; (i != priv->rxqs_n); ++i) { + if ((*priv->rxqs)[i] == NULL) + continue; +- memset(&(*priv->rxqs)[i]->stats, 0, +- sizeof(struct mlx5_rxq_stats)); ++ (*priv->rxqs)[i]->stats_reset = (*priv->rxqs)[i]->stats; + } + for (i = 0; (i != priv->txqs_n); ++i) { +- if ((*priv->txqs)[i] == NULL) ++ struct mlx5_txq_data *txq_data = (*priv->txqs)[i]; ++ ++ if (txq_data == NULL) + continue; +- memset(&(*priv->txqs)[i]->stats, 0, +- sizeof(struct mlx5_txq_stats)); ++ txq_data->stats_reset = txq_data->stats; + } + mlx5_os_read_dev_stat(priv, "out_of_buffer", &stats_ctrl->imissed_base); + stats_ctrl->imissed = 0; +diff --git a/dpdk/drivers/net/mlx5/mlx5_trigger.c b/dpdk/drivers/net/mlx5/mlx5_trigger.c +index bd029154f8..9b82ee40fd 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_trigger.c ++++ b/dpdk/drivers/net/mlx5/mlx5_trigger.c +@@ -13,6 +13,7 @@ + #include + + #include "mlx5.h" ++#include "mlx5_flow.h" + #include "mlx5_mr.h" + #include "mlx5_rxtx.h" + #include "mlx5_utils.h" +@@ -155,12 +156,29 @@ mlx5_rxq_start(struct rte_eth_dev *dev) + mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, + rxq_ctrl->rxq.mprq_mp); + } else { ++ struct rte_mempool *mp; ++ uint32_t flags; + uint32_t s; + +- for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) ++ /* ++ * The pinned external buffer should be ++ * registered for DMA operations by application. ++ * The mem_list of the pool contains ++ * the list of chunks with mbuf structures ++ * w/o built-in data buffers ++ * and DMA actually does not happen there, ++ * no need to create MR for these chunks. ++ */ ++ for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) { ++ mp = rxq_ctrl->rxq.rxseg[s].mp; ++ flags = rte_pktmbuf_priv_flags(mp); ++ if (flags & ++ RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) ++ continue; + mlx5_mr_update_mp + (dev, &rxq_ctrl->rxq.mr_ctrl, +- rxq_ctrl->rxq.rxseg[s].mp); ++ mp); ++ } + } + ret = rxq_alloc_elts(rxq_ctrl); + if (ret) +@@ -180,6 +198,7 @@ mlx5_rxq_start(struct rte_eth_dev *dev) + ret = priv->obj_ops.rxq_obj_new(dev, i); + if (ret) { + mlx5_free(rxq_ctrl->obj); ++ rxq_ctrl->obj = NULL; + goto error; + } + DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", +@@ -226,12 +245,11 @@ mlx5_hairpin_auto_bind(struct rte_eth_dev *dev) + txq_ctrl = mlx5_txq_get(dev, i); + if (!txq_ctrl) + continue; +- if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { ++ if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN || ++ txq_ctrl->hairpin_conf.peers[0].port != self_port) { + mlx5_txq_release(dev, i); + continue; + } +- if (txq_ctrl->hairpin_conf.peers[0].port != self_port) +- continue; + if (txq_ctrl->hairpin_conf.manual_bind) { + mlx5_txq_release(dev, i); + return 0; +@@ -245,13 +263,12 @@ mlx5_hairpin_auto_bind(struct rte_eth_dev *dev) + txq_ctrl = mlx5_txq_get(dev, i); + if (!txq_ctrl) + continue; +- if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { ++ /* Skip hairpin queues with other peer ports. */ ++ if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN || ++ txq_ctrl->hairpin_conf.peers[0].port != self_port) { + mlx5_txq_release(dev, i); + continue; + } +- /* Skip hairpin queues with other peer ports. */ +- if (txq_ctrl->hairpin_conf.peers[0].port != self_port) +- continue; + if (!txq_ctrl->obj) { + rte_errno = ENOMEM; + DRV_LOG(ERR, "port %u no txq object found: %d", +@@ -813,7 +830,7 @@ mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) + + /* + * Unbind the hairpin port pair, HW configuration of both devices will be clear +- * and status will be reset for all the queues used between the them. ++ * and status will be reset for all the queues used between them. + * This function only supports to unbind the Tx from one Rx. + * + * @param dev +@@ -1066,6 +1083,12 @@ mlx5_dev_start(struct rte_eth_dev *dev) + dev->data->port_id, strerror(rte_errno)); + goto error; + } ++ if ((priv->config.devx && priv->config.dv_flow_en && ++ priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) { ++ ret = priv->obj_ops.lb_dummy_queue_create(dev); ++ if (ret) ++ goto error; ++ } + ret = mlx5_txq_start(dev); + if (ret) { + DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", +@@ -1129,11 +1152,18 @@ mlx5_dev_start(struct rte_eth_dev *dev) + priv->sh->port[priv->dev_port - 1].ih_port_id = + (uint32_t)dev->data->port_id; + } else { +- DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.", ++ DRV_LOG(INFO, "port %u starts without RMV interrupts.", + dev->data->port_id); +- dev->data->dev_conf.intr_conf.lsc = 0; + dev->data->dev_conf.intr_conf.rmv = 0; + } ++ if (priv->sh->intr_handle_nl.fd >= 0) { ++ priv->sh->port[priv->dev_port - 1].nl_ih_port_id = ++ (uint32_t)dev->data->port_id; ++ } else { ++ DRV_LOG(INFO, "port %u starts without LSC interrupts.", ++ dev->data->port_id); ++ dev->data->dev_conf.intr_conf.lsc = 0; ++ } + if (priv->sh->intr_handle_devx.fd >= 0) + priv->sh->port[priv->dev_port - 1].devx_ih_port_id = + (uint32_t)dev->data->port_id; +@@ -1146,6 +1176,8 @@ mlx5_dev_start(struct rte_eth_dev *dev) + mlx5_traffic_disable(dev); + mlx5_txq_stop(dev); + mlx5_rxq_stop(dev); ++ if (priv->obj_ops.lb_dummy_queue_release) ++ priv->obj_ops.lb_dummy_queue_release(dev); + mlx5_txpp_stop(dev); /* Stop last. */ + rte_errno = ret; /* Restore rte_errno. */ + return -rte_errno; +@@ -1178,11 +1210,15 @@ mlx5_dev_stop(struct rte_eth_dev *dev) + mlx5_traffic_disable(dev); + /* All RX queue flags will be cleared in the flush interface. */ + mlx5_flow_list_flush(dev, &priv->flows, true); ++ mlx5_shared_action_flush(dev); + mlx5_rx_intr_vec_disable(dev); + priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS; + priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS; ++ priv->sh->port[priv->dev_port - 1].nl_ih_port_id = RTE_MAX_ETHPORTS; + mlx5_txq_stop(dev); + mlx5_rxq_stop(dev); ++ if (priv->obj_ops.lb_dummy_queue_release) ++ priv->obj_ops.lb_dummy_queue_release(dev); + mlx5_txpp_stop(dev); + + return 0; +diff --git a/dpdk/drivers/net/mlx5/mlx5_txpp.c b/dpdk/drivers/net/mlx5/mlx5_txpp.c +index 28afda28cb..1f4c1081f5 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_txpp.c ++++ b/dpdk/drivers/net/mlx5/mlx5_txpp.c +@@ -328,6 +328,7 @@ mlx5_txpp_create_rearm_queue(struct mlx5_dev_ctx_shared *sh) + sq_attr.tis_num = sh->tis->id; + sq_attr.cqn = wq->cq->id; + sq_attr.cd_master = 1; ++ sq_attr.ts_format = mlx5_ts_format_conv(sh->sq_ts_format); + sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar); + sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; + sq_attr.wq_attr.pd = sh->pdn; +@@ -577,6 +578,7 @@ mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh) + sq_attr.state = MLX5_SQC_STATE_RST; + sq_attr.cqn = wq->cq->id; + sq_attr.packet_pacing_rate_limit_index = sh->txpp.pp_id; ++ sq_attr.ts_format = mlx5_ts_format_conv(sh->sq_ts_format); + sq_attr.wq_attr.cd_slave = 1; + sq_attr.wq_attr.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar); + sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC; +@@ -674,8 +676,8 @@ mlx5_atomic_read_cqe(rte_int128_t *from, rte_int128_t *ts) + { + /* + * The only CQE of Clock Queue is being continuously +- * update by hardware with soecified rate. We have to +- * read timestump and WQE completion index atomically. ++ * updated by hardware with specified rate. We must ++ * read timestamp and WQE completion index atomically. + */ + #if defined(RTE_ARCH_X86_64) + rte_int128_t src; +@@ -736,15 +738,24 @@ mlx5_txpp_update_timestamp(struct mlx5_dev_ctx_shared *sh) + } to; + uint64_t ts; + uint16_t ci; ++ uint8_t opcode; + + static_assert(sizeof(struct mlx5_cqe_ts) == sizeof(rte_int128_t), + "Wrong timestamp CQE part size"); + mlx5_atomic_read_cqe((rte_int128_t *)&cqe->timestamp, &to.u128); +- if (to.cts.op_own >> 4) { +- DRV_LOG(DEBUG, "Clock Queue error sync lost."); +- __atomic_fetch_add(&sh->txpp.err_clock_queue, ++ opcode = MLX5_CQE_OPCODE(to.cts.op_own); ++ if (opcode) { ++ if (opcode != MLX5_CQE_INVALID) { ++ /* ++ * Commit the error state if and only if ++ * we have got at least one actual completion. ++ */ ++ DRV_LOG(DEBUG, ++ "Clock Queue error sync lost (%X).", opcode); ++ __atomic_fetch_add(&sh->txpp.err_clock_queue, + 1, __ATOMIC_RELAXED); +- sh->txpp.sync_lost = 1; ++ sh->txpp.sync_lost = 1; ++ } + return; + } + ci = rte_be_to_cpu_16(to.cts.wqe_counter); +@@ -1038,7 +1049,6 @@ mlx5_txpp_start(struct rte_eth_dev *dev) + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_dev_ctx_shared *sh = priv->sh; + int err = 0; +- int ret; + + if (!priv->config.tx_pp) { + /* Packet pacing is not requested for the device. */ +@@ -1051,14 +1061,14 @@ mlx5_txpp_start(struct rte_eth_dev *dev) + return 0; + } + if (priv->config.tx_pp > 0) { +- ret = rte_mbuf_dynflag_lookup +- (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); +- if (ret < 0) ++ err = rte_mbuf_dynflag_lookup ++ (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); ++ /* No flag registered means no service needed. */ ++ if (err < 0) + return 0; ++ err = 0; + } +- ret = pthread_mutex_lock(&sh->txpp.mutex); +- MLX5_ASSERT(!ret); +- RTE_SET_USED(ret); ++ claim_zero(pthread_mutex_lock(&sh->txpp.mutex)); + if (sh->txpp.refcnt) { + priv->txpp_en = 1; + ++sh->txpp.refcnt; +@@ -1072,9 +1082,7 @@ mlx5_txpp_start(struct rte_eth_dev *dev) + rte_errno = -err; + } + } +- ret = pthread_mutex_unlock(&sh->txpp.mutex); +- MLX5_ASSERT(!ret); +- RTE_SET_USED(ret); ++ claim_zero(pthread_mutex_unlock(&sh->txpp.mutex)); + return err; + } + +@@ -1092,24 +1100,21 @@ mlx5_txpp_stop(struct rte_eth_dev *dev) + { + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_dev_ctx_shared *sh = priv->sh; +- int ret; + + if (!priv->txpp_en) { + /* Packet pacing is already disabled for the device. */ + return; + } + priv->txpp_en = 0; +- ret = pthread_mutex_lock(&sh->txpp.mutex); +- MLX5_ASSERT(!ret); +- RTE_SET_USED(ret); ++ claim_zero(pthread_mutex_lock(&sh->txpp.mutex)); + MLX5_ASSERT(sh->txpp.refcnt); +- if (!sh->txpp.refcnt || --sh->txpp.refcnt) ++ if (!sh->txpp.refcnt || --sh->txpp.refcnt) { ++ claim_zero(pthread_mutex_unlock(&sh->txpp.mutex)); + return; ++ } + /* No references any more, do actual destroy. */ + mlx5_txpp_destroy(sh); +- ret = pthread_mutex_unlock(&sh->txpp.mutex); +- MLX5_ASSERT(!ret); +- RTE_SET_USED(ret); ++ claim_zero(pthread_mutex_unlock(&sh->txpp.mutex)); + } + + /* +diff --git a/dpdk/drivers/net/mlx5/mlx5_txq.c b/dpdk/drivers/net/mlx5/mlx5_txq.c +index c53af10d58..f3516a8a5a 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_txq.c ++++ b/dpdk/drivers/net/mlx5/mlx5_txq.c +@@ -109,19 +109,26 @@ mlx5_get_tx_port_offloads(struct rte_eth_dev *dev) + if (config->tx_pp) + offloads |= DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP; + if (config->swp) { +- if (config->hw_csum) ++ if (config->swp & MLX5_SW_PARSING_CSUM_CAP) + offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; +- if (config->tso) ++ if (config->swp & MLX5_SW_PARSING_TSO_CAP) + offloads |= (DEV_TX_OFFLOAD_IP_TNL_TSO | + DEV_TX_OFFLOAD_UDP_TNL_TSO); + } + if (config->tunnel_en) { + if (config->hw_csum) + offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; +- if (config->tso) +- offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | +- DEV_TX_OFFLOAD_GRE_TNL_TSO | +- DEV_TX_OFFLOAD_GENEVE_TNL_TSO); ++ if (config->tso) { ++ if (config->tunnel_en & ++ MLX5_TUNNELED_OFFLOADS_VXLAN_CAP) ++ offloads |= DEV_TX_OFFLOAD_VXLAN_TNL_TSO; ++ if (config->tunnel_en & ++ MLX5_TUNNELED_OFFLOADS_GRE_CAP) ++ offloads |= DEV_TX_OFFLOAD_GRE_TNL_TSO; ++ if (config->tunnel_en & ++ MLX5_TUNNELED_OFFLOADS_GENEVE_CAP) ++ offloads |= DEV_TX_OFFLOAD_GENEVE_TNL_TSO; ++ } + } + return offloads; + } +@@ -519,9 +526,11 @@ txq_uar_ncattr_init(struct mlx5_txq_ctrl *txq_ctrl, size_t page_size) + * + * @param txq_ctrl + * Pointer to Tx queue control structure. ++ * @param bf_reg ++ * BlueFlame register from Verbs UAR. + */ + void +-txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl) ++txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl, void *bf_reg) + { + struct mlx5_priv *priv = txq_ctrl->priv; + struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); +@@ -538,7 +547,7 @@ txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl) + return; + MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); + MLX5_ASSERT(ppriv); +- ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg; ++ ppriv->uar_table[txq_ctrl->txq.idx] = bf_reg; + txq_uar_ncattr_init(txq_ctrl, page_size); + #ifndef RTE_ARCH_64 + /* Assign an UAR lock according to UAR page number */ +@@ -567,6 +576,7 @@ txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) + { + struct mlx5_priv *priv = txq_ctrl->priv; + struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); ++ struct mlx5_proc_priv *primary_ppriv = priv->sh->pppriv; + struct mlx5_txq_data *txq = &txq_ctrl->txq; + void *addr; + uintptr_t uar_va; +@@ -585,20 +595,18 @@ txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) + * As rdma-core, UARs are mapped in size of OS page + * size. Ref to libmlx5 function: mlx5_init_context() + */ +- uar_va = (uintptr_t)txq_ctrl->bf_reg; ++ uar_va = (uintptr_t)primary_ppriv->uar_table[txq->idx]; + offset = uar_va & (page_size - 1); /* Offset in page. */ + addr = rte_mem_map(NULL, page_size, RTE_PROT_WRITE, RTE_MAP_SHARED, +- fd, txq_ctrl->uar_mmap_offset); ++ fd, txq_ctrl->uar_mmap_offset); + if (!addr) { +- DRV_LOG(ERR, +- "port %u mmap failed for BF reg of txq %u", ++ DRV_LOG(ERR, "Port %u mmap failed for BF reg of txq %u.", + txq->port_id, txq->idx); + rte_errno = ENXIO; + return -rte_errno; + } + addr = RTE_PTR_ADD(addr, offset); + ppriv->uar_table[txq->idx] = addr; +- txq_uar_ncattr_init(txq_ctrl, page_size); + return 0; + } + +@@ -970,11 +978,21 @@ txq_set_params(struct mlx5_txq_ctrl *txq_ctrl) + MLX5_MAX_TSO_HEADER); + txq_ctrl->txq.tso_en = 1; + } +- txq_ctrl->txq.tunnel_en = config->tunnel_en | config->swp; +- txq_ctrl->txq.swp_en = ((DEV_TX_OFFLOAD_IP_TNL_TSO | +- DEV_TX_OFFLOAD_UDP_TNL_TSO | +- DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) & +- txq_ctrl->txq.offloads) && config->swp; ++ if (((DEV_TX_OFFLOAD_VXLAN_TNL_TSO & txq_ctrl->txq.offloads) && ++ (config->tunnel_en & MLX5_TUNNELED_OFFLOADS_VXLAN_CAP)) | ++ ((DEV_TX_OFFLOAD_GRE_TNL_TSO & txq_ctrl->txq.offloads) && ++ (config->tunnel_en & MLX5_TUNNELED_OFFLOADS_GRE_CAP)) | ++ ((DEV_TX_OFFLOAD_GENEVE_TNL_TSO & txq_ctrl->txq.offloads) && ++ (config->tunnel_en & MLX5_TUNNELED_OFFLOADS_GENEVE_CAP)) | ++ (config->swp & MLX5_SW_PARSING_TSO_CAP)) ++ txq_ctrl->txq.tunnel_en = 1; ++ txq_ctrl->txq.swp_en = (((DEV_TX_OFFLOAD_IP_TNL_TSO | ++ DEV_TX_OFFLOAD_UDP_TNL_TSO) & ++ txq_ctrl->txq.offloads) && (config->swp & ++ MLX5_SW_PARSING_TSO_CAP)) | ++ ((DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM & ++ txq_ctrl->txq.offloads) && (config->swp & ++ MLX5_SW_PARSING_CSUM_CAP)); + } + + /** +@@ -1238,7 +1256,7 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_txq_ctrl *txq_ctrl; + +- if (!(*priv->txqs)[idx]) ++ if (priv->txqs == NULL || (*priv->txqs)[idx] == NULL) + return 0; + txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); + if (__atomic_sub_fetch(&txq_ctrl->refcnt, 1, __ATOMIC_RELAXED) > 1) +diff --git a/dpdk/drivers/net/mlx5/mlx5_utils.h b/dpdk/drivers/net/mlx5/mlx5_utils.h +index be6e5f67aa..4bb6320e62 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_utils.h ++++ b/dpdk/drivers/net/mlx5/mlx5_utils.h +@@ -52,7 +52,7 @@ extern int mlx5_logtype; + + /* + * For the case which data is linked with sequence increased index, the +- * array table will be more efficiect than hash table once need to serarch ++ * array table will be more efficient than hash table once need to search + * one data entry in large numbers of entries. Since the traditional hash + * tables has fixed table size, when huge numbers of data saved to the hash + * table, it also comes lots of hash conflict. +diff --git a/dpdk/drivers/net/mvneta/mvneta_ethdev.c b/dpdk/drivers/net/mvneta/mvneta_ethdev.c +index 2cd73919ce..e92b90fcc4 100644 +--- a/dpdk/drivers/net/mvneta/mvneta_ethdev.c ++++ b/dpdk/drivers/net/mvneta/mvneta_ethdev.c +@@ -251,7 +251,7 @@ mvneta_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) + (mru + MRVL_NETA_PKT_OFFS > mbuf_data_size)) { + mru = mbuf_data_size - MRVL_NETA_PKT_OFFS; + mtu = MRVL_NETA_MRU_TO_MTU(mru); +- MVNETA_LOG(WARNING, "MTU too big, max MTU possible limitted by" ++ MVNETA_LOG(WARNING, "MTU too big, max MTU possible limited by" + " current mbuf size: %u. Set MTU to %u, MRU to %u", + mbuf_data_size, mtu, mru); + } +@@ -840,7 +840,6 @@ mvneta_eth_dev_create(struct rte_vdev_device *vdev, const char *name) + eth_dev->rx_pkt_burst = mvneta_rx_pkt_burst; + mvneta_set_tx_function(eth_dev); + eth_dev->dev_ops = &mvneta_ops; +- eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + + rte_eth_dev_probing_finish(eth_dev); + return 0; +diff --git a/dpdk/drivers/net/mvpp2/mrvl_ethdev.c b/dpdk/drivers/net/mvpp2/mrvl_ethdev.c +index 6cd5acd337..09af452abe 100644 +--- a/dpdk/drivers/net/mvpp2/mrvl_ethdev.c ++++ b/dpdk/drivers/net/mvpp2/mrvl_ethdev.c +@@ -398,12 +398,18 @@ mrvl_dev_configure(struct rte_eth_dev *dev) + dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) { + MRVL_LOG(WARNING, "Disabling hash for 1 rx queue"); + priv->ppio_params.inqs_params.hash_type = PP2_PPIO_HASH_T_NONE; +- ++ priv->configured = 1; + return 0; + } + +- return mrvl_configure_rss(priv, +- &dev->data->dev_conf.rx_adv_conf.rss_conf); ++ ret = mrvl_configure_rss(priv, ++ &dev->data->dev_conf.rx_adv_conf.rss_conf); ++ if (ret < 0) ++ return ret; ++ ++ priv->configured = 1; ++ ++ return 0; + } + + /** +@@ -444,7 +450,7 @@ mrvl_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) + if (mru - RTE_ETHER_CRC_LEN + MRVL_PKT_OFFS > mbuf_data_size) { + mru = mbuf_data_size + RTE_ETHER_CRC_LEN - MRVL_PKT_OFFS; + mtu = MRVL_PP2_MRU_TO_MTU(mru); +- MRVL_LOG(WARNING, "MTU too big, max MTU possible limitted " ++ MRVL_LOG(WARNING, "MTU too big, max MTU possible limited " + "by current mbuf size: %u. Set MTU to %u, MRU to %u", + mbuf_data_size, mtu, mru); + } +@@ -1370,13 +1376,14 @@ mrvl_xstats_get(struct rte_eth_dev *dev, + { + struct mrvl_priv *priv = dev->data->dev_private; + struct pp2_ppio_statistics ppio_stats; +- unsigned int i; ++ unsigned int i, count; + +- if (!stats) +- return 0; ++ count = RTE_DIM(mrvl_xstats_tbl); ++ if (n < count) ++ return count; + + pp2_ppio_get_statistics(priv->ppio, &ppio_stats, 0); +- for (i = 0; i < n && i < RTE_DIM(mrvl_xstats_tbl); i++) { ++ for (i = 0; i < count; i++) { + uint64_t val; + + if (mrvl_xstats_tbl[i].size == sizeof(uint32_t)) +@@ -1392,7 +1399,7 @@ mrvl_xstats_get(struct rte_eth_dev *dev, + stats[i].value = val; + } + +- return n; ++ return count; + } + + /** +diff --git a/dpdk/drivers/net/mvpp2/mrvl_ethdev.h b/dpdk/drivers/net/mvpp2/mrvl_ethdev.h +index eee5182ce8..c7ab0e4932 100644 +--- a/dpdk/drivers/net/mvpp2/mrvl_ethdev.h ++++ b/dpdk/drivers/net/mvpp2/mrvl_ethdev.h +@@ -208,9 +208,10 @@ struct mrvl_priv { + LIST_HEAD(shaper_profiles, mrvl_tm_shaper_profile) shaper_profiles; + LIST_HEAD(nodes, mrvl_tm_node) nodes; + uint64_t rate_max; ++ ++ uint8_t configured; /** indicates if device has been configured */ + }; + +-/** Flow operations forward declaration. */ + extern const struct rte_flow_ops mrvl_flow_ops; + + /** Meter operations forward declaration. */ +diff --git a/dpdk/drivers/net/mvpp2/mrvl_qos.c b/dpdk/drivers/net/mvpp2/mrvl_qos.c +index 7fd970309e..202978dfc8 100644 +--- a/dpdk/drivers/net/mvpp2/mrvl_qos.c ++++ b/dpdk/drivers/net/mvpp2/mrvl_qos.c +@@ -301,7 +301,7 @@ get_entry_values(const char *entry, uint8_t *tab, + } + + /** +- * Parse Traffic Class'es mapping configuration. ++ * Parse Traffic Classes mapping configuration. + * + * @param file Config file handle. + * @param port Which port to look for. +diff --git a/dpdk/drivers/net/mvpp2/mrvl_tm.c b/dpdk/drivers/net/mvpp2/mrvl_tm.c +index e98f576cfa..9fac80b867 100644 +--- a/dpdk/drivers/net/mvpp2/mrvl_tm.c ++++ b/dpdk/drivers/net/mvpp2/mrvl_tm.c +@@ -57,7 +57,7 @@ mrvl_get_max_rate(struct rte_eth_dev *dev, uint64_t *rate) + + close(fd); + +- *rate = ethtool_cmd_speed(&edata) * 1000 * 1000 / 8; ++ *rate = (uint64_t)ethtool_cmd_speed(&edata) * 1000 * 1000 / 8; + + return 0; + } +@@ -146,6 +146,11 @@ mrvl_node_type_get(struct rte_eth_dev *dev, uint32_t node_id, int *is_leaf, + struct mrvl_priv *priv = dev->data->dev_private; + struct mrvl_tm_node *node; + ++ if (!priv->configured) ++ return -rte_tm_error_set(error, ENODEV, ++ RTE_TM_ERROR_TYPE_UNSPECIFIED, ++ NULL, "Port didn't configured\n"); ++ + if (!is_leaf) + return -rte_tm_error_set(error, EINVAL, + RTE_TM_ERROR_TYPE_UNSPECIFIED, +@@ -177,6 +182,11 @@ mrvl_capabilities_get(struct rte_eth_dev *dev, + { + struct mrvl_priv *priv = dev->data->dev_private; + ++ if (!priv->configured) ++ return -rte_tm_error_set(error, ENODEV, ++ RTE_TM_ERROR_TYPE_UNSPECIFIED, ++ NULL, "Port didn't configured\n"); ++ + if (!cap) + return -rte_tm_error_set(error, EINVAL, + RTE_TM_ERROR_TYPE_UNSPECIFIED, +@@ -228,6 +238,11 @@ mrvl_level_capabilities_get(struct rte_eth_dev *dev, + { + struct mrvl_priv *priv = dev->data->dev_private; + ++ if (!priv->configured) ++ return -rte_tm_error_set(error, ENODEV, ++ RTE_TM_ERROR_TYPE_UNSPECIFIED, ++ NULL, "Port didn't configured\n"); ++ + if (!cap) + return -rte_tm_error_set(error, EINVAL, + RTE_TM_ERROR_TYPE_UNSPECIFIED, +@@ -294,6 +309,11 @@ mrvl_node_capabilities_get(struct rte_eth_dev *dev, uint32_t node_id, + struct mrvl_priv *priv = dev->data->dev_private; + struct mrvl_tm_node *node; + ++ if (!priv->configured) ++ return -rte_tm_error_set(error, ENODEV, ++ RTE_TM_ERROR_TYPE_UNSPECIFIED, ++ NULL, "Port didn't configured\n"); ++ + if (!cap) + return -rte_tm_error_set(error, EINVAL, + RTE_TM_ERROR_TYPE_UNSPECIFIED, +@@ -366,6 +386,11 @@ mrvl_shaper_profile_add(struct rte_eth_dev *dev, uint32_t shaper_profile_id, + struct mrvl_priv *priv = dev->data->dev_private; + struct mrvl_tm_shaper_profile *profile; + ++ if (!priv->configured) ++ return -rte_tm_error_set(error, ENODEV, ++ RTE_TM_ERROR_TYPE_UNSPECIFIED, ++ NULL, "Port didn't configured\n"); ++ + if (!params) + return -rte_tm_error_set(error, EINVAL, + RTE_TM_ERROR_TYPE_UNSPECIFIED, +@@ -434,6 +459,11 @@ mrvl_shaper_profile_delete(struct rte_eth_dev *dev, uint32_t shaper_profile_id, + struct mrvl_priv *priv = dev->data->dev_private; + struct mrvl_tm_shaper_profile *profile; + ++ if (!priv->configured) ++ return -rte_tm_error_set(error, ENODEV, ++ RTE_TM_ERROR_TYPE_UNSPECIFIED, ++ NULL, "Port didn't configured\n"); ++ + profile = mrvl_shaper_profile_from_id(priv, shaper_profile_id); + if (!profile) + return -rte_tm_error_set(error, ENODEV, +@@ -580,6 +610,11 @@ mrvl_node_add(struct rte_eth_dev *dev, uint32_t node_id, + struct mrvl_tm_node *node, *parent = NULL; + int ret; + ++ if (!priv->configured) ++ return -rte_tm_error_set(error, ENODEV, ++ RTE_TM_ERROR_TYPE_UNSPECIFIED, ++ NULL, "Port didn't configured\n"); ++ + if (priv->ppio) + return -rte_tm_error_set(error, EPERM, + RTE_TM_ERROR_TYPE_UNSPECIFIED, +@@ -665,6 +700,11 @@ mrvl_node_delete(struct rte_eth_dev *dev, uint32_t node_id, + struct mrvl_priv *priv = dev->data->dev_private; + struct mrvl_tm_node *node; + ++ if (!priv->configured) ++ return -rte_tm_error_set(error, ENODEV, ++ RTE_TM_ERROR_TYPE_UNSPECIFIED, ++ NULL, "Port didn't configured\n"); ++ + if (priv->ppio) { + return -rte_tm_error_set(error, EPERM, + RTE_TM_ERROR_TYPE_UNSPECIFIED, +@@ -729,6 +769,11 @@ mrvl_node_suspend(struct rte_eth_dev *dev, uint32_t node_id, + struct mrvl_tm_node *node, *tmp; + int ret; + ++ if (!priv->configured) ++ return -rte_tm_error_set(error, ENODEV, ++ RTE_TM_ERROR_TYPE_UNSPECIFIED, ++ NULL, "Port didn't configured\n"); ++ + node = mrvl_node_from_id(priv, node_id); + if (!node) + return -rte_tm_error_set(error, ENODEV, +@@ -770,6 +815,11 @@ mrvl_node_resume(struct rte_eth_dev *dev, uint32_t node_id, + struct mrvl_tm_node *node; + int ret; + ++ if (!priv->configured) ++ return -rte_tm_error_set(error, ENODEV, ++ RTE_TM_ERROR_TYPE_UNSPECIFIED, ++ NULL, "Port didn't configured\n"); ++ + node = mrvl_node_from_id(priv, node_id); + if (!node) + return -rte_tm_error_set(error, ENODEV, +@@ -806,6 +856,11 @@ mrvl_hierarchy_commit(struct rte_eth_dev *dev, int clear_on_fail, + struct mrvl_tm_node *node; + int ret; + ++ if (!priv->configured) ++ return -rte_tm_error_set(error, ENODEV, ++ RTE_TM_ERROR_TYPE_UNSPECIFIED, ++ NULL, "Port didn't configured\n"); ++ + if (priv->ppio) { + ret = -rte_tm_error_set(error, EPERM, + RTE_TM_ERROR_TYPE_UNSPECIFIED, +@@ -912,6 +967,11 @@ mrvl_node_stats_read(struct rte_eth_dev *dev, uint32_t node_id, + struct mrvl_tm_node *node; + int ret; + ++ if (!priv->configured) ++ return -rte_tm_error_set(error, ENODEV, ++ RTE_TM_ERROR_TYPE_UNSPECIFIED, ++ NULL, "Port didn't configured\n"); ++ + if (!priv->ppio) { + return -rte_tm_error_set(error, EPERM, + RTE_TM_ERROR_TYPE_UNSPECIFIED, +@@ -981,6 +1041,11 @@ mrvl_node_stats_update(struct rte_eth_dev *dev, uint32_t node_id, + struct mrvl_priv *priv = dev->data->dev_private; + struct mrvl_tm_node *node; + ++ if (!priv->configured) ++ return -rte_tm_error_set(error, ENODEV, ++ RTE_TM_ERROR_TYPE_UNSPECIFIED, ++ NULL, "Port didn't configured\n"); ++ + node = mrvl_node_from_id(priv, node_id); + if (!node) + return -rte_tm_error_set(error, ENODEV, +diff --git a/dpdk/drivers/net/netvsc/hn_ethdev.c b/dpdk/drivers/net/netvsc/hn_ethdev.c +index 49f954305d..3826d66b7e 100644 +--- a/dpdk/drivers/net/netvsc/hn_ethdev.c ++++ b/dpdk/drivers/net/netvsc/hn_ethdev.c +@@ -980,8 +980,8 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) + } + + hv->vmbus = vmbus; +- hv->rxbuf_res = &vmbus->resource[HV_RECV_BUF_MAP]; +- hv->chim_res = &vmbus->resource[HV_SEND_BUF_MAP]; ++ hv->rxbuf_res = vmbus->resource[HV_RECV_BUF_MAP]; ++ hv->chim_res = vmbus->resource[HV_SEND_BUF_MAP]; + hv->port_id = eth_dev->data->port_id; + hv->latency = HN_CHAN_LATENCY_NS; + hv->rx_copybreak = HN_RXCOPY_THRESHOLD; +diff --git a/dpdk/drivers/net/netvsc/hn_nvs.c b/dpdk/drivers/net/netvsc/hn_nvs.c +index 03b6cc1551..4a2797bf8e 100644 +--- a/dpdk/drivers/net/netvsc/hn_nvs.c ++++ b/dpdk/drivers/net/netvsc/hn_nvs.c +@@ -193,11 +193,11 @@ hn_nvs_conn_rxbuf(struct hn_data *hv) + * Connect RXBUF to NVS. + */ + conn.type = NVS_TYPE_RXBUF_CONN; +- conn.gpadl = hv->rxbuf_res->phys_addr; ++ conn.gpadl = hv->rxbuf_res.phys_addr; + conn.sig = NVS_RXBUF_SIG; + PMD_DRV_LOG(DEBUG, "connect rxbuff va=%p gpad=%#" PRIx64, +- hv->rxbuf_res->addr, +- hv->rxbuf_res->phys_addr); ++ hv->rxbuf_res.addr, ++ hv->rxbuf_res.phys_addr); + + error = hn_nvs_execute(hv, &conn, sizeof(conn), + &resp, sizeof(resp), +@@ -229,7 +229,7 @@ hn_nvs_conn_rxbuf(struct hn_data *hv) + hv->rxbuf_section_cnt = resp.nvs_sect[0].slotcnt; + + /* +- * Pimary queue's rxbuf_info is not allocated at creation time. ++ * Primary queue's rxbuf_info is not allocated at creation time. + * Now we can allocate it after we figure out the slotcnt. + */ + hv->primary->rxbuf_info = rte_calloc("HN_RXBUF_INFO", +@@ -308,17 +308,17 @@ hn_nvs_conn_chim(struct hn_data *hv) + struct hn_nvs_chim_conn chim; + struct hn_nvs_chim_connresp resp; + uint32_t sectsz; +- unsigned long len = hv->chim_res->len; ++ unsigned long len = hv->chim_res.len; + int error; + + /* Connect chimney sending buffer to NVS */ + memset(&chim, 0, sizeof(chim)); + chim.type = NVS_TYPE_CHIM_CONN; +- chim.gpadl = hv->chim_res->phys_addr; ++ chim.gpadl = hv->chim_res.phys_addr; + chim.sig = NVS_CHIM_SIG; + PMD_DRV_LOG(DEBUG, "connect send buf va=%p gpad=%#" PRIx64, +- hv->chim_res->addr, +- hv->chim_res->phys_addr); ++ hv->chim_res.addr, ++ hv->chim_res.phys_addr); + + error = hn_nvs_execute(hv, &chim, sizeof(chim), + &resp, sizeof(resp), +diff --git a/dpdk/drivers/net/netvsc/hn_rxtx.c b/dpdk/drivers/net/netvsc/hn_rxtx.c +index 015662fdb4..fe4ccd1c3e 100644 +--- a/dpdk/drivers/net/netvsc/hn_rxtx.c ++++ b/dpdk/drivers/net/netvsc/hn_rxtx.c +@@ -578,11 +578,11 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, + rte_iova_t iova; + + /* +- * Build an external mbuf that points to recveive area. ++ * Build an external mbuf that points to receive area. + * Use refcount to handle multiple packets in same + * receive buffer section. + */ +- rxbuf = hv->rxbuf_res->addr; ++ rxbuf = hv->rxbuf_res.addr; + iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf); + shinfo = &rxb->shinfo; + +@@ -765,8 +765,8 @@ hn_nvs_handle_rxbuf(struct rte_eth_dev *dev, + { + const struct vmbus_chanpkt_rxbuf *pkt; + const struct hn_nvs_hdr *nvs_hdr = buf; +- uint32_t rxbuf_sz = hv->rxbuf_res->len; +- char *rxbuf = hv->rxbuf_res->addr; ++ uint32_t rxbuf_sz = hv->rxbuf_res.len; ++ char *rxbuf = hv->rxbuf_res.addr; + unsigned int i, hlen, count; + struct hn_rx_bufinfo *rxb; + +@@ -1031,7 +1031,7 @@ hn_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_id) + * returns: + * - -EINVAL - offset outside of ring + * - RTE_ETH_RX_DESC_AVAIL - no data available yet +- * - RTE_ETH_RX_DESC_DONE - data is waiting in stagin ring ++ * - RTE_ETH_RX_DESC_DONE - data is waiting in staging ring + */ + int hn_dev_rx_queue_status(void *arg, uint16_t offset) + { +@@ -1266,7 +1266,7 @@ hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, + if (txd->chim_index == NVS_CHIM_IDX_INVALID) + return NULL; + +- chim = (uint8_t *)hv->chim_res->addr ++ chim = (uint8_t *)hv->chim_res.addr + + txd->chim_index * hv->chim_szmax; + + txq->agg_txd = txd; +@@ -1348,8 +1348,11 @@ static void hn_encap(struct rndis_packet_msg *pkt, + *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen, + m->tso_segsz); + } +- } else if (m->ol_flags & +- (PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM)) { ++ } else if ((m->ol_flags & PKT_TX_L4_MASK) == ++ PKT_TX_TCP_CKSUM || ++ (m->ol_flags & PKT_TX_L4_MASK) == ++ PKT_TX_UDP_CKSUM || ++ (m->ol_flags & PKT_TX_IP_CKSUM)) { + pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE, + NDIS_PKTINFO_TYPE_CSUM); + *pi_data = 0; +@@ -1363,9 +1366,11 @@ static void hn_encap(struct rndis_packet_msg *pkt, + *pi_data |= NDIS_TXCSUM_INFO_IPCS; + } + +- if (m->ol_flags & PKT_TX_TCP_CKSUM) ++ if ((m->ol_flags & PKT_TX_L4_MASK) == ++ PKT_TX_TCP_CKSUM) + *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen); +- else if (m->ol_flags & PKT_TX_UDP_CKSUM) ++ else if ((m->ol_flags & PKT_TX_L4_MASK) == ++ PKT_TX_UDP_CKSUM) + *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen); + } + +diff --git a/dpdk/drivers/net/netvsc/hn_var.h b/dpdk/drivers/net/netvsc/hn_var.h +index bd874c6b4d..3d3429c1a2 100644 +--- a/dpdk/drivers/net/netvsc/hn_var.h ++++ b/dpdk/drivers/net/netvsc/hn_var.h +@@ -119,7 +119,7 @@ struct hn_data { + uint32_t link_status; + uint32_t link_speed; + +- struct rte_mem_resource *rxbuf_res; /* UIO resource for Rx */ ++ struct rte_mem_resource rxbuf_res; /* UIO resource for Rx */ + uint32_t rxbuf_section_cnt; /* # of Rx sections */ + uint32_t rx_copybreak; + uint32_t rx_extmbuf_enable; +@@ -128,7 +128,7 @@ struct hn_data { + uint64_t rss_offloads; + + rte_spinlock_t chim_lock; +- struct rte_mem_resource *chim_res; /* UIO resource for Tx */ ++ struct rte_mem_resource chim_res; /* UIO resource for Tx */ + struct rte_bitmap *chim_bmap; /* Send buffer map */ + void *chim_bmem; + uint32_t tx_copybreak; +diff --git a/dpdk/drivers/net/nfb/nfb.h b/dpdk/drivers/net/nfb/nfb.h +index 59d3ab4986..96c44c3a45 100644 +--- a/dpdk/drivers/net/nfb/nfb.h ++++ b/dpdk/drivers/net/nfb/nfb.h +@@ -48,10 +48,6 @@ struct pmd_internals { + + char nfb_dev[PATH_MAX]; + struct nfb_device *nfb; +- /* Place to remember if filter was promiscuous or filtering by table, +- * when disabling allmulticast +- */ +- enum nc_rxmac_mac_filter rx_filter_original; + }; + + #endif /* _NFB_H_ */ +diff --git a/dpdk/drivers/net/nfb/nfb_ethdev.c b/dpdk/drivers/net/nfb/nfb_ethdev.c +index c55bcdf1ef..6740bc65c5 100644 +--- a/dpdk/drivers/net/nfb/nfb_ethdev.c ++++ b/dpdk/drivers/net/nfb/nfb_ethdev.c +@@ -77,9 +77,10 @@ static void + nfb_nc_rxmac_deinit(struct nc_rxmac *rxmac[RTE_MAX_NC_RXMAC], + uint16_t max_rxmac) + { +- for (; max_rxmac > 0; --max_rxmac) { +- nc_rxmac_close(rxmac[max_rxmac]); +- rxmac[max_rxmac] = NULL; ++ uint16_t i; ++ for (i = 0; i < max_rxmac; i++) { ++ nc_rxmac_close(rxmac[i]); ++ rxmac[i] = NULL; + } + } + +@@ -95,9 +96,10 @@ static void + nfb_nc_txmac_deinit(struct nc_txmac *txmac[RTE_MAX_NC_TXMAC], + uint16_t max_txmac) + { +- for (; max_txmac > 0; --max_txmac) { +- nc_txmac_close(txmac[max_txmac]); +- txmac[max_txmac] = NULL; ++ uint16_t i; ++ for (i = 0; i < max_txmac; i++) { ++ nc_txmac_close(txmac[i]); ++ txmac[i] = NULL; + } + } + +@@ -514,7 +516,6 @@ nfb_eth_dev_init(struct rte_eth_dev *dev) + + data->promiscuous = nfb_eth_promiscuous_get(dev); + data->all_multicast = nfb_eth_allmulticast_get(dev); +- internals->rx_filter_original = data->promiscuous; + + dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + +diff --git a/dpdk/drivers/net/nfb/nfb_rxmode.c b/dpdk/drivers/net/nfb/nfb_rxmode.c +index 2d0b613d21..ca6e4d5578 100644 +--- a/dpdk/drivers/net/nfb/nfb_rxmode.c ++++ b/dpdk/drivers/net/nfb/nfb_rxmode.c +@@ -14,8 +14,6 @@ nfb_eth_promiscuous_enable(struct rte_eth_dev *dev) + dev->data->dev_private; + uint16_t i; + +- internals->rx_filter_original = RXMAC_MAC_FILTER_PROMISCUOUS; +- + for (i = 0; i < internals->max_rxmac; ++i) { + nc_rxmac_mac_filter_enable(internals->rxmac[i], + RXMAC_MAC_FILTER_PROMISCUOUS); +@@ -30,16 +28,13 @@ nfb_eth_promiscuous_disable(struct rte_eth_dev *dev) + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; + uint16_t i; ++ enum nc_rxmac_mac_filter filter = RXMAC_MAC_FILTER_TABLE_BCAST; + +- internals->rx_filter_original = RXMAC_MAC_FILTER_TABLE; +- +- /* if promisc is not enabled, do nothing */ +- if (!nfb_eth_promiscuous_get(dev)) +- return 0; ++ if (dev->data->all_multicast) ++ filter = RXMAC_MAC_FILTER_TABLE_BCAST_MCAST; + + for (i = 0; i < internals->max_rxmac; ++i) { +- nc_rxmac_mac_filter_enable(internals->rxmac[i], +- RXMAC_MAC_FILTER_TABLE); ++ nc_rxmac_mac_filter_enable(internals->rxmac[i], filter); + } + + return 0; +@@ -67,6 +62,8 @@ nfb_eth_allmulticast_enable(struct rte_eth_dev *dev) + dev->data->dev_private; + + uint16_t i; ++ if (dev->data->promiscuous) ++ return 0; + for (i = 0; i < internals->max_rxmac; ++i) { + nc_rxmac_mac_filter_enable(internals->rxmac[i], + RXMAC_MAC_FILTER_TABLE_BCAST_MCAST); +@@ -83,13 +80,12 @@ nfb_eth_allmulticast_disable(struct rte_eth_dev *dev) + + uint16_t i; + +- /* if multicast is not enabled do nothing */ +- if (!nfb_eth_allmulticast_get(dev)) ++ if (dev->data->promiscuous) + return 0; + + for (i = 0; i < internals->max_rxmac; ++i) { + nc_rxmac_mac_filter_enable(internals->rxmac[i], +- internals->rx_filter_original); ++ RXMAC_MAC_FILTER_TABLE_BCAST); + } + + return 0; +diff --git a/dpdk/drivers/net/nfp/nfp_net.c b/dpdk/drivers/net/nfp/nfp_net.c +index 9ea24e5bda..4fa0bcc9e7 100644 +--- a/dpdk/drivers/net/nfp/nfp_net.c ++++ b/dpdk/drivers/net/nfp/nfp_net.c +@@ -223,6 +223,7 @@ nfp_net_rx_queue_release(void *rx_queue) + + if (rxq) { + nfp_net_rx_queue_release_mbufs(rxq); ++ rte_memzone_free(rxq->tz); + rte_free(rxq->rxbufs); + rte_free(rxq); + } +@@ -259,6 +260,7 @@ nfp_net_tx_queue_release(void *tx_queue) + + if (txq) { + nfp_net_tx_queue_release_mbufs(txq); ++ rte_memzone_free(txq->tz); + rte_free(txq->txbufs); + rte_free(txq); + } +@@ -543,10 +545,6 @@ nfp_set_mac_addr(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr) + return -EBUSY; + } + +- if ((hw->ctrl & NFP_NET_CFG_CTRL_ENABLE) && +- !(hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR)) +- return -EBUSY; +- + /* Writing new MAC to the specific port BAR address */ + nfp_net_write_mac(hw, (uint8_t *)mac_addr); + +@@ -892,11 +890,15 @@ nfp_net_close(struct rte_eth_dev *dev) + for (i = 0; i < dev->data->nb_tx_queues; i++) { + nfp_net_reset_tx_queue( + (struct nfp_net_txq *)dev->data->tx_queues[i]); ++ nfp_net_tx_queue_release( ++ (struct nfp_net_txq *)dev->data->tx_queues[i]); + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + nfp_net_reset_rx_queue( + (struct nfp_net_rxq *)dev->data->rx_queues[i]); ++ nfp_net_rx_queue_release( ++ (struct nfp_net_rxq *)dev->data->rx_queues[i]); + } + + rte_intr_disable(&pci_dev->intr_handle); +@@ -907,8 +909,12 @@ nfp_net_close(struct rte_eth_dev *dev) + nfp_net_dev_interrupt_handler, + (void *)dev); + ++ /* Cancel possible impending LSC work here before releasing the port*/ ++ rte_eal_alarm_cancel(nfp_net_dev_interrupt_delayed_handler, ++ (void *)dev); ++ + /* +- * The ixgbe PMD driver disables the pcie master on the ++ * The ixgbe PMD disables the pcie master on the + * device. The i40e does not... + */ + +@@ -1220,9 +1226,6 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) + DEV_RX_OFFLOAD_UDP_CKSUM | + DEV_RX_OFFLOAD_TCP_CKSUM; + +- dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME | +- DEV_RX_OFFLOAD_RSS_HASH; +- + if (hw->cap & NFP_NET_CFG_CTRL_TXVLAN) + dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; + +@@ -1271,15 +1274,22 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) + .nb_mtu_seg_max = NFP_TX_MAX_MTU_SEG, + }; + +- dev_info->flow_type_rss_offloads = ETH_RSS_IPV4 | +- ETH_RSS_NONFRAG_IPV4_TCP | +- ETH_RSS_NONFRAG_IPV4_UDP | +- ETH_RSS_IPV6 | +- ETH_RSS_NONFRAG_IPV6_TCP | +- ETH_RSS_NONFRAG_IPV6_UDP; ++ /* All NFP devices support jumbo frames */ ++ dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME; ++ ++ if (hw->cap & NFP_NET_CFG_CTRL_RSS) { ++ dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_RSS_HASH; + +- dev_info->reta_size = NFP_NET_CFG_RSS_ITBL_SZ; +- dev_info->hash_key_size = NFP_NET_CFG_RSS_KEY_SZ; ++ dev_info->flow_type_rss_offloads = ETH_RSS_IPV4 | ++ ETH_RSS_NONFRAG_IPV4_TCP | ++ ETH_RSS_NONFRAG_IPV4_UDP | ++ ETH_RSS_IPV6 | ++ ETH_RSS_NONFRAG_IPV6_TCP | ++ ETH_RSS_NONFRAG_IPV6_UDP; ++ ++ dev_info->reta_size = NFP_NET_CFG_RSS_ITBL_SZ; ++ dev_info->hash_key_size = NFP_NET_CFG_RSS_KEY_SZ; ++ } + + dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G | + ETH_LINK_SPEED_25G | ETH_LINK_SPEED_40G | +@@ -1605,6 +1615,11 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev, + rxq->dma = (uint64_t)tz->iova; + rxq->rxds = (struct nfp_net_rx_desc *)tz->addr; + ++ /* Also save the pointer to the memzone struct so it can be freed ++ * if needed ++ */ ++ rxq->tz = tz; ++ + /* mbuf pointers array for referencing mbufs linked to RX descriptors */ + rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs", + sizeof(*rxq->rxbufs) * nb_desc, +@@ -1745,6 +1760,11 @@ nfp_net_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + return -ENOMEM; + } + ++ /* Save the pointer to the memzone struct so it can be freed ++ * if needed ++ */ ++ txq->tz = tz; ++ + txq->tx_count = nb_desc; + txq->tx_free_thresh = tx_free_thresh; + txq->tx_pthresh = tx_conf->tx_thresh.pthresh; +@@ -2373,22 +2393,25 @@ nfp_net_vlan_offload_set(struct rte_eth_dev *dev, int mask) + { + uint32_t new_ctrl, update; + struct nfp_net_hw *hw; ++ struct rte_eth_conf *dev_conf; + int ret; + + hw = NFP_NET_DEV_PRIVATE_TO_HW(dev->data->dev_private); +- new_ctrl = 0; +- +- /* Enable vlan strip if it is not configured yet */ +- if ((mask & ETH_VLAN_STRIP_OFFLOAD) && +- !(hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) +- new_ctrl = hw->ctrl | NFP_NET_CFG_CTRL_RXVLAN; ++ dev_conf = &dev->data->dev_conf; ++ new_ctrl = hw->ctrl; + +- /* Disable vlan strip just if it is configured */ +- if (!(mask & ETH_VLAN_STRIP_OFFLOAD) && +- (hw->ctrl & NFP_NET_CFG_CTRL_RXVLAN)) +- new_ctrl = hw->ctrl & ~NFP_NET_CFG_CTRL_RXVLAN; ++ /* ++ * Vlan stripping setting ++ * Enable or disable VLAN stripping ++ */ ++ if (mask & ETH_VLAN_STRIP_MASK) { ++ if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_VLAN_STRIP) ++ new_ctrl |= NFP_NET_CFG_CTRL_RXVLAN; ++ else ++ new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN; ++ } + +- if (new_ctrl == 0) ++ if (new_ctrl == hw->ctrl) + return 0; + + update = NFP_NET_CFG_UPDATE_GEN; +@@ -3036,7 +3059,7 @@ nfp_cpp_bridge_serve_write(int sockfd, struct nfp_cpp *cpp) + off_t offset, nfp_offset; + uint32_t cpp_id, pos, len; + uint32_t tmpbuf[16]; +- size_t count, curlen, totlen = 0; ++ size_t count, curlen; + int err = 0; + + PMD_CPP_LOG(DEBUG, "%s: offset size %zu, count_size: %zu\n", __func__, +@@ -3113,7 +3136,6 @@ nfp_cpp_bridge_serve_write(int sockfd, struct nfp_cpp *cpp) + } + + nfp_offset += pos; +- totlen += pos; + nfp_cpp_area_release(area); + nfp_cpp_area_free(area); + +@@ -3138,7 +3160,7 @@ nfp_cpp_bridge_serve_read(int sockfd, struct nfp_cpp *cpp) + off_t offset, nfp_offset; + uint32_t cpp_id, pos, len; + uint32_t tmpbuf[16]; +- size_t count, curlen, totlen = 0; ++ size_t count, curlen; + int err = 0; + + PMD_CPP_LOG(DEBUG, "%s: offset size %zu, count_size: %zu\n", __func__, +@@ -3214,7 +3236,6 @@ nfp_cpp_bridge_serve_read(int sockfd, struct nfp_cpp *cpp) + } + + nfp_offset += pos; +- totlen += pos; + nfp_cpp_area_release(area); + nfp_cpp_area_free(area); + +diff --git a/dpdk/drivers/net/nfp/nfp_net_pmd.h b/dpdk/drivers/net/nfp/nfp_net_pmd.h +index 1295c5959e..67393f611e 100644 +--- a/dpdk/drivers/net/nfp/nfp_net_pmd.h ++++ b/dpdk/drivers/net/nfp/nfp_net_pmd.h +@@ -8,7 +8,7 @@ + * + * @file dpdk/pmd/nfp_net_pmd.h + * +- * Netronome NFP_NET PMD driver ++ * Netronome NFP_NET PMD + */ + + #ifndef _NFP_NET_PMD_H_ +@@ -28,10 +28,10 @@ struct nfp_net_adapter; + * DPDK uses uint16_t variables for these values + */ + #define NFP_NET_MAX_TX_DESC (32 * 1024) +-#define NFP_NET_MIN_TX_DESC 64 ++#define NFP_NET_MIN_TX_DESC 256 + + #define NFP_NET_MAX_RX_DESC (32 * 1024) +-#define NFP_NET_MIN_RX_DESC 64 ++#define NFP_NET_MIN_RX_DESC 256 + + /* Descriptor alignment */ + #define NFP_ALIGN_RING_DESC 128 +@@ -235,6 +235,9 @@ struct nfp_net_txq { + */ + struct nfp_net_tx_desc *txds; + ++ /* Pointer to the memzone for the ring */ ++ const struct rte_memzone *tz; ++ + /* + * At this point 48 bytes have been used for all the fields in the + * TX critical path. We have room for 8 bytes and still all placed +@@ -370,6 +373,9 @@ struct nfp_net_rxq { + /* DMA address of the queue */ + __le64 dma; + ++ /* Pointer to the memzone for the ring */ ++ const struct rte_memzone *tz; ++ + /* + * Queue information: @qidx is the queue index from Linux's + * perspective. @fl_qcidx is the index of the Queue +diff --git a/dpdk/drivers/net/nfp/nfpcore/nfp-common/nfp_resid.h b/dpdk/drivers/net/nfp/nfpcore/nfp-common/nfp_resid.h +index 0e03948ec7..394a7628e0 100644 +--- a/dpdk/drivers/net/nfp/nfpcore/nfp-common/nfp_resid.h ++++ b/dpdk/drivers/net/nfp/nfpcore/nfp-common/nfp_resid.h +@@ -63,7 +63,7 @@ + * Wildcard indicating a CPP read or write action + * + * The action used will be either read or write depending on whether a read or +- * write instruction/call is performed on the NFP_CPP_ID. It is recomended that ++ * write instruction/call is performed on the NFP_CPP_ID. It is recommended that + * the RW action is used even if all actions to be performed on a NFP_CPP_ID are + * known to be only reads or writes. Doing so will in many cases save NFP CPP + * internal software resources. +@@ -405,7 +405,7 @@ int nfp_idstr2meid(int chip_family, const char *s, const char **endptr); + * @param chip_family Chip family ID + * @param s A string of format "iX.anything" or "iX" + * @param endptr If non-NULL, *endptr will point to the trailing +- * striong after the ME ID part of the string, which ++ * string after the ME ID part of the string, which + * is either an empty string or the first character + * after the separating period. + * @return The island ID on succes, -1 on error. +@@ -425,7 +425,7 @@ int nfp_idstr2island(int chip_family, const char *s, const char **endptr); + * @param chip_family Chip family ID + * @param s A string of format "meX.anything" or "meX" + * @param endptr If non-NULL, *endptr will point to the trailing +- * striong after the ME ID part of the string, which ++ * string after the ME ID part of the string, which + * is either an empty string or the first character + * after the separating period. + * @return The ME number on succes, -1 on error. +diff --git a/dpdk/drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c b/dpdk/drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c +index 36725d69ab..69f6b165da 100644 +--- a/dpdk/drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c ++++ b/dpdk/drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c +@@ -16,9 +16,6 @@ + + #include + #include +-#if defined(RTE_BACKTRACE) +-#include +-#endif + #include + #include + #include +diff --git a/dpdk/drivers/net/nfp/nfpcore/nfp_cppcore.c b/dpdk/drivers/net/nfp/nfpcore/nfp_cppcore.c +index 6d629430d4..40d70b9625 100644 +--- a/dpdk/drivers/net/nfp/nfpcore/nfp_cppcore.c ++++ b/dpdk/drivers/net/nfp/nfpcore/nfp_cppcore.c +@@ -202,7 +202,7 @@ nfp_cpp_area_alloc(struct nfp_cpp *cpp, uint32_t dest, + * @address: start address on CPP target + * @size: size of area + * +- * Allocate and initilizae a CPP area structure, and lock it down so ++ * Allocate and initialize a CPP area structure, and lock it down so + * that it can be accessed directly. + * + * NOTE: @address and @size must be 32-bit aligned values. +diff --git a/dpdk/drivers/net/nfp/nfpcore/nfp_nsp.h b/dpdk/drivers/net/nfp/nfpcore/nfp_nsp.h +index c9c7b0d0fb..e74cdeb191 100644 +--- a/dpdk/drivers/net/nfp/nfpcore/nfp_nsp.h ++++ b/dpdk/drivers/net/nfp/nfpcore/nfp_nsp.h +@@ -272,7 +272,7 @@ int __nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes); + * @br_primary: branch id of primary bootloader + * @br_secondary: branch id of secondary bootloader + * @br_nsp: branch id of NSP +- * @primary: version of primarary bootloader ++ * @primary: version of primary bootloader + * @secondary: version id of secondary bootloader + * @nsp: version id of NSP + * @sensor_mask: mask of present sensors available on NIC +diff --git a/dpdk/drivers/net/nfp/nfpcore/nfp_resource.c b/dpdk/drivers/net/nfp/nfpcore/nfp_resource.c +index dd41fa4de4..7b5630fd86 100644 +--- a/dpdk/drivers/net/nfp/nfpcore/nfp_resource.c ++++ b/dpdk/drivers/net/nfp/nfpcore/nfp_resource.c +@@ -207,7 +207,7 @@ nfp_resource_acquire(struct nfp_cpp *cpp, const char *name) + * nfp_resource_release() - Release a NFP Resource handle + * @res: NFP Resource handle + * +- * NOTE: This function implictly unlocks the resource handle ++ * NOTE: This function implicitly unlocks the resource handle + */ + void + nfp_resource_release(struct nfp_resource *res) +diff --git a/dpdk/drivers/net/nfp/nfpcore/nfp_rtsym.c b/dpdk/drivers/net/nfp/nfpcore/nfp_rtsym.c +index cb7d83db51..2feca2ed81 100644 +--- a/dpdk/drivers/net/nfp/nfpcore/nfp_rtsym.c ++++ b/dpdk/drivers/net/nfp/nfpcore/nfp_rtsym.c +@@ -236,7 +236,7 @@ nfp_rtsym_lookup(struct nfp_rtsym_table *rtbl, const char *name) + * nfp_rtsym_read_le() - Read a simple unsigned scalar value from symbol + * @rtbl: NFP RTsym table + * @name: Symbol name +- * @error: Poniter to error code (optional) ++ * @error: Pointer to error code (optional) + * + * Lookup a symbol, map, read it and return it's value. Value of the symbol + * will be interpreted as a simple little-endian unsigned value. Symbol can +diff --git a/dpdk/drivers/net/nfp/nfpcore/nfp_target.h b/dpdk/drivers/net/nfp/nfpcore/nfp_target.h +index 2884a0034f..e8dcc9ad1e 100644 +--- a/dpdk/drivers/net/nfp/nfpcore/nfp_target.h ++++ b/dpdk/drivers/net/nfp/nfpcore/nfp_target.h +@@ -37,7 +37,7 @@ pushpull_width(int pp) + static inline int + target_rw(uint32_t cpp_id, int pp, int start, int len) + { +- int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ uint8_t island = NFP_CPP_ID_ISLAND_of(cpp_id); + + if (island && (island < start || island > (start + len))) + return NFP_ERRNO(EINVAL); +@@ -117,7 +117,7 @@ nfp6000_nbi_ppc(uint32_t cpp_id) + static inline int + nfp6000_nbi(uint32_t cpp_id, uint64_t address) + { +- int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ uint8_t island = NFP_CPP_ID_ISLAND_of(cpp_id); + uint64_t rel_addr = address & 0x3fFFFF; + + if (island && (island < 8 || island > 9)) +@@ -281,7 +281,7 @@ static inline int + nfp6000_mu(uint32_t cpp_id, uint64_t address) + { + int pp; +- int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ uint8_t island = NFP_CPP_ID_ISLAND_of(cpp_id); + + if (island == 0) { + if (address < 0x2000000000ULL) +@@ -316,7 +316,7 @@ nfp6000_mu(uint32_t cpp_id, uint64_t address) + static inline int + nfp6000_ila(uint32_t cpp_id) + { +- int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ uint8_t island = NFP_CPP_ID_ISLAND_of(cpp_id); + + if (island && (island < 48 || island > 51)) + return NFP_ERRNO(EINVAL); +@@ -336,7 +336,7 @@ nfp6000_ila(uint32_t cpp_id) + static inline int + nfp6000_pci(uint32_t cpp_id) + { +- int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ uint8_t island = NFP_CPP_ID_ISLAND_of(cpp_id); + + if (island && (island < 4 || island > 7)) + return NFP_ERRNO(EINVAL); +@@ -354,7 +354,7 @@ nfp6000_pci(uint32_t cpp_id) + static inline int + nfp6000_crypto(uint32_t cpp_id) + { +- int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ uint8_t island = NFP_CPP_ID_ISLAND_of(cpp_id); + + if (island && (island < 12 || island > 15)) + return NFP_ERRNO(EINVAL); +@@ -370,9 +370,9 @@ nfp6000_crypto(uint32_t cpp_id) + static inline int + nfp6000_cap_xpb(uint32_t cpp_id) + { +- int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ uint8_t island = NFP_CPP_ID_ISLAND_of(cpp_id); + +- if (island && (island < 1 || island > 63)) ++ if (island > 63) + return NFP_ERRNO(EINVAL); + + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { +@@ -410,9 +410,9 @@ nfp6000_cap_xpb(uint32_t cpp_id) + static inline int + nfp6000_cls(uint32_t cpp_id) + { +- int island = NFP_CPP_ID_ISLAND_of(cpp_id); ++ uint8_t island = NFP_CPP_ID_ISLAND_of(cpp_id); + +- if (island && (island < 1 || island > 63)) ++ if (island > 63) + return NFP_ERRNO(EINVAL); + + switch (cpp_id & NFP_CPP_ID(0, ~0, ~0)) { +@@ -540,11 +540,11 @@ nfp_target_cpp(uint32_t cpp_island_id, uint64_t cpp_island_address, + const uint32_t *imb_table) + { + int err; +- int island = NFP_CPP_ID_ISLAND_of(cpp_island_id); +- int target = NFP_CPP_ID_TARGET_of(cpp_island_id); ++ uint8_t island = NFP_CPP_ID_ISLAND_of(cpp_island_id); ++ uint8_t target = NFP_CPP_ID_TARGET_of(cpp_island_id); + uint32_t imb; + +- if (target < 0 || target >= 16) ++ if (target >= 16) + return NFP_ERRNO(EINVAL); + + if (island == 0) { +diff --git a/dpdk/drivers/net/octeontx/base/octeontx_pkivf.h b/dpdk/drivers/net/octeontx/base/octeontx_pkivf.h +index d41eaa57ed..8c86841ea6 100644 +--- a/dpdk/drivers/net/octeontx/base/octeontx_pkivf.h ++++ b/dpdk/drivers/net/octeontx/base/octeontx_pkivf.h +@@ -362,7 +362,6 @@ int octeontx_pki_port_open(int port); + int octeontx_pki_port_hash_config(int port, pki_hash_cfg_t *hash_cfg); + int octeontx_pki_port_pktbuf_config(int port, pki_pktbuf_cfg_t *buf_cfg); + int octeontx_pki_port_create_qos(int port, pki_qos_cfg_t *qos_cfg); +-int octeontx_pki_port_close(int port); + int octeontx_pki_port_errchk_config(int port, pki_errchk_cfg_t *cfg); + int octeontx_pki_port_vlan_fltr_config(int port, + pki_port_vlan_filter_config_t *fltr_cfg); +diff --git a/dpdk/drivers/net/octeontx/octeontx_ethdev.c b/dpdk/drivers/net/octeontx/octeontx_ethdev.c +index 5836dbe09e..cf01155c82 100644 +--- a/dpdk/drivers/net/octeontx/octeontx_ethdev.c ++++ b/dpdk/drivers/net/octeontx/octeontx_ethdev.c +@@ -25,6 +25,11 @@ + #include "octeontx_rxtx.h" + #include "octeontx_logs.h" + ++/* Useful in stopping/closing event device if no of ++ * eth ports are using it. ++ */ ++uint16_t evdev_refcnt; ++ + struct evdev_priv_data { + OFFLOAD_FLAGS; /*Sequence should not be changed */ + } __rte_cache_aligned; +@@ -490,7 +495,11 @@ octeontx_dev_close(struct rte_eth_dev *dev) + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + +- rte_event_dev_close(nic->evdev); ++ /* Stopping/closing event device once all eth ports are closed. */ ++ if (__atomic_sub_fetch(&evdev_refcnt, 1, __ATOMIC_ACQUIRE) == 0) { ++ rte_event_dev_stop(nic->evdev); ++ rte_event_dev_close(nic->evdev); ++ } + + octeontx_dev_flow_ctrl_fini(dev); + +@@ -681,8 +690,6 @@ octeontx_dev_stop(struct rte_eth_dev *dev) + + PMD_INIT_FUNC_TRACE(); + +- rte_event_dev_stop(nic->evdev); +- + ret = octeontx_port_stop(nic); + if (ret < 0) { + octeontx_log_err("failed to req stop port %d res=%d", +@@ -1102,7 +1109,7 @@ octeontx_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx, + + /* Verify queue index */ + if (qidx >= dev->data->nb_rx_queues) { +- octeontx_log_err("QID %d not supporteded (0 - %d available)\n", ++ octeontx_log_err("QID %d not supported (0 - %d available)\n", + qidx, (dev->data->nb_rx_queues - 1)); + return -ENOTSUP; + } +@@ -1346,6 +1353,7 @@ octeontx_create(struct rte_vdev_device *dev, int port, uint8_t evdev, + nic->pko_vfid = pko_vfid; + nic->port_id = port; + nic->evdev = evdev; ++ __atomic_add_fetch(&evdev_refcnt, 1, __ATOMIC_ACQUIRE); + + res = octeontx_port_open(nic); + if (res < 0) +@@ -1374,7 +1382,6 @@ octeontx_create(struct rte_vdev_device *dev, int port, uint8_t evdev, + data->promiscuous = 0; + data->all_multicast = 0; + data->scattered_rx = 0; +- data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + + /* Get maximum number of supported MAC entries */ + max_entries = octeontx_bgx_port_mac_entries_get(nic->port_id); +@@ -1596,6 +1603,7 @@ octeontx_probe(struct rte_vdev_device *dev) + } + } + ++ __atomic_store_n(&evdev_refcnt, 0, __ATOMIC_RELEASE); + /* + * Do 1:1 links for ports & queues. All queues would be mapped to + * one port. If there are more ports than queues, then some ports +diff --git a/dpdk/drivers/net/octeontx/octeontx_rxtx.h b/dpdk/drivers/net/octeontx/octeontx_rxtx.h +index 7c24d8b4fd..1447f2617e 100644 +--- a/dpdk/drivers/net/octeontx/octeontx_rxtx.h ++++ b/dpdk/drivers/net/octeontx/octeontx_rxtx.h +@@ -161,7 +161,7 @@ ptype_table[PTYPE_SIZE][PTYPE_SIZE][PTYPE_SIZE] = { + + + static __rte_always_inline uint64_t +-octeontx_pktmbuf_detach(struct rte_mbuf *m) ++octeontx_pktmbuf_detach(struct rte_mbuf *m, struct rte_mbuf **m_tofree) + { + struct rte_mempool *mp = m->pool; + uint32_t mbuf_size, buf_len; +@@ -171,6 +171,8 @@ octeontx_pktmbuf_detach(struct rte_mbuf *m) + + /* Update refcount of direct mbuf */ + md = rte_mbuf_from_indirect(m); ++ /* The real data will be in the direct buffer, inform callers this */ ++ *m_tofree = md; + refcount = rte_mbuf_refcnt_update(md, -1); + + priv_size = rte_pktmbuf_priv_size(mp); +@@ -203,18 +205,18 @@ octeontx_pktmbuf_detach(struct rte_mbuf *m) + } + + static __rte_always_inline uint64_t +-octeontx_prefree_seg(struct rte_mbuf *m) ++octeontx_prefree_seg(struct rte_mbuf *m, struct rte_mbuf **m_tofree) + { + if (likely(rte_mbuf_refcnt_read(m) == 1)) { + if (!RTE_MBUF_DIRECT(m)) +- return octeontx_pktmbuf_detach(m); ++ return octeontx_pktmbuf_detach(m, m_tofree); + + m->next = NULL; + m->nb_segs = 1; + return 0; + } else if (rte_mbuf_refcnt_update(m, -1) == 0) { + if (!RTE_MBUF_DIRECT(m)) +- return octeontx_pktmbuf_detach(m); ++ return octeontx_pktmbuf_detach(m, m_tofree); + + rte_mbuf_refcnt_set(m, 1); + m->next = NULL; +@@ -315,6 +317,14 @@ __octeontx_xmit_prepare(struct rte_mbuf *tx_pkt, uint64_t *cmd_buf, + const uint16_t flag) + { + uint16_t gaura_id, nb_desc = 0; ++ struct rte_mbuf *m_tofree; ++ rte_iova_t iova; ++ uint16_t data_len; ++ ++ m_tofree = tx_pkt; ++ ++ data_len = tx_pkt->data_len; ++ iova = rte_mbuf_data_iova(tx_pkt); + + /* Setup PKO_SEND_HDR_S */ + cmd_buf[nb_desc++] = tx_pkt->data_len & 0xffff; +@@ -329,22 +339,23 @@ __octeontx_xmit_prepare(struct rte_mbuf *tx_pkt, uint64_t *cmd_buf, + * not, as SG_DESC[I] and SEND_HDR[II] are clear. + */ + if (flag & OCCTX_TX_OFFLOAD_MBUF_NOFF_F) +- cmd_buf[0] |= (octeontx_prefree_seg(tx_pkt) << ++ cmd_buf[0] |= (octeontx_prefree_seg(tx_pkt, &m_tofree) << + 58); + + /* Mark mempool object as "put" since it is freed by PKO */ + if (!(cmd_buf[0] & (1ULL << 58))) +- __mempool_check_cookies(tx_pkt->pool, (void **)&tx_pkt, ++ __mempool_check_cookies(m_tofree->pool, (void **)&m_tofree, + 1, 0); + /* Get the gaura Id */ +- gaura_id = octeontx_fpa_bufpool_gaura((uintptr_t)tx_pkt->pool->pool_id); ++ gaura_id = ++ octeontx_fpa_bufpool_gaura((uintptr_t)m_tofree->pool->pool_id); + + /* Setup PKO_SEND_BUFLINK_S */ + cmd_buf[nb_desc++] = PKO_SEND_BUFLINK_SUBDC | + PKO_SEND_BUFLINK_LDTYPE(0x1ull) | + PKO_SEND_BUFLINK_GAUAR((long)gaura_id) | +- tx_pkt->data_len; +- cmd_buf[nb_desc++] = rte_mbuf_data_iova(tx_pkt); ++ data_len; ++ cmd_buf[nb_desc++] = iova; + + return nb_desc; + } +@@ -354,8 +365,10 @@ __octeontx_xmit_mseg_prepare(struct rte_mbuf *tx_pkt, uint64_t *cmd_buf, + const uint16_t flag) + { + uint16_t nb_segs, nb_desc = 0; +- uint16_t gaura_id, len = 0; +- struct rte_mbuf *m_next = NULL; ++ uint16_t gaura_id; ++ struct rte_mbuf *m_next = NULL, *m_tofree; ++ rte_iova_t iova; ++ uint16_t data_len; + + nb_segs = tx_pkt->nb_segs; + /* Setup PKO_SEND_HDR_S */ +@@ -369,40 +382,49 @@ __octeontx_xmit_mseg_prepare(struct rte_mbuf *tx_pkt, uint64_t *cmd_buf, + + do { + m_next = tx_pkt->next; +- /* To handle case where mbufs belong to diff pools, like +- * fragmentation ++ /* Get TX parameters up front, octeontx_prefree_seg might change ++ * them + */ +- gaura_id = octeontx_fpa_bufpool_gaura((uintptr_t) +- tx_pkt->pool->pool_id); ++ m_tofree = tx_pkt; ++ data_len = tx_pkt->data_len; ++ iova = rte_mbuf_data_iova(tx_pkt); + + /* Setup PKO_SEND_GATHER_S */ +- cmd_buf[nb_desc] = PKO_SEND_GATHER_SUBDC | +- PKO_SEND_GATHER_LDTYPE(0x1ull) | +- PKO_SEND_GATHER_GAUAR((long)gaura_id) | +- tx_pkt->data_len; ++ cmd_buf[nb_desc] = 0; + + /* SG_DESC[I] bit controls if buffer is to be freed or + * not, as SEND_HDR[DF] and SEND_HDR[II] are clear. + */ + if (flag & OCCTX_TX_OFFLOAD_MBUF_NOFF_F) { + cmd_buf[nb_desc] |= +- (octeontx_prefree_seg(tx_pkt) << 57); ++ (octeontx_prefree_seg(tx_pkt, &m_tofree) << 57); + } + ++ /* To handle case where mbufs belong to diff pools, like ++ * fragmentation ++ */ ++ gaura_id = octeontx_fpa_bufpool_gaura((uintptr_t) ++ m_tofree->pool->pool_id); ++ ++ /* Setup PKO_SEND_GATHER_S */ ++ cmd_buf[nb_desc] |= PKO_SEND_GATHER_SUBDC | ++ PKO_SEND_GATHER_LDTYPE(0x1ull) | ++ PKO_SEND_GATHER_GAUAR((long)gaura_id) | ++ data_len; ++ + /* Mark mempool object as "put" since it is freed by + * PKO. + */ + if (!(cmd_buf[nb_desc] & (1ULL << 57))) { + tx_pkt->next = NULL; +- __mempool_check_cookies(tx_pkt->pool, +- (void **)&tx_pkt, 1, 0); ++ __mempool_check_cookies(m_tofree->pool, ++ (void **)&m_tofree, 1, 0); + } + nb_desc++; + +- cmd_buf[nb_desc++] = rte_mbuf_data_iova(tx_pkt); ++ cmd_buf[nb_desc++] = iova; + + nb_segs--; +- len += tx_pkt->data_len; + tx_pkt = m_next; + } while (nb_segs); + +diff --git a/dpdk/drivers/net/octeontx2/otx2_ethdev.c b/dpdk/drivers/net/octeontx2/otx2_ethdev.c +index 6cebbe677d..95f85f874c 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_ethdev.c ++++ b/dpdk/drivers/net/octeontx2/otx2_ethdev.c +@@ -1311,6 +1311,7 @@ otx2_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t sq, + txq->qconf.nb_desc = nb_desc; + memcpy(&txq->qconf.conf.tx, tx_conf, sizeof(struct rte_eth_txconf)); + ++ txq->lso_tun_fmt = dev->lso_tun_fmt; + otx2_nix_form_default_desc(txq); + + otx2_nix_dbg("sq=%d fc=%p offload=0x%" PRIx64 " sqb=0x%" PRIx64 "" +@@ -1661,7 +1662,7 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev) + struct otx2_mbox *mbox = dev->mbox; + struct nix_lso_format_cfg_rsp *rsp; + struct nix_lso_format_cfg *req; +- uint8_t base; ++ uint8_t *fmt; + int rc; + + /* Skip if TSO was not requested */ +@@ -1676,11 +1677,9 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev) + if (rc) + return rc; + +- base = rsp->lso_format_idx; +- if (base != NIX_LSO_FORMAT_IDX_TSOV4) ++ if (rsp->lso_format_idx != NIX_LSO_FORMAT_IDX_TSOV4) + return -EFAULT; +- dev->lso_base_idx = base; +- otx2_nix_dbg("tcpv4 lso fmt=%u", base); ++ otx2_nix_dbg("tcpv4 lso fmt=%u", rsp->lso_format_idx); + + + /* +@@ -1692,9 +1691,9 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev) + if (rc) + return rc; + +- if (rsp->lso_format_idx != base + 1) ++ if (rsp->lso_format_idx != NIX_LSO_FORMAT_IDX_TSOV6) + return -EFAULT; +- otx2_nix_dbg("tcpv6 lso fmt=%u\n", base + 1); ++ otx2_nix_dbg("tcpv6 lso fmt=%u\n", rsp->lso_format_idx); + + /* + * IPv4/UDP/TUN HDR/IPv4/TCP LSO +@@ -1705,9 +1704,8 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev) + if (rc) + return rc; + +- if (rsp->lso_format_idx != base + 2) +- return -EFAULT; +- otx2_nix_dbg("udp tun v4v4 fmt=%u\n", base + 2); ++ dev->lso_udp_tun_idx[NIX_LSO_TUN_V4V4] = rsp->lso_format_idx; ++ otx2_nix_dbg("udp tun v4v4 fmt=%u\n", rsp->lso_format_idx); + + /* + * IPv4/UDP/TUN HDR/IPv6/TCP LSO +@@ -1718,9 +1716,8 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev) + if (rc) + return rc; + +- if (rsp->lso_format_idx != base + 3) +- return -EFAULT; +- otx2_nix_dbg("udp tun v4v6 fmt=%u\n", base + 3); ++ dev->lso_udp_tun_idx[NIX_LSO_TUN_V4V6] = rsp->lso_format_idx; ++ otx2_nix_dbg("udp tun v4v6 fmt=%u\n", rsp->lso_format_idx); + + /* + * IPv6/UDP/TUN HDR/IPv4/TCP LSO +@@ -1731,9 +1728,8 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev) + if (rc) + return rc; + +- if (rsp->lso_format_idx != base + 4) +- return -EFAULT; +- otx2_nix_dbg("udp tun v6v4 fmt=%u\n", base + 4); ++ dev->lso_udp_tun_idx[NIX_LSO_TUN_V6V4] = rsp->lso_format_idx; ++ otx2_nix_dbg("udp tun v6v4 fmt=%u\n", rsp->lso_format_idx); + + /* + * IPv6/UDP/TUN HDR/IPv6/TCP LSO +@@ -1743,9 +1739,9 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev) + rc = otx2_mbox_process_msg(mbox, (void *)&rsp); + if (rc) + return rc; +- if (rsp->lso_format_idx != base + 5) +- return -EFAULT; +- otx2_nix_dbg("udp tun v6v6 fmt=%u\n", base + 5); ++ ++ dev->lso_udp_tun_idx[NIX_LSO_TUN_V6V6] = rsp->lso_format_idx; ++ otx2_nix_dbg("udp tun v6v6 fmt=%u\n", rsp->lso_format_idx); + + /* + * IPv4/TUN HDR/IPv4/TCP LSO +@@ -1756,9 +1752,8 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev) + if (rc) + return rc; + +- if (rsp->lso_format_idx != base + 6) +- return -EFAULT; +- otx2_nix_dbg("tun v4v4 fmt=%u\n", base + 6); ++ dev->lso_tun_idx[NIX_LSO_TUN_V4V4] = rsp->lso_format_idx; ++ otx2_nix_dbg("tun v4v4 fmt=%u\n", rsp->lso_format_idx); + + /* + * IPv4/TUN HDR/IPv6/TCP LSO +@@ -1769,9 +1764,8 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev) + if (rc) + return rc; + +- if (rsp->lso_format_idx != base + 7) +- return -EFAULT; +- otx2_nix_dbg("tun v4v6 fmt=%u\n", base + 7); ++ dev->lso_tun_idx[NIX_LSO_TUN_V4V6] = rsp->lso_format_idx; ++ otx2_nix_dbg("tun v4v6 fmt=%u\n", rsp->lso_format_idx); + + /* + * IPv6/TUN HDR/IPv4/TCP LSO +@@ -1782,9 +1776,8 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev) + if (rc) + return rc; + +- if (rsp->lso_format_idx != base + 8) +- return -EFAULT; +- otx2_nix_dbg("tun v6v4 fmt=%u\n", base + 8); ++ dev->lso_tun_idx[NIX_LSO_TUN_V6V4] = rsp->lso_format_idx; ++ otx2_nix_dbg("tun v6v4 fmt=%u\n", rsp->lso_format_idx); + + /* + * IPv6/TUN HDR/IPv6/TCP LSO +@@ -1794,9 +1787,26 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev) + rc = otx2_mbox_process_msg(mbox, (void *)&rsp); + if (rc) + return rc; +- if (rsp->lso_format_idx != base + 9) +- return -EFAULT; +- otx2_nix_dbg("tun v6v6 fmt=%u\n", base + 9); ++ ++ dev->lso_tun_idx[NIX_LSO_TUN_V6V6] = rsp->lso_format_idx; ++ otx2_nix_dbg("tun v6v6 fmt=%u\n", rsp->lso_format_idx); ++ ++ /* Save all tun formats into u64 for fast path. ++ * Lower 32bit has non-udp tunnel formats. ++ * Upper 32bit has udp tunnel formats. ++ */ ++ fmt = dev->lso_tun_idx; ++ dev->lso_tun_fmt = ((uint64_t)fmt[NIX_LSO_TUN_V4V4] | ++ (uint64_t)fmt[NIX_LSO_TUN_V4V6] << 8 | ++ (uint64_t)fmt[NIX_LSO_TUN_V6V4] << 16 | ++ (uint64_t)fmt[NIX_LSO_TUN_V6V6] << 24); ++ ++ fmt = dev->lso_udp_tun_idx; ++ dev->lso_tun_fmt |= ((uint64_t)fmt[NIX_LSO_TUN_V4V4] << 32 | ++ (uint64_t)fmt[NIX_LSO_TUN_V4V6] << 40 | ++ (uint64_t)fmt[NIX_LSO_TUN_V6V4] << 48 | ++ (uint64_t)fmt[NIX_LSO_TUN_V6V6] << 56); ++ + return 0; + } + +diff --git a/dpdk/drivers/net/octeontx2/otx2_ethdev.h b/dpdk/drivers/net/octeontx2/otx2_ethdev.h +index 99f0469d89..f33e9fcb09 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_ethdev.h ++++ b/dpdk/drivers/net/octeontx2/otx2_ethdev.h +@@ -177,6 +177,14 @@ enum nix_q_size_e { + nix_q_size_max + }; + ++enum nix_lso_tun_type { ++ NIX_LSO_TUN_V4V4, ++ NIX_LSO_TUN_V4V6, ++ NIX_LSO_TUN_V6V4, ++ NIX_LSO_TUN_V6V6, ++ NIX_LSO_TUN_MAX, ++}; ++ + struct otx2_qint { + struct rte_eth_dev *eth_dev; + uint8_t qintx; +@@ -271,7 +279,9 @@ struct otx2_eth_dev { + uint8_t tx_chan_cnt; + uint8_t lso_tsov4_idx; + uint8_t lso_tsov6_idx; +- uint8_t lso_base_idx; ++ uint8_t lso_udp_tun_idx[NIX_LSO_TUN_MAX]; ++ uint8_t lso_tun_idx[NIX_LSO_TUN_MAX]; ++ uint64_t lso_tun_fmt; + uint8_t mac_addr[RTE_ETHER_ADDR_LEN]; + uint8_t mkex_pfl_name[MKEX_NAME_LEN]; + uint8_t max_mac_entries; +@@ -354,6 +364,7 @@ struct otx2_eth_txq { + rte_iova_t fc_iova; + uint16_t sqes_per_sqb_log2; + int16_t nb_sqb_bufs_adj; ++ uint64_t lso_tun_fmt; + RTE_MARKER slow_path_start; + uint16_t nb_sqb_bufs; + uint16_t sq; +diff --git a/dpdk/drivers/net/octeontx2/otx2_ethdev_devargs.c b/dpdk/drivers/net/octeontx2/otx2_ethdev_devargs.c +index d4a85bf55e..3f9542331f 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_ethdev_devargs.c ++++ b/dpdk/drivers/net/octeontx2/otx2_ethdev_devargs.c +@@ -134,7 +134,7 @@ otx2_ethdev_parse_devargs(struct rte_devargs *devargs, struct otx2_eth_dev *dev) + { + uint16_t rss_size = NIX_RSS_RETA_SIZE; + uint16_t sqb_count = NIX_MAX_SQB; +- uint16_t flow_prealloc_size = 8; ++ uint16_t flow_prealloc_size = 1; + uint16_t switch_header_type = 0; + uint16_t flow_max_priority = 3; + uint16_t ipsec_in_max_spi = 1; +diff --git a/dpdk/drivers/net/octeontx2/otx2_ethdev_irq.c b/dpdk/drivers/net/octeontx2/otx2_ethdev_irq.c +index b121488faf..f69b16e848 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_ethdev_irq.c ++++ b/dpdk/drivers/net/octeontx2/otx2_ethdev_irq.c +@@ -372,7 +372,7 @@ oxt2_nix_register_cq_irqs(struct rte_eth_dev *eth_dev) + return -ENOMEM; + } + } +- /* VFIO vector zero is resereved for misc interrupt so ++ /* VFIO vector zero is reserved for misc interrupt so + * doing required adjustment. (b13bfab4cd) + */ + handle->intr_vec[q] = RTE_INTR_VEC_RXTX_OFFSET + vec; +diff --git a/dpdk/drivers/net/octeontx2/otx2_ethdev_ops.c b/dpdk/drivers/net/octeontx2/otx2_ethdev_ops.c +index 963cc285ed..99ddcb0170 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_ethdev_ops.c ++++ b/dpdk/drivers/net/octeontx2/otx2_ethdev_ops.c +@@ -17,7 +17,8 @@ otx2_nix_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu) + struct nix_frs_cfg *req; + int rc; + +- frame_size += NIX_TIMESYNC_RX_OFFSET * otx2_ethdev_is_ptp_en(dev); ++ if (dev->configured && otx2_ethdev_is_ptp_en(dev)) ++ frame_size += NIX_TIMESYNC_RX_OFFSET; + + /* Check if MTU is within the allowed range */ + if (frame_size < NIX_MIN_FRS || frame_size > NIX_MAX_FRS) +@@ -453,7 +454,7 @@ otx2_nix_fw_version_get(struct rte_eth_dev *eth_dev, char *fw_version, + rc = strlcpy(fw_version, (char *)dev->mkex_pfl_name, rc); + + rc += 1; /* Add the size of '\0' */ +- if (fw_size < (uint32_t)rc) ++ if (fw_size < (size_t)rc) + return rc; + + return 0; +@@ -533,8 +534,7 @@ otx2_nix_get_module_eeprom(struct rte_eth_dev *eth_dev, + struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev); + struct cgx_fw_data *rsp; + +- if (!info->data || !info->length || +- (info->offset + info->length > SFP_EEPROM_SIZE)) ++ if (info->offset + info->length > SFP_EEPROM_SIZE) + return -EINVAL; + + rsp = nix_get_fwdata(dev); +@@ -561,6 +561,11 @@ otx2_nix_info_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *devinfo) + devinfo->max_vfs = pci_dev->max_vfs; + devinfo->max_mtu = devinfo->max_rx_pktlen - NIX_L2_OVERHEAD; + devinfo->min_mtu = devinfo->min_rx_bufsize - NIX_L2_OVERHEAD; ++ if (dev->configured && otx2_ethdev_is_ptp_en(dev)) { ++ devinfo->max_mtu -= NIX_TIMESYNC_RX_OFFSET; ++ devinfo->min_mtu -= NIX_TIMESYNC_RX_OFFSET; ++ devinfo->max_rx_pktlen -= NIX_TIMESYNC_RX_OFFSET; ++ } + + devinfo->rx_offload_capa = dev->rx_offload_capa; + devinfo->tx_offload_capa = dev->tx_offload_capa; +diff --git a/dpdk/drivers/net/octeontx2/otx2_flow.c b/dpdk/drivers/net/octeontx2/otx2_flow.c +index a5900f349b..54dfff8f14 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_flow.c ++++ b/dpdk/drivers/net/octeontx2/otx2_flow.c +@@ -13,38 +13,27 @@ otx2_flow_free_all_resources(struct otx2_eth_dev *hw) + { + struct otx2_npc_flow_info *npc = &hw->npc_flow; + struct otx2_mbox *mbox = hw->mbox; +- struct otx2_mcam_ents_info *info; +- struct rte_bitmap *bmap; + struct rte_flow *flow; +- int entry_count = 0; + int rc, idx; + +- for (idx = 0; idx < npc->flow_max_priority; idx++) { +- info = &npc->flow_entry_info[idx]; +- entry_count += info->live_ent; +- } +- +- if (entry_count == 0) +- return 0; +- + /* Free all MCAM entries allocated */ + rc = otx2_flow_mcam_free_all_entries(mbox); + + /* Free any MCAM counters and delete flow list */ + for (idx = 0; idx < npc->flow_max_priority; idx++) { + while ((flow = TAILQ_FIRST(&npc->flow_list[idx])) != NULL) { +- if (flow->ctr_id != NPC_COUNTER_NONE) ++ if (flow->ctr_id != NPC_COUNTER_NONE) { ++ rc |= otx2_flow_mcam_clear_counter(mbox, ++ flow->ctr_id); + rc |= otx2_flow_mcam_free_counter(mbox, + flow->ctr_id); ++ } ++ ++ otx2_delete_prio_list_entry(npc, flow); + + TAILQ_REMOVE(&npc->flow_list[idx], flow, next); + rte_free(flow); +- bmap = npc->live_entries[flow->priority]; +- rte_bitmap_clear(bmap, flow->mcam_id); + } +- info = &npc->flow_entry_info[idx]; +- info->free_ent = 0; +- info->live_ent = 0; + } + return rc; + } +@@ -661,7 +650,6 @@ otx2_flow_destroy(struct rte_eth_dev *dev, + struct otx2_eth_dev *hw = dev->data->dev_private; + struct otx2_npc_flow_info *npc = &hw->npc_flow; + struct otx2_mbox *mbox = hw->mbox; +- struct rte_bitmap *bmap; + uint16_t match_id; + int rc; + +@@ -708,8 +696,7 @@ otx2_flow_destroy(struct rte_eth_dev *dev, + + TAILQ_REMOVE(&npc->flow_list[flow->priority], flow, next); + +- bmap = npc->live_entries[flow->priority]; +- rte_bitmap_clear(bmap, flow->mcam_id); ++ otx2_delete_prio_list_entry(npc, flow); + + rte_free(flow); + return 0; +@@ -963,12 +950,23 @@ flow_fetch_kex_cfg(struct otx2_eth_dev *dev) + return rc; + } + ++#define OTX2_MCAM_TOT_ENTRIES_96XX (4096) ++#define OTX2_MCAM_TOT_ENTRIES_98XX (16384) ++ ++static int otx2_mcam_tot_entries(struct otx2_eth_dev *dev) ++{ ++ if (otx2_dev_is_98xx(dev)) ++ return OTX2_MCAM_TOT_ENTRIES_98XX; ++ else ++ return OTX2_MCAM_TOT_ENTRIES_96XX; ++} ++ + int + otx2_flow_init(struct otx2_eth_dev *hw) + { +- uint8_t *mem = NULL, *nix_mem = NULL, *npc_mem = NULL; + struct otx2_npc_flow_info *npc = &hw->npc_flow; +- uint32_t bmap_sz; ++ uint32_t bmap_sz, tot_mcam_entries = 0, sz = 0; ++ uint8_t *nix_mem = NULL; + int rc = 0, idx; + + rc = flow_fetch_kex_cfg(hw); +@@ -980,61 +978,8 @@ otx2_flow_init(struct otx2_eth_dev *hw) + rte_atomic32_init(&npc->mark_actions); + npc->vtag_actions = 0; + +- npc->mcam_entries = NPC_MCAM_TOT_ENTRIES >> npc->keyw[NPC_MCAM_RX]; +- /* Free, free_rev, live and live_rev entries */ +- bmap_sz = rte_bitmap_get_memory_footprint(npc->mcam_entries); +- mem = rte_zmalloc(NULL, 4 * bmap_sz * npc->flow_max_priority, +- RTE_CACHE_LINE_SIZE); +- if (mem == NULL) { +- otx2_err("Bmap alloc failed"); +- rc = -ENOMEM; +- return rc; +- } +- +- npc->flow_entry_info = rte_zmalloc(NULL, npc->flow_max_priority +- * sizeof(struct otx2_mcam_ents_info), +- 0); +- if (npc->flow_entry_info == NULL) { +- otx2_err("flow_entry_info alloc failed"); +- rc = -ENOMEM; +- goto err; +- } +- +- npc->free_entries = rte_zmalloc(NULL, npc->flow_max_priority +- * sizeof(struct rte_bitmap *), +- 0); +- if (npc->free_entries == NULL) { +- otx2_err("free_entries alloc failed"); +- rc = -ENOMEM; +- goto err; +- } +- +- npc->free_entries_rev = rte_zmalloc(NULL, npc->flow_max_priority +- * sizeof(struct rte_bitmap *), +- 0); +- if (npc->free_entries_rev == NULL) { +- otx2_err("free_entries_rev alloc failed"); +- rc = -ENOMEM; +- goto err; +- } +- +- npc->live_entries = rte_zmalloc(NULL, npc->flow_max_priority +- * sizeof(struct rte_bitmap *), +- 0); +- if (npc->live_entries == NULL) { +- otx2_err("live_entries alloc failed"); +- rc = -ENOMEM; +- goto err; +- } +- +- npc->live_entries_rev = rte_zmalloc(NULL, npc->flow_max_priority +- * sizeof(struct rte_bitmap *), +- 0); +- if (npc->live_entries_rev == NULL) { +- otx2_err("live_entries_rev alloc failed"); +- rc = -ENOMEM; +- goto err; +- } ++ tot_mcam_entries = otx2_mcam_tot_entries(hw); ++ npc->mcam_entries = tot_mcam_entries >> npc->keyw[NPC_MCAM_RX]; + + npc->flow_list = rte_zmalloc(NULL, npc->flow_max_priority + * sizeof(struct otx2_flow_list), +@@ -1045,30 +990,17 @@ otx2_flow_init(struct otx2_eth_dev *hw) + goto err; + } + +- npc_mem = mem; ++ sz = npc->flow_max_priority * sizeof(struct otx2_prio_flow_list_head); ++ npc->prio_flow_list = rte_zmalloc(NULL, sz, 0); ++ if (npc->prio_flow_list == NULL) { ++ otx2_err("prio_flow_list alloc failed"); ++ rc = -ENOMEM; ++ goto err; ++ } ++ + for (idx = 0; idx < npc->flow_max_priority; idx++) { + TAILQ_INIT(&npc->flow_list[idx]); +- +- npc->free_entries[idx] = +- rte_bitmap_init(npc->mcam_entries, mem, bmap_sz); +- mem += bmap_sz; +- +- npc->free_entries_rev[idx] = +- rte_bitmap_init(npc->mcam_entries, mem, bmap_sz); +- mem += bmap_sz; +- +- npc->live_entries[idx] = +- rte_bitmap_init(npc->mcam_entries, mem, bmap_sz); +- mem += bmap_sz; +- +- npc->live_entries_rev[idx] = +- rte_bitmap_init(npc->mcam_entries, mem, bmap_sz); +- mem += bmap_sz; +- +- npc->flow_entry_info[idx].free_ent = 0; +- npc->flow_entry_info[idx].live_ent = 0; +- npc->flow_entry_info[idx].max_id = 0; +- npc->flow_entry_info[idx].min_id = ~(0); ++ TAILQ_INIT(&npc->prio_flow_list[idx]); + } + + npc->rss_grps = NIX_RSS_GRPS; +@@ -1093,18 +1025,8 @@ otx2_flow_init(struct otx2_eth_dev *hw) + err: + if (npc->flow_list) + rte_free(npc->flow_list); +- if (npc->live_entries_rev) +- rte_free(npc->live_entries_rev); +- if (npc->live_entries) +- rte_free(npc->live_entries); +- if (npc->free_entries_rev) +- rte_free(npc->free_entries_rev); +- if (npc->free_entries) +- rte_free(npc->free_entries); +- if (npc->flow_entry_info) +- rte_free(npc->flow_entry_info); +- if (npc_mem) +- rte_free(npc_mem); ++ if (npc->prio_flow_list) ++ rte_free(npc->prio_flow_list); + return rc; + } + +@@ -1122,16 +1044,11 @@ otx2_flow_fini(struct otx2_eth_dev *hw) + + if (npc->flow_list) + rte_free(npc->flow_list); +- if (npc->live_entries_rev) +- rte_free(npc->live_entries_rev); +- if (npc->live_entries) +- rte_free(npc->live_entries); +- if (npc->free_entries_rev) +- rte_free(npc->free_entries_rev); +- if (npc->free_entries) +- rte_free(npc->free_entries); +- if (npc->flow_entry_info) +- rte_free(npc->flow_entry_info); ++ ++ if (npc->prio_flow_list) { ++ rte_free(npc->prio_flow_list); ++ npc->prio_flow_list = NULL; ++ } + + return 0; + } +diff --git a/dpdk/drivers/net/octeontx2/otx2_flow.h b/dpdk/drivers/net/octeontx2/otx2_flow.h +index 30a823c8a7..7a62f4469f 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_flow.h ++++ b/dpdk/drivers/net/octeontx2/otx2_flow.h +@@ -34,7 +34,6 @@ enum { + /* 32 bytes from LDATA_CFG & 32 bytes from FLAGS_CFG */ + #define NPC_MAX_EXTRACT_DATA_LEN (64) + #define NPC_LDATA_LFLAG_LEN (16) +-#define NPC_MCAM_TOT_ENTRIES (4096) + #define NPC_MAX_KEY_NIBBLES (31) + /* Nibble offsets */ + #define NPC_LAYER_KEYX_SZ (3) +@@ -141,14 +140,6 @@ struct npc_get_datax_cfg { + struct npc_xtract_info flag_xtract[NPC_MAX_LD][NPC_MAX_LT]; + }; + +-struct otx2_mcam_ents_info { +- /* Current max & min values of mcam index */ +- uint32_t max_id; +- uint32_t min_id; +- uint32_t free_ent; +- uint32_t live_ent; +-}; +- + struct rte_flow { + uint8_t nix_intf; + uint32_t mcam_id; +@@ -164,6 +155,13 @@ struct rte_flow { + + TAILQ_HEAD(otx2_flow_list, rte_flow); + ++struct otx2_prio_flow_entry { ++ struct rte_flow *flow; ++ TAILQ_ENTRY(otx2_prio_flow_entry) next; ++}; ++ ++TAILQ_HEAD(otx2_prio_flow_list_head, otx2_prio_flow_entry); ++ + /* Accessed from ethdev private - otx2_eth_dev */ + struct otx2_npc_flow_info { + rte_atomic32_t mark_actions; +@@ -176,22 +174,9 @@ struct otx2_npc_flow_info { + otx2_dxcfg_t prx_dxcfg; /* intf, lid, lt, extract */ + otx2_fxcfg_t prx_fxcfg; /* Flag extract */ + otx2_ld_flags_t prx_lfcfg; /* KEX LD_Flags CFG */ +- /* mcam entry info per priority level: both free & in-use */ +- struct otx2_mcam_ents_info *flow_entry_info; +- /* Bitmap of free preallocated entries in ascending index & +- * descending priority +- */ +- struct rte_bitmap **free_entries; +- /* Bitmap of free preallocated entries in descending index & +- * ascending priority +- */ +- struct rte_bitmap **free_entries_rev; +- /* Bitmap of live entries in ascending index & descending priority */ +- struct rte_bitmap **live_entries; +- /* Bitmap of live entries in descending index & ascending priority */ +- struct rte_bitmap **live_entries_rev; + /* Priority bucket wise tail queue of all rte_flow resources */ + struct otx2_flow_list *flow_list; ++ struct otx2_prio_flow_list_head *prio_flow_list; + uint32_t rss_grps; /* rss groups supported */ + struct rte_bitmap *rss_grp_entries; + uint16_t channel; /*rx channel */ +@@ -401,4 +386,7 @@ int otx2_flow_parse_actions(struct rte_eth_dev *dev, + int otx2_flow_free_all_resources(struct otx2_eth_dev *hw); + + int otx2_flow_parse_mpls(struct otx2_parse_state *pst, int lid); ++ ++void otx2_delete_prio_list_entry(struct otx2_npc_flow_info *flow_info, ++ struct rte_flow *flow); + #endif /* __OTX2_FLOW_H__ */ +diff --git a/dpdk/drivers/net/octeontx2/otx2_flow_utils.c b/dpdk/drivers/net/octeontx2/otx2_flow_utils.c +index 7ed86ba742..4f4be7d69f 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_flow_utils.c ++++ b/dpdk/drivers/net/octeontx2/otx2_flow_utils.c +@@ -451,435 +451,455 @@ otx2_flow_keyx_compress(uint64_t *data, uint32_t nibble_mask) + } + + static int +-flow_first_set_bit(uint64_t slab) ++otx2_initialise_mcam_entry(struct otx2_mbox *mbox, ++ struct otx2_npc_flow_info *flow_info, ++ struct rte_flow *flow, int mcam_id) + { +- int num = 0; ++ struct npc_mcam_write_entry_req *req; ++ struct npc_mcam_write_entry_rsq *rsp; ++ int rc = 0, idx; + +- if ((slab & 0xffffffff) == 0) { +- num += 32; +- slab >>= 32; +- } +- if ((slab & 0xffff) == 0) { +- num += 16; +- slab >>= 16; +- } +- if ((slab & 0xff) == 0) { +- num += 8; +- slab >>= 8; +- } +- if ((slab & 0xf) == 0) { +- num += 4; +- slab >>= 4; ++ req = otx2_mbox_alloc_msg_npc_mcam_write_entry(mbox); ++ if (req == NULL) ++ return -ENOSPC; ++ req->set_cntr = 0; ++ req->cntr = 0; ++ req->entry = mcam_id; ++ ++ req->intf = (flow->nix_intf == NIX_INTF_RX) ? NPC_MCAM_RX : NPC_MCAM_TX; ++ req->enable_entry = 1; ++ req->entry_data.action = flow->npc_action; ++ req->entry_data.vtag_action = flow->vtag_action; ++ ++ for (idx = 0; idx < OTX2_MAX_MCAM_WIDTH_DWORDS; idx++) { ++ req->entry_data.kw[idx] = 0x0; ++ req->entry_data.kw_mask[idx] = 0x0; + } +- if ((slab & 0x3) == 0) { +- num += 2; +- slab >>= 2; ++ ++ if (flow->nix_intf == NIX_INTF_RX) { ++ req->entry_data.kw[0] |= (uint64_t)flow_info->channel; ++ req->entry_data.kw_mask[0] |= (BIT_ULL(12) - 1); ++ } else { ++ uint16_t pf_func = (flow->npc_action >> 4) & 0xffff; ++ ++ pf_func = rte_cpu_to_be_16(pf_func); ++ req->entry_data.kw[0] |= ((uint64_t)pf_func << 32); ++ req->entry_data.kw_mask[0] |= ((uint64_t)0xffff << 32); + } +- if ((slab & 0x1) == 0) +- num += 1; + +- return num; ++ rc = otx2_mbox_process_msg(mbox, (void *)&rsp); ++ if (rc != 0) { ++ otx2_err("npc: mcam initialisation write failed"); ++ return rc; ++ } ++ return 0; + } + + static int +-flow_shift_lv_ent(struct otx2_mbox *mbox, struct rte_flow *flow, +- struct otx2_npc_flow_info *flow_info, +- uint32_t old_ent, uint32_t new_ent) ++otx2_shift_mcam_entry(struct otx2_mbox *mbox, uint16_t old_ent, ++ uint16_t new_ent) + { + struct npc_mcam_shift_entry_req *req; + struct npc_mcam_shift_entry_rsp *rsp; +- struct otx2_flow_list *list; +- struct rte_flow *flow_iter; +- int rc = 0; +- +- otx2_npc_dbg("Old ent:%u new ent:%u priority:%u", old_ent, new_ent, +- flow->priority); +- +- list = &flow_info->flow_list[flow->priority]; ++ int rc = -ENOSPC; + + /* Old entry is disabled & it's contents are moved to new_entry, + * new entry is enabled finally. + */ + req = otx2_mbox_alloc_msg_npc_mcam_shift_entry(mbox); ++ if (req == NULL) ++ return rc; + req->curr_entry[0] = old_ent; + req->new_entry[0] = new_ent; + req->shift_count = 1; + +- otx2_mbox_msg_send(mbox, 0); +- rc = otx2_mbox_get_rsp(mbox, 0, (void *)&rsp); ++ rc = otx2_mbox_process_msg(mbox, (void *)&rsp); + if (rc) + return rc; + +- /* Remove old node from list */ +- TAILQ_FOREACH(flow_iter, list, next) { +- if (flow_iter->mcam_id == old_ent) +- TAILQ_REMOVE(list, flow_iter, next); +- } +- +- /* Insert node with new mcam id at right place */ +- TAILQ_FOREACH(flow_iter, list, next) { +- if (flow_iter->mcam_id > new_ent) +- TAILQ_INSERT_BEFORE(flow_iter, flow, next); +- } +- return rc; ++ return 0; + } + +-/* Exchange all required entries with a given priority level */ ++enum SHIFT_DIR { ++ SLIDE_ENTRIES_TO_LOWER_INDEX, ++ SLIDE_ENTRIES_TO_HIGHER_INDEX, ++}; ++ + static int +-flow_shift_ent(struct otx2_mbox *mbox, struct rte_flow *flow, +- struct otx2_npc_flow_info *flow_info, +- struct npc_mcam_alloc_entry_rsp *rsp, int dir, int prio_lvl) ++otx2_slide_mcam_entries(struct otx2_mbox *mbox, ++ struct otx2_npc_flow_info *flow_info, int prio, ++ uint16_t *free_mcam_id, int dir) + { +- struct rte_bitmap *fr_bmp, *fr_bmp_rev, *lv_bmp, *lv_bmp_rev, *bmp; +- uint32_t e_fr = 0, e_lv = 0, e, e_id = 0, mcam_entries; +- uint64_t fr_bit_pos = 0, lv_bit_pos = 0, bit_pos = 0; +- /* Bit position within the slab */ +- uint32_t sl_fr_bit_off = 0, sl_lv_bit_off = 0; +- /* Overall bit position of the start of slab */ +- /* free & live entry index */ +- int rc_fr = 0, rc_lv = 0, rc = 0, idx = 0; +- struct otx2_mcam_ents_info *ent_info; +- /* free & live bitmap slab */ +- uint64_t sl_fr = 0, sl_lv = 0, *sl; +- +- fr_bmp = flow_info->free_entries[prio_lvl]; +- fr_bmp_rev = flow_info->free_entries_rev[prio_lvl]; +- lv_bmp = flow_info->live_entries[prio_lvl]; +- lv_bmp_rev = flow_info->live_entries_rev[prio_lvl]; +- ent_info = &flow_info->flow_entry_info[prio_lvl]; +- mcam_entries = flow_info->mcam_entries; +- +- +- /* New entries allocated are always contiguous, but older entries +- * already in free/live bitmap can be non-contiguous: so return +- * shifted entries should be in non-contiguous format. +- */ +- while (idx <= rsp->count) { +- if (!sl_fr && !sl_lv) { +- /* Lower index elements to be exchanged */ +- if (dir < 0) { +- rc_fr = rte_bitmap_scan(fr_bmp, &e_fr, &sl_fr); +- rc_lv = rte_bitmap_scan(lv_bmp, &e_lv, &sl_lv); +- otx2_npc_dbg("Fwd slab rc fr %u rc lv %u " +- "e_fr %u e_lv %u", rc_fr, rc_lv, +- e_fr, e_lv); +- } else { +- rc_fr = rte_bitmap_scan(fr_bmp_rev, +- &sl_fr_bit_off, +- &sl_fr); +- rc_lv = rte_bitmap_scan(lv_bmp_rev, +- &sl_lv_bit_off, +- &sl_lv); +- +- otx2_npc_dbg("Rev slab rc fr %u rc lv %u " +- "e_fr %u e_lv %u", rc_fr, rc_lv, +- e_fr, e_lv); +- } +- } +- +- if (rc_fr) { +- fr_bit_pos = flow_first_set_bit(sl_fr); +- e_fr = sl_fr_bit_off + fr_bit_pos; +- otx2_npc_dbg("Fr_bit_pos 0x%" PRIx64, fr_bit_pos); +- } else { +- e_fr = ~(0); +- } +- +- if (rc_lv) { +- lv_bit_pos = flow_first_set_bit(sl_lv); +- e_lv = sl_lv_bit_off + lv_bit_pos; +- otx2_npc_dbg("Lv_bit_pos 0x%" PRIx64, lv_bit_pos); +- } else { +- e_lv = ~(0); +- } ++ uint16_t to_mcam_id = 0, from_mcam_id = 0; ++ struct otx2_prio_flow_list_head *list; ++ struct otx2_prio_flow_entry *curr = 0; ++ int rc = 0; + +- /* First entry is from free_bmap */ +- if (e_fr < e_lv) { +- bmp = fr_bmp; +- e = e_fr; +- sl = &sl_fr; +- bit_pos = fr_bit_pos; +- if (dir > 0) +- e_id = mcam_entries - e - 1; +- else +- e_id = e; +- otx2_npc_dbg("Fr e %u e_id %u", e, e_id); +- } else { +- bmp = lv_bmp; +- e = e_lv; +- sl = &sl_lv; +- bit_pos = lv_bit_pos; +- if (dir > 0) +- e_id = mcam_entries - e - 1; +- else +- e_id = e; +- +- otx2_npc_dbg("Lv e %u e_id %u", e, e_id); +- if (idx < rsp->count) +- rc = +- flow_shift_lv_ent(mbox, flow, +- flow_info, e_id, +- rsp->entry + idx); ++ list = &flow_info->prio_flow_list[prio]; ++ ++ to_mcam_id = *free_mcam_id; ++ if (dir == SLIDE_ENTRIES_TO_HIGHER_INDEX) ++ curr = TAILQ_LAST(list, otx2_prio_flow_list_head); ++ else if (dir == SLIDE_ENTRIES_TO_LOWER_INDEX) ++ curr = TAILQ_FIRST(list); ++ ++ while (curr) { ++ from_mcam_id = curr->flow->mcam_id; ++ if ((dir == SLIDE_ENTRIES_TO_HIGHER_INDEX && ++ from_mcam_id < to_mcam_id) || ++ (dir == SLIDE_ENTRIES_TO_LOWER_INDEX && ++ from_mcam_id > to_mcam_id)) { ++ /* Newly allocated entry and the source entry given to ++ * npc_mcam_shift_entry_req will be in disabled state. ++ * Initialise and enable before moving an entry into ++ * this mcam. ++ */ ++ rc = otx2_initialise_mcam_entry(mbox, flow_info, ++ curr->flow, to_mcam_id); ++ if (rc) ++ return rc; ++ rc = otx2_shift_mcam_entry(mbox, from_mcam_id, ++ to_mcam_id); ++ if (rc) ++ return rc; ++ ++ curr->flow->mcam_id = to_mcam_id; ++ to_mcam_id = from_mcam_id; + } + +- rte_bitmap_clear(bmp, e); +- rte_bitmap_set(bmp, rsp->entry + idx); +- /* Update entry list, use non-contiguous +- * list now. +- */ +- rsp->entry_list[idx] = e_id; +- *sl &= ~(1 << bit_pos); ++ if (dir == SLIDE_ENTRIES_TO_HIGHER_INDEX) ++ curr = TAILQ_PREV(curr, otx2_prio_flow_list_head, next); ++ else if (dir == SLIDE_ENTRIES_TO_LOWER_INDEX) ++ curr = TAILQ_NEXT(curr, next); ++ } + +- /* Update min & max entry identifiers in current +- * priority level. +- */ +- if (dir < 0) { +- ent_info->max_id = rsp->entry + idx; +- ent_info->min_id = e_id; +- } else { +- ent_info->max_id = e_id; +- ent_info->min_id = rsp->entry; +- } ++ *free_mcam_id = from_mcam_id; + +- idx++; +- } +- return rc; ++ return 0; + } + +-/* Validate if newly allocated entries lie in the correct priority zone +- * since NPC_MCAM_LOWER_PRIO & NPC_MCAM_HIGHER_PRIO don't ensure zone accuracy. +- * If not properly aligned, shift entries to do so ++/* ++ * The mcam_alloc request is first made with NPC_MCAM_LOWER_PRIO with the last ++ * entry in the requested priority level as the reference entry. If it fails, ++ * the alloc request is retried with NPC_MCAM_HIGHER_PRIO with the first entry ++ * in the next lower priority level as the reference entry. After obtaining ++ * the free MCAM from kernel, we check if it is at the right user requested ++ * priority level. If not, the flow rules are moved across MCAM entries till ++ * the user requested priority levels are met. ++ * The MCAM sorting algorithm works as below. ++ * For any given free MCAM obtained from the kernel, there are 3 possibilities. ++ * Case 1: ++ * There are entries belonging to higher user priority level(numerically ++ * lesser) in higher mcam indices. In this case, the entries with higher user ++ * priority are slided towards lower indices and a free entry is created in the ++ * higher indices. ++ * Example: ++ * Assume free entry = 1610, user requested priority = 2 and ++ * max user priority levels = 5 with below entries in respective priority ++ * levels. ++ * 0: 1630, 1635, 1641 ++ * 1: 1646, 1650, 1651 ++ * 2: 1652, 1655, 1660 ++ * 3: 1661, 1662, 1663, 1664 ++ * 4: 1665, 1667, 1670 ++ * ++ * Entries (1630, 1635, 1641, 1646, 1650, 1651) have to be slided down towards ++ * lower indices. ++ * Shifting sequence will be as below: ++ * 1610 <- 1630 <- 1635 <- 1641 <- 1646 <- 1650 <- 1651 ++ * Entry 1651 will be free-ed for writing the new flow. This entry will now ++ * become the head of priority level 2. ++ * ++ * Case 2: ++ * There are entries belonging to lower user priority level (numerically ++ * bigger) in lower mcam indices. In this case, the entries with lower user ++ * priority are slided towards higher indices and a free entry is created in the ++ * lower indices. ++ * ++ * Example: ++ * free entry = 1653, user requested priority = 0 ++ * 0: 1630, 1635, 1641 ++ * 1: 1646, 1650, 1651 ++ * 2: 1652, 1655, 1660 ++ * 3: 1661, 1662, 1663, 1664 ++ * 4: 1665, 1667, 1670 ++ * ++ * Entries (1646, 1650, 1651, 1652) have to be slided up towards higher ++ * indices. ++ * Shifting sequence will be as below: ++ * 1646 -> 1650 -> 1651 -> 1652 -> 1653 ++ * Entry 1646 will be free-ed for writing the new flow. This entry will now ++ * become the last element in priority level 0. ++ * ++ * Case 3: ++ * Free mcam is at the right place, ie, all higher user priority level ++ * mcams lie in lower indices and all lower user priority level mcams lie in ++ * higher mcam indices. ++ * ++ * The priority level lists are scanned first for case (1) and if the ++ * condition is found true, case(2) is skipped because they are mutually ++ * exclusive. For example, consider below state. ++ * 0: 1630, 1635, 1641 ++ * 1: 1646, 1650, 1651 ++ * 2: 1652, 1655, 1660 ++ * 3: 1661, 1662, 1663, 1664 ++ * 4: 1665, 1667, 1670 ++ * free entry = 1610, user requested priority = 2 ++ * ++ * Case 1: Here the condition is; ++ * "if (requested_prio > prio_idx && free_mcam < tail->flow->mcam_id ){}" ++ * If this condition is true, it means at some higher priority level than ++ * requested priority level, there are entries at lower indices than the given ++ * free mcam. That is, we have found in levels 0,1 there is an mcam X which is ++ * greater than 1610. ++ * If, for any free entry and user req prio, the above condition is true, then ++ * the below case(2) condition will always be false since the lists are kept ++ * sorted. The case(2) condition is; ++ * "if (requested_prio < prio_idx && free_mcam > head->flow->mcam_id){}" ++ * There can't be entries at lower indices at priority level higher ++ * than the requested priority level. That is, here, at levels 3 & 4 there ++ * cannot be any entry greater than 1610. Because all entries in 3 & 4 must be ++ * greater than X which was found to be greater than 1610 earlier. + */ ++ + static int +-flow_validate_and_shift_prio_ent(struct otx2_mbox *mbox, struct rte_flow *flow, +- struct otx2_npc_flow_info *flow_info, +- struct npc_mcam_alloc_entry_rsp *rsp, +- int req_prio) ++otx2_sort_mcams_by_user_prio_level(struct otx2_mbox *mbox, ++ struct otx2_prio_flow_entry *flow_list_entry, ++ struct otx2_npc_flow_info *flow_info, ++ struct npc_mcam_alloc_entry_rsp *rsp) + { +- int prio_idx = 0, rc = 0, needs_shift = 0, idx, prio = flow->priority; +- struct otx2_mcam_ents_info *info = flow_info->flow_entry_info; +- int dir = (req_prio == NPC_MCAM_HIGHER_PRIO) ? 1 : -1; +- uint32_t tot_ent = 0; +- +- otx2_npc_dbg("Dir %d, priority = %d", dir, prio); +- +- if (dir < 0) +- prio_idx = flow_info->flow_max_priority - 1; +- +- /* Only live entries needs to be shifted, free entries can just be +- * moved by bits manipulation. +- */ +- +- /* For dir = -1(NPC_MCAM_LOWER_PRIO), when shifting, +- * NPC_MAX_PREALLOC_ENT are exchanged with adjoining higher priority +- * level entries(lower indexes). +- * +- * For dir = +1(NPC_MCAM_HIGHER_PRIO), during shift, +- * NPC_MAX_PREALLOC_ENT are exchanged with adjoining lower priority +- * level entries(higher indexes) with highest indexes. +- */ +- do { +- tot_ent = info[prio_idx].free_ent + info[prio_idx].live_ent; +- +- if (dir < 0 && prio_idx != prio && +- rsp->entry > info[prio_idx].max_id && tot_ent) { +- otx2_npc_dbg("Rsp entry %u prio idx %u " +- "max id %u", rsp->entry, prio_idx, +- info[prio_idx].max_id); +- +- needs_shift = 1; +- } else if ((dir > 0) && (prio_idx != prio) && +- (rsp->entry < info[prio_idx].min_id) && tot_ent) { +- otx2_npc_dbg("Rsp entry %u prio idx %u " +- "min id %u", rsp->entry, prio_idx, +- info[prio_idx].min_id); +- needs_shift = 1; ++ int requested_prio = flow_list_entry->flow->priority; ++ struct otx2_prio_flow_entry *head, *tail; ++ struct otx2_prio_flow_list_head *list; ++ uint16_t free_mcam = rsp->entry; ++ bool do_reverse_scan = true; ++ int prio_idx = 0, rc = 0; ++ ++ while (prio_idx <= flow_info->flow_max_priority - 1) { ++ list = &flow_info->prio_flow_list[prio_idx]; ++ tail = TAILQ_LAST(list, otx2_prio_flow_list_head); ++ ++ /* requested priority is lower than current level ++ * ie, numerically req prio is higher ++ */ ++ if (requested_prio > prio_idx && tail) { ++ /* but there are some mcams in current level ++ * at higher indices, ie, at priority lower ++ * than free_mcam. ++ */ ++ if (free_mcam < tail->flow->mcam_id) { ++ rc = otx2_slide_mcam_entries(mbox, flow_info, ++ prio_idx, &free_mcam, ++ SLIDE_ENTRIES_TO_LOWER_INDEX); ++ if (rc) ++ return rc; ++ do_reverse_scan = false; ++ } + } ++ prio_idx++; ++ } + +- otx2_npc_dbg("Needs_shift = %d", needs_shift); +- if (needs_shift) { +- needs_shift = 0; +- rc = flow_shift_ent(mbox, flow, flow_info, rsp, dir, +- prio_idx); +- } else { +- for (idx = 0; idx < rsp->count; idx++) +- rsp->entry_list[idx] = rsp->entry + idx; +- } +- } while ((prio_idx != prio) && (prio_idx += dir)); ++ prio_idx = flow_info->flow_max_priority - 1; ++ while (prio_idx && do_reverse_scan) { ++ list = &flow_info->prio_flow_list[prio_idx]; ++ head = TAILQ_FIRST(list); + ++ /* requested priority is higher than current level ++ * ie, numerically req prio is lower ++ */ ++ if (requested_prio < prio_idx && head) { ++ /* but free mcam is higher than lowest priority ++ * mcam in current level ++ */ ++ if (free_mcam > head->flow->mcam_id) { ++ rc = otx2_slide_mcam_entries(mbox, flow_info, ++ prio_idx, &free_mcam, ++ SLIDE_ENTRIES_TO_HIGHER_INDEX); ++ if (rc) ++ return rc; ++ } ++ } ++ prio_idx--; ++ } ++ rsp->entry = free_mcam; + return rc; + } + +-static int +-flow_find_ref_entry(struct otx2_npc_flow_info *flow_info, int *prio, +- int prio_lvl) ++static void ++otx2_insert_into_flow_list(struct otx2_npc_flow_info *flow_info, ++ struct otx2_prio_flow_entry *entry) + { +- struct otx2_mcam_ents_info *info = flow_info->flow_entry_info; +- int step = 1; +- +- while (step < flow_info->flow_max_priority) { +- if (((prio_lvl + step) < flow_info->flow_max_priority) && +- info[prio_lvl + step].live_ent) { +- *prio = NPC_MCAM_HIGHER_PRIO; +- return info[prio_lvl + step].min_id; +- } ++ struct otx2_prio_flow_list_head *list; ++ struct otx2_prio_flow_entry *curr; ++ ++ list = &flow_info->prio_flow_list[entry->flow->priority]; ++ curr = TAILQ_FIRST(list); + +- if (((prio_lvl - step) >= 0) && +- info[prio_lvl - step].live_ent) { +- otx2_npc_dbg("Prio_lvl %u live %u", prio_lvl - step, +- info[prio_lvl - step].live_ent); +- *prio = NPC_MCAM_LOWER_PRIO; +- return info[prio_lvl - step].max_id; ++ if (curr) { ++ while (curr) { ++ if (entry->flow->mcam_id > curr->flow->mcam_id) ++ curr = TAILQ_NEXT(curr, next); ++ else ++ break; + } +- step++; ++ if (curr) ++ TAILQ_INSERT_BEFORE(curr, entry, next); ++ else ++ TAILQ_INSERT_TAIL(list, entry, next); ++ } else { ++ TAILQ_INSERT_HEAD(list, entry, next); + } +- *prio = NPC_MCAM_ANY_PRIO; +- return 0; + } + + static int +-flow_fill_entry_cache(struct otx2_mbox *mbox, struct rte_flow *flow, +- struct otx2_npc_flow_info *flow_info, uint32_t *free_ent) ++otx2_allocate_mcam_entry(struct otx2_mbox *mbox, int prio, ++ struct npc_mcam_alloc_entry_rsp *rsp_local, ++ int ref_entry) + { +- struct rte_bitmap *free_bmp, *free_bmp_rev, *live_bmp, *live_bmp_rev; +- struct npc_mcam_alloc_entry_rsp rsp_local; + struct npc_mcam_alloc_entry_rsp *rsp_cmd; + struct npc_mcam_alloc_entry_req *req; + struct npc_mcam_alloc_entry_rsp *rsp; +- struct otx2_mcam_ents_info *info; +- uint16_t ref_ent, idx; +- int rc, prio; +- +- info = &flow_info->flow_entry_info[flow->priority]; +- free_bmp = flow_info->free_entries[flow->priority]; +- free_bmp_rev = flow_info->free_entries_rev[flow->priority]; +- live_bmp = flow_info->live_entries[flow->priority]; +- live_bmp_rev = flow_info->live_entries_rev[flow->priority]; +- +- ref_ent = flow_find_ref_entry(flow_info, &prio, flow->priority); ++ int rc = -ENOSPC; + + req = otx2_mbox_alloc_msg_npc_mcam_alloc_entry(mbox); ++ if (req == NULL) ++ return rc; + req->contig = 1; +- req->count = flow_info->flow_prealloc_size; ++ req->count = 1; + req->priority = prio; +- req->ref_entry = ref_ent; ++ req->ref_entry = ref_entry; + +- otx2_npc_dbg("Fill cache ref entry %u prio %u", ref_ent, prio); +- +- otx2_mbox_msg_send(mbox, 0); +- rc = otx2_mbox_get_rsp(mbox, 0, (void *)&rsp_cmd); ++ rc = otx2_mbox_process_msg(mbox, (void *)&rsp_cmd); + if (rc) + return rc; + +- rsp = &rsp_local; +- memcpy(rsp, rsp_cmd, sizeof(*rsp)); ++ if (!rsp_cmd->count) ++ return -ENOSPC; + +- otx2_npc_dbg("Alloc entry %u count %u , prio = %d", rsp->entry, +- rsp->count, prio); ++ memcpy(rsp_local, rsp_cmd, sizeof(*rsp)); + +- /* Non-first ent cache fill */ +- if (prio != NPC_MCAM_ANY_PRIO) { +- flow_validate_and_shift_prio_ent(mbox, flow, flow_info, rsp, +- prio); +- } else { +- /* Copy into response entry list */ +- for (idx = 0; idx < rsp->count; idx++) +- rsp->entry_list[idx] = rsp->entry + idx; +- } +- +- otx2_npc_dbg("Fill entry cache rsp count %u", rsp->count); +- /* Update free entries, reverse free entries list, +- * min & max entry ids. +- */ +- for (idx = 0; idx < rsp->count; idx++) { +- if (unlikely(rsp->entry_list[idx] < info->min_id)) +- info->min_id = rsp->entry_list[idx]; +- +- if (unlikely(rsp->entry_list[idx] > info->max_id)) +- info->max_id = rsp->entry_list[idx]; ++ return 0; ++} + +- /* Skip entry to be returned, not to be part of free +- * list. +- */ +- if (prio == NPC_MCAM_HIGHER_PRIO) { +- if (unlikely(idx == (rsp->count - 1))) { +- *free_ent = rsp->entry_list[idx]; +- continue; ++static void ++otx2_find_mcam_ref_entry(struct rte_flow *flow, ++ struct otx2_npc_flow_info *flow_info, int *prio, ++ int *ref_entry, int dir) ++{ ++ struct otx2_prio_flow_entry *head, *tail; ++ struct otx2_prio_flow_list_head *list; ++ int prio_idx = flow->priority; ++ ++ if (dir == NPC_MCAM_LOWER_PRIO) { ++ while (prio_idx >= 0) { ++ list = &flow_info->prio_flow_list[prio_idx]; ++ head = TAILQ_FIRST(list); ++ if (head) { ++ *prio = NPC_MCAM_LOWER_PRIO; ++ *ref_entry = head->flow->mcam_id; ++ return; + } +- } else { +- if (unlikely(!idx)) { +- *free_ent = rsp->entry_list[idx]; +- continue; ++ prio_idx--; ++ } ++ } else if (dir == NPC_MCAM_HIGHER_PRIO) { ++ prio_idx = flow->priority; ++ while (prio_idx <= flow_info->flow_max_priority - 1) { ++ list = &flow_info->prio_flow_list[prio_idx]; ++ tail = TAILQ_LAST(list, otx2_prio_flow_list_head); ++ if (tail) { ++ *prio = NPC_MCAM_HIGHER_PRIO; ++ *ref_entry = tail->flow->mcam_id; ++ return; + } ++ prio_idx++; + } +- info->free_ent++; +- rte_bitmap_set(free_bmp, rsp->entry_list[idx]); +- rte_bitmap_set(free_bmp_rev, flow_info->mcam_entries - +- rsp->entry_list[idx] - 1); +- +- otx2_npc_dbg("Final rsp entry %u rsp entry rev %u", +- rsp->entry_list[idx], +- flow_info->mcam_entries - rsp->entry_list[idx] - 1); + } ++ *prio = NPC_MCAM_ANY_PRIO; ++ *ref_entry = 0; ++} + +- otx2_npc_dbg("Cache free entry %u, rev = %u", *free_ent, +- flow_info->mcam_entries - *free_ent - 1); +- info->live_ent++; +- rte_bitmap_set(live_bmp, *free_ent); +- rte_bitmap_set(live_bmp_rev, flow_info->mcam_entries - *free_ent - 1); ++static int ++otx2_alloc_mcam_by_ref_entry(struct otx2_mbox *mbox, struct rte_flow *flow, ++ struct otx2_npc_flow_info *flow_info, ++ struct npc_mcam_alloc_entry_rsp *rsp_local) ++{ ++ int prio, ref_entry = 0, rc = 0, dir = NPC_MCAM_LOWER_PRIO; ++ bool retry_done = false; ++ ++retry: ++ otx2_find_mcam_ref_entry(flow, flow_info, &prio, &ref_entry, dir); ++ rc = otx2_allocate_mcam_entry(mbox, prio, rsp_local, ref_entry); ++ if (rc && !retry_done) { ++ otx2_info("npc: Lower priority entry not available. " ++ "Retrying for higher priority"); ++ ++ dir = NPC_MCAM_HIGHER_PRIO; ++ retry_done = true; ++ goto retry; ++ } else if (rc && retry_done) { ++ return rc; ++ } + + return 0; + } + + static int +-flow_check_preallocated_entry_cache(struct otx2_mbox *mbox, +- struct rte_flow *flow, +- struct otx2_npc_flow_info *flow_info) ++otx2_get_free_mcam_entry(struct otx2_mbox *mbox, struct rte_flow *flow, ++ struct otx2_npc_flow_info *flow_info) + { +- struct rte_bitmap *free, *free_rev, *live, *live_rev; +- uint32_t pos = 0, free_ent = 0, mcam_entries; +- struct otx2_mcam_ents_info *info; +- uint64_t slab = 0; +- int rc; ++ struct npc_mcam_alloc_entry_rsp rsp_local; ++ struct otx2_prio_flow_entry *new_entry; ++ int rc = 0; + +- otx2_npc_dbg("Flow priority %u", flow->priority); ++ rc = otx2_alloc_mcam_by_ref_entry(mbox, flow, flow_info, &rsp_local); + +- info = &flow_info->flow_entry_info[flow->priority]; ++ if (rc) ++ return rc; + +- free_rev = flow_info->free_entries_rev[flow->priority]; +- free = flow_info->free_entries[flow->priority]; +- live_rev = flow_info->live_entries_rev[flow->priority]; +- live = flow_info->live_entries[flow->priority]; +- mcam_entries = flow_info->mcam_entries; ++ new_entry = rte_zmalloc("otx2_rte_flow", sizeof(*new_entry), 0); ++ if (!new_entry) ++ return -ENOSPC; + +- if (info->free_ent) { +- rc = rte_bitmap_scan(free, &pos, &slab); +- if (rc) { +- /* Get free_ent from free entry bitmap */ +- free_ent = pos + __builtin_ctzll(slab); +- otx2_npc_dbg("Allocated from cache entry %u", free_ent); +- /* Remove from free bitmaps and add to live ones */ +- rte_bitmap_clear(free, free_ent); +- rte_bitmap_set(live, free_ent); +- rte_bitmap_clear(free_rev, +- mcam_entries - free_ent - 1); +- rte_bitmap_set(live_rev, +- mcam_entries - free_ent - 1); +- +- info->free_ent--; +- info->live_ent++; +- return free_ent; +- } ++ new_entry->flow = flow; + +- otx2_npc_dbg("No free entry:its a mess"); +- return -1; +- } ++ otx2_npc_dbg("kernel allocated MCAM entry %d", rsp_local.entry); + +- rc = flow_fill_entry_cache(mbox, flow, flow_info, &free_ent); ++ rc = otx2_sort_mcams_by_user_prio_level(mbox, new_entry, flow_info, ++ &rsp_local); + if (rc) +- return rc; ++ goto err; ++ ++ otx2_npc_dbg("allocated MCAM entry after sorting %d", rsp_local.entry); ++ flow->mcam_id = rsp_local.entry; ++ otx2_insert_into_flow_list(flow_info, new_entry); ++ ++ return rsp_local.entry; ++err: ++ rte_free(new_entry); ++ return rc; ++} ++ ++void ++otx2_delete_prio_list_entry(struct otx2_npc_flow_info *flow_info, ++ struct rte_flow *flow) ++{ ++ struct otx2_prio_flow_list_head *list; ++ struct otx2_prio_flow_entry *curr; + +- return free_ent; ++ list = &flow_info->prio_flow_list[flow->priority]; ++ curr = TAILQ_FIRST(list); ++ ++ if (!curr) ++ return; ++ ++ while (curr) { ++ if (flow->mcam_id == curr->flow->mcam_id) { ++ TAILQ_REMOVE(list, curr, next); ++ rte_free(curr); ++ break; ++ } ++ curr = TAILQ_NEXT(curr, next); ++ } + } + + int +@@ -902,14 +922,15 @@ otx2_flow_mcam_alloc_and_write(struct rte_flow *flow, struct otx2_mbox *mbox, + return rc; + } + +- entry = flow_check_preallocated_entry_cache(mbox, flow, flow_info); ++ entry = otx2_get_free_mcam_entry(mbox, flow, flow_info); + if (entry < 0) { +- otx2_err("Prealloc failed"); +- otx2_flow_mcam_free_counter(mbox, ctr); ++ otx2_err("MCAM allocation failed"); ++ if (use_ctr) ++ otx2_flow_mcam_free_counter(mbox, ctr); + return NPC_MCAM_ALLOC_FAILED; + } + +- if (pst->is_vf) { ++ if (pst->is_vf && flow->nix_intf == OTX2_INTF_RX) { + (void)otx2_mbox_alloc_msg_npc_read_base_steer_rule(mbox); + rc = otx2_mbox_process_msg(mbox, (void *)&base_rule_rsp); + if (rc) { +diff --git a/dpdk/drivers/net/octeontx2/otx2_ptp.c b/dpdk/drivers/net/octeontx2/otx2_ptp.c +index b8ef4c181d..c2a7fa1e8d 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_ptp.c ++++ b/dpdk/drivers/net/octeontx2/otx2_ptp.c +@@ -440,7 +440,7 @@ otx2_nix_read_clock(struct rte_eth_dev *eth_dev, uint64_t *clock) + /* This API returns the raw PTP HI clock value. Since LFs doesn't + * have direct access to PTP registers and it requires mbox msg + * to AF for this value. In fastpath reading this value for every +- * packet (which involes mbox call) becomes very expensive, hence ++ * packet (which involves mbox call) becomes very expensive, hence + * we should be able to derive PTP HI clock value from tsc by + * using freq_mult and clk_delta calculated during configure stage. + */ +diff --git a/dpdk/drivers/net/octeontx2/otx2_tm.c b/dpdk/drivers/net/octeontx2/otx2_tm.c +index fdd56697f1..6aff1f9587 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_tm.c ++++ b/dpdk/drivers/net/octeontx2/otx2_tm.c +@@ -2769,6 +2769,12 @@ otx2_nix_tm_node_stats_read(struct rte_eth_dev *eth_dev, uint32_t node_id, + return -EINVAL; + } + ++ if (!(tm_node->flags & NIX_TM_NODE_HWRES)) { ++ error->type = RTE_TM_ERROR_TYPE_NODE_ID; ++ error->message = "HW resources not allocated"; ++ return -EINVAL; ++ } ++ + /* Stats support only for leaf node or TL1 root */ + if (nix_tm_is_leaf(dev, tm_node->lvl)) { + reg = (((uint64_t)tm_node->id) << 32); +diff --git a/dpdk/drivers/net/octeontx2/otx2_tx.c b/dpdk/drivers/net/octeontx2/otx2_tx.c +index 439c46f61c..ff299f00b9 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_tx.c ++++ b/dpdk/drivers/net/octeontx2/otx2_tx.c +@@ -27,6 +27,7 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + struct otx2_eth_txq *txq = tx_queue; uint16_t i; + const rte_iova_t io_addr = txq->io_addr; + void *lmt_addr = txq->lmt_addr; ++ uint64_t lso_tun_fmt; + + NIX_XMIT_FC_OR_RETURN(txq, pkts); + +@@ -34,6 +35,7 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) { ++ lso_tun_fmt = txq->lso_tun_fmt; + for (i = 0; i < pkts; i++) + otx2_nix_xmit_prepare_tso(tx_pkts[i], flags); + } +@@ -45,7 +47,7 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + rte_io_wmb(); + + for (i = 0; i < pkts; i++) { +- otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags); ++ otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt); + /* Passing no of segdw as 4: HDR + EXT + SG + SMEM */ + otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], + tx_pkts[i]->ol_flags, 4, flags); +@@ -65,6 +67,7 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, + struct otx2_eth_txq *txq = tx_queue; uint64_t i; + const rte_iova_t io_addr = txq->io_addr; + void *lmt_addr = txq->lmt_addr; ++ uint64_t lso_tun_fmt; + uint16_t segdw; + + NIX_XMIT_FC_OR_RETURN(txq, pkts); +@@ -73,6 +76,7 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, + + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) { ++ lso_tun_fmt = txq->lso_tun_fmt; + for (i = 0; i < pkts; i++) + otx2_nix_xmit_prepare_tso(tx_pkts[i], flags); + } +@@ -84,7 +88,7 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, + rte_io_wmb(); + + for (i = 0; i < pkts; i++) { +- otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags); ++ otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt); + segdw = otx2_nix_prepare_mseg(tx_pkts[i], cmd, flags); + otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], + tx_pkts[i]->ol_flags, segdw, +diff --git a/dpdk/drivers/net/octeontx2/otx2_tx.h b/dpdk/drivers/net/octeontx2/otx2_tx.h +index a97b160677..4b7bbb6527 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_tx.h ++++ b/dpdk/drivers/net/octeontx2/otx2_tx.h +@@ -61,7 +61,7 @@ otx2_nix_xmit_prepare_tstamp(uint64_t *cmd, const uint64_t *send_mem_desc, + /* Retrieving the default desc values */ + cmd[off] = send_mem_desc[6]; + +- /* Using compiler barier to avoid voilation of C ++ /* Using compiler barrier to avoid violation of C + * aliasing rules. + */ + rte_compiler_barrier(); +@@ -70,7 +70,7 @@ otx2_nix_xmit_prepare_tstamp(uint64_t *cmd, const uint64_t *send_mem_desc, + /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp + * should not be recorded, hence changing the alg type to + * NIX_SENDMEMALG_SET and also changing send mem addr field to +- * next 8 bytes as it corrpt the actual tx tstamp registered ++ * next 8 bytes as it corrupts the actual tx tstamp registered + * address. + */ + send_mem->alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp); +@@ -197,7 +197,8 @@ otx2_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags) + } + + static __rte_always_inline void +-otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) ++otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags, ++ const uint64_t lso_tun_fmt) + { + struct nix_send_ext_s *send_hdr_ext; + struct nix_send_hdr_s *send_hdr; +@@ -339,14 +340,15 @@ otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags) + (ol_flags & PKT_TX_TUNNEL_MASK)) { + const uint8_t is_udp_tun = (NIX_UDP_TUN_BITMASK >> + ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & 0x1; ++ uint8_t shift = is_udp_tun ? 32 : 0; ++ ++ shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4); ++ shift += (!!(ol_flags & PKT_TX_IPV6) << 3); + + w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM; + w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0; + /* Update format for UDP tunneled packet */ +- send_hdr_ext->w0.lso_format += is_udp_tun ? 2 : 6; +- +- send_hdr_ext->w0.lso_format += +- !!(ol_flags & PKT_TX_OUTER_IPV6) << 1; ++ send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift); + } + } + +diff --git a/dpdk/drivers/net/octeontx2/otx2_vlan.c b/dpdk/drivers/net/octeontx2/otx2_vlan.c +index 7357b06695..dd730a2574 100644 +--- a/dpdk/drivers/net/octeontx2/otx2_vlan.c ++++ b/dpdk/drivers/net/octeontx2/otx2_vlan.c +@@ -306,12 +306,12 @@ nix_vlan_mcam_config(struct rte_eth_dev *eth_dev, + (0xF & ~(NPC_LT_LB_CTAG ^ NPC_LT_LB_STAG_QINQ)) + << mkex->lb_lt_offset; + +- mcam_data = ((uint32_t)vlan_id << 16); +- mcam_mask = (BIT_ULL(16) - 1) << 16; ++ mcam_data = (uint16_t)vlan_id; ++ mcam_mask = (BIT_ULL(16) - 1); + otx2_mbox_memcpy(key_data + mkex->lb_xtract.key_off, +- &mcam_data, mkex->lb_xtract.len + 1); ++ &mcam_data, mkex->lb_xtract.len); + otx2_mbox_memcpy(key_mask + mkex->lb_xtract.key_off, +- &mcam_mask, mkex->lb_xtract.len + 1); ++ &mcam_mask, mkex->lb_xtract.len); + } + + /* Adds LB STAG flag to MCAM KW */ +@@ -953,7 +953,7 @@ static void nix_vlan_reinstall_vlan_filters(struct rte_eth_dev *eth_dev) + struct vlan_entry *entry; + int rc; + +- /* VLAN filters can't be set without setting filtern on */ ++ /* VLAN filters can't be set without setting filters on */ + rc = nix_vlan_handle_default_rx_entry(eth_dev, false, true, true); + if (rc) { + otx2_err("Failed to reinstall vlan filters"); +diff --git a/dpdk/drivers/net/pcap/rte_eth_pcap.c b/dpdk/drivers/net/pcap/rte_eth_pcap.c +index 40f4fa9021..2e8ac55b72 100644 +--- a/dpdk/drivers/net/pcap/rte_eth_pcap.c ++++ b/dpdk/drivers/net/pcap/rte_eth_pcap.c +@@ -621,9 +621,11 @@ eth_dev_stop(struct rte_eth_dev *dev) + + /* Special iface case. Single pcap is open and shared between tx/rx. */ + if (internals->single_iface) { +- pcap_close(pp->tx_pcap[0]); +- pp->tx_pcap[0] = NULL; +- pp->rx_pcap[0] = NULL; ++ if (pp->tx_pcap[0] != NULL) { ++ pcap_close(pp->tx_pcap[0]); ++ pp->tx_pcap[0] = NULL; ++ pp->rx_pcap[0] = NULL; ++ } + goto status_down; + } + +@@ -755,6 +757,8 @@ eth_dev_close(struct rte_eth_dev *dev) + PMD_LOG(INFO, "Closing pcap ethdev on NUMA socket %d", + rte_socket_id()); + ++ eth_dev_stop(dev); ++ + rte_free(dev->process_private); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) +@@ -827,7 +831,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, + + pcap_pkt_count = count_packets_in_pcap(pcap, pcap_q); + +- snprintf(ring_name, sizeof(ring_name), "PCAP_RING%" PRIu16, ++ snprintf(ring_name, sizeof(ring_name), "PCAP_RING%" PRIu32, + ring_number); + + pcap_q->pkts = rte_ring_create(ring_name, +@@ -1362,6 +1366,33 @@ eth_from_pcaps(struct rte_vdev_device *vdev, + return 0; + } + ++static void ++eth_release_pcaps(struct pmd_devargs *pcaps, ++ struct pmd_devargs *dumpers, ++ int single_iface) ++{ ++ unsigned int i; ++ ++ if (single_iface) { ++ if (pcaps->queue[0].pcap) ++ pcap_close(pcaps->queue[0].pcap); ++ return; ++ } ++ ++ for (i = 0; i < dumpers->num_of_queue; i++) { ++ if (dumpers->queue[i].dumper) ++ pcap_dump_close(dumpers->queue[i].dumper); ++ ++ if (dumpers->queue[i].pcap) ++ pcap_close(dumpers->queue[i].pcap); ++ } ++ ++ for (i = 0; i < pcaps->num_of_queue; i++) { ++ if (pcaps->queue[i].pcap) ++ pcap_close(pcaps->queue[i].pcap); ++ } ++} ++ + static int + pmd_pcap_probe(struct rte_vdev_device *dev) + { +@@ -1582,6 +1613,9 @@ pmd_pcap_probe(struct rte_vdev_device *dev) + free_kvlist: + rte_kvargs_free(kvlist); + ++ if (ret < 0) ++ eth_release_pcaps(&pcaps, &dumpers, devargs_all.single_iface); ++ + return ret; + } + +diff --git a/dpdk/drivers/net/pfe/pfe_ethdev.c b/dpdk/drivers/net/pfe/pfe_ethdev.c +index 3b079693fb..873d37d9be 100644 +--- a/dpdk/drivers/net/pfe/pfe_ethdev.c ++++ b/dpdk/drivers/net/pfe/pfe_ethdev.c +@@ -582,11 +582,6 @@ pfe_eth_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused) + struct rte_eth_link link, old; + unsigned int lstatus = 1; + +- if (dev == NULL) { +- PFE_PMD_ERR("Invalid device in link_update.\n"); +- return 0; +- } +- + memset(&old, 0, sizeof(old)); + memset(&link, 0, sizeof(struct rte_eth_link)); + +@@ -793,7 +788,7 @@ pfe_eth_init(struct rte_vdev_device *vdev, struct pfe *pfe, int id) + if (eth_dev == NULL) + return -ENOMEM; + +- /* Extract pltform data */ ++ /* Extract platform data */ + pfe_info = (struct ls1012a_pfe_platform_data *)&pfe->platform_data; + if (!pfe_info) { + PFE_PMD_ERR("pfe missing additional platform data"); +@@ -855,8 +850,6 @@ pfe_eth_init(struct rte_vdev_device *vdev, struct pfe *pfe, int id) + eth_dev->data->nb_rx_queues = 1; + eth_dev->data->nb_tx_queues = 1; + +- eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; +- + /* For link status, open the PFE CDEV; Error from this function + * is silently ignored; In case of error, the link status will not + * be available. +diff --git a/dpdk/drivers/net/pfe/pfe_hal.c b/dpdk/drivers/net/pfe/pfe_hal.c +index 0d25ec0523..0f8f9702e6 100644 +--- a/dpdk/drivers/net/pfe/pfe_hal.c ++++ b/dpdk/drivers/net/pfe/pfe_hal.c +@@ -185,7 +185,7 @@ gemac_set_mode(void *base, __rte_unused int mode) + { + u32 val = readl(base + EMAC_RCNTRL_REG); + +- /*Remove loopbank*/ ++ /* Remove loopback */ + val &= ~EMAC_RCNTRL_LOOP; + + /*Enable flow control and MII mode*/ +diff --git a/dpdk/drivers/net/pfe/pfe_hif.c b/dpdk/drivers/net/pfe/pfe_hif.c +index be5b2ada16..67e5586663 100644 +--- a/dpdk/drivers/net/pfe/pfe_hif.c ++++ b/dpdk/drivers/net/pfe/pfe_hif.c +@@ -113,9 +113,9 @@ pfe_hif_init_buffers(struct pfe_hif *hif) + * results, eth id, queue id from PFE block along with data. + * so we have to provide additional memory for each packet to + * HIF rx rings so that PFE block can write its headers. +- * so, we are giving the data pointor to HIF rings whose ++ * so, we are giving the data pointer to HIF rings whose + * calculation is as below: +- * mbuf->data_pointor - Required_header_size ++ * mbuf->data_pointer - Required_header_size + * + * We are utilizing the HEADROOM area to receive the PFE + * block headers. On packet reception, HIF driver will use +diff --git a/dpdk/drivers/net/pfe/pfe_hif.h b/dpdk/drivers/net/pfe/pfe_hif.h +index 6aaf904bb1..e8d5ba10e1 100644 +--- a/dpdk/drivers/net/pfe/pfe_hif.h ++++ b/dpdk/drivers/net/pfe/pfe_hif.h +@@ -8,7 +8,7 @@ + #define HIF_CLIENT_QUEUES_MAX 16 + #define HIF_RX_PKT_MIN_SIZE RTE_CACHE_LINE_SIZE + /* +- * HIF_TX_DESC_NT value should be always greter than 4, ++ * HIF_TX_DESC_NT value should be always greater than 4, + * Otherwise HIF_TX_POLL_MARK will become zero. + */ + #define HIF_RX_DESC_NT 64 +diff --git a/dpdk/drivers/net/pfe/pfe_hif_lib.c b/dpdk/drivers/net/pfe/pfe_hif_lib.c +index 799050dce3..6fe6d33d23 100644 +--- a/dpdk/drivers/net/pfe/pfe_hif_lib.c ++++ b/dpdk/drivers/net/pfe/pfe_hif_lib.c +@@ -38,7 +38,7 @@ pfe_hif_shm_clean(struct hif_shm *hif_shm) + * This function should be called before initializing HIF driver. + * + * @param[in] hif_shm Shared memory address location in DDR +- * @rerurn 0 - on succes, <0 on fail to initialize ++ * @return 0 - on succes, <0 on fail to initialize + */ + int + pfe_hif_shm_init(struct hif_shm *hif_shm, struct rte_mempool *mb_pool) +@@ -109,9 +109,9 @@ hif_lib_client_release_rx_buffers(struct hif_client_s *client) + for (ii = 0; ii < client->rx_q[qno].size; ii++) { + buf = (void *)desc->data; + if (buf) { +- /* Data pointor to mbuf pointor calculation: ++ /* Data pointer to mbuf pointer calculation: + * "Data - User private data - headroom - mbufsize" +- * Actual data pointor given to HIF BDs was ++ * Actual data pointer given to HIF BDs was + * "mbuf->data_offset - PFE_PKT_HEADER_SZ" + */ + buf = buf + PFE_PKT_HEADER_SZ +@@ -477,7 +477,7 @@ hif_hdr_write(struct hif_hdr *pkt_hdr, unsigned int + client_id, unsigned int qno, + u32 client_ctrl) + { +- /* Optimize the write since the destinaton may be non-cacheable */ ++ /* Optimize the write since the destination may be non-cacheable */ + if (!((unsigned long)pkt_hdr & 0x3)) { + ((u32 *)pkt_hdr)[0] = (client_ctrl << 16) | (qno << 8) | + client_id; +diff --git a/dpdk/drivers/net/qede/base/bcm_osal.h b/dpdk/drivers/net/qede/base/bcm_osal.h +index c5b5399282..9ea579bfc8 100644 +--- a/dpdk/drivers/net/qede/base/bcm_osal.h ++++ b/dpdk/drivers/net/qede/base/bcm_osal.h +@@ -14,7 +14,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -99,7 +98,7 @@ typedef intptr_t osal_int_ptr_t; + } while (0) + #define OSAL_VFREE(dev, memory) OSAL_FREE(dev, memory) + #define OSAL_MEM_ZERO(mem, size) bzero(mem, size) +-#define OSAL_MEMCPY(dst, src, size) rte_memcpy(dst, src, size) ++#define OSAL_MEMCPY(dst, src, size) memcpy(dst, src, size) + #define OSAL_MEMCMP(s1, s2, size) memcmp(s1, s2, size) + #define OSAL_MEMSET(dst, val, length) \ + memset(dst, val, length) +diff --git a/dpdk/drivers/net/qede/base/ecore_int.c b/dpdk/drivers/net/qede/base/ecore_int.c +index 4207b1853e..2c4aac9418 100644 +--- a/dpdk/drivers/net/qede/base/ecore_int.c ++++ b/dpdk/drivers/net/qede/base/ecore_int.c +@@ -928,7 +928,7 @@ static void ecore_int_attn_print(struct ecore_hwfn *p_hwfn, + bool b_clear) + { + /* @DPDK */ +- DP_NOTICE(p_hwfn->p_dev, false, "[block_id %d type %d]\n", id, type); ++ DP_VERBOSE(p_hwfn, ECORE_MSG_INTR, "[block_id %d type %d]\n", id, type); + } + + /** +diff --git a/dpdk/drivers/net/qede/base/ecore_vf.c b/dpdk/drivers/net/qede/base/ecore_vf.c +index db03bc494f..a36ae47c2b 100644 +--- a/dpdk/drivers/net/qede/base/ecore_vf.c ++++ b/dpdk/drivers/net/qede/base/ecore_vf.c +@@ -73,7 +73,7 @@ static void ecore_vf_pf_req_end(struct ecore_hwfn *p_hwfn, + #endif + static enum _ecore_status_t + ecore_send_msg2pf(struct ecore_hwfn *p_hwfn, +- u8 *done, u32 resp_size) ++ u8 *done, __rte_unused u32 resp_size) + { + union vfpf_tlvs *p_req = p_hwfn->vf_iov_info->vf2pf_request; + struct ustorm_trigger_vf_zone trigger; +@@ -86,9 +86,6 @@ ecore_send_msg2pf(struct ecore_hwfn *p_hwfn, + /* output tlvs list */ + ecore_dp_tlv_list(p_hwfn, p_req); + +- /* need to add the END TLV to the message size */ +- resp_size += sizeof(struct channel_list_end_tlv); +- + /* Send TLVs over HW channel */ + OSAL_MEMSET(&trigger, 0, sizeof(struct ustorm_trigger_vf_zone)); + trigger.vf_pf_msg_valid = 1; +diff --git a/dpdk/drivers/net/qede/qede_debug.c b/dpdk/drivers/net/qede/qede_debug.c +index 2297d245c4..18f2d988fb 100644 +--- a/dpdk/drivers/net/qede/qede_debug.c ++++ b/dpdk/drivers/net/qede/qede_debug.c +@@ -1809,7 +1809,8 @@ static u32 qed_grc_dump_addr_range(struct ecore_hwfn *p_hwfn, + u8 split_id) + { + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; +- u8 port_id = 0, pf_id = 0, vf_id = 0, fid = 0; ++ u8 port_id = 0, pf_id = 0; ++ u16 vf_id = 0, fid = 0; + bool read_using_dmae = false; + u32 thresh; + +@@ -3522,7 +3523,7 @@ static enum dbg_status qed_grc_dump(struct ecore_hwfn *p_hwfn, + + /* Dump MCP HW Dump */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_MCP_HW_DUMP) && +- !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_MCP) && 1) ++ !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_MCP)) + offset += qed_grc_dump_mcp_hw_dump(p_hwfn, + p_ptt, + dump_buf + offset, dump); +@@ -5983,7 +5984,7 @@ static char *qed_get_buf_ptr(void *buf, u32 offset) + /* Reads a param from the specified buffer. Returns the number of dwords read. + * If the returned str_param is NULL, the param is numeric and its value is + * returned in num_param. +- * Otheriwise, the param is a string and its pointer is returned in str_param. ++ * Otherwise, the param is a string and its pointer is returned in str_param. + */ + static u32 qed_read_param(u32 *dump_buf, + const char **param_name, +@@ -7558,7 +7559,7 @@ static enum dbg_status format_feature(struct ecore_hwfn *p_hwfn, + text_buf[i] = '\n'; + + +- /* Free the old dump_buf and point the dump_buf to the newly allocagted ++ /* Free the old dump_buf and point the dump_buf to the newly allocated + * and formatted text buffer. + */ + OSAL_VFREE(p_hwfn, feature->dump_buf); +diff --git a/dpdk/drivers/net/qede/qede_ethdev.c b/dpdk/drivers/net/qede/qede_ethdev.c +index ab5f5b1065..02a1d8945a 100644 +--- a/dpdk/drivers/net/qede/qede_ethdev.c ++++ b/dpdk/drivers/net/qede/qede_ethdev.c +@@ -237,9 +237,6 @@ qede_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) + static char ver_str[QEDE_PMD_DRV_VER_STR_SIZE]; + size_t size; + +- if (fw_ver == NULL) +- return 0; +- + if (IS_PF(edev)) + snprintf(ver_str, QEDE_PMD_DRV_VER_STR_SIZE, "%s", + QEDE_PMD_FW_VERSION); +@@ -361,7 +358,7 @@ qede_assign_rxtx_handlers(struct rte_eth_dev *dev, bool is_dummy) + static void + qede_alloc_etherdev(struct qede_dev *qdev, struct qed_dev_eth_info *info) + { +- rte_memcpy(&qdev->dev_info, info, sizeof(*info)); ++ qdev->dev_info = *info; + qdev->ops = qed_ops; + } + +@@ -2139,8 +2136,10 @@ int qede_rss_hash_update(struct rte_eth_dev *eth_dev, + /* RSS hash key */ + if (key) { + if (len > (ECORE_RSS_KEY_SIZE * sizeof(uint32_t))) { +- DP_ERR(edev, "RSS key length exceeds limit\n"); +- return -EINVAL; ++ len = ECORE_RSS_KEY_SIZE * sizeof(uint32_t); ++ DP_NOTICE(edev, false, ++ "RSS key length too big, trimmed to %d\n", ++ len); + } + DP_INFO(edev, "Applying user supplied hash key\n"); + rss_params.update_rss_key = 1; +@@ -2358,7 +2357,7 @@ static int qede_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) + if (fp->rxq != NULL) { + bufsz = (uint16_t)rte_pktmbuf_data_room_size( + fp->rxq->mb_pool) - RTE_PKTMBUF_HEADROOM; +- /* cache align the mbuf size to simplfy rx_buf_size ++ /* cache align the mbuf size to simplify rx_buf_size + * calculation + */ + bufsz = QEDE_FLOOR_TO_CACHE_LINE_SIZE(bufsz); +diff --git a/dpdk/drivers/net/qede/qede_filter.c b/dpdk/drivers/net/qede/qede_filter.c +index df5c07dfe5..5590b9d214 100644 +--- a/dpdk/drivers/net/qede/qede_filter.c ++++ b/dpdk/drivers/net/qede/qede_filter.c +@@ -388,10 +388,8 @@ qede_arfs_construct_pkt(struct rte_eth_dev *eth_dev, + ip6->vtc_flow = + rte_cpu_to_be_32(QEDE_FDIR_IPV6_DEFAULT_VTC_FLOW); + +- rte_memcpy(&ip6->src_addr, arfs->tuple.src_ipv6, +- IPV6_ADDR_LEN); +- rte_memcpy(&ip6->dst_addr, arfs->tuple.dst_ipv6, +- IPV6_ADDR_LEN); ++ memcpy(&ip6->src_addr, arfs->tuple.src_ipv6, IPV6_ADDR_LEN); ++ memcpy(&ip6->dst_addr, arfs->tuple.dst_ipv6, IPV6_ADDR_LEN); + len += sizeof(struct rte_ipv6_hdr); + params->ipv6 = true; + +@@ -821,12 +819,10 @@ qede_flow_parse_pattern(__rte_unused struct rte_eth_dev *dev, + const struct rte_flow_item_ipv6 *spec; + + spec = pattern->spec; +- rte_memcpy(flow->entry.tuple.src_ipv6, +- spec->hdr.src_addr, +- IPV6_ADDR_LEN); +- rte_memcpy(flow->entry.tuple.dst_ipv6, +- spec->hdr.dst_addr, +- IPV6_ADDR_LEN); ++ memcpy(flow->entry.tuple.src_ipv6, ++ spec->hdr.src_addr, IPV6_ADDR_LEN); ++ memcpy(flow->entry.tuple.dst_ipv6, ++ spec->hdr.dst_addr, IPV6_ADDR_LEN); + flow->entry.tuple.eth_proto = + RTE_ETHER_TYPE_IPV6; + } +diff --git a/dpdk/drivers/net/qede/qede_main.c b/dpdk/drivers/net/qede/qede_main.c +index caa9d1d4f6..e987b1b390 100644 +--- a/dpdk/drivers/net/qede/qede_main.c ++++ b/dpdk/drivers/net/qede/qede_main.c +@@ -381,7 +381,7 @@ qed_fill_dev_info(struct ecore_dev *edev, struct qed_dev_info *dev_info) + dev_info->mtu = ECORE_LEADING_HWFN(edev)->hw_info.mtu; + dev_info->dev_type = edev->type; + +- rte_memcpy(&dev_info->hw_mac, &edev->hwfns[0].hw_info.hw_mac_addr, ++ memcpy(&dev_info->hw_mac, &edev->hwfns[0].hw_info.hw_mac_addr, + RTE_ETHER_ADDR_LEN); + + dev_info->fw_major = FW_MAJOR_VERSION; +@@ -449,7 +449,7 @@ qed_fill_eth_dev_info(struct ecore_dev *edev, struct qed_dev_eth_info *info) + info->num_vlan_filters = RESC_NUM(&edev->hwfns[0], ECORE_VLAN) - + max_vf_vlan_filters; + +- rte_memcpy(&info->port_mac, &edev->hwfns[0].hw_info.hw_mac_addr, ++ memcpy(&info->port_mac, &edev->hwfns[0].hw_info.hw_mac_addr, + RTE_ETHER_ADDR_LEN); + } else { + ecore_vf_get_num_rxqs(ECORE_LEADING_HWFN(edev), +@@ -480,7 +480,7 @@ static void qed_set_name(struct ecore_dev *edev, char name[NAME_SIZE]) + { + int i; + +- rte_memcpy(edev->name, name, NAME_SIZE); ++ memcpy(edev->name, name, NAME_SIZE); + for_each_hwfn(edev, i) { + snprintf(edev->hwfns[i].name, NAME_SIZE, "%s-%d", name, i); + } +@@ -522,10 +522,9 @@ static void qed_fill_link(struct ecore_hwfn *hwfn, + + /* Prepare source inputs */ + if (IS_PF(hwfn->p_dev)) { +- rte_memcpy(¶ms, ecore_mcp_get_link_params(hwfn), +- sizeof(params)); +- rte_memcpy(&link, ecore_mcp_get_link_state(hwfn), sizeof(link)); +- rte_memcpy(&link_caps, ecore_mcp_get_link_capabilities(hwfn), ++ memcpy(¶ms, ecore_mcp_get_link_params(hwfn), sizeof(params)); ++ memcpy(&link, ecore_mcp_get_link_state(hwfn), sizeof(link)); ++ memcpy(&link_caps, ecore_mcp_get_link_capabilities(hwfn), + sizeof(link_caps)); + } else { + ecore_vf_read_bulletin(hwfn, &change); +diff --git a/dpdk/drivers/net/qede/qede_rxtx.c b/dpdk/drivers/net/qede/qede_rxtx.c +index 75d78cebb5..f357a8f258 100644 +--- a/dpdk/drivers/net/qede/qede_rxtx.c ++++ b/dpdk/drivers/net/qede/qede_rxtx.c +@@ -46,17 +46,14 @@ static inline int qede_alloc_rx_bulk_mbufs(struct qede_rx_queue *rxq, int count) + int i, ret = 0; + uint16_t idx; + +- if (count > QEDE_MAX_BULK_ALLOC_COUNT) +- count = QEDE_MAX_BULK_ALLOC_COUNT; ++ idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq); + + ret = rte_mempool_get_bulk(rxq->mb_pool, obj_p, count); + if (unlikely(ret)) { + PMD_RX_LOG(ERR, rxq, + "Failed to allocate %d rx buffers " + "sw_rx_prod %u sw_rx_cons %u mp entries %u free %u", +- count, +- rxq->sw_rx_prod & NUM_RX_BDS(rxq), +- rxq->sw_rx_cons & NUM_RX_BDS(rxq), ++ count, idx, rxq->sw_rx_cons & NUM_RX_BDS(rxq), + rte_mempool_avail_count(rxq->mb_pool), + rte_mempool_in_use_count(rxq->mb_pool)); + return -ENOMEM; +@@ -87,7 +84,7 @@ static inline int qede_alloc_rx_bulk_mbufs(struct qede_rx_queue *rxq, int count) + * (MTU + Maximum L2 Header Size + 2) / ETH_RX_MAX_BUFF_PER_PKT + * 3) In regular mode - minimum rx_buf_size should be + * (MTU + Maximum L2 Header Size + 2) +- * In above cases +2 corrosponds to 2 bytes padding in front of L2 ++ * In above cases +2 corresponds to 2 bytes padding in front of L2 + * header. + * 4) rx_buf_size should be cacheline-size aligned. So considering + * criteria 1, we need to adjust the size to floor instead of ceil, +@@ -103,7 +100,7 @@ qede_calc_rx_buf_size(struct rte_eth_dev *dev, uint16_t mbufsz, + + if (dev->data->scattered_rx) { + /* per HW limitation, only ETH_RX_MAX_BUFF_PER_PKT number of +- * bufferes can be used for single packet. So need to make sure ++ * buffers can be used for single packet. So need to make sure + * mbuf size is sufficient enough for this. + */ + if ((mbufsz * ETH_RX_MAX_BUFF_PER_PKT) < +@@ -244,7 +241,7 @@ qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t qid, + + /* Fix up RX buffer size */ + bufsz = (uint16_t)rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; +- /* cache align the mbuf size to simplfy rx_buf_size calculation */ ++ /* cache align the mbuf size to simplify rx_buf_size calculation */ + bufsz = QEDE_FLOOR_TO_CACHE_LINE_SIZE(bufsz); + if ((rxmode->offloads & DEV_RX_OFFLOAD_SCATTER) || + (max_rx_pkt_len + QEDE_ETH_OVERHEAD) > bufsz) { +@@ -1542,25 +1539,26 @@ qede_recv_pkts_regular(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + uint8_t bitfield_val; + #endif + uint8_t offset, flags, bd_num; +- ++ uint16_t count = 0; + + /* Allocate buffers that we used in previous loop */ + if (rxq->rx_alloc_count) { +- if (unlikely(qede_alloc_rx_bulk_mbufs(rxq, +- rxq->rx_alloc_count))) { ++ count = rxq->rx_alloc_count > QEDE_MAX_BULK_ALLOC_COUNT ? ++ QEDE_MAX_BULK_ALLOC_COUNT : rxq->rx_alloc_count; ++ ++ if (unlikely(qede_alloc_rx_bulk_mbufs(rxq, count))) { + struct rte_eth_dev *dev; + + PMD_RX_LOG(ERR, rxq, +- "New buffer allocation failed," +- "dropping incoming packetn"); ++ "New buffers allocation failed," ++ "dropping incoming packets\n"); + dev = &rte_eth_devices[rxq->port_id]; +- dev->data->rx_mbuf_alloc_failed += +- rxq->rx_alloc_count; +- rxq->rx_alloc_errors += rxq->rx_alloc_count; ++ dev->data->rx_mbuf_alloc_failed += count; ++ rxq->rx_alloc_errors += count; + return 0; + } + qede_update_rx_prod(qdev, rxq); +- rxq->rx_alloc_count = 0; ++ rxq->rx_alloc_count -= count; + } + + hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr); +@@ -1728,8 +1726,8 @@ qede_recv_pkts_regular(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + } + } + +- /* Request number of bufferes to be allocated in next loop */ +- rxq->rx_alloc_count = rx_alloc_count; ++ /* Request number of buffers to be allocated in next loop */ ++ rxq->rx_alloc_count += rx_alloc_count; + + rxq->rcv_pkts += rx_pkt; + rxq->rx_segs += rx_pkt; +@@ -1769,25 +1767,26 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + struct qede_agg_info *tpa_info = NULL; + uint32_t rss_hash; + int rx_alloc_count = 0; +- ++ uint16_t count = 0; + + /* Allocate buffers that we used in previous loop */ + if (rxq->rx_alloc_count) { +- if (unlikely(qede_alloc_rx_bulk_mbufs(rxq, +- rxq->rx_alloc_count))) { ++ count = rxq->rx_alloc_count > QEDE_MAX_BULK_ALLOC_COUNT ? ++ QEDE_MAX_BULK_ALLOC_COUNT : rxq->rx_alloc_count; ++ ++ if (unlikely(qede_alloc_rx_bulk_mbufs(rxq, count))) { + struct rte_eth_dev *dev; + + PMD_RX_LOG(ERR, rxq, +- "New buffer allocation failed," +- "dropping incoming packetn"); ++ "New buffers allocation failed," ++ "dropping incoming packets\n"); + dev = &rte_eth_devices[rxq->port_id]; +- dev->data->rx_mbuf_alloc_failed += +- rxq->rx_alloc_count; +- rxq->rx_alloc_errors += rxq->rx_alloc_count; ++ dev->data->rx_mbuf_alloc_failed += count; ++ rxq->rx_alloc_errors += count; + return 0; + } + qede_update_rx_prod(qdev, rxq); +- rxq->rx_alloc_count = 0; ++ rxq->rx_alloc_count -= count; + } + + hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr); +@@ -2025,8 +2024,8 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + } + } + +- /* Request number of bufferes to be allocated in next loop */ +- rxq->rx_alloc_count = rx_alloc_count; ++ /* Request number of buffers to be allocated in next loop */ ++ rxq->rx_alloc_count += rx_alloc_count; + + rxq->rcv_pkts += rx_pkt; + +@@ -2489,7 +2488,7 @@ qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + /* Inner L2 header size in two byte words */ + inner_l2_hdr_size = (mbuf->l2_len - + MPLSINUDP_HDR_SIZE) / 2; +- /* Inner L4 header offset from the beggining ++ /* Inner L4 header offset from the beginning + * of inner packet in two byte words + */ + inner_l4_hdr_offset = (mbuf->l2_len - +diff --git a/dpdk/drivers/net/qede/qede_rxtx.h b/dpdk/drivers/net/qede/qede_rxtx.h +index fcb564a1bb..4baa9ec53f 100644 +--- a/dpdk/drivers/net/qede/qede_rxtx.h ++++ b/dpdk/drivers/net/qede/qede_rxtx.h +@@ -242,7 +242,7 @@ struct qede_fastpath { + struct qede_tx_queue *txq; + }; + +-/* This structure holds the inforation of fast path queues ++/* This structure holds the information of fast path queues + * belonging to individual engines in CMT mode. + */ + struct qede_fastpath_cmt { +diff --git a/dpdk/drivers/net/qede/qede_sriov.c b/dpdk/drivers/net/qede/qede_sriov.c +index 0b99a8d6fe..937d339fb8 100644 +--- a/dpdk/drivers/net/qede/qede_sriov.c ++++ b/dpdk/drivers/net/qede/qede_sriov.c +@@ -203,10 +203,10 @@ void qed_inform_vf_link_state(struct ecore_hwfn *hwfn) + if (!hwfn->pf_iov_info) + return; + +- rte_memcpy(¶ms, ecore_mcp_get_link_params(lead_hwfn), ++ memcpy(¶ms, ecore_mcp_get_link_params(lead_hwfn), + sizeof(params)); +- rte_memcpy(&link, ecore_mcp_get_link_state(lead_hwfn), sizeof(link)); +- rte_memcpy(&caps, ecore_mcp_get_link_capabilities(lead_hwfn), ++ memcpy(&link, ecore_mcp_get_link_state(lead_hwfn), sizeof(link)); ++ memcpy(&caps, ecore_mcp_get_link_capabilities(lead_hwfn), + sizeof(caps)); + + /* Update bulletin of all future possible VFs with link configuration */ +diff --git a/dpdk/drivers/net/sfc/meson.build b/dpdk/drivers/net/sfc/meson.build +index be888bd87a..61535f8673 100644 +--- a/dpdk/drivers/net/sfc/meson.build ++++ b/dpdk/drivers/net/sfc/meson.build +@@ -6,7 +6,7 @@ + # This software was jointly developed between OKTET Labs (under contract + # for Solarflare) and Solarflare Communications, Inc. + +-if (arch_subdir != 'x86' or not dpdk_conf.get('RTE_ARCH_64')) and (arch_subdir != 'arm' or not host_machine.cpu_family().startswith('aarch64')) ++if (arch_subdir != 'x86' and arch_subdir != 'arm') or (not dpdk_conf.get('RTE_ARCH_64')) + build = false + reason = 'only supported on x86_64 and aarch64' + endif +diff --git a/dpdk/drivers/net/sfc/sfc.h b/dpdk/drivers/net/sfc/sfc.h +index ed059e142f..9db465862b 100644 +--- a/dpdk/drivers/net/sfc/sfc.h ++++ b/dpdk/drivers/net/sfc/sfc.h +@@ -128,7 +128,6 @@ struct sfc_port { + unsigned int nb_mcast_addrs; + uint8_t *mcast_addrs; + +- rte_spinlock_t mac_stats_lock; + uint64_t *mac_stats_buf; + unsigned int mac_stats_nb_supported; + efsys_mem_t mac_stats_dma_mem; +@@ -140,6 +139,8 @@ struct sfc_port { + + uint32_t mac_stats_mask[EFX_MAC_STATS_MASK_NPAGES]; + ++ unsigned int mac_stats_by_id[EFX_MAC_NSTATS]; ++ + uint64_t ipackets; + }; + +@@ -398,7 +399,7 @@ int sfc_port_start(struct sfc_adapter *sa); + void sfc_port_stop(struct sfc_adapter *sa); + void sfc_port_link_mode_to_info(efx_link_mode_t link_mode, + struct rte_eth_link *link_info); +-int sfc_port_update_mac_stats(struct sfc_adapter *sa); ++int sfc_port_update_mac_stats(struct sfc_adapter *sa, boolean_t manual_update); + int sfc_port_reset_mac_stats(struct sfc_adapter *sa); + int sfc_set_rx_mode(struct sfc_adapter *sa); + int sfc_set_rx_mode_unchecked(struct sfc_adapter *sa); +diff --git a/dpdk/drivers/net/sfc/sfc_dp.c b/dpdk/drivers/net/sfc/sfc_dp.c +index 027dcaba23..03a6a7fddd 100644 +--- a/dpdk/drivers/net/sfc/sfc_dp.c ++++ b/dpdk/drivers/net/sfc/sfc_dp.c +@@ -65,7 +65,7 @@ sfc_dp_register(struct sfc_dp_list *head, struct sfc_dp *entry) + { + if (sfc_dp_find_by_name(head, entry->type, entry->name) != NULL) { + SFC_GENERIC_LOG(ERR, +- "sfc %s dapapath '%s' already registered", ++ "sfc %s datapath '%s' already registered", + entry->type == SFC_DP_RX ? "Rx" : + entry->type == SFC_DP_TX ? "Tx" : + "unknown", +diff --git a/dpdk/drivers/net/sfc/sfc_dp_rx.h b/dpdk/drivers/net/sfc/sfc_dp_rx.h +index f3e00e2e38..2ee3ee4c73 100644 +--- a/dpdk/drivers/net/sfc/sfc_dp_rx.h ++++ b/dpdk/drivers/net/sfc/sfc_dp_rx.h +@@ -151,7 +151,7 @@ typedef int (sfc_dp_rx_qcreate_t)(uint16_t port_id, uint16_t queue_id, + struct sfc_dp_rxq **dp_rxqp); + + /** +- * Free resources allocated for datapath recevie queue. ++ * Free resources allocated for datapath receive queue. + */ + typedef void (sfc_dp_rx_qdestroy_t)(struct sfc_dp_rxq *dp_rxq); + +@@ -184,7 +184,7 @@ typedef bool (sfc_dp_rx_qrx_ps_ev_t)(struct sfc_dp_rxq *dp_rxq, + /** + * Receive queue purge function called after queue flush. + * +- * Should be used to free unused recevie buffers. ++ * Should be used to free unused receive buffers. + */ + typedef void (sfc_dp_rx_qpurge_t)(struct sfc_dp_rxq *dp_rxq); + +diff --git a/dpdk/drivers/net/sfc/sfc_ef100.h b/dpdk/drivers/net/sfc/sfc_ef100.h +index 97ddb00797..9cb5bf8f6f 100644 +--- a/dpdk/drivers/net/sfc/sfc_ef100.h ++++ b/dpdk/drivers/net/sfc/sfc_ef100.h +@@ -19,7 +19,7 @@ extern "C" { + * + * @param evq_prime Global address of the prime register + * @param evq_hw_index Event queue index +- * @param evq_read_ptr Masked event qeueu read pointer ++ * @param evq_read_ptr Masked event queue read pointer + */ + static inline void + sfc_ef100_evq_prime(volatile void *evq_prime, unsigned int evq_hw_index, +diff --git a/dpdk/drivers/net/sfc/sfc_ef100_rx.c b/dpdk/drivers/net/sfc/sfc_ef100_rx.c +index c1c56d0e75..d3def4e803 100644 +--- a/dpdk/drivers/net/sfc/sfc_ef100_rx.c ++++ b/dpdk/drivers/net/sfc/sfc_ef100_rx.c +@@ -46,6 +46,9 @@ + ((_ndesc) - 1 /* head must not step on tail */ - \ + 1 /* Rx error */ - 1 /* flush */) + ++/** Invalid user mark value when the mark should be treated as unset */ ++#define SFC_EF100_USER_MARK_INVALID 0 ++ + struct sfc_ef100_rx_sw_desc { + struct rte_mbuf *mbuf; + }; +@@ -208,7 +211,7 @@ sfc_ef100_rx_tun_outer_l4_csum(const efx_word_t class) + return EFX_WORD_FIELD(class, + ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L4_CSUM) == + ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ? +- PKT_RX_OUTER_L4_CKSUM_GOOD : PKT_RX_OUTER_L4_CKSUM_GOOD; ++ PKT_RX_OUTER_L4_CKSUM_GOOD : PKT_RX_OUTER_L4_CKSUM_BAD; + } + + static uint32_t +@@ -365,7 +368,6 @@ static const efx_rx_prefix_layout_t sfc_ef100_rx_prefix_layout = { + + SFC_EF100_RX_PREFIX_FIELD(LENGTH, B_FALSE), + SFC_EF100_RX_PREFIX_FIELD(RSS_HASH_VALID, B_FALSE), +- SFC_EF100_RX_PREFIX_FIELD(USER_FLAG, B_FALSE), + SFC_EF100_RX_PREFIX_FIELD(CLASS, B_FALSE), + SFC_EF100_RX_PREFIX_FIELD(RSS_HASH, B_FALSE), + SFC_EF100_RX_PREFIX_FIELD(USER_MARK, B_FALSE), +@@ -404,12 +406,16 @@ sfc_ef100_rx_prefix_to_offloads(const struct sfc_ef100_rxq *rxq, + ESF_GZ_RX_PREFIX_RSS_HASH); + } + +- if ((rxq->flags & SFC_EF100_RXQ_USER_MARK) && +- EFX_TEST_OWORD_BIT(rx_prefix[0], ESF_GZ_RX_PREFIX_USER_FLAG_LBN)) { +- ol_flags |= PKT_RX_FDIR_ID; ++ if (rxq->flags & SFC_EF100_RXQ_USER_MARK) { ++ uint32_t user_mark; ++ + /* EFX_OWORD_FIELD converts little-endian to CPU */ +- m->hash.fdir.hi = EFX_OWORD_FIELD(rx_prefix[0], +- ESF_GZ_RX_PREFIX_USER_MARK); ++ user_mark = EFX_OWORD_FIELD(rx_prefix[0], ++ ESF_GZ_RX_PREFIX_USER_MARK); ++ if (user_mark != SFC_EF100_USER_MARK_INVALID) { ++ ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID; ++ m->hash.fdir.hi = user_mark; ++ } + } + + m->ol_flags = ol_flags; +@@ -780,7 +786,7 @@ sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr, + unsup_rx_prefix_fields = + efx_rx_prefix_layout_check(pinfo, &sfc_ef100_rx_prefix_layout); + +- /* LENGTH and CLASS filds must always be present */ ++ /* LENGTH and CLASS fields must always be present */ + if ((unsup_rx_prefix_fields & + ((1U << EFX_RX_PREFIX_FIELD_LENGTH) | + (1U << EFX_RX_PREFIX_FIELD_CLASS))) != 0) +@@ -794,8 +800,7 @@ sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr, + rxq->flags &= ~SFC_EF100_RXQ_RSS_HASH; + + if ((unsup_rx_prefix_fields & +- ((1U << EFX_RX_PREFIX_FIELD_USER_FLAG) | +- (1U << EFX_RX_PREFIX_FIELD_USER_MARK))) == 0) ++ (1U << EFX_RX_PREFIX_FIELD_USER_MARK)) == 0) + rxq->flags |= SFC_EF100_RXQ_USER_MARK; + else + rxq->flags &= ~SFC_EF100_RXQ_USER_MARK; +diff --git a/dpdk/drivers/net/sfc/sfc_ef10_essb_rx.c b/dpdk/drivers/net/sfc/sfc_ef10_essb_rx.c +index b167e01c59..92822b0c23 100644 +--- a/dpdk/drivers/net/sfc/sfc_ef10_essb_rx.c ++++ b/dpdk/drivers/net/sfc/sfc_ef10_essb_rx.c +@@ -625,7 +625,7 @@ sfc_ef10_essb_rx_qcreate(uint16_t port_id, uint16_t queue_id, + rxq->block_size, rxq->buf_stride); + sfc_ef10_essb_rx_info(&rxq->dp.dpq, + "max fill level is %u descs (%u bufs), " +- "refill threashold %u descs (%u bufs)", ++ "refill threshold %u descs (%u bufs)", + rxq->max_fill_level, + rxq->max_fill_level * rxq->block_size, + rxq->refill_threshold, +diff --git a/dpdk/drivers/net/sfc/sfc_ef10_rx_ev.h b/dpdk/drivers/net/sfc/sfc_ef10_rx_ev.h +index d15d24f4c1..00d0619f94 100644 +--- a/dpdk/drivers/net/sfc/sfc_ef10_rx_ev.h ++++ b/dpdk/drivers/net/sfc/sfc_ef10_rx_ev.h +@@ -40,7 +40,7 @@ sfc_ef10_rx_ev_to_offloads(const efx_qword_t rx_ev, struct rte_mbuf *m, + rte_cpu_to_le_64((1ull << ESF_DZ_RX_ECC_ERR_LBN) | + (1ull << ESF_DZ_RX_ECRC_ERR_LBN) | + (1ull << ESF_DZ_RX_PARSE_INCOMPLETE_LBN)))) { +- /* Zero packet type is used as a marker to dicard bad packets */ ++ /* Zero packet type is used as a marker to discard bad packets */ + goto done; + } + +diff --git a/dpdk/drivers/net/sfc/sfc_ethdev.c b/dpdk/drivers/net/sfc/sfc_ethdev.c +index a002e2c037..1b02759f93 100644 +--- a/dpdk/drivers/net/sfc/sfc_ethdev.c ++++ b/dpdk/drivers/net/sfc/sfc_ethdev.c +@@ -46,14 +46,6 @@ sfc_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) + int ret; + int rc; + +- /* +- * Return value of the callback is likely supposed to be +- * equal to or greater than 0, nevertheless, if an error +- * occurs, it will be desirable to pass it to the caller +- */ +- if ((fw_version == NULL) || (fw_size == 0)) +- return -EINVAL; +- + rc = efx_nic_get_fw_version(sa->nic, &enfi); + if (rc != 0) + return -rc; +@@ -94,7 +86,6 @@ sfc_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) + struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev); + struct sfc_rss *rss = &sas->rss; + struct sfc_mae *mae = &sa->mae; +- uint64_t txq_offloads_def = 0; + + sfc_log_init(sa, "entry"); + +@@ -146,11 +137,6 @@ sfc_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) + dev_info->tx_offload_capa = sfc_tx_get_dev_offload_caps(sa) | + dev_info->tx_queue_offload_capa; + +- if (dev_info->tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) +- txq_offloads_def |= DEV_TX_OFFLOAD_MBUF_FAST_FREE; +- +- dev_info->default_txconf.offloads |= txq_offloads_def; +- + if (rss->context_type != EFX_RX_SCALE_UNAVAILABLE) { + uint64_t rte_hf = 0; + unsigned int i; +@@ -612,9 +598,9 @@ sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + uint64_t *mac_stats; + int ret; + +- rte_spinlock_lock(&port->mac_stats_lock); ++ sfc_adapter_lock(sa); + +- ret = sfc_port_update_mac_stats(sa); ++ ret = sfc_port_update_mac_stats(sa, B_FALSE); + if (ret != 0) + goto unlock; + +@@ -685,7 +671,7 @@ sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + } + + unlock: +- rte_spinlock_unlock(&port->mac_stats_lock); ++ sfc_adapter_unlock(sa); + SFC_ASSERT(ret >= 0); + return -ret; + } +@@ -697,12 +683,15 @@ sfc_stats_reset(struct rte_eth_dev *dev) + struct sfc_port *port = &sa->port; + int rc; + ++ sfc_adapter_lock(sa); ++ + if (sa->state != SFC_ADAPTER_STARTED) { + /* + * The operation cannot be done if port is not started; it + * will be scheduled to be done during the next port start + */ + port->mac_stats_reset_pending = B_TRUE; ++ sfc_adapter_unlock(sa); + return 0; + } + +@@ -710,6 +699,8 @@ sfc_stats_reset(struct rte_eth_dev *dev) + if (rc != 0) + sfc_err(sa, "failed to reset statistics (rc = %d)", rc); + ++ sfc_adapter_unlock(sa); ++ + SFC_ASSERT(rc >= 0); + return -rc; + } +@@ -725,9 +716,9 @@ sfc_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, + unsigned int i; + int nstats = 0; + +- rte_spinlock_lock(&port->mac_stats_lock); ++ sfc_adapter_lock(sa); + +- rc = sfc_port_update_mac_stats(sa); ++ rc = sfc_port_update_mac_stats(sa, B_FALSE); + if (rc != 0) { + SFC_ASSERT(rc > 0); + nstats = -rc; +@@ -747,7 +738,7 @@ sfc_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, + } + + unlock: +- rte_spinlock_unlock(&port->mac_stats_lock); ++ sfc_adapter_unlock(sa); + + return nstats; + } +@@ -782,19 +773,16 @@ sfc_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids, + struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev); + struct sfc_port *port = &sa->port; + uint64_t *mac_stats; +- unsigned int nb_supported = 0; +- unsigned int nb_written = 0; + unsigned int i; + int ret; + int rc; + +- if (unlikely(values == NULL) || +- unlikely((ids == NULL) && (n < port->mac_stats_nb_supported))) +- return port->mac_stats_nb_supported; ++ if (unlikely(ids == NULL || values == NULL)) ++ return -EINVAL; + +- rte_spinlock_lock(&port->mac_stats_lock); ++ sfc_adapter_lock(sa); + +- rc = sfc_port_update_mac_stats(sa); ++ rc = sfc_port_update_mac_stats(sa, B_FALSE); + if (rc != 0) { + SFC_ASSERT(rc > 0); + ret = -rc; +@@ -803,20 +791,22 @@ sfc_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids, + + mac_stats = port->mac_stats_buf; + +- for (i = 0; (i < EFX_MAC_NSTATS) && (nb_written < n); ++i) { +- if (!EFX_MAC_STAT_SUPPORTED(port->mac_stats_mask, i)) +- continue; +- +- if ((ids == NULL) || (ids[nb_written] == nb_supported)) +- values[nb_written++] = mac_stats[i]; ++ SFC_ASSERT(port->mac_stats_nb_supported <= ++ RTE_DIM(port->mac_stats_by_id)); + +- ++nb_supported; ++ for (i = 0; i < n; i++) { ++ if (ids[i] < port->mac_stats_nb_supported) { ++ values[i] = mac_stats[port->mac_stats_by_id[ids[i]]]; ++ } else { ++ ret = i; ++ goto unlock; ++ } + } + +- ret = nb_written; ++ ret = n; + + unlock: +- rte_spinlock_unlock(&port->mac_stats_lock); ++ sfc_adapter_unlock(sa); + + return ret; + } +@@ -828,29 +818,39 @@ sfc_xstats_get_names_by_id(struct rte_eth_dev *dev, + { + struct sfc_adapter *sa = sfc_adapter_by_eth_dev(dev); + struct sfc_port *port = &sa->port; +- unsigned int nb_supported = 0; +- unsigned int nb_written = 0; ++ unsigned int nb_supported; + unsigned int i; + +- if (unlikely(xstats_names == NULL) || +- unlikely((ids == NULL) && (size < port->mac_stats_nb_supported))) +- return port->mac_stats_nb_supported; ++ if (unlikely(xstats_names == NULL && ids != NULL) || ++ unlikely(xstats_names != NULL && ids == NULL)) ++ return -EINVAL; + +- for (i = 0; (i < EFX_MAC_NSTATS) && (nb_written < size); ++i) { +- if (!EFX_MAC_STAT_SUPPORTED(port->mac_stats_mask, i)) +- continue; ++ sfc_adapter_lock(sa); ++ ++ if (unlikely(xstats_names == NULL && ids == NULL)) { ++ nb_supported = port->mac_stats_nb_supported; ++ sfc_adapter_unlock(sa); ++ return nb_supported; ++ } + +- if ((ids == NULL) || (ids[nb_written] == nb_supported)) { +- char *name = xstats_names[nb_written++].name; ++ SFC_ASSERT(port->mac_stats_nb_supported <= ++ RTE_DIM(port->mac_stats_by_id)); + +- strlcpy(name, efx_mac_stat_name(sa->nic, i), ++ for (i = 0; i < size; i++) { ++ if (ids[i] < port->mac_stats_nb_supported) { ++ strlcpy(xstats_names[i].name, ++ efx_mac_stat_name(sa->nic, ++ port->mac_stats_by_id[ids[i]]), + sizeof(xstats_names[0].name)); ++ } else { ++ sfc_adapter_unlock(sa); ++ return i; + } +- +- ++nb_supported; + } + +- return nb_written; ++ sfc_adapter_unlock(sa); ++ ++ return size; + } + + static int +@@ -2206,7 +2206,6 @@ sfc_eth_dev_init(struct rte_eth_dev *dev) + + /* Copy PCI device info to the dev->data */ + rte_eth_copy_pci_info(dev, pci_dev); +- dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + dev->data->dev_flags |= RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE; + + rc = sfc_kvargs_parse(sa); +diff --git a/dpdk/drivers/net/sfc/sfc_flow.c b/dpdk/drivers/net/sfc/sfc_flow.c +index 4321045d1a..95d1d7cd78 100644 +--- a/dpdk/drivers/net/sfc/sfc_flow.c ++++ b/dpdk/drivers/net/sfc/sfc_flow.c +@@ -1382,6 +1382,9 @@ sfc_flow_parse_rss(struct sfc_adapter *sa, + rxq_hw_index_max = rxq->hw_index; + } + ++ if (rxq_hw_index_max - rxq_hw_index_min + 1 > EFX_MAXRSS) ++ return -EINVAL; ++ + switch (action_rss->func) { + case RTE_ETH_HASH_FUNCTION_DEFAULT: + case RTE_ETH_HASH_FUNCTION_TOEPLITZ: +@@ -1516,9 +1519,8 @@ sfc_flow_filter_insert(struct sfc_adapter *sa, + uint8_t *rss_key; + + if (spec_filter->rss) { +- rss_spread = MIN(flow_rss->rxq_hw_index_max - +- flow_rss->rxq_hw_index_min + 1, +- EFX_MAXRSS); ++ rss_spread = flow_rss->rxq_hw_index_max - ++ flow_rss->rxq_hw_index_min + 1; + rss_hash_types = flow_rss->rss_hash_types; + rss_key = flow_rss->rss_key; + } else { +diff --git a/dpdk/drivers/net/sfc/sfc_intr.c b/dpdk/drivers/net/sfc/sfc_intr.c +index da32d393c2..3e5f86ffb0 100644 +--- a/dpdk/drivers/net/sfc/sfc_intr.c ++++ b/dpdk/drivers/net/sfc/sfc_intr.c +@@ -8,7 +8,7 @@ + */ + + /* +- * At the momemt of writing DPDK v16.07 has notion of two types of ++ * At the moment of writing DPDK v16.07 has notion of two types of + * interrupts: LSC (link status change) and RXQ (receive indication). + * It allows to register interrupt callback for entire device which is + * not intended to be used for receive indication (i.e. link status +diff --git a/dpdk/drivers/net/sfc/sfc_mae.c b/dpdk/drivers/net/sfc/sfc_mae.c +index 4ddfef5563..02f189eca5 100644 +--- a/dpdk/drivers/net/sfc/sfc_mae.c ++++ b/dpdk/drivers/net/sfc/sfc_mae.c +@@ -68,10 +68,7 @@ sfc_mae_attach(struct sfc_adapter *sa) + sfc_log_init(sa, "assign RTE switch port"); + switch_port_request.type = SFC_MAE_SWITCH_PORT_INDEPENDENT; + switch_port_request.entity_mportp = &entity_mport; +- /* +- * As of now, the driver does not support representors, so +- * RTE ethdev MPORT simply matches that of the entity. +- */ ++ /* RTE ethdev MPORT matches that of the entity for independent ports. */ + switch_port_request.ethdev_mportp = &entity_mport; + switch_port_request.ethdev_port_id = sas->port_id; + rc = sfc_mae_assign_switch_port(mae->switch_domain_id, +@@ -221,6 +218,7 @@ sfc_mae_outer_rule_enable(struct sfc_adapter *sa, + if (fw_rsrc->refcnt == 0) { + (void)efx_mae_outer_rule_remove(sa->nic, + &fw_rsrc->rule_id); ++ fw_rsrc->rule_id.id = EFX_MAE_RSRC_ID_INVALID; + } + return rc; + } +@@ -1395,7 +1393,6 @@ sfc_mae_rule_parse_item_tunnel(const struct rte_flow_item *item, + uint8_t supp_mask[sizeof(uint64_t)]; + const uint8_t *spec = NULL; + const uint8_t *mask = NULL; +- const void *def_mask; + int rc; + + /* +@@ -1417,12 +1414,11 @@ sfc_mae_rule_parse_item_tunnel(const struct rte_flow_item *item, + * sfc_mae_rule_encap_parse_init(). Default mask + * was also picked by that helper. Use it here. + */ +- def_mask = ctx_mae->tunnel_def_mask; +- + rc = sfc_flow_parse_init(item, + (const void **)&spec, (const void **)&mask, +- (const void *)&supp_mask, def_mask, +- sizeof(def_mask), error); ++ (const void *)&supp_mask, ++ ctx_mae->tunnel_def_mask, ++ ctx_mae->tunnel_def_mask_size, error); + if (rc != 0) + return rc; + +@@ -1577,12 +1573,12 @@ sfc_mae_rule_process_outer(struct sfc_adapter *sa, + struct sfc_mae_outer_rule **rulep, + struct rte_flow_error *error) + { +- struct sfc_mae_outer_rule *rule; ++ efx_mae_rule_id_t invalid_rule_id = { .id = EFX_MAE_RSRC_ID_INVALID }; + int rc; + + if (ctx->encap_type == EFX_TUNNEL_PROTOCOL_NONE) { + *rulep = NULL; +- return 0; ++ goto no_or_id; + } + + SFC_ASSERT(ctx->match_spec_outer != NULL); +@@ -1610,21 +1606,27 @@ sfc_mae_rule_process_outer(struct sfc_adapter *sa, + /* The spec has now been tracked by the outer rule entry. */ + ctx->match_spec_outer = NULL; + ++no_or_id: + /* +- * Depending on whether we reuse an existing outer rule or create a +- * new one (see above), outer rule ID is either a valid value or +- * EFX_MAE_RSRC_ID_INVALID. Set it in the action rule match +- * specification (and the full mask, too) in order to have correct +- * class comparisons of the new rule with existing ones. +- * Also, action rule match specification will be validated shortly, +- * and having the full mask set for outer rule ID indicates that we +- * will use this field, and support for this field has to be checked. ++ * In MAE, lookup sequence comprises outer parse, outer rule lookup, ++ * inner parse (when some outer rule is hit) and action rule lookup. ++ * If the currently processed flow does not come with an outer rule, ++ * its action rule must be available only for packets which miss in ++ * outer rule table. Set OR_ID match field to 0xffffffff/0xffffffff ++ * in the action rule specification; this ensures correct behaviour. ++ * ++ * If, on the other hand, this flow does have an outer rule, its ID ++ * may be unknown at the moment (not yet allocated), but OR_ID mask ++ * has to be set to 0xffffffff anyway for correct class comparisons. ++ * When the outer rule has been allocated, this match field will be ++ * overridden by sfc_mae_outer_rule_enable() to use the right value. + */ +- rule = *rulep; + rc = efx_mae_match_spec_outer_rule_id_set(ctx->match_spec_action, +- &rule->fw_rsrc.rule_id); ++ &invalid_rule_id); + if (rc != 0) { +- sfc_mae_outer_rule_del(sa, *rulep); ++ if (*rulep != NULL) ++ sfc_mae_outer_rule_del(sa, *rulep); ++ + *rulep = NULL; + + return rte_flow_error_set(error, rc, +@@ -1656,20 +1658,20 @@ sfc_mae_rule_encap_parse_init(struct sfc_adapter *sa, + case RTE_FLOW_ITEM_TYPE_VXLAN: + ctx->encap_type = EFX_TUNNEL_PROTOCOL_VXLAN; + ctx->tunnel_def_mask = &rte_flow_item_vxlan_mask; +- RTE_BUILD_BUG_ON(sizeof(ctx->tunnel_def_mask) != +- sizeof(rte_flow_item_vxlan_mask)); ++ ctx->tunnel_def_mask_size = ++ sizeof(rte_flow_item_vxlan_mask); + break; + case RTE_FLOW_ITEM_TYPE_GENEVE: + ctx->encap_type = EFX_TUNNEL_PROTOCOL_GENEVE; + ctx->tunnel_def_mask = &rte_flow_item_geneve_mask; +- RTE_BUILD_BUG_ON(sizeof(ctx->tunnel_def_mask) != +- sizeof(rte_flow_item_geneve_mask)); ++ ctx->tunnel_def_mask_size = ++ sizeof(rte_flow_item_geneve_mask); + break; + case RTE_FLOW_ITEM_TYPE_NVGRE: + ctx->encap_type = EFX_TUNNEL_PROTOCOL_NVGRE; + ctx->tunnel_def_mask = &rte_flow_item_nvgre_mask; +- RTE_BUILD_BUG_ON(sizeof(ctx->tunnel_def_mask) != +- sizeof(rte_flow_item_nvgre_mask)); ++ ctx->tunnel_def_mask_size = ++ sizeof(rte_flow_item_nvgre_mask); + break; + case RTE_FLOW_ITEM_TYPE_END: + break; +@@ -2001,6 +2003,9 @@ sfc_mae_rule_parse_action_port_id(struct sfc_adapter *sa, + uint16_t port_id; + int rc; + ++ if (conf->id > UINT16_MAX) ++ return EOVERFLOW; ++ + port_id = (conf->original != 0) ? sas->port_id : conf->id; + + rc = sfc_mae_switch_port_by_ethdev(mae->switch_domain_id, +@@ -2103,6 +2108,8 @@ sfc_mae_rule_parse_actions(struct sfc_adapter *sa, + efx_mae_actions_t *spec; + int rc; + ++ rte_errno = 0; ++ + if (actions == NULL) { + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, NULL, +@@ -2146,7 +2153,7 @@ sfc_mae_rule_parse_actions(struct sfc_adapter *sa, + efx_mae_action_set_spec_fini(sa->nic, spec); + + fail_action_set_spec_init: +- if (rc > 0) { ++ if (rc > 0 && rte_errno == 0) { + rc = rte_flow_error_set(error, rc, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "Failed to process the action"); +diff --git a/dpdk/drivers/net/sfc/sfc_mae.h b/dpdk/drivers/net/sfc/sfc_mae.h +index 53ddead979..bf432638c1 100644 +--- a/dpdk/drivers/net/sfc/sfc_mae.h ++++ b/dpdk/drivers/net/sfc/sfc_mae.h +@@ -179,7 +179,8 @@ struct sfc_mae_parse_ctx { + * which part of the pattern is being parsed. + */ + const efx_mae_field_id_t *field_ids_remap; +- /* This points to a tunnel-specific default mask. */ ++ /* These two fields correspond to the tunnel-specific default mask. */ ++ size_t tunnel_def_mask_size; + const void *tunnel_def_mask; + bool match_mport_set; + struct sfc_mae_pattern_data pattern_data; +diff --git a/dpdk/drivers/net/sfc/sfc_port.c b/dpdk/drivers/net/sfc/sfc_port.c +index 4de13267d5..fc66430809 100644 +--- a/dpdk/drivers/net/sfc/sfc_port.c ++++ b/dpdk/drivers/net/sfc/sfc_port.c +@@ -26,7 +26,8 @@ + /** + * Update MAC statistics in the buffer. + * +- * @param sa Adapter ++ * @param sa Adapter ++ * @param force_upload Flag to upload MAC stats in any case + * + * @return Status code + * @retval 0 Success +@@ -34,7 +35,7 @@ + * @retval ENOMEM Memory allocation failure + */ + int +-sfc_port_update_mac_stats(struct sfc_adapter *sa) ++sfc_port_update_mac_stats(struct sfc_adapter *sa, boolean_t force_upload) + { + struct sfc_port *port = &sa->port; + efsys_mem_t *esmp = &port->mac_stats_dma_mem; +@@ -43,17 +44,17 @@ sfc_port_update_mac_stats(struct sfc_adapter *sa) + unsigned int nb_attempts = 0; + int rc; + +- SFC_ASSERT(rte_spinlock_is_locked(&port->mac_stats_lock)); ++ SFC_ASSERT(sfc_adapter_is_locked(sa)); + + if (sa->state != SFC_ADAPTER_STARTED) +- return EINVAL; ++ return 0; + + /* + * If periodic statistics DMA'ing is off or if not supported, + * make a manual request and keep an eye on timer if need be + */ + if (!port->mac_stats_periodic_dma_supported || +- (port->mac_stats_update_period_ms == 0)) { ++ (port->mac_stats_update_period_ms == 0) || force_upload) { + if (port->mac_stats_update_period_ms != 0) { + uint64_t timestamp = sfc_get_system_msecs(); + +@@ -103,14 +104,13 @@ sfc_port_reset_sw_stats(struct sfc_adapter *sa) + int + sfc_port_reset_mac_stats(struct sfc_adapter *sa) + { +- struct sfc_port *port = &sa->port; + int rc; + +- rte_spinlock_lock(&port->mac_stats_lock); ++ SFC_ASSERT(sfc_adapter_is_locked(sa)); ++ + rc = efx_mac_stats_clear(sa->nic); + if (rc == 0) + sfc_port_reset_sw_stats(sa); +- rte_spinlock_unlock(&port->mac_stats_lock); + + return rc; + } +@@ -158,6 +158,27 @@ sfc_port_phy_caps_to_max_link_speed(uint32_t phy_caps) + + #endif + ++static void ++sfc_port_fill_mac_stats_info(struct sfc_adapter *sa) ++{ ++ unsigned int mac_stats_nb_supported = 0; ++ struct sfc_port *port = &sa->port; ++ unsigned int stat_idx; ++ ++ efx_mac_stats_get_mask(sa->nic, port->mac_stats_mask, ++ sizeof(port->mac_stats_mask)); ++ ++ for (stat_idx = 0; stat_idx < EFX_MAC_NSTATS; ++stat_idx) { ++ if (!EFX_MAC_STAT_SUPPORTED(port->mac_stats_mask, stat_idx)) ++ continue; ++ ++ port->mac_stats_by_id[mac_stats_nb_supported] = stat_idx; ++ mac_stats_nb_supported++; ++ } ++ ++ port->mac_stats_nb_supported = mac_stats_nb_supported; ++} ++ + int + sfc_port_start(struct sfc_adapter *sa) + { +@@ -166,7 +187,6 @@ sfc_port_start(struct sfc_adapter *sa) + uint32_t phy_adv_cap; + const uint32_t phy_pause_caps = + ((1u << EFX_PHY_CAP_PAUSE) | (1u << EFX_PHY_CAP_ASYM)); +- unsigned int i; + + sfc_log_init(sa, "entry"); + +@@ -260,12 +280,7 @@ sfc_port_start(struct sfc_adapter *sa) + port->mac_stats_reset_pending = B_FALSE; + } + +- efx_mac_stats_get_mask(sa->nic, port->mac_stats_mask, +- sizeof(port->mac_stats_mask)); +- +- for (i = 0, port->mac_stats_nb_supported = 0; i < EFX_MAC_NSTATS; ++i) +- if (EFX_MAC_STAT_SUPPORTED(port->mac_stats_mask, i)) +- port->mac_stats_nb_supported++; ++ sfc_port_fill_mac_stats_info(sa); + + port->mac_stats_update_generation = 0; + +@@ -353,6 +368,8 @@ sfc_port_stop(struct sfc_adapter *sa) + (void)efx_mac_stats_periodic(sa->nic, &sa->port.mac_stats_dma_mem, + 0, B_FALSE); + ++ sfc_port_update_mac_stats(sa, B_TRUE); ++ + efx_port_fini(sa->nic); + efx_filter_fini(sa->nic); + +@@ -416,8 +433,6 @@ sfc_port_attach(struct sfc_adapter *sa) + goto fail_mcast_addr_list_buf_alloc; + } + +- rte_spinlock_init(&port->mac_stats_lock); +- + rc = ENOMEM; + port->mac_stats_buf = rte_calloc_socket("mac_stats_buf", EFX_MAC_NSTATS, + sizeof(uint64_t), 0, +diff --git a/dpdk/drivers/net/sfc/sfc_rx.c b/dpdk/drivers/net/sfc/sfc_rx.c +index 3415dcad7e..8a7f51a6b2 100644 +--- a/dpdk/drivers/net/sfc/sfc_rx.c ++++ b/dpdk/drivers/net/sfc/sfc_rx.c +@@ -1028,7 +1028,7 @@ sfc_rx_mb_pool_buf_size(struct sfc_adapter *sa, struct rte_mempool *mb_pool) + /* Make sure that end padding does not write beyond the buffer */ + if (buf_aligned < nic_align_end) { + /* +- * Estimate space which can be lost. If guarnteed buffer ++ * Estimate space which can be lost. If guaranteed buffer + * size is odd, lost space is (nic_align_end - 1). More + * accurate formula is below. + */ +diff --git a/dpdk/drivers/net/sfc/sfc_switch.c b/dpdk/drivers/net/sfc/sfc_switch.c +index bdea2a2446..4aa9ab3060 100644 +--- a/dpdk/drivers/net/sfc/sfc_switch.c ++++ b/dpdk/drivers/net/sfc/sfc_switch.c +@@ -214,9 +214,9 @@ sfc_mae_assign_switch_domain(struct sfc_adapter *sa, + + fail_mem_alloc: + sfc_hw_switch_id_fini(sa, hw_switch_id); +- rte_spinlock_unlock(&sfc_mae_switch.lock); + + fail_hw_switch_id_init: ++ rte_spinlock_unlock(&sfc_mae_switch.lock); + return rc; + } + +diff --git a/dpdk/drivers/net/sfc/sfc_tx.c b/dpdk/drivers/net/sfc/sfc_tx.c +index 24602e3d10..7c6bac694d 100644 +--- a/dpdk/drivers/net/sfc/sfc_tx.c ++++ b/dpdk/drivers/net/sfc/sfc_tx.c +@@ -269,6 +269,7 @@ sfc_tx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index) + static int + sfc_tx_check_mode(struct sfc_adapter *sa, const struct rte_eth_txmode *txmode) + { ++ uint64_t dev_tx_offload_cap = sfc_tx_get_dev_offload_caps(sa); + int rc = 0; + + switch (txmode->mq_mode) { +@@ -280,6 +281,13 @@ sfc_tx_check_mode(struct sfc_adapter *sa, const struct rte_eth_txmode *txmode) + rc = EINVAL; + } + ++ if ((dev_tx_offload_cap & DEV_TX_OFFLOAD_MBUF_FAST_FREE) != 0 && ++ (txmode->offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE) == 0) { ++ sfc_err(sa, "There is no FAST_FREE flag in the attempted Tx mode configuration"); ++ sfc_err(sa, "FAST_FREE is always active as per the current Tx datapath variant"); ++ rc = EINVAL; ++ } ++ + /* + * These features are claimed to be i40e-specific, + * but it does make sense to double-check their absence +diff --git a/dpdk/drivers/net/softnic/conn.c b/dpdk/drivers/net/softnic/conn.c +index 8b66580887..5b031358d5 100644 +--- a/dpdk/drivers/net/softnic/conn.c ++++ b/dpdk/drivers/net/softnic/conn.c +@@ -144,6 +144,7 @@ softnic_conn_free(struct softnic_conn *conn) + + free(conn->msg_out); + free(conn->msg_in); ++ free(conn->buf); + free(conn->prompt); + free(conn->welcome); + free(conn); +diff --git a/dpdk/drivers/net/softnic/rte_eth_softnic.c b/dpdk/drivers/net/softnic/rte_eth_softnic.c +index 3387ab485f..0ac3c0b0ec 100644 +--- a/dpdk/drivers/net/softnic/rte_eth_softnic.c ++++ b/dpdk/drivers/net/softnic/rte_eth_softnic.c +@@ -447,6 +447,7 @@ pmd_parse_args(struct pmd_params *p, const char *params) + { + struct rte_kvargs *kvlist; + int ret = 0; ++ char *firmware = NULL; + + kvlist = rte_kvargs_parse(params, pmd_valid_args); + if (kvlist == NULL) +@@ -454,7 +455,14 @@ pmd_parse_args(struct pmd_params *p, const char *params) + + /* Set default values */ + memset(p, 0, sizeof(*p)); +- p->firmware = SOFTNIC_FIRMWARE; ++ if (rte_strscpy(p->firmware, SOFTNIC_FIRMWARE, ++ sizeof(p->firmware)) < 0) { ++ PMD_LOG(WARNING, ++ "\"%s\": firmware path should be shorter than %zu", ++ SOFTNIC_FIRMWARE, sizeof(p->firmware)); ++ ret = -EINVAL; ++ goto out_free; ++ } + p->cpu_id = SOFTNIC_CPU_ID; + p->sc = SOFTNIC_SC; + p->tm.n_queues = SOFTNIC_TM_N_QUEUES; +@@ -475,11 +483,22 @@ pmd_parse_args(struct pmd_params *p, const char *params) + /* Firmware script (optional) */ + if (rte_kvargs_count(kvlist, PMD_PARAM_FIRMWARE) == 1) { + ret = rte_kvargs_process(kvlist, PMD_PARAM_FIRMWARE, +- &get_string, &p->firmware); ++ &get_string, &firmware); + if (ret < 0) + goto out_free; +- } + ++ if (rte_strscpy(p->firmware, firmware, ++ sizeof(p->firmware)) < 0) { ++ PMD_LOG(WARNING, ++ "\"%s\": " ++ "firmware path should be shorter than %zu", ++ firmware, sizeof(p->firmware)); ++ free(firmware); ++ ret = -EINVAL; ++ goto out_free; ++ } ++ free(firmware); ++ } + /* Connection listening port (optional) */ + if (rte_kvargs_count(kvlist, PMD_PARAM_CONN_PORT) == 1) { + ret = rte_kvargs_process(kvlist, PMD_PARAM_CONN_PORT, +@@ -628,7 +647,12 @@ pmd_probe(struct rte_vdev_device *vdev) + if (status) + return status; + +- p.name = name; ++ if (rte_strscpy(p.name, name, sizeof(p.name)) < 0) { ++ PMD_LOG(WARNING, ++ "\"%s\": device name should be shorter than %zu", ++ name, sizeof(p.name)); ++ return -EINVAL; ++ } + + /* Allocate and initialize soft ethdev private data */ + dev_private = pmd_init(&p); +diff --git a/dpdk/drivers/net/softnic/rte_eth_softnic_action.c b/dpdk/drivers/net/softnic/rte_eth_softnic_action.c +index 92c744dc9a..33be9552a6 100644 +--- a/dpdk/drivers/net/softnic/rte_eth_softnic_action.c ++++ b/dpdk/drivers/net/softnic/rte_eth_softnic_action.c +@@ -183,6 +183,7 @@ softnic_table_action_profile_free(struct pmd_internals *p) + break; + + TAILQ_REMOVE(&p->table_action_profile_list, profile, node); ++ rte_table_action_profile_free(profile->ap); + free(profile); + } + } +diff --git a/dpdk/drivers/net/softnic/rte_eth_softnic_cli.c b/dpdk/drivers/net/softnic/rte_eth_softnic_cli.c +index 932ec15f49..b04e78c6e0 100644 +--- a/dpdk/drivers/net/softnic/rte_eth_softnic_cli.c ++++ b/dpdk/drivers/net/softnic/rte_eth_softnic_cli.c +@@ -631,7 +631,7 @@ tmgr_hierarchy_default(struct pmd_internals *softnic, + .shaper_profile_id = params->shaper_profile_id.tc[0], + .shared_shaper_id = &shared_shaper_id[0], + .n_shared_shapers = +- (¶ms->shared_shaper_id.tc_valid[0]) ? 1 : 0, ++ (params->shared_shaper_id.tc_valid[0]) ? 1 : 0, + .nonleaf = { + .n_sp_priorities = 1, + }, +@@ -641,7 +641,7 @@ tmgr_hierarchy_default(struct pmd_internals *softnic, + .shaper_profile_id = params->shaper_profile_id.tc[1], + .shared_shaper_id = &shared_shaper_id[1], + .n_shared_shapers = +- (¶ms->shared_shaper_id.tc_valid[1]) ? 1 : 0, ++ (params->shared_shaper_id.tc_valid[1]) ? 1 : 0, + .nonleaf = { + .n_sp_priorities = 1, + }, +@@ -651,7 +651,7 @@ tmgr_hierarchy_default(struct pmd_internals *softnic, + .shaper_profile_id = params->shaper_profile_id.tc[2], + .shared_shaper_id = &shared_shaper_id[2], + .n_shared_shapers = +- (¶ms->shared_shaper_id.tc_valid[2]) ? 1 : 0, ++ (params->shared_shaper_id.tc_valid[2]) ? 1 : 0, + .nonleaf = { + .n_sp_priorities = 1, + }, +@@ -661,7 +661,7 @@ tmgr_hierarchy_default(struct pmd_internals *softnic, + .shaper_profile_id = params->shaper_profile_id.tc[3], + .shared_shaper_id = &shared_shaper_id[3], + .n_shared_shapers = +- (¶ms->shared_shaper_id.tc_valid[3]) ? 1 : 0, ++ (params->shared_shaper_id.tc_valid[3]) ? 1 : 0, + .nonleaf = { + .n_sp_priorities = 1, + }, +@@ -671,7 +671,7 @@ tmgr_hierarchy_default(struct pmd_internals *softnic, + .shaper_profile_id = params->shaper_profile_id.tc[4], + .shared_shaper_id = &shared_shaper_id[4], + .n_shared_shapers = +- (¶ms->shared_shaper_id.tc_valid[4]) ? 1 : 0, ++ (params->shared_shaper_id.tc_valid[4]) ? 1 : 0, + .nonleaf = { + .n_sp_priorities = 1, + }, +@@ -681,7 +681,7 @@ tmgr_hierarchy_default(struct pmd_internals *softnic, + .shaper_profile_id = params->shaper_profile_id.tc[5], + .shared_shaper_id = &shared_shaper_id[5], + .n_shared_shapers = +- (¶ms->shared_shaper_id.tc_valid[5]) ? 1 : 0, ++ (params->shared_shaper_id.tc_valid[5]) ? 1 : 0, + .nonleaf = { + .n_sp_priorities = 1, + }, +@@ -691,7 +691,7 @@ tmgr_hierarchy_default(struct pmd_internals *softnic, + .shaper_profile_id = params->shaper_profile_id.tc[6], + .shared_shaper_id = &shared_shaper_id[6], + .n_shared_shapers = +- (¶ms->shared_shaper_id.tc_valid[6]) ? 1 : 0, ++ (params->shared_shaper_id.tc_valid[6]) ? 1 : 0, + .nonleaf = { + .n_sp_priorities = 1, + }, +@@ -701,7 +701,7 @@ tmgr_hierarchy_default(struct pmd_internals *softnic, + .shaper_profile_id = params->shaper_profile_id.tc[7], + .shared_shaper_id = &shared_shaper_id[7], + .n_shared_shapers = +- (¶ms->shared_shaper_id.tc_valid[7]) ? 1 : 0, ++ (params->shared_shaper_id.tc_valid[7]) ? 1 : 0, + .nonleaf = { + .n_sp_priorities = 1, + }, +@@ -711,7 +711,7 @@ tmgr_hierarchy_default(struct pmd_internals *softnic, + .shaper_profile_id = params->shaper_profile_id.tc[8], + .shared_shaper_id = &shared_shaper_id[8], + .n_shared_shapers = +- (¶ms->shared_shaper_id.tc_valid[8]) ? 1 : 0, ++ (params->shared_shaper_id.tc_valid[8]) ? 1 : 0, + .nonleaf = { + .n_sp_priorities = 1, + }, +@@ -721,7 +721,7 @@ tmgr_hierarchy_default(struct pmd_internals *softnic, + .shaper_profile_id = params->shaper_profile_id.tc[9], + .shared_shaper_id = &shared_shaper_id[9], + .n_shared_shapers = +- (¶ms->shared_shaper_id.tc_valid[9]) ? 1 : 0, ++ (params->shared_shaper_id.tc_valid[9]) ? 1 : 0, + .nonleaf = { + .n_sp_priorities = 1, + }, +@@ -731,7 +731,7 @@ tmgr_hierarchy_default(struct pmd_internals *softnic, + .shaper_profile_id = params->shaper_profile_id.tc[10], + .shared_shaper_id = &shared_shaper_id[10], + .n_shared_shapers = +- (¶ms->shared_shaper_id.tc_valid[10]) ? 1 : 0, ++ (params->shared_shaper_id.tc_valid[10]) ? 1 : 0, + .nonleaf = { + .n_sp_priorities = 1, + }, +@@ -741,7 +741,7 @@ tmgr_hierarchy_default(struct pmd_internals *softnic, + .shaper_profile_id = params->shaper_profile_id.tc[11], + .shared_shaper_id = &shared_shaper_id[11], + .n_shared_shapers = +- (¶ms->shared_shaper_id.tc_valid[11]) ? 1 : 0, ++ (params->shared_shaper_id.tc_valid[11]) ? 1 : 0, + .nonleaf = { + .n_sp_priorities = 1, + }, +@@ -751,7 +751,7 @@ tmgr_hierarchy_default(struct pmd_internals *softnic, + .shaper_profile_id = params->shaper_profile_id.tc[12], + .shared_shaper_id = &shared_shaper_id[12], + .n_shared_shapers = +- (¶ms->shared_shaper_id.tc_valid[12]) ? 1 : 0, ++ (params->shared_shaper_id.tc_valid[12]) ? 1 : 0, + .nonleaf = { + .n_sp_priorities = 1, + }, +diff --git a/dpdk/drivers/net/softnic/rte_eth_softnic_flow.c b/dpdk/drivers/net/softnic/rte_eth_softnic_flow.c +index 7925bad1c0..dc3684d397 100644 +--- a/dpdk/drivers/net/softnic/rte_eth_softnic_flow.c ++++ b/dpdk/drivers/net/softnic/rte_eth_softnic_flow.c +@@ -930,7 +930,7 @@ flow_rule_match_acl_get(struct pmd_internals *softnic __rte_unused, + * Both *tmask* and *fmask* are byte arrays of size *tsize* and *fsize* + * respectively. + * They are located within a larger buffer at offsets *toffset* and *foffset* +- * respectivelly. Both *tmask* and *fmask* represent bitmasks for the larger ++ * respectively. Both *tmask* and *fmask* represent bitmasks for the larger + * buffer. + * Question: are the two masks equivalent? + * +diff --git a/dpdk/drivers/net/softnic/rte_eth_softnic_internals.h b/dpdk/drivers/net/softnic/rte_eth_softnic_internals.h +index 9c8737c9e2..263c48ba3b 100644 +--- a/dpdk/drivers/net/softnic/rte_eth_softnic_internals.h ++++ b/dpdk/drivers/net/softnic/rte_eth_softnic_internals.h +@@ -28,14 +28,15 @@ + #include "conn.h" + + #define NAME_SIZE 64 ++#define SOFTNIC_PATH_MAX 4096 + + /** + * PMD Parameters + */ + + struct pmd_params { +- const char *name; +- const char *firmware; ++ char name[NAME_SIZE]; ++ char firmware[SOFTNIC_PATH_MAX]; + uint16_t conn_port; + uint32_t cpu_id; + int sc; /**< Service cores. */ +diff --git a/dpdk/drivers/net/tap/rte_eth_tap.c b/dpdk/drivers/net/tap/rte_eth_tap.c +index 2542de3065..dcc12f32e7 100644 +--- a/dpdk/drivers/net/tap/rte_eth_tap.c ++++ b/dpdk/drivers/net/tap/rte_eth_tap.c +@@ -67,6 +67,7 @@ + + /* IPC key for queue fds sync */ + #define TAP_MP_KEY "tap_mp_sync_queues" ++#define TAP_MP_REQ_START_RXTX "tap_mp_req_start_rxtx" + + #define TAP_IOV_DEFAULT_MAX 1024 + +@@ -342,15 +343,21 @@ tap_verify_csum(struct rte_mbuf *mbuf) + rte_pktmbuf_data_len(mbuf)) + return; + } else { +- /* IPv6 extensions are not supported */ ++ /* - RTE_PTYPE_L3_IPV4_EXT_UNKNOWN cannot happen because ++ * mbuf->packet_type is filled by rte_net_get_ptype() which ++ * never returns this value. ++ * - IPv6 extensions are not supported. ++ */ + return; + } + if (l4 == RTE_PTYPE_L4_UDP || l4 == RTE_PTYPE_L4_TCP) { ++ int cksum_ok; ++ + l4_hdr = rte_pktmbuf_mtod_offset(mbuf, void *, l2_len + l3_len); + /* Don't verify checksum for multi-segment packets. */ + if (mbuf->nb_segs > 1) + return; +- if (l3 == RTE_PTYPE_L3_IPV4) { ++ if (l3 == RTE_PTYPE_L3_IPV4 || l3 == RTE_PTYPE_L3_IPV4_EXT) { + if (l4 == RTE_PTYPE_L4_UDP) { + udp_hdr = (struct rte_udp_hdr *)l4_hdr; + if (udp_hdr->dgram_cksum == 0) { +@@ -363,13 +370,13 @@ tap_verify_csum(struct rte_mbuf *mbuf) + return; + } + } +- cksum = ~rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr); +- } else if (l3 == RTE_PTYPE_L3_IPV6) { +- cksum = ~rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr); ++ cksum = rte_ipv4_udptcp_cksum(l3_hdr, l4_hdr); ++ } else { /* l3 == RTE_PTYPE_L3_IPV6, checked above */ ++ cksum = rte_ipv6_udptcp_cksum(l3_hdr, l4_hdr); + } +- mbuf->ol_flags |= cksum ? +- PKT_RX_L4_CKSUM_BAD : +- PKT_RX_L4_CKSUM_GOOD; ++ cksum_ok = (cksum == 0) || (cksum == 0xffff); ++ mbuf->ol_flags |= cksum_ok ? ++ PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD; + } + } + +@@ -544,7 +551,7 @@ tap_tx_l4_cksum(uint16_t *l4_cksum, uint16_t l4_phdr_cksum, + } + } + +-/* Accumaulate L4 raw checksums */ ++/* Accumulate L4 raw checksums */ + static void + tap_tx_l4_add_rcksum(char *l4_data, unsigned int l4_len, uint16_t *l4_cksum, + uint32_t *l4_raw_cksum) +@@ -899,11 +906,49 @@ tap_link_set_up(struct rte_eth_dev *dev) + return tap_ioctl(pmd, SIOCSIFFLAGS, &ifr, 1, LOCAL_AND_REMOTE); + } + ++static int ++tap_mp_req_on_rxtx(struct rte_eth_dev *dev) ++{ ++ struct rte_mp_msg msg; ++ struct ipc_queues *request_param = (struct ipc_queues *)msg.param; ++ int err; ++ int fd_iterator = 0; ++ struct pmd_process_private *process_private = dev->process_private; ++ int i; ++ ++ memset(&msg, 0, sizeof(msg)); ++ strlcpy(msg.name, TAP_MP_REQ_START_RXTX, sizeof(msg.name)); ++ strlcpy(request_param->port_name, dev->data->name, sizeof(request_param->port_name)); ++ msg.len_param = sizeof(*request_param); ++ for (i = 0; i < dev->data->nb_tx_queues; i++) { ++ msg.fds[fd_iterator++] = process_private->txq_fds[i]; ++ msg.num_fds++; ++ request_param->txq_count++; ++ } ++ for (i = 0; i < dev->data->nb_rx_queues; i++) { ++ msg.fds[fd_iterator++] = process_private->rxq_fds[i]; ++ msg.num_fds++; ++ request_param->rxq_count++; ++ } ++ ++ err = rte_mp_sendmsg(&msg); ++ if (err < 0) { ++ TAP_LOG(ERR, "Failed to send start req to secondary %d", ++ rte_errno); ++ return -1; ++ } ++ ++ return 0; ++} ++ + static int + tap_dev_start(struct rte_eth_dev *dev) + { + int err, i; + ++ if (rte_eal_process_type() == RTE_PROC_PRIMARY) ++ tap_mp_req_on_rxtx(dev); ++ + err = tap_intr_handle_set(dev, 1); + if (err) + return err; +@@ -920,6 +965,34 @@ tap_dev_start(struct rte_eth_dev *dev) + return err; + } + ++static int ++tap_mp_req_start_rxtx(const struct rte_mp_msg *request, __rte_unused const void *peer) ++{ ++ struct rte_eth_dev *dev; ++ const struct ipc_queues *request_param = ++ (const struct ipc_queues *)request->param; ++ int fd_iterator; ++ int queue; ++ struct pmd_process_private *process_private; ++ ++ dev = rte_eth_dev_get_by_name(request_param->port_name); ++ if (!dev) { ++ TAP_LOG(ERR, "Failed to get dev for %s", ++ request_param->port_name); ++ return -1; ++ } ++ process_private = dev->process_private; ++ fd_iterator = 0; ++ TAP_LOG(DEBUG, "tap_attach rx_q:%d tx_q:%d\n", request_param->rxq_count, ++ request_param->txq_count); ++ for (queue = 0; queue < request_param->txq_count; queue++) ++ process_private->txq_fds[queue] = request->fds[fd_iterator++]; ++ for (queue = 0; queue < request_param->rxq_count; queue++) ++ process_private->rxq_fds[queue] = request->fds[fd_iterator++]; ++ ++ return 0; ++} ++ + /* This function gets called when the current port gets stopped. + */ + static int +@@ -1104,6 +1177,9 @@ tap_dev_close(struct rte_eth_dev *dev) + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + rte_free(dev->process_private); ++ if (tap_devices_count == 1) ++ rte_mp_action_unregister(TAP_MP_REQ_START_RXTX); ++ tap_devices_count--; + return 0; + } + +@@ -1133,8 +1209,11 @@ tap_dev_close(struct rte_eth_dev *dev) + + if (internals->remote_if_index) { + /* Restore initial remote state */ +- ioctl(internals->ioctl_sock, SIOCSIFFLAGS, ++ int ret = ioctl(internals->ioctl_sock, SIOCSIFFLAGS, + &internals->remote_initial_flags); ++ if (ret) ++ TAP_LOG(ERR, "restore remote state failed: %d", ret); ++ + } + + rte_mempool_free(internals->gso_ctx_mp); +@@ -2455,6 +2534,16 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev) + ret = tap_mp_attach_queues(name, eth_dev); + if (ret != 0) + return -1; ++ ++ if (!tap_devices_count) { ++ ret = rte_mp_action_register(TAP_MP_REQ_START_RXTX, tap_mp_req_start_rxtx); ++ if (ret < 0 && rte_errno != ENOTSUP) { ++ TAP_LOG(ERR, "tap: Failed to register IPC callback: %s", ++ strerror(rte_errno)); ++ return -1; ++ } ++ } ++ tap_devices_count++; + rte_eth_dev_probing_finish(eth_dev); + return 0; + } +diff --git a/dpdk/drivers/net/tap/tap_bpf_api.c b/dpdk/drivers/net/tap/tap_bpf_api.c +index 98f6a76011..15283f8917 100644 +--- a/dpdk/drivers/net/tap/tap_bpf_api.c ++++ b/dpdk/drivers/net/tap/tap_bpf_api.c +@@ -96,7 +96,7 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, + * Load BPF instructions to kernel + * + * @param[in] type +- * BPF program type: classifieir or action ++ * BPF program type: classifier or action + * + * @param[in] insns + * Array of BPF instructions (equivalent to BPF instructions) +@@ -104,7 +104,7 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, + * @param[in] insns_cnt + * Number of BPF instructions (size of array) + * +- * @param[in] lincense ++ * @param[in] license + * License string that must be acknowledged by the kernel + * + * @return +diff --git a/dpdk/drivers/net/tap/tap_flow.c b/dpdk/drivers/net/tap/tap_flow.c +index 1538349e9c..6e51b1c2a3 100644 +--- a/dpdk/drivers/net/tap/tap_flow.c ++++ b/dpdk/drivers/net/tap/tap_flow.c +@@ -961,7 +961,7 @@ add_action(struct rte_flow *flow, size_t *act_index, struct action_data *adata) + } + + /** +- * Helper function to send a serie of TC actions to the kernel ++ * Helper function to send a series of TC actions to the kernel + * + * @param[in] flow + * Pointer to rte flow containing the netlink message +@@ -1300,10 +1300,16 @@ tap_flow_validate(struct rte_eth_dev *dev, + static void + tap_flow_set_handle(struct rte_flow *flow) + { ++ union { ++ struct rte_flow *flow; ++ const void *key; ++ } tmp; + uint32_t handle = 0; + ++ tmp.flow = flow; ++ + if (sizeof(flow) > 4) +- handle = rte_jhash(&flow, sizeof(flow), 1); ++ handle = rte_jhash(tmp.key, sizeof(flow), 1); + else + handle = (uintptr_t)flow; + /* must be at least 1 to avoid letting the kernel choose one for us */ +@@ -2011,7 +2017,7 @@ static int bpf_rss_key(enum bpf_rss_key_e cmd, __u32 *key_idx) + break; + + /* +- * Subtract offest to restore real key index ++ * Subtract offset to restore real key index + * If a non RSS flow is falsely trying to release map + * entry 0 - the offset subtraction will calculate the real + * map index as an out-of-range value and the release operation +diff --git a/dpdk/drivers/net/tap/tap_intr.c b/dpdk/drivers/net/tap/tap_intr.c +index 5cf4f173a0..1cacc15d9f 100644 +--- a/dpdk/drivers/net/tap/tap_intr.c ++++ b/dpdk/drivers/net/tap/tap_intr.c +@@ -59,7 +59,7 @@ tap_rx_intr_vec_install(struct rte_eth_dev *dev) + + if (!dev->data->dev_conf.intr_conf.rxq) + return 0; +- intr_handle->intr_vec = malloc(sizeof(intr_handle->intr_vec[rxqs_n])); ++ intr_handle->intr_vec = malloc(sizeof(int) * rxqs_n); + if (intr_handle->intr_vec == NULL) { + rte_errno = ENOMEM; + TAP_LOG(ERR, +diff --git a/dpdk/drivers/net/thunderx/nicvf_svf.c b/dpdk/drivers/net/thunderx/nicvf_svf.c +index bccf290599..1bcf73d9fc 100644 +--- a/dpdk/drivers/net/thunderx/nicvf_svf.c ++++ b/dpdk/drivers/net/thunderx/nicvf_svf.c +@@ -21,7 +21,7 @@ nicvf_svf_push(struct nicvf *vf) + + entry = rte_zmalloc("nicvf", sizeof(*entry), RTE_CACHE_LINE_SIZE); + if (entry == NULL) +- rte_panic("Cannoc allocate memory for svf_entry\n"); ++ rte_panic("Cannot allocate memory for svf_entry\n"); + + entry->vf = vf; + +diff --git a/dpdk/drivers/net/txgbe/base/meson.build b/dpdk/drivers/net/txgbe/base/meson.build +index 3c63bf5f4c..cf4e8cb0b4 100644 +--- a/dpdk/drivers/net/txgbe/base/meson.build ++++ b/dpdk/drivers/net/txgbe/base/meson.build +@@ -21,6 +21,6 @@ foreach flag: error_cflags + endforeach + + base_lib = static_library('txgbe_base', sources, +- dependencies: static_rte_eal, ++ dependencies: [static_rte_eal, static_rte_net], + c_args: c_args) + base_objs = base_lib.extract_all_objects() +diff --git a/dpdk/drivers/net/txgbe/base/txgbe_eeprom.c b/dpdk/drivers/net/txgbe/base/txgbe_eeprom.c +index 72cd3ff307..6ff0f23f18 100644 +--- a/dpdk/drivers/net/txgbe/base/txgbe_eeprom.c ++++ b/dpdk/drivers/net/txgbe/base/txgbe_eeprom.c +@@ -20,8 +20,6 @@ s32 txgbe_init_eeprom_params(struct txgbe_hw *hw) + u16 eeprom_size; + int err = 0; + +- DEBUGFUNC("txgbe_init_eeprom_params"); +- + if (eeprom->type != txgbe_eeprom_unknown) + return 0; + +@@ -53,12 +51,12 @@ s32 txgbe_init_eeprom_params(struct txgbe_hw *hw) + + err = eeprom->read32(hw, TXGBE_SW_REGION_PTR << 1, &eeprom->sw_addr); + if (err) { +- DEBUGOUT("EEPROM read failed.\n"); ++ DEBUGOUT("EEPROM read failed."); + return err; + } + +- DEBUGOUT("eeprom params: type = %d, size = %d, address bits: " +- "%d %d\n", eeprom->type, eeprom->word_size, ++ DEBUGOUT("eeprom params: type = %d, size = %d, address bits: %d %d", ++ eeprom->type, eeprom->word_size, + eeprom->address_bits, eeprom->sw_addr); + + return 0; +@@ -77,9 +75,6 @@ s32 txgbe_get_eeprom_semaphore(struct txgbe_hw *hw) + u32 i; + u32 swsm; + +- DEBUGFUNC("txgbe_get_eeprom_semaphore"); +- +- + /* Get SMBI software semaphore between device drivers first */ + for (i = 0; i < timeout; i++) { + /* +@@ -95,8 +90,7 @@ s32 txgbe_get_eeprom_semaphore(struct txgbe_hw *hw) + } + + if (i == timeout) { +- DEBUGOUT("Driver can't access the eeprom - SMBI Semaphore " +- "not granted.\n"); ++ DEBUGOUT("Driver can't access the eeprom - SMBI Semaphore not granted."); + /* + * this release is particularly important because our attempts + * above to get the semaphore may have succeeded, and if there +@@ -139,13 +133,12 @@ s32 txgbe_get_eeprom_semaphore(struct txgbe_hw *hw) + * was not granted because we don't have access to the EEPROM + */ + if (i >= timeout) { +- DEBUGOUT("SWESMBI Software EEPROM semaphore not granted.\n"); ++ DEBUGOUT("SWESMBI Software EEPROM semaphore not granted."); + txgbe_release_eeprom_semaphore(hw); + status = TXGBE_ERR_EEPROM; + } + } else { +- DEBUGOUT("Software semaphore SMBI between device drivers " +- "not granted.\n"); ++ DEBUGOUT("Software semaphore SMBI between device drivers not granted."); + } + + return status; +@@ -159,8 +152,6 @@ s32 txgbe_get_eeprom_semaphore(struct txgbe_hw *hw) + **/ + void txgbe_release_eeprom_semaphore(struct txgbe_hw *hw) + { +- DEBUGFUNC("txgbe_release_eeprom_semaphore"); +- + wr32m(hw, TXGBE_MNGSWSYNC, TXGBE_MNGSWSYNC_REQ, 0); + wr32m(hw, TXGBE_SWSEM, TXGBE_SWSEM_PF, 0); + txgbe_flush(hw); +@@ -193,7 +184,7 @@ s32 txgbe_ee_read16(struct txgbe_hw *hw, u32 offset, + } + + /** +- * txgbe_ee_read_buffer- Read EEPROM word(s) using hostif ++ * txgbe_ee_readw_buffer- Read EEPROM word(s) using hostif + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @words: number of words +@@ -274,42 +265,6 @@ s32 txgbe_ee_read32(struct txgbe_hw *hw, u32 addr, u32 *data) + return err; + } + +-/** +- * txgbe_ee_read_buffer - Read EEPROM byte(s) using hostif +- * @hw: pointer to hardware structure +- * @addr: offset of bytes in the EEPROM to read +- * @len: number of bytes +- * @data: byte(s) read from the EEPROM +- * +- * Reads a 8 bit byte(s) from the EEPROM using the hostif. +- **/ +-s32 txgbe_ee_read_buffer(struct txgbe_hw *hw, +- u32 addr, u32 len, void *data) +-{ +- const u32 mask = TXGBE_MNGSEM_SWMBX | TXGBE_MNGSEM_SWFLASH; +- u8 *buf = (u8 *)data; +- int err; +- +- err = hw->mac.acquire_swfw_sync(hw, mask); +- if (err) +- return err; +- +- while (len) { +- u32 seg = (len <= TXGBE_PMMBX_DATA_SIZE +- ? len : TXGBE_PMMBX_DATA_SIZE); +- +- err = txgbe_hic_sr_read(hw, addr, buf, seg); +- if (err) +- break; +- +- len -= seg; +- buf += seg; +- } +- +- hw->mac.release_swfw_sync(hw, mask); +- return err; +-} +- + /** + * txgbe_ee_write - Write EEPROM word using hostif + * @hw: pointer to hardware structure +@@ -325,8 +280,6 @@ s32 txgbe_ee_write16(struct txgbe_hw *hw, u32 offset, + u32 addr = (offset << 1); + int err; + +- DEBUGFUNC("\n"); +- + err = hw->mac.acquire_swfw_sync(hw, mask); + if (err) + return err; +@@ -339,7 +292,7 @@ s32 txgbe_ee_write16(struct txgbe_hw *hw, u32 offset, + } + + /** +- * txgbe_ee_write_buffer - Write EEPROM word(s) using hostif ++ * txgbe_ee_writew_buffer - Write EEPROM word(s) using hostif + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to write + * @words: number of words +@@ -383,8 +336,6 @@ s32 txgbe_ee_writew_sw(struct txgbe_hw *hw, u32 offset, + u32 addr = hw->rom.sw_addr + (offset << 1); + int err; + +- DEBUGFUNC("\n"); +- + err = hw->mac.acquire_swfw_sync(hw, mask); + if (err) + return err; +@@ -420,42 +371,6 @@ s32 txgbe_ee_write32(struct txgbe_hw *hw, u32 addr, u32 data) + return err; + } + +-/** +- * txgbe_ee_write_buffer - Write EEPROM byte(s) using hostif +- * @hw: pointer to hardware structure +- * @addr: offset of bytes in the EEPROM to write +- * @len: number of bytes +- * @data: word(s) write to the EEPROM +- * +- * Write a 8 bit byte(s) to the EEPROM using the hostif. +- **/ +-s32 txgbe_ee_write_buffer(struct txgbe_hw *hw, +- u32 addr, u32 len, void *data) +-{ +- const u32 mask = TXGBE_MNGSEM_SWMBX | TXGBE_MNGSEM_SWFLASH; +- u8 *buf = (u8 *)data; +- int err; +- +- err = hw->mac.acquire_swfw_sync(hw, mask); +- if (err) +- return err; +- +- while (len) { +- u32 seg = (len <= TXGBE_PMMBX_DATA_SIZE +- ? len : TXGBE_PMMBX_DATA_SIZE); +- +- err = txgbe_hic_sr_write(hw, addr, buf, seg); +- if (err) +- break; +- +- len -= seg; +- buf += seg; +- } +- +- hw->mac.release_swfw_sync(hw, mask); +- return err; +-} +- + /** + * txgbe_calc_eeprom_checksum - Calculates and returns the checksum + * @hw: pointer to hardware structure +@@ -470,11 +385,9 @@ s32 txgbe_calc_eeprom_checksum(struct txgbe_hw *hw) + int err; + u16 buffer[BUFF_SIZE]; + +- DEBUGFUNC("txgbe_calc_eeprom_checksum"); +- + err = hw->rom.readw_sw(hw, TXGBE_EEPROM_CHECKSUM, &read_checksum); + if (err) { +- DEBUGOUT("EEPROM read failed\n"); ++ DEBUGOUT("EEPROM read failed"); + return err; + } + +@@ -508,15 +421,13 @@ s32 txgbe_validate_eeprom_checksum(struct txgbe_hw *hw, + u16 read_checksum = 0; + int err; + +- DEBUGFUNC("txgbe_validate_eeprom_checksum"); +- + /* Read the first word from the EEPROM. If this times out or fails, do + * not continue or we could be in for a very long wait while every + * EEPROM read fails + */ + err = hw->rom.read16(hw, 0, &checksum); + if (err) { +- DEBUGOUT("EEPROM read failed\n"); ++ DEBUGOUT("EEPROM read failed"); + return err; + } + +@@ -528,7 +439,7 @@ s32 txgbe_validate_eeprom_checksum(struct txgbe_hw *hw, + + err = hw->rom.readw_sw(hw, TXGBE_EEPROM_CHECKSUM, &read_checksum); + if (err) { +- DEBUGOUT("EEPROM read failed\n"); ++ DEBUGOUT("EEPROM read failed"); + return err; + } + +@@ -537,7 +448,7 @@ s32 txgbe_validate_eeprom_checksum(struct txgbe_hw *hw, + */ + if (read_checksum != checksum) { + err = TXGBE_ERR_EEPROM_CHECKSUM; +- DEBUGOUT("EEPROM checksum error\n"); ++ DEBUGOUT("EEPROM checksum error"); + } + + /* If the user cares, return the calculated checksum */ +@@ -556,15 +467,13 @@ s32 txgbe_update_eeprom_checksum(struct txgbe_hw *hw) + s32 status; + u16 checksum; + +- DEBUGFUNC("txgbe_update_eeprom_checksum"); +- + /* Read the first word from the EEPROM. If this times out or fails, do + * not continue or we could be in for a very long wait while every + * EEPROM read fails + */ + status = hw->rom.read16(hw, 0, &checksum); + if (status) { +- DEBUGOUT("EEPROM read failed\n"); ++ DEBUGOUT("EEPROM read failed"); + return status; + } + +diff --git a/dpdk/drivers/net/txgbe/base/txgbe_eeprom.h b/dpdk/drivers/net/txgbe/base/txgbe_eeprom.h +index d0e142dba5..78b8af978b 100644 +--- a/dpdk/drivers/net/txgbe/base/txgbe_eeprom.h ++++ b/dpdk/drivers/net/txgbe/base/txgbe_eeprom.h +@@ -51,14 +51,12 @@ s32 txgbe_ee_readw_sw(struct txgbe_hw *hw, u32 offset, u16 *data); + s32 txgbe_ee_readw_buffer(struct txgbe_hw *hw, u32 offset, u32 words, + void *data); + s32 txgbe_ee_read32(struct txgbe_hw *hw, u32 addr, u32 *data); +-s32 txgbe_ee_read_buffer(struct txgbe_hw *hw, u32 addr, u32 len, void *data); + + s32 txgbe_ee_write16(struct txgbe_hw *hw, u32 offset, u16 data); + s32 txgbe_ee_writew_sw(struct txgbe_hw *hw, u32 offset, u16 data); + s32 txgbe_ee_writew_buffer(struct txgbe_hw *hw, u32 offset, u32 words, + void *data); + s32 txgbe_ee_write32(struct txgbe_hw *hw, u32 addr, u32 data); +-s32 txgbe_ee_write_buffer(struct txgbe_hw *hw, u32 addr, u32 len, void *data); + + + #endif /* _TXGBE_EEPROM_H_ */ +diff --git a/dpdk/drivers/net/txgbe/base/txgbe_hw.c b/dpdk/drivers/net/txgbe/base/txgbe_hw.c +index 5ee13b0f82..e6ffbd900e 100644 +--- a/dpdk/drivers/net/txgbe/base/txgbe_hw.c ++++ b/dpdk/drivers/net/txgbe/base/txgbe_hw.c +@@ -40,8 +40,6 @@ bool txgbe_device_supports_autoneg_fc(struct txgbe_hw *hw) + u32 speed; + bool link_up; + +- DEBUGFUNC("txgbe_device_supports_autoneg_fc"); +- + switch (hw->phy.media_type) { + case txgbe_media_type_fiber_qsfp: + case txgbe_media_type_fiber: +@@ -92,11 +90,9 @@ s32 txgbe_setup_fc(struct txgbe_hw *hw) + u64 reg_bp = 0; + bool locked = false; + +- DEBUGFUNC("txgbe_setup_fc"); +- + /* Validate the requested mode */ + if (hw->fc.strict_ieee && hw->fc.requested_mode == txgbe_fc_rx_pause) { +- DEBUGOUT("txgbe_fc_rx_pause not valid in strict IEEE mode\n"); ++ DEBUGOUT("txgbe_fc_rx_pause not valid in strict IEEE mode"); + err = TXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } +@@ -194,7 +190,7 @@ s32 txgbe_setup_fc(struct txgbe_hw *hw) + SR_AN_MMD_ADV_REG1_PAUSE_ASM; + break; + default: +- DEBUGOUT("Flow control param set incorrectly\n"); ++ DEBUGOUT("Flow control param set incorrectly"); + err = TXGBE_ERR_CONFIG; + goto out; + } +@@ -225,7 +221,7 @@ s32 txgbe_setup_fc(struct txgbe_hw *hw) + TXGBE_MD_DEV_AUTO_NEG, reg_cu); + } + +- DEBUGOUT("Set up FC; reg = 0x%08X\n", reg); ++ DEBUGOUT("Set up FC; reg = 0x%08X", reg); + out: + return err; + } +@@ -244,8 +240,6 @@ s32 txgbe_start_hw(struct txgbe_hw *hw) + s32 err; + u16 device_caps; + +- DEBUGFUNC("txgbe_start_hw"); +- + /* Set the media type */ + hw->phy.media_type = hw->phy.get_media_type(hw); + +@@ -258,7 +252,7 @@ s32 txgbe_start_hw(struct txgbe_hw *hw) + /* Setup flow control */ + err = txgbe_setup_fc(hw); + if (err != 0 && err != TXGBE_NOT_IMPLEMENTED) { +- DEBUGOUT("Flow control setup failed, returning %d\n", err); ++ DEBUGOUT("Flow control setup failed, returning %d", err); + return err; + } + +@@ -320,8 +314,6 @@ s32 txgbe_init_hw(struct txgbe_hw *hw) + { + s32 status; + +- DEBUGFUNC("txgbe_init_hw"); +- + /* Reset the hardware */ + status = hw->mac.reset_hw(hw); + if (status == 0 || status == TXGBE_ERR_SFP_NOT_PRESENT) { +@@ -330,7 +322,7 @@ s32 txgbe_init_hw(struct txgbe_hw *hw) + } + + if (status != 0) +- DEBUGOUT("Failed to initialize HW, STATUS = %d\n", status); ++ DEBUGOUT("Failed to initialize HW, STATUS = %d", status); + + return status; + } +@@ -346,8 +338,6 @@ s32 txgbe_clear_hw_cntrs(struct txgbe_hw *hw) + { + u16 i = 0; + +- DEBUGFUNC("txgbe_clear_hw_cntrs"); +- + /* QP Stats */ + /* don't write clear queue stats */ + for (i = 0; i < TXGBE_MAX_QP; i++) { +@@ -467,8 +457,6 @@ s32 txgbe_get_mac_addr(struct txgbe_hw *hw, u8 *mac_addr) + u32 rar_low; + u16 i; + +- DEBUGFUNC("txgbe_get_mac_addr"); +- + wr32(hw, TXGBE_ETHADDRIDX, 0); + rar_high = rd32(hw, TXGBE_ETHADDRH); + rar_low = rd32(hw, TXGBE_ETHADDRL); +@@ -494,8 +482,6 @@ void txgbe_set_lan_id_multi_port(struct txgbe_hw *hw) + struct txgbe_bus_info *bus = &hw->bus; + u32 reg; + +- DEBUGFUNC("txgbe_set_lan_id_multi_port_pcie"); +- + reg = rd32(hw, TXGBE_PORTSTAT); + bus->lan_id = TXGBE_PORTSTAT_ID(reg); + +@@ -521,8 +507,6 @@ s32 txgbe_stop_hw(struct txgbe_hw *hw) + u32 reg_val; + u16 i; + +- DEBUGFUNC("txgbe_stop_hw"); +- + /* + * Set the adapter_stopped flag so other driver functions stop touching + * the hardware +@@ -569,8 +553,6 @@ s32 txgbe_led_on(struct txgbe_hw *hw, u32 index) + { + u32 led_reg = rd32(hw, TXGBE_LEDCTL); + +- DEBUGFUNC("txgbe_led_on"); +- + if (index > 4) + return TXGBE_ERR_PARAM; + +@@ -592,8 +574,6 @@ s32 txgbe_led_off(struct txgbe_hw *hw, u32 index) + { + u32 led_reg = rd32(hw, TXGBE_LEDCTL); + +- DEBUGFUNC("txgbe_led_off"); +- + if (index > 4) + return TXGBE_ERR_PARAM; + +@@ -616,8 +596,6 @@ s32 txgbe_validate_mac_addr(u8 *mac_addr) + { + s32 status = 0; + +- DEBUGFUNC("txgbe_validate_mac_addr"); +- + /* Make sure it is not a multicast address */ + if (TXGBE_IS_MULTICAST(mac_addr)) { + status = TXGBE_ERR_INVALID_MAC_ADDR; +@@ -648,11 +626,9 @@ s32 txgbe_set_rar(struct txgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, + u32 rar_low, rar_high; + u32 rar_entries = hw->mac.num_rar_entries; + +- DEBUGFUNC("txgbe_set_rar"); +- + /* Make sure we are using a valid rar index range */ + if (index >= rar_entries) { +- DEBUGOUT("RAR index %d is out of range.\n", index); ++ DEBUGOUT("RAR index %d is out of range.", index); + return TXGBE_ERR_INVALID_ARGUMENT; + } + +@@ -700,11 +676,9 @@ s32 txgbe_clear_rar(struct txgbe_hw *hw, u32 index) + u32 rar_high; + u32 rar_entries = hw->mac.num_rar_entries; + +- DEBUGFUNC("txgbe_clear_rar"); +- + /* Make sure we are using a valid rar index range */ + if (index >= rar_entries) { +- DEBUGOUT("RAR index %d is out of range.\n", index); ++ DEBUGOUT("RAR index %d is out of range.", index); + return TXGBE_ERR_INVALID_ARGUMENT; + } + +@@ -740,8 +714,6 @@ s32 txgbe_init_rx_addrs(struct txgbe_hw *hw) + u32 psrctl; + u32 rar_entries = hw->mac.num_rar_entries; + +- DEBUGFUNC("txgbe_init_rx_addrs"); +- + /* + * If the current mac address is valid, assume it is a software override + * to the permanent address. +@@ -759,7 +731,7 @@ s32 txgbe_init_rx_addrs(struct txgbe_hw *hw) + hw->mac.addr[4], hw->mac.addr[5]); + } else { + /* Setup the receive address. */ +- DEBUGOUT("Overriding MAC Address in RAR[0]\n"); ++ DEBUGOUT("Overriding MAC Address in RAR[0]"); + DEBUGOUT(" New MAC Addr =%.2X %.2X %.2X ", + hw->mac.addr[0], hw->mac.addr[1], + hw->mac.addr[2]); +@@ -777,7 +749,7 @@ s32 txgbe_init_rx_addrs(struct txgbe_hw *hw) + hw->addr_ctrl.rar_used_count = 1; + + /* Zero out the other receive addresses. */ +- DEBUGOUT("Clearing RAR[1-%d]\n", rar_entries - 1); ++ DEBUGOUT("Clearing RAR[1-%d]", rar_entries - 1); + for (i = 1; i < rar_entries; i++) { + wr32(hw, TXGBE_ETHADDRIDX, i); + wr32(hw, TXGBE_ETHADDRL, 0); +@@ -791,7 +763,7 @@ s32 txgbe_init_rx_addrs(struct txgbe_hw *hw) + psrctl |= TXGBE_PSRCTL_ADHF12(hw->mac.mc_filter_type); + wr32(hw, TXGBE_PSRCTL, psrctl); + +- DEBUGOUT(" Clearing MTA\n"); ++ DEBUGOUT(" Clearing MTA"); + for (i = 0; i < hw->mac.mcft_size; i++) + wr32(hw, TXGBE_MCADDRTBL(i), 0); + +@@ -816,8 +788,6 @@ static s32 txgbe_mta_vector(struct txgbe_hw *hw, u8 *mc_addr) + { + u32 vector = 0; + +- DEBUGFUNC("txgbe_mta_vector"); +- + switch (hw->mac.mc_filter_type) { + case 0: /* use bits [47:36] of the address */ + vector = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4)); +@@ -832,7 +802,7 @@ static s32 txgbe_mta_vector(struct txgbe_hw *hw, u8 *mc_addr) + vector = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8)); + break; + default: /* Invalid mc_filter_type */ +- DEBUGOUT("MC filter type param set incorrectly\n"); ++ DEBUGOUT("MC filter type param set incorrectly"); + ASSERT(0); + break; + } +@@ -855,12 +825,10 @@ void txgbe_set_mta(struct txgbe_hw *hw, u8 *mc_addr) + u32 vector_bit; + u32 vector_reg; + +- DEBUGFUNC("txgbe_set_mta"); +- + hw->addr_ctrl.mta_in_use++; + + vector = txgbe_mta_vector(hw, mc_addr); +- DEBUGOUT(" bit-vector = 0x%03X\n", vector); ++ DEBUGOUT(" bit-vector = 0x%03X", vector); + + /* + * The MTA is a register array of 128 32-bit registers. It is treated +@@ -894,8 +862,6 @@ s32 txgbe_update_mc_addr_list(struct txgbe_hw *hw, u8 *mc_addr_list, + u32 i; + u32 vmdq; + +- DEBUGFUNC("txgbe_update_mc_addr_list"); +- + /* + * Set the new number of MC addresses that we are being requested to + * use. +@@ -905,13 +871,13 @@ s32 txgbe_update_mc_addr_list(struct txgbe_hw *hw, u8 *mc_addr_list, + + /* Clear mta_shadow */ + if (clear) { +- DEBUGOUT(" Clearing MTA\n"); ++ DEBUGOUT(" Clearing MTA"); + memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); + } + + /* Update mta_shadow */ + for (i = 0; i < mc_addr_count; i++) { +- DEBUGOUT(" Adding the multicast addresses:\n"); ++ DEBUGOUT(" Adding the multicast addresses:"); + txgbe_set_mta(hw, next(hw, &mc_addr_list, &vmdq)); + } + +@@ -928,7 +894,7 @@ s32 txgbe_update_mc_addr_list(struct txgbe_hw *hw, u8 *mc_addr_list, + wr32(hw, TXGBE_PSRCTL, psrctl); + } + +- DEBUGOUT("txgbe update mc addr list complete\n"); ++ DEBUGOUT("txgbe update mc addr list complete"); + return 0; + } + +@@ -946,8 +912,6 @@ s32 txgbe_fc_enable(struct txgbe_hw *hw) + u32 fcrtl, fcrth; + int i; + +- DEBUGFUNC("txgbe_fc_enable"); +- + /* Validate the water mark configuration */ + if (!hw->fc.pause_time) { + err = TXGBE_ERR_INVALID_LINK_SETTINGS; +@@ -960,7 +924,7 @@ s32 txgbe_fc_enable(struct txgbe_hw *hw) + hw->fc.high_water[i]) { + if (!hw->fc.low_water[i] || + hw->fc.low_water[i] >= hw->fc.high_water[i]) { +- DEBUGOUT("Invalid water mark configuration\n"); ++ DEBUGOUT("Invalid water mark configuration"); + err = TXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } +@@ -1018,7 +982,7 @@ s32 txgbe_fc_enable(struct txgbe_hw *hw) + fccfg_reg |= TXGBE_TXFCCFG_FC; + break; + default: +- DEBUGOUT("Flow control param set incorrectly\n"); ++ DEBUGOUT("Flow control param set incorrectly"); + err = TXGBE_ERR_CONFIG; + goto out; + } +@@ -1079,8 +1043,7 @@ s32 txgbe_negotiate_fc(struct txgbe_hw *hw, u32 adv_reg, u32 lp_reg, + u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) + { + if ((!(adv_reg)) || (!(lp_reg))) { +- DEBUGOUT("Local or link partner's advertised flow control " +- "settings are NULL. Local: %x, link partner: %x\n", ++ DEBUGOUT("Local or link partner's advertised flow control settings are NULL. Local: %x, link partner: %x", + adv_reg, lp_reg); + return TXGBE_ERR_FC_NOT_NEGOTIATED; + } +@@ -1095,22 +1058,22 @@ s32 txgbe_negotiate_fc(struct txgbe_hw *hw, u32 adv_reg, u32 lp_reg, + */ + if (hw->fc.requested_mode == txgbe_fc_full) { + hw->fc.current_mode = txgbe_fc_full; +- DEBUGOUT("Flow Control = FULL.\n"); ++ DEBUGOUT("Flow Control = FULL."); + } else { + hw->fc.current_mode = txgbe_fc_rx_pause; +- DEBUGOUT("Flow Control=RX PAUSE frames only\n"); ++ DEBUGOUT("Flow Control=RX PAUSE frames only"); + } + } else if (!(adv_reg & adv_sym) && (adv_reg & adv_asm) && + (lp_reg & lp_sym) && (lp_reg & lp_asm)) { + hw->fc.current_mode = txgbe_fc_tx_pause; +- DEBUGOUT("Flow Control = TX PAUSE frames only.\n"); ++ DEBUGOUT("Flow Control = TX PAUSE frames only."); + } else if ((adv_reg & adv_sym) && (adv_reg & adv_asm) && + !(lp_reg & lp_sym) && (lp_reg & lp_asm)) { + hw->fc.current_mode = txgbe_fc_rx_pause; +- DEBUGOUT("Flow Control = RX PAUSE frames only.\n"); ++ DEBUGOUT("Flow Control = RX PAUSE frames only."); + } else { + hw->fc.current_mode = txgbe_fc_none; +- DEBUGOUT("Flow Control = NONE.\n"); ++ DEBUGOUT("Flow Control = NONE."); + } + return 0; + } +@@ -1210,8 +1173,6 @@ void txgbe_fc_autoneg(struct txgbe_hw *hw) + u32 speed; + bool link_up; + +- DEBUGFUNC("txgbe_fc_autoneg"); +- + /* + * AN should have completed when the cable was plugged in. + * Look for reasons to bail out. Bail out if: +@@ -1277,8 +1238,6 @@ s32 txgbe_acquire_swfw_sync(struct txgbe_hw *hw, u32 mask) + u32 timeout = 200; + u32 i; + +- DEBUGFUNC("txgbe_acquire_swfw_sync"); +- + for (i = 0; i < timeout; i++) { + /* + * SW NVM semaphore bit is used for access to all +@@ -1321,8 +1280,6 @@ void txgbe_release_swfw_sync(struct txgbe_hw *hw, u32 mask) + u32 mngsem; + u32 swmask = mask; + +- DEBUGFUNC("txgbe_release_swfw_sync"); +- + txgbe_get_eeprom_semaphore(hw); + + mngsem = rd32(hw, TXGBE_MNGSEM); +@@ -1346,8 +1303,6 @@ s32 txgbe_disable_sec_rx_path(struct txgbe_hw *hw) + int i; + u32 secrxreg; + +- DEBUGFUNC("txgbe_disable_sec_rx_path"); +- + secrxreg = rd32(hw, TXGBE_SECRXCTL); + secrxreg |= TXGBE_SECRXCTL_XDSA; + wr32(hw, TXGBE_SECRXCTL, secrxreg); +@@ -1362,8 +1317,7 @@ s32 txgbe_disable_sec_rx_path(struct txgbe_hw *hw) + + /* For informational purposes only */ + if (i >= TXGBE_MAX_SECRX_POLL) +- DEBUGOUT("Rx unit being enabled before security " +- "path fully disabled. Continuing with init.\n"); ++ DEBUGOUT("Rx unit being enabled before security path fully disabled. Continuing with init."); + + return 0; + } +@@ -1378,8 +1332,6 @@ s32 txgbe_enable_sec_rx_path(struct txgbe_hw *hw) + { + u32 secrxreg; + +- DEBUGFUNC("txgbe_enable_sec_rx_path"); +- + secrxreg = rd32(hw, TXGBE_SECRXCTL); + secrxreg &= ~TXGBE_SECRXCTL_XDSA; + wr32(hw, TXGBE_SECRXCTL, secrxreg); +@@ -1415,8 +1367,7 @@ int txgbe_disable_sec_tx_path(struct txgbe_hw *hw) + + /* For informational purposes only */ + if (i >= TXGBE_MAX_SECTX_POLL) +- PMD_DRV_LOG(DEBUG, "Tx unit being enabled before security " +- "path fully disabled. Continuing with init."); ++ DEBUGOUT("Tx unit being enabled before security path fully disabled. Continuing with init."); + + return 0; + } +@@ -1453,8 +1404,6 @@ static s32 txgbe_get_san_mac_addr_offset(struct txgbe_hw *hw, + { + s32 err; + +- DEBUGFUNC("txgbe_get_san_mac_addr_offset"); +- + /* + * First read the EEPROM pointer to see if the MAC addresses are + * available. +@@ -1485,8 +1434,6 @@ s32 txgbe_get_san_mac_addr(struct txgbe_hw *hw, u8 *san_mac_addr) + u8 i; + s32 err; + +- DEBUGFUNC("txgbe_get_san_mac_addr"); +- + /* + * First read the EEPROM pointer to see if the MAC addresses are + * available. If they're not, no point in calling set_lan_id() here. +@@ -1535,8 +1482,6 @@ s32 txgbe_set_san_mac_addr(struct txgbe_hw *hw, u8 *san_mac_addr) + u16 san_mac_data, san_mac_offset; + u8 i; + +- DEBUGFUNC("txgbe_set_san_mac_addr"); +- + /* Look for SAN mac address pointer. If not defined, return */ + err = txgbe_get_san_mac_addr_offset(hw, &san_mac_offset); + if (err || san_mac_offset == 0 || san_mac_offset == 0xFFFF) +@@ -1567,11 +1512,9 @@ s32 txgbe_clear_vmdq(struct txgbe_hw *hw, u32 rar, u32 vmdq) + u32 mpsar_lo, mpsar_hi; + u32 rar_entries = hw->mac.num_rar_entries; + +- DEBUGFUNC("txgbe_clear_vmdq"); +- + /* Make sure we are using a valid rar index range */ + if (rar >= rar_entries) { +- DEBUGOUT("RAR index %d is out of range.\n", rar); ++ DEBUGOUT("RAR index %d is out of range.", rar); + return TXGBE_ERR_INVALID_ARGUMENT; + } + +@@ -1621,11 +1564,9 @@ s32 txgbe_set_vmdq(struct txgbe_hw *hw, u32 rar, u32 vmdq) + u32 mpsar; + u32 rar_entries = hw->mac.num_rar_entries; + +- DEBUGFUNC("txgbe_set_vmdq"); +- + /* Make sure we are using a valid rar index range */ + if (rar >= rar_entries) { +- DEBUGOUT("RAR index %d is out of range.\n", rar); ++ DEBUGOUT("RAR index %d is out of range.", rar); + return TXGBE_ERR_INVALID_ARGUMENT; + } + +@@ -1650,8 +1591,7 @@ s32 txgbe_init_uta_tables(struct txgbe_hw *hw) + { + int i; + +- DEBUGFUNC("txgbe_init_uta_tables"); +- DEBUGOUT(" Clearing UTA\n"); ++ DEBUGOUT(" Clearing UTA"); + + for (i = 0; i < 128; i++) + wr32(hw, TXGBE_UCADDRTBL(i), 0); +@@ -1706,7 +1646,7 @@ s32 txgbe_find_vlvf_slot(struct txgbe_hw *hw, u32 vlan, bool vlvf_bypass) + * slot we found during our search, else error. + */ + if (!first_empty_slot) +- DEBUGOUT("No space in VLVF.\n"); ++ DEBUGOUT("No space in VLVF."); + + return first_empty_slot ? first_empty_slot : TXGBE_ERR_NO_SPACE; + } +@@ -1727,8 +1667,6 @@ s32 txgbe_set_vfta(struct txgbe_hw *hw, u32 vlan, u32 vind, + u32 regidx, vfta_delta, vfta; + s32 err; + +- DEBUGFUNC("txgbe_set_vfta"); +- + if (vlan > 4095 || vind > 63) + return TXGBE_ERR_PARAM; + +@@ -1796,8 +1734,6 @@ s32 txgbe_set_vlvf(struct txgbe_hw *hw, u32 vlan, u32 vind, + u32 portctl; + s32 vlvf_index; + +- DEBUGFUNC("txgbe_set_vlvf"); +- + if (vlan > 4095 || vind > 63) + return TXGBE_ERR_PARAM; + +@@ -1877,8 +1813,6 @@ s32 txgbe_clear_vfta(struct txgbe_hw *hw) + { + u32 offset; + +- DEBUGFUNC("txgbe_clear_vfta"); +- + for (offset = 0; offset < hw->mac.vft_size; offset++) + wr32(hw, TXGBE_VLANTBL(offset), 0); + +@@ -1932,8 +1866,6 @@ s32 txgbe_check_mac_link(struct txgbe_hw *hw, u32 *speed, + u32 links_reg, links_orig; + u32 i; + +- DEBUGFUNC("txgbe_check_mac_link"); +- + /* If Crosstalk fix enabled do the sanity check of making sure + * the SFP+ cage is full. + */ +@@ -1964,7 +1896,7 @@ s32 txgbe_check_mac_link(struct txgbe_hw *hw, u32 *speed, + links_reg = rd32(hw, TXGBE_PORTSTAT); + + if (links_orig != links_reg) { +- DEBUGOUT("LINKS changed from %08X to %08X\n", ++ DEBUGOUT("LINKS changed from %08X to %08X", + links_orig, links_reg); + } + +@@ -2019,8 +1951,6 @@ s32 txgbe_get_wwn_prefix(struct txgbe_hw *hw, u16 *wwnn_prefix, + u16 offset, caps; + u16 alt_san_mac_blk_offset; + +- DEBUGFUNC("txgbe_get_wwn_prefix"); +- + /* clear output first */ + *wwnn_prefix = 0xFFFF; + *wwpn_prefix = 0xFFFF; +@@ -2110,8 +2040,6 @@ void txgbe_set_ethertype_anti_spoofing(struct txgbe_hw *hw, + **/ + s32 txgbe_get_device_caps(struct txgbe_hw *hw, u16 *device_caps) + { +- DEBUGFUNC("txgbe_get_device_caps"); +- + hw->rom.readw_sw(hw, TXGBE_DEVICE_CAPS, device_caps); + + return 0; +@@ -2233,8 +2161,6 @@ s32 txgbe_get_thermal_sensor_data(struct txgbe_hw *hw) + s64 tsv; + u32 ts_stat; + +- DEBUGFUNC("txgbe_get_thermal_sensor_data"); +- + /* Only support thermal sensors attached to physical port 0 */ + if (hw->bus.lan_id != 0) + return TXGBE_NOT_IMPLEMENTED; +@@ -2265,8 +2191,6 @@ s32 txgbe_init_thermal_sensor_thresh(struct txgbe_hw *hw) + { + struct txgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; + +- DEBUGFUNC("txgbe_init_thermal_sensor_thresh"); +- + memset(data, 0, sizeof(struct txgbe_thermal_sensor_data)); + + if (hw->bus.lan_id != 0) +@@ -2337,8 +2261,6 @@ s32 txgbe_setup_mac_link_multispeed_fiber(struct txgbe_hw *hw, + u32 i = 0; + bool autoneg, link_up = false; + +- DEBUGFUNC("txgbe_setup_mac_link_multispeed_fiber"); +- + /* Mask off requested but non-supported speeds */ + status = hw->mac.get_link_capabilities(hw, &link_speed, &autoneg); + if (status != 0) +@@ -2363,7 +2285,7 @@ s32 txgbe_setup_mac_link_multispeed_fiber(struct txgbe_hw *hw, + /* QSFP module automatically detects MAC link speed */ + break; + default: +- DEBUGOUT("Unexpected media type.\n"); ++ DEBUGOUT("Unexpected media type."); + break; + } + +@@ -2413,7 +2335,7 @@ s32 txgbe_setup_mac_link_multispeed_fiber(struct txgbe_hw *hw, + /* QSFP module automatically detects link speed */ + break; + default: +- DEBUGOUT("Unexpected media type.\n"); ++ DEBUGOUT("Unexpected media type."); + break; + } + +@@ -2479,8 +2401,6 @@ s32 txgbe_init_shared_code(struct txgbe_hw *hw) + { + s32 status; + +- DEBUGFUNC("txgbe_init_shared_code"); +- + /* + * Set the mac type + */ +@@ -2513,8 +2433,6 @@ s32 txgbe_set_mac_type(struct txgbe_hw *hw) + { + s32 err = 0; + +- DEBUGFUNC("txgbe_set_mac_type"); +- + if (hw->vendor_id != PCI_VENDOR_ID_WANGXUN) { + DEBUGOUT("Unsupported vendor id: %x", hw->vendor_id); + return TXGBE_ERR_DEVICE_NOT_SUPPORTED; +@@ -2550,7 +2468,7 @@ s32 txgbe_set_mac_type(struct txgbe_hw *hw) + break; + } + +- DEBUGOUT("found mac: %d media: %d, returns: %d\n", ++ DEBUGOUT("found mac: %d media: %d, returns: %d", + hw->mac.type, hw->phy.media_type, err); + return err; + } +@@ -2559,8 +2477,6 @@ void txgbe_init_mac_link_ops(struct txgbe_hw *hw) + { + struct txgbe_mac_info *mac = &hw->mac; + +- DEBUGFUNC("txgbe_init_mac_link_ops"); +- + /* + * enable the laser control functions for SFP+ fiber + * and MNG not enabled +@@ -2607,8 +2523,6 @@ s32 txgbe_init_phy_raptor(struct txgbe_hw *hw) + struct txgbe_phy_info *phy = &hw->phy; + s32 err = 0; + +- DEBUGFUNC("txgbe_init_phy_raptor"); +- + if (hw->device_id == TXGBE_DEV_ID_RAPTOR_QSFP) { + /* Store flag indicating I2C bus access control unit. */ + hw->phy.qsfp_shared_i2c_bus = TRUE; +@@ -2650,8 +2564,6 @@ s32 txgbe_setup_sfp_modules(struct txgbe_hw *hw) + { + s32 err = 0; + +- DEBUGFUNC("txgbe_setup_sfp_modules"); +- + if (hw->phy.sfp_type == txgbe_sfp_type_unknown) + return 0; + +@@ -2671,7 +2583,7 @@ s32 txgbe_setup_sfp_modules(struct txgbe_hw *hw) + msec_delay(hw->rom.semaphore_delay); + + if (err) { +- DEBUGOUT("sfp module setup not complete\n"); ++ DEBUGOUT("sfp module setup not complete"); + return TXGBE_ERR_SFP_SETUP_NOT_COMPLETE; + } + +@@ -2769,8 +2681,6 @@ s32 txgbe_init_ops_pf(struct txgbe_hw *hw) + struct txgbe_rom_info *rom = &hw->rom; + struct txgbe_mbx_info *mbx = &hw->mbx; + +- DEBUGFUNC("txgbe_init_ops_pf"); +- + /* BUS */ + bus->set_lan_id = txgbe_set_lan_id_multi_port; + +@@ -2895,8 +2805,6 @@ s32 txgbe_get_link_capabilities_raptor(struct txgbe_hw *hw, + s32 status = 0; + u32 autoc = 0; + +- DEBUGFUNC("txgbe_get_link_capabilities_raptor"); +- + /* Check if 1G SFP module. */ + if (hw->phy.sfp_type == txgbe_sfp_type_1g_cu_core0 || + hw->phy.sfp_type == txgbe_sfp_type_1g_cu_core1 || +@@ -3000,8 +2908,6 @@ u32 txgbe_get_media_type_raptor(struct txgbe_hw *hw) + { + u32 media_type; + +- DEBUGFUNC("txgbe_get_media_type_raptor"); +- + /* Detect if there is a copper PHY attached. */ + switch (hw->phy.type) { + case txgbe_phy_cu_unknown: +@@ -3050,8 +2956,6 @@ s32 txgbe_start_mac_link_raptor(struct txgbe_hw *hw, + s32 status = 0; + bool got_lock = false; + +- DEBUGFUNC("txgbe_start_mac_link_raptor"); +- + UNREFERENCED_PARAMETER(autoneg_wait_to_complete); + + /* reset_pipeline requires us to hold this lock as it writes to +@@ -3134,8 +3038,6 @@ void txgbe_enable_tx_laser_multispeed_fiber(struct txgbe_hw *hw) + **/ + void txgbe_flap_tx_laser_multispeed_fiber(struct txgbe_hw *hw) + { +- DEBUGFUNC("txgbe_flap_tx_laser_multispeed_fiber"); +- + /* Blocked by MNG FW so bail */ + if (txgbe_check_reset_blocked(hw)) + return; +@@ -3167,7 +3069,7 @@ void txgbe_set_hard_rate_select_speed(struct txgbe_hw *hw, + esdp_reg &= ~(TXGBE_GPIOBIT_4 | TXGBE_GPIOBIT_5); + break; + default: +- DEBUGOUT("Invalid fixed module speed\n"); ++ DEBUGOUT("Invalid fixed module speed"); + return; + } + +@@ -3193,8 +3095,6 @@ s32 txgbe_setup_mac_link_smartspeed(struct txgbe_hw *hw, + bool link_up = false; + u32 autoc_reg = rd32_epcs(hw, SR_AN_MMD_ADV_REG1); + +- DEBUGFUNC("txgbe_setup_mac_link_smartspeed"); +- + /* Set autoneg_advertised value based on input link speed */ + hw->phy.autoneg_advertised = 0; + +@@ -3283,8 +3183,7 @@ s32 txgbe_setup_mac_link_smartspeed(struct txgbe_hw *hw, + + out: + if (link_up && link_speed == TXGBE_LINK_SPEED_1GB_FULL) +- DEBUGOUT("Smartspeed has downgraded the link speed " +- "from the maximum advertised\n"); ++ DEBUGOUT("Smartspeed has downgraded the link speed from the maximum advertised"); + return status; + } + +@@ -3313,8 +3212,6 @@ s32 txgbe_setup_mac_link(struct txgbe_hw *hw, + u32 i; + u32 link_capabilities = TXGBE_LINK_SPEED_UNKNOWN; + +- DEBUGFUNC("txgbe_setup_mac_link"); +- + /* Check to see if speed passed in is supported. */ + status = hw->mac.get_link_capabilities(hw, + &link_capabilities, &autoneg); +@@ -3421,8 +3318,6 @@ static s32 txgbe_setup_copper_link_raptor(struct txgbe_hw *hw, + { + s32 status; + +- DEBUGFUNC("txgbe_setup_copper_link_raptor"); +- + /* Setup the PHY according to input speed */ + status = hw->phy.setup_link_speed(hw, speed, + autoneg_wait_to_complete); +@@ -3530,8 +3425,6 @@ s32 txgbe_reset_hw(struct txgbe_hw *hw) + s32 status; + u32 autoc; + +- DEBUGFUNC("txgbe_reset_hw"); +- + /* Call adapter stop to disable tx/rx and clear interrupts */ + status = hw->mac.stop_hw(hw); + if (status != 0) +@@ -3661,8 +3554,6 @@ s32 txgbe_start_hw_raptor(struct txgbe_hw *hw) + { + s32 err = 0; + +- DEBUGFUNC("txgbe_start_hw_raptor"); +- + err = txgbe_start_hw(hw); + if (err != 0) + goto out; +@@ -3687,8 +3578,6 @@ s32 txgbe_start_hw_raptor(struct txgbe_hw *hw) + **/ + s32 txgbe_enable_rx_dma_raptor(struct txgbe_hw *hw, u32 regval) + { +- DEBUGFUNC("txgbe_enable_rx_dma_raptor"); +- + /* + * Workaround silicon errata when enabling the Rx datapath. + * If traffic is incoming before we enable the Rx unit, it could hang +@@ -3721,8 +3610,6 @@ bool txgbe_verify_lesm_fw_enabled_raptor(struct txgbe_hw *hw) + u16 fw_offset, fw_lesm_param_offset, fw_lesm_state; + s32 status; + +- DEBUGFUNC("txgbe_verify_lesm_fw_enabled_raptor"); +- + /* get the offset to the Firmware Module block */ + status = hw->rom.read16(hw, TXGBE_FW_PTR, &fw_offset); + +diff --git a/dpdk/drivers/net/txgbe/base/txgbe_mbx.c b/dpdk/drivers/net/txgbe/base/txgbe_mbx.c +index bfe53478ea..3ef4fce1f2 100644 +--- a/dpdk/drivers/net/txgbe/base/txgbe_mbx.c ++++ b/dpdk/drivers/net/txgbe/base/txgbe_mbx.c +@@ -20,8 +20,6 @@ s32 txgbe_read_mbx(struct txgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id) + struct txgbe_mbx_info *mbx = &hw->mbx; + s32 ret_val = TXGBE_ERR_MBX; + +- DEBUGFUNC("txgbe_read_mbx"); +- + /* limit read to size of mailbox */ + if (size > mbx->size) + size = mbx->size; +@@ -46,8 +44,6 @@ s32 txgbe_write_mbx(struct txgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id) + struct txgbe_mbx_info *mbx = &hw->mbx; + s32 ret_val = 0; + +- DEBUGFUNC("txgbe_write_mbx"); +- + if (size > mbx->size) { + ret_val = TXGBE_ERR_MBX; + DEBUGOUT("Invalid mailbox message size %d", size); +@@ -70,8 +66,6 @@ s32 txgbe_check_for_msg(struct txgbe_hw *hw, u16 mbx_id) + struct txgbe_mbx_info *mbx = &hw->mbx; + s32 ret_val = TXGBE_ERR_MBX; + +- DEBUGFUNC("txgbe_check_for_msg"); +- + if (mbx->check_for_msg) + ret_val = mbx->check_for_msg(hw, mbx_id); + +@@ -90,8 +84,6 @@ s32 txgbe_check_for_ack(struct txgbe_hw *hw, u16 mbx_id) + struct txgbe_mbx_info *mbx = &hw->mbx; + s32 ret_val = TXGBE_ERR_MBX; + +- DEBUGFUNC("txgbe_check_for_ack"); +- + if (mbx->check_for_ack) + ret_val = mbx->check_for_ack(hw, mbx_id); + +@@ -110,8 +102,6 @@ s32 txgbe_check_for_rst(struct txgbe_hw *hw, u16 mbx_id) + struct txgbe_mbx_info *mbx = &hw->mbx; + s32 ret_val = TXGBE_ERR_MBX; + +- DEBUGFUNC("txgbe_check_for_rst"); +- + if (mbx->check_for_rst) + ret_val = mbx->check_for_rst(hw, mbx_id); + +@@ -144,8 +134,6 @@ s32 txgbe_check_for_msg_pf(struct txgbe_hw *hw, u16 vf_number) + s32 index = TXGBE_MBVFICR_INDEX(vf_number); + u32 vf_bit = vf_number % 16; + +- DEBUGFUNC("txgbe_check_for_msg_pf"); +- + if (!txgbe_check_for_bit_pf(hw, TXGBE_MBVFICR_VFREQ_VF1 << vf_bit, + index)) { + ret_val = 0; +@@ -168,8 +156,6 @@ s32 txgbe_check_for_ack_pf(struct txgbe_hw *hw, u16 vf_number) + s32 index = TXGBE_MBVFICR_INDEX(vf_number); + u32 vf_bit = vf_number % 16; + +- DEBUGFUNC("txgbe_check_for_ack_pf"); +- + if (!txgbe_check_for_bit_pf(hw, TXGBE_MBVFICR_VFACK_VF1 << vf_bit, + index)) { + ret_val = 0; +@@ -193,8 +179,6 @@ s32 txgbe_check_for_rst_pf(struct txgbe_hw *hw, u16 vf_number) + u32 vflre = 0; + s32 ret_val = TXGBE_ERR_MBX; + +- DEBUGFUNC("txgbe_check_for_rst_pf"); +- + vflre = rd32(hw, TXGBE_FLRVFE(reg_offset)); + if (vflre & (1 << vf_shift)) { + ret_val = 0; +@@ -217,8 +201,6 @@ STATIC s32 txgbe_obtain_mbx_lock_pf(struct txgbe_hw *hw, u16 vf_number) + s32 ret_val = TXGBE_ERR_MBX; + u32 p2v_mailbox; + +- DEBUGFUNC("txgbe_obtain_mbx_lock_pf"); +- + /* Take ownership of the buffer */ + wr32(hw, TXGBE_MBCTL(vf_number), TXGBE_MBCTL_PFU); + +@@ -247,8 +229,6 @@ s32 txgbe_write_mbx_pf(struct txgbe_hw *hw, u32 *msg, u16 size, u16 vf_number) + s32 ret_val; + u16 i; + +- DEBUGFUNC("txgbe_write_mbx_pf"); +- + /* lock the mailbox to prevent pf/vf race condition */ + ret_val = txgbe_obtain_mbx_lock_pf(hw, vf_number); + if (ret_val) +@@ -288,8 +268,6 @@ s32 txgbe_read_mbx_pf(struct txgbe_hw *hw, u32 *msg, u16 size, u16 vf_number) + s32 ret_val; + u16 i; + +- DEBUGFUNC("txgbe_read_mbx_pf"); +- + /* lock the mailbox to prevent pf/vf race condition */ + ret_val = txgbe_obtain_mbx_lock_pf(hw, vf_number); + if (ret_val) +diff --git a/dpdk/drivers/net/txgbe/base/txgbe_mng.c b/dpdk/drivers/net/txgbe/base/txgbe_mng.c +index 224e48f5e1..b492dc8f11 100644 +--- a/dpdk/drivers/net/txgbe/base/txgbe_mng.c ++++ b/dpdk/drivers/net/txgbe/base/txgbe_mng.c +@@ -44,10 +44,8 @@ txgbe_hic_unlocked(struct txgbe_hw *hw, u32 *buffer, u32 length, u32 timeout) + u32 value, loop; + u16 i, dword_len; + +- DEBUGFUNC("txgbe_hic_unlocked"); +- + if (!length || length > TXGBE_PMMBX_BSIZE) { +- DEBUGOUT("Buffer length failure buffersize=%d.\n", length); ++ DEBUGOUT("Buffer length failure buffersize=%d.", length); + return TXGBE_ERR_HOST_INTERFACE_COMMAND; + } + +@@ -77,7 +75,7 @@ txgbe_hic_unlocked(struct txgbe_hw *hw, u32 *buffer, u32 length, u32 timeout) + TXGBE_MNGMBXCTL_FWRDY, TXGBE_MNGMBXCTL_FWRDY, + &value, timeout, 1000); + if (!loop || !(value & TXGBE_MNGMBXCTL_FWACK)) { +- DEBUGOUT("Command has failed with no status valid.\n"); ++ DEBUGOUT("Command has failed with no status valid."); + return TXGBE_ERR_HOST_INTERFACE_COMMAND; + } + +@@ -113,10 +111,8 @@ txgbe_host_interface_command(struct txgbe_hw *hw, u32 *buffer, + u32 bi; + u32 dword_len; + +- DEBUGFUNC("txgbe_host_interface_command"); +- + if (length == 0 || length > TXGBE_PMMBX_BSIZE) { +- DEBUGOUT("Buffer length failure buffersize=%d.\n", length); ++ DEBUGOUT("Buffer length failure buffersize=%d.", length); + return TXGBE_ERR_HOST_INTERFACE_COMMAND; + } + +@@ -158,7 +154,7 @@ txgbe_host_interface_command(struct txgbe_hw *hw, u32 *buffer, + goto rel_out; + + if (length < buf_len + hdr_size) { +- DEBUGOUT("Buffer not large enough for reply message.\n"); ++ DEBUGOUT("Buffer not large enough for reply message."); + err = TXGBE_ERR_HOST_INTERFACE_COMMAND; + goto rel_out; + } +@@ -284,7 +280,6 @@ s32 txgbe_hic_set_drv_ver(struct txgbe_hw *hw, u8 maj, u8 min, + int i; + s32 ret_val = 0; + +- DEBUGFUNC("txgbe_hic_set_drv_ver"); + UNREFERENCED_PARAMETER(len, driver_ver); + + fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO; +@@ -337,8 +332,6 @@ txgbe_hic_reset(struct txgbe_hw *hw) + int i; + s32 err = 0; + +- DEBUGFUNC("\n"); +- + reset_cmd.hdr.cmd = FW_RESET_CMD; + reset_cmd.hdr.buf_len = FW_RESET_LEN; + reset_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED; +diff --git a/dpdk/drivers/net/txgbe/base/txgbe_osdep.h b/dpdk/drivers/net/txgbe/base/txgbe_osdep.h +index e18e400af3..4a0dd385b3 100644 +--- a/dpdk/drivers/net/txgbe/base/txgbe_osdep.h ++++ b/dpdk/drivers/net/txgbe/base/txgbe_osdep.h +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + #include "../txgbe_logs.h" + +diff --git a/dpdk/drivers/net/txgbe/base/txgbe_phy.c b/dpdk/drivers/net/txgbe/base/txgbe_phy.c +index bdd6bf780e..ce6d580636 100644 +--- a/dpdk/drivers/net/txgbe/base/txgbe_phy.c ++++ b/dpdk/drivers/net/txgbe/base/txgbe_phy.c +@@ -7,7 +7,7 @@ + #include "txgbe_mng.h" + #include "txgbe_phy.h" + +-static void txgbe_i2c_start(struct txgbe_hw *hw); ++static void txgbe_i2c_start(struct txgbe_hw *hw, u8 dev_addr); + static void txgbe_i2c_stop(struct txgbe_hw *hw); + + /** +@@ -22,7 +22,7 @@ static bool txgbe_identify_extphy(struct txgbe_hw *hw) + u16 phy_addr = 0; + + if (!txgbe_validate_phy_addr(hw, phy_addr)) { +- DEBUGOUT("Unable to validate PHY address 0x%04X\n", ++ DEBUGOUT("Unable to validate PHY address 0x%04X", + phy_addr); + return false; + } +@@ -87,8 +87,6 @@ s32 txgbe_identify_phy(struct txgbe_hw *hw) + { + s32 err = TXGBE_ERR_PHY_ADDR_INVALID; + +- DEBUGFUNC("txgbe_identify_phy"); +- + txgbe_read_phy_if(hw); + + if (hw->phy.type != txgbe_phy_unknown) +@@ -124,11 +122,9 @@ s32 txgbe_check_reset_blocked(struct txgbe_hw *hw) + { + u32 mmngc; + +- DEBUGFUNC("txgbe_check_reset_blocked"); +- + mmngc = rd32(hw, TXGBE_STAT); + if (mmngc & TXGBE_STAT_MNGVETO) { +- DEBUGOUT("MNG_VETO bit detected.\n"); ++ DEBUGOUT("MNG_VETO bit detected."); + return true; + } + +@@ -146,8 +142,6 @@ bool txgbe_validate_phy_addr(struct txgbe_hw *hw, u32 phy_addr) + u16 phy_id = 0; + bool valid = false; + +- DEBUGFUNC("txgbe_validate_phy_addr"); +- + hw->phy.addr = phy_addr; + hw->phy.read_reg(hw, TXGBE_MD_PHY_ID_HIGH, + TXGBE_MD_DEV_PMA_PMD, &phy_id); +@@ -155,7 +149,7 @@ bool txgbe_validate_phy_addr(struct txgbe_hw *hw, u32 phy_addr) + if (phy_id != 0xFFFF && phy_id != 0x0) + valid = true; + +- DEBUGOUT("PHY ID HIGH is 0x%04X\n", phy_id); ++ DEBUGOUT("PHY ID HIGH is 0x%04X", phy_id); + + return valid; + } +@@ -171,8 +165,6 @@ s32 txgbe_get_phy_id(struct txgbe_hw *hw) + u16 phy_id_high = 0; + u16 phy_id_low = 0; + +- DEBUGFUNC("txgbe_get_phy_id"); +- + err = hw->phy.read_reg(hw, TXGBE_MD_PHY_ID_HIGH, + TXGBE_MD_DEV_PMA_PMD, + &phy_id_high); +@@ -185,7 +177,7 @@ s32 txgbe_get_phy_id(struct txgbe_hw *hw) + hw->phy.id |= (u32)(phy_id_low & TXGBE_PHY_REVISION_MASK); + hw->phy.revision = (u32)(phy_id_low & ~TXGBE_PHY_REVISION_MASK); + } +- DEBUGOUT("PHY_ID_HIGH 0x%04X, PHY_ID_LOW 0x%04X\n", ++ DEBUGOUT("PHY_ID_HIGH 0x%04X, PHY_ID_LOW 0x%04X", + phy_id_high, phy_id_low); + + return err; +@@ -200,8 +192,6 @@ enum txgbe_phy_type txgbe_get_phy_type_from_id(u32 phy_id) + { + enum txgbe_phy_type phy_type; + +- DEBUGFUNC("txgbe_get_phy_type_from_id"); +- + switch (phy_id) { + case TXGBE_PHYID_TN1010: + phy_type = txgbe_phy_tn; +@@ -259,7 +249,7 @@ txgbe_reset_extphy(struct txgbe_hw *hw) + + if (ctrl & TXGBE_MD_PORT_CTRL_RESET) { + err = TXGBE_ERR_RESET_FAILED; +- DEBUGOUT("PHY reset polling failed to complete.\n"); ++ DEBUGOUT("PHY reset polling failed to complete."); + } + + return err; +@@ -273,8 +263,6 @@ s32 txgbe_reset_phy(struct txgbe_hw *hw) + { + s32 err = 0; + +- DEBUGFUNC("txgbe_reset_phy"); +- + if (hw->phy.type == txgbe_phy_unknown) + err = txgbe_identify_phy(hw); + +@@ -330,7 +318,7 @@ s32 txgbe_read_phy_reg_mdi(struct txgbe_hw *hw, u32 reg_addr, u32 device_type, + */ + if (!po32m(hw, TXGBE_MDIOSCD, TXGBE_MDIOSCD_BUSY, + 0, NULL, 100, 100)) { +- DEBUGOUT("PHY address command did not complete\n"); ++ DEBUGOUT("PHY address command did not complete"); + return TXGBE_ERR_PHY; + } + +@@ -354,8 +342,6 @@ s32 txgbe_read_phy_reg(struct txgbe_hw *hw, u32 reg_addr, + s32 err; + u32 gssr = hw->phy.phy_semaphore_mask; + +- DEBUGFUNC("txgbe_read_phy_reg"); +- + if (hw->mac.acquire_swfw_sync(hw, gssr)) + return TXGBE_ERR_SWFW_SYNC; + +@@ -393,7 +379,7 @@ s32 txgbe_write_phy_reg_mdi(struct txgbe_hw *hw, u32 reg_addr, + /* wait for completion */ + if (!po32m(hw, TXGBE_MDIOSCD, TXGBE_MDIOSCD_BUSY, + 0, NULL, 100, 100)) { +- TLOG_DEBUG("PHY write cmd didn't complete\n"); ++ DEBUGOUT("PHY write cmd didn't complete"); + return -TERR_PHY; + } + +@@ -414,8 +400,6 @@ s32 txgbe_write_phy_reg(struct txgbe_hw *hw, u32 reg_addr, + s32 err; + u32 gssr = hw->phy.phy_semaphore_mask; + +- DEBUGFUNC("txgbe_write_phy_reg"); +- + if (hw->mac.acquire_swfw_sync(hw, gssr)) + err = TXGBE_ERR_SWFW_SYNC; + +@@ -439,8 +423,6 @@ s32 txgbe_setup_phy_link(struct txgbe_hw *hw) + bool autoneg = false; + u32 speed; + +- DEBUGFUNC("txgbe_setup_phy_link"); +- + txgbe_get_copper_link_capabilities(hw, &speed, &autoneg); + + /* Set or unset auto-negotiation 10G advertisement */ +@@ -526,8 +508,6 @@ s32 txgbe_setup_phy_link_speed(struct txgbe_hw *hw, + { + UNREFERENCED_PARAMETER(autoneg_wait_to_complete); + +- DEBUGFUNC("txgbe_setup_phy_link_speed"); +- + /* + * Clear autoneg_advertised and set new values based on input link + * speed. +@@ -598,8 +578,6 @@ s32 txgbe_get_copper_link_capabilities(struct txgbe_hw *hw, + { + s32 err = 0; + +- DEBUGFUNC("txgbe_get_copper_link_capabilities"); +- + *autoneg = true; + if (!hw->phy.speeds_supported) + err = txgbe_get_copper_speeds_supported(hw); +@@ -627,8 +605,6 @@ s32 txgbe_check_phy_link_tnx(struct txgbe_hw *hw, u32 *speed, + u16 phy_speed = 0; + u16 phy_data = 0; + +- DEBUGFUNC("txgbe_check_phy_link_tnx"); +- + /* Initialize speed and link to default case */ + *link_up = false; + *speed = TXGBE_LINK_SPEED_10GB_FULL; +@@ -672,8 +648,6 @@ s32 txgbe_setup_phy_link_tnx(struct txgbe_hw *hw) + bool autoneg = false; + u32 speed; + +- DEBUGFUNC("txgbe_setup_phy_link_tnx"); +- + txgbe_get_copper_link_capabilities(hw, &speed, &autoneg); + + if (speed & TXGBE_LINK_SPEED_10GB_FULL) { +@@ -747,8 +721,6 @@ s32 txgbe_identify_module(struct txgbe_hw *hw) + { + s32 err = TXGBE_ERR_SFP_NOT_PRESENT; + +- DEBUGFUNC("txgbe_identify_module"); +- + switch (hw->phy.media_type) { + case txgbe_media_type_fiber: + err = txgbe_identify_sfp_module(hw); +@@ -786,8 +758,6 @@ s32 txgbe_identify_sfp_module(struct txgbe_hw *hw) + u8 cable_spec = 0; + u16 enforce_sfp = 0; + +- DEBUGFUNC("txgbe_identify_sfp_module"); +- + if (hw->phy.media_type != txgbe_media_type_fiber) { + hw->phy.sfp_type = txgbe_sfp_type_not_present; + return TXGBE_ERR_SFP_NOT_PRESENT; +@@ -967,7 +937,7 @@ s32 txgbe_identify_sfp_module(struct txgbe_hw *hw) + hw->phy.sfp_type == txgbe_sfp_type_1g_lx_core1 || + hw->phy.sfp_type == txgbe_sfp_type_1g_sx_core0 || + hw->phy.sfp_type == txgbe_sfp_type_1g_sx_core1)) { +- DEBUGOUT("SFP+ module not supported\n"); ++ DEBUGOUT("SFP+ module not supported"); + hw->phy.type = txgbe_phy_sfp_unsupported; + return TXGBE_ERR_SFP_NOT_SUPPORTED; + } +@@ -996,8 +966,6 @@ s32 txgbe_identify_qsfp_module(struct txgbe_hw *hw) + u8 device_tech = 0; + bool active_cable = false; + +- DEBUGFUNC("txgbe_identify_qsfp_module"); +- + if (hw->phy.media_type != txgbe_media_type_fiber_qsfp) { + hw->phy.sfp_type = txgbe_sfp_type_not_present; + err = TXGBE_ERR_SFP_NOT_PRESENT; +@@ -1140,10 +1108,10 @@ s32 txgbe_identify_qsfp_module(struct txgbe_hw *hw) + if (hw->allow_unsupported_sfp) { + DEBUGOUT("WARNING: Wangxun (R) Network Connections are quality tested using Wangxun (R) Ethernet Optics. " + "Using untested modules is not supported and may cause unstable operation or damage to the module or the adapter. " +- "Wangxun Corporation is not responsible for any harm caused by using untested modules.\n"); ++ "Wangxun Corporation is not responsible for any harm caused by using untested modules."); + err = 0; + } else { +- DEBUGOUT("QSFP module not supported\n"); ++ DEBUGOUT("QSFP module not supported"); + hw->phy.type = + txgbe_phy_sfp_unsupported; + err = TXGBE_ERR_SFP_NOT_SUPPORTED; +@@ -1169,8 +1137,6 @@ s32 txgbe_identify_qsfp_module(struct txgbe_hw *hw) + s32 txgbe_read_i2c_eeprom(struct txgbe_hw *hw, u8 byte_offset, + u8 *eeprom_data) + { +- DEBUGFUNC("txgbe_read_i2c_eeprom"); +- + return hw->phy.read_i2c_byte(hw, byte_offset, + TXGBE_I2C_EEPROM_DEV_ADDR, + eeprom_data); +@@ -1203,8 +1169,6 @@ s32 txgbe_read_i2c_sff8472(struct txgbe_hw *hw, u8 byte_offset, + s32 txgbe_write_i2c_eeprom(struct txgbe_hw *hw, u8 byte_offset, + u8 eeprom_data) + { +- DEBUGFUNC("txgbe_write_i2c_eeprom"); +- + return hw->phy.write_i2c_byte(hw, byte_offset, + TXGBE_I2C_EEPROM_DEV_ADDR, + eeprom_data); +@@ -1223,11 +1187,7 @@ s32 txgbe_write_i2c_eeprom(struct txgbe_hw *hw, u8 byte_offset, + s32 txgbe_read_i2c_byte_unlocked(struct txgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data) + { +- UNREFERENCED_PARAMETER(dev_addr); +- +- DEBUGFUNC("txgbe_read_i2c_byte"); +- +- txgbe_i2c_start(hw); ++ txgbe_i2c_start(hw, dev_addr); + + /* wait tx empty */ + if (!po32m(hw, TXGBE_I2CICR, TXGBE_I2CICR_TXEMPTY, +@@ -1289,11 +1249,7 @@ s32 txgbe_read_i2c_byte(struct txgbe_hw *hw, u8 byte_offset, + s32 txgbe_write_i2c_byte_unlocked(struct txgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data) + { +- UNREFERENCED_PARAMETER(dev_addr); +- +- DEBUGFUNC("txgbe_write_i2c_byte"); +- +- txgbe_i2c_start(hw); ++ txgbe_i2c_start(hw, dev_addr); + + /* wait tx empty */ + if (!po32m(hw, TXGBE_I2CICR, TXGBE_I2CICR_TXEMPTY, +@@ -1344,10 +1300,8 @@ s32 txgbe_write_i2c_byte(struct txgbe_hw *hw, u8 byte_offset, + * + * Sets I2C start condition (High -> Low on SDA while SCL is High) + **/ +-static void txgbe_i2c_start(struct txgbe_hw *hw) ++static void txgbe_i2c_start(struct txgbe_hw *hw, u8 dev_addr) + { +- DEBUGFUNC("txgbe_i2c_start"); +- + wr32(hw, TXGBE_I2CENA, 0); + + wr32(hw, TXGBE_I2CCON, +@@ -1355,9 +1309,9 @@ static void txgbe_i2c_start(struct txgbe_hw *hw) + TXGBE_I2CCON_SPEED(1) | + TXGBE_I2CCON_RESTART | + TXGBE_I2CCON_SDIA)); +- wr32(hw, TXGBE_I2CTAR, TXGBE_I2C_SLAVEADDR); +- wr32(hw, TXGBE_I2CSSSCLHCNT, 600); +- wr32(hw, TXGBE_I2CSSSCLLCNT, 600); ++ wr32(hw, TXGBE_I2CTAR, dev_addr >> 1); ++ wr32(hw, TXGBE_I2CSSSCLHCNT, 200); ++ wr32(hw, TXGBE_I2CSSSCLLCNT, 200); + wr32(hw, TXGBE_I2CRXTL, 0); /* 1byte for rx full signal */ + wr32(hw, TXGBE_I2CTXTL, 4); + wr32(hw, TXGBE_I2CSCLTMOUT, 0xFFFFFF); +@@ -1375,12 +1329,10 @@ static void txgbe_i2c_start(struct txgbe_hw *hw) + **/ + static void txgbe_i2c_stop(struct txgbe_hw *hw) + { +- DEBUGFUNC("txgbe_i2c_stop"); +- + /* wait for completion */ + if (!po32m(hw, TXGBE_I2CSTAT, TXGBE_I2CSTAT_MST, + 0, NULL, 100, 100)) { +- DEBUGFUNC("i2c stop timeout."); ++ DEBUGOUT("i2c stop timeout."); + } + + wr32(hw, TXGBE_I2CENA, 0); +diff --git a/dpdk/drivers/net/txgbe/base/txgbe_regs.h b/dpdk/drivers/net/txgbe/base/txgbe_regs.h +index 052609e3c1..2802e59e16 100644 +--- a/dpdk/drivers/net/txgbe/base/txgbe_regs.h ++++ b/dpdk/drivers/net/txgbe/base/txgbe_regs.h +@@ -1071,30 +1071,30 @@ enum txgbe_5tuple_protocol { + #define TXGBE_MACRXERRCRCH 0x01192C + #define TXGBE_MACRXERRLENL 0x011978 + #define TXGBE_MACRXERRLENH 0x01197C +-#define TXGBE_MACRX1TO64L 0x001940 +-#define TXGBE_MACRX1TO64H 0x001944 +-#define TXGBE_MACRX65TO127L 0x001948 +-#define TXGBE_MACRX65TO127H 0x00194C +-#define TXGBE_MACRX128TO255L 0x001950 +-#define TXGBE_MACRX128TO255H 0x001954 +-#define TXGBE_MACRX256TO511L 0x001958 +-#define TXGBE_MACRX256TO511H 0x00195C +-#define TXGBE_MACRX512TO1023L 0x001960 +-#define TXGBE_MACRX512TO1023H 0x001964 +-#define TXGBE_MACRX1024TOMAXL 0x001968 +-#define TXGBE_MACRX1024TOMAXH 0x00196C +-#define TXGBE_MACTX1TO64L 0x001834 +-#define TXGBE_MACTX1TO64H 0x001838 +-#define TXGBE_MACTX65TO127L 0x00183C +-#define TXGBE_MACTX65TO127H 0x001840 +-#define TXGBE_MACTX128TO255L 0x001844 +-#define TXGBE_MACTX128TO255H 0x001848 +-#define TXGBE_MACTX256TO511L 0x00184C +-#define TXGBE_MACTX256TO511H 0x001850 +-#define TXGBE_MACTX512TO1023L 0x001854 +-#define TXGBE_MACTX512TO1023H 0x001858 +-#define TXGBE_MACTX1024TOMAXL 0x00185C +-#define TXGBE_MACTX1024TOMAXH 0x001860 ++#define TXGBE_MACRX1TO64L 0x011940 ++#define TXGBE_MACRX1TO64H 0x011944 ++#define TXGBE_MACRX65TO127L 0x011948 ++#define TXGBE_MACRX65TO127H 0x01194C ++#define TXGBE_MACRX128TO255L 0x011950 ++#define TXGBE_MACRX128TO255H 0x011954 ++#define TXGBE_MACRX256TO511L 0x011958 ++#define TXGBE_MACRX256TO511H 0x01195C ++#define TXGBE_MACRX512TO1023L 0x011960 ++#define TXGBE_MACRX512TO1023H 0x011964 ++#define TXGBE_MACRX1024TOMAXL 0x011968 ++#define TXGBE_MACRX1024TOMAXH 0x01196C ++#define TXGBE_MACTX1TO64L 0x011834 ++#define TXGBE_MACTX1TO64H 0x011838 ++#define TXGBE_MACTX65TO127L 0x01183C ++#define TXGBE_MACTX65TO127H 0x011840 ++#define TXGBE_MACTX128TO255L 0x011844 ++#define TXGBE_MACTX128TO255H 0x011848 ++#define TXGBE_MACTX256TO511L 0x01184C ++#define TXGBE_MACTX256TO511H 0x011850 ++#define TXGBE_MACTX512TO1023L 0x011854 ++#define TXGBE_MACTX512TO1023H 0x011858 ++#define TXGBE_MACTX1024TOMAXL 0x01185C ++#define TXGBE_MACTX1024TOMAXH 0x011860 + + #define TXGBE_MACRXUNDERSIZE 0x011938 + #define TXGBE_MACRXOVERSIZE 0x01193C +@@ -1218,6 +1218,7 @@ enum txgbe_5tuple_protocol { + #define TXGBE_IVARMISC 0x0004FC + #define TXGBE_IVARMISC_VEC(v) LS(v, 0, 0x7) + #define TXGBE_IVARMISC_VLD MS(7, 0x1) ++#define TXGBE_PX_INTA 0x000110 + #define TXGBE_ICR(i) (0x000120 + (i) * 4) /* 0-1 */ + #define TXGBE_ICR_MASK MS(0, 0xFFFFFFFF) + #define TXGBE_ICS(i) (0x000130 + (i) * 4) /* 0-1 */ +@@ -1816,8 +1817,13 @@ po32m(struct txgbe_hw *hw, u32 reg, u32 mask, u32 expect, u32 *actual, + } + + do { +- all |= rd32(hw, reg); +- value |= mask & all; ++ if (expect != 0) { ++ all |= rd32(hw, reg); ++ value |= mask & all; ++ } else { ++ all = rd32(hw, reg); ++ value = mask & all; ++ } + if (value == expect) + break; + +@@ -1845,7 +1851,7 @@ po32m(struct txgbe_hw *hw, u32 reg, u32 mask, u32 expect, u32 *actual, + + #define wr32w(hw, reg, val, mask, slice) do { \ + wr32((hw), reg, val); \ +- po32m((hw), reg, mask, mask, NULL, 5, slice); \ ++ po32m((hw), reg, mask, 0, NULL, 5, slice); \ + } while (0) + + #define TXGBE_XPCS_IDAADDR 0x13000 +diff --git a/dpdk/drivers/net/txgbe/base/txgbe_type.h b/dpdk/drivers/net/txgbe/base/txgbe_type.h +index b322a2cac8..4e9a7deb12 100644 +--- a/dpdk/drivers/net/txgbe/base/txgbe_type.h ++++ b/dpdk/drivers/net/txgbe/base/txgbe_type.h +@@ -284,6 +284,7 @@ struct txgbe_hw_stats { + u64 rx_management_packets; + u64 tx_management_packets; + u64 rx_management_dropped; ++ u64 rx_dma_drop; + u64 rx_drop_packets; + + /* Basic Error */ +diff --git a/dpdk/drivers/net/txgbe/txgbe_ethdev.c b/dpdk/drivers/net/txgbe/txgbe_ethdev.c +index f8dffe1f12..1a2c9ff976 100644 +--- a/dpdk/drivers/net/txgbe/txgbe_ethdev.c ++++ b/dpdk/drivers/net/txgbe/txgbe_ethdev.c +@@ -102,7 +102,8 @@ static void txgbe_dev_link_status_print(struct rte_eth_dev *dev); + static int txgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev, uint8_t on); + static int txgbe_dev_macsec_interrupt_setup(struct rte_eth_dev *dev); + static int txgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev); +-static int txgbe_dev_interrupt_get_status(struct rte_eth_dev *dev); ++static int txgbe_dev_interrupt_get_status(struct rte_eth_dev *dev, ++ struct rte_intr_handle *handle); + static int txgbe_dev_interrupt_action(struct rte_eth_dev *dev, + struct rte_intr_handle *handle); + static void txgbe_dev_interrupt_handler(void *param); +@@ -365,7 +366,7 @@ txgbe_dev_queue_stats_mapping_set(struct rte_eth_dev *eth_dev, + if (hw->mac.type != txgbe_mac_raptor) + return -ENOSYS; + +- if (stat_idx & !QMAP_FIELD_RESERVED_BITS_MASK) ++ if (stat_idx & ~QMAP_FIELD_RESERVED_BITS_MASK) + return -EIO; + + PMD_INIT_LOG(DEBUG, "Setting port %d, %s queue_id %d to stat index %d", +@@ -977,7 +978,6 @@ txgbe_vlan_hw_extend_disable(struct rte_eth_dev *dev) + + ctrl = rd32(hw, TXGBE_PORTCTL); + ctrl &= ~TXGBE_PORTCTL_VLANEXT; +- ctrl &= ~TXGBE_PORTCTL_QINQ; + wr32(hw, TXGBE_PORTCTL, ctrl); + } + +@@ -985,17 +985,38 @@ static void + txgbe_vlan_hw_extend_enable(struct rte_eth_dev *dev) + { + struct txgbe_hw *hw = TXGBE_DEV_HW(dev); +- struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; +- struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode; + uint32_t ctrl; + + PMD_INIT_FUNC_TRACE(); + + ctrl = rd32(hw, TXGBE_PORTCTL); + ctrl |= TXGBE_PORTCTL_VLANEXT; +- if (rxmode->offloads & DEV_RX_OFFLOAD_QINQ_STRIP || +- txmode->offloads & DEV_TX_OFFLOAD_QINQ_INSERT) +- ctrl |= TXGBE_PORTCTL_QINQ; ++ wr32(hw, TXGBE_PORTCTL, ctrl); ++} ++ ++static void ++txgbe_qinq_hw_strip_disable(struct rte_eth_dev *dev) ++{ ++ struct txgbe_hw *hw = TXGBE_DEV_HW(dev); ++ uint32_t ctrl; ++ ++ PMD_INIT_FUNC_TRACE(); ++ ++ ctrl = rd32(hw, TXGBE_PORTCTL); ++ ctrl &= ~TXGBE_PORTCTL_QINQ; ++ wr32(hw, TXGBE_PORTCTL, ctrl); ++} ++ ++static void ++txgbe_qinq_hw_strip_enable(struct rte_eth_dev *dev) ++{ ++ struct txgbe_hw *hw = TXGBE_DEV_HW(dev); ++ uint32_t ctrl; ++ ++ PMD_INIT_FUNC_TRACE(); ++ ++ ctrl = rd32(hw, TXGBE_PORTCTL); ++ ctrl |= TXGBE_PORTCTL_QINQ | TXGBE_PORTCTL_VLANEXT; + wr32(hw, TXGBE_PORTCTL, ctrl); + } + +@@ -1062,6 +1083,13 @@ txgbe_vlan_offload_config(struct rte_eth_dev *dev, int mask) + txgbe_vlan_hw_extend_disable(dev); + } + ++ if (mask & ETH_QINQ_STRIP_MASK) { ++ if (rxmode->offloads & DEV_RX_OFFLOAD_QINQ_STRIP) ++ txgbe_qinq_hw_strip_enable(dev); ++ else ++ txgbe_qinq_hw_strip_disable(dev); ++ } ++ + return 0; + } + +@@ -1452,7 +1480,7 @@ txgbe_dev_start(struct rte_eth_dev *dev) + } + } + +- /* confiugre msix for sleep until rx interrupt */ ++ /* configure msix for sleep until rx interrupt */ + txgbe_configure_msix(dev); + + /* initialize transmission unit */ +@@ -1881,6 +1909,7 @@ txgbe_read_stats_registers(struct txgbe_hw *hw, + + hw_stats->rx_bytes += rd64(hw, TXGBE_DMARXOCTL); + hw_stats->tx_bytes += rd64(hw, TXGBE_DMATXOCTL); ++ hw_stats->rx_dma_drop += rd32(hw, TXGBE_DMARXDROP); + hw_stats->rx_drop_packets += rd32(hw, TXGBE_PBRXDROP); + + /* MAC Stats */ +@@ -2029,7 +2058,8 @@ txgbe_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + } + + /* Rx Errors */ +- stats->imissed = hw_stats->rx_total_missed_packets; ++ stats->imissed = hw_stats->rx_total_missed_packets + ++ hw_stats->rx_dma_drop; + stats->ierrors = hw_stats->rx_crc_errors + + hw_stats->rx_mac_short_packet_dropped + + hw_stats->rx_length_errors + +@@ -2311,9 +2341,11 @@ txgbe_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) + + etrack_id = (eeprom_verh << 16) | eeprom_verl; + ret = snprintf(fw_version, fw_size, "0x%08x", etrack_id); ++ if (ret < 0) ++ return -EINVAL; + + ret += 1; /* add the size of '\0' */ +- if (fw_size < (u32)ret) ++ if (fw_size < (size_t)ret) + return ret; + else + return 0; +@@ -2638,12 +2670,17 @@ txgbe_dev_macsec_interrupt_setup(struct rte_eth_dev *dev) + * - On failure, a negative value. + */ + static int +-txgbe_dev_interrupt_get_status(struct rte_eth_dev *dev) ++txgbe_dev_interrupt_get_status(struct rte_eth_dev *dev, ++ struct rte_intr_handle *intr_handle) + { + uint32_t eicr; + struct txgbe_hw *hw = TXGBE_DEV_HW(dev); + struct txgbe_interrupt *intr = TXGBE_DEV_INTR(dev); + ++ if (intr_handle->type != RTE_INTR_HANDLE_UIO && ++ intr_handle->type != RTE_INTR_HANDLE_VFIO_MSIX) ++ wr32(hw, TXGBE_PX_INTA, 1); ++ + /* clear all cause mask */ + txgbe_disable_intr(hw); + +@@ -2846,7 +2883,7 @@ txgbe_dev_interrupt_handler(void *param) + { + struct rte_eth_dev *dev = (struct rte_eth_dev *)param; + +- txgbe_dev_interrupt_get_status(dev); ++ txgbe_dev_interrupt_get_status(dev, dev->intr_handle); + txgbe_dev_interrupt_action(dev, dev->intr_handle); + } + +@@ -3385,7 +3422,7 @@ txgbe_set_ivar_map(struct txgbe_hw *hw, int8_t direction, + wr32(hw, TXGBE_IVARMISC, tmp); + } else { + /* rx or tx causes */ +- /* Workround for ICR lost */ ++ /* Workaround for ICR lost */ + idx = ((16 * (queue & 1)) + (8 * direction)); + tmp = rd32(hw, TXGBE_IVAR(queue >> 1)); + tmp &= ~(0xFF << idx); +@@ -3684,7 +3721,7 @@ txgbe_timesync_disable(struct rte_eth_dev *dev) + /* Disable L2 filtering of IEEE1588/802.1AS Ethernet frame types. */ + wr32(hw, TXGBE_ETFLT(TXGBE_ETF_ID_1588), 0); + +- /* Stop incrementating the System Time registers. */ ++ /* Stop incrementing the System Time registers. */ + wr32(hw, TXGBE_TSTIMEINC, 0); + + return 0; +diff --git a/dpdk/drivers/net/txgbe/txgbe_logs.h b/dpdk/drivers/net/txgbe/txgbe_logs.h +index f44ca06ee2..337e2cb5e8 100644 +--- a/dpdk/drivers/net/txgbe/txgbe_logs.h ++++ b/dpdk/drivers/net/txgbe/txgbe_logs.h +@@ -45,10 +45,7 @@ extern int txgbe_logtype_tx_free; + #define PMD_TX_FREE_LOG(level, fmt, args...) do { } while (0) + #endif + +-#define TLOG_DEBUG(fmt, args...) PMD_DRV_LOG(DEBUG, fmt, ##args) +- +-#define DEBUGOUT(fmt, args...) TLOG_DEBUG(fmt, ##args) +-#define PMD_INIT_FUNC_TRACE() TLOG_DEBUG(" >>") +-#define DEBUGFUNC(fmt) TLOG_DEBUG(fmt) ++#define DEBUGOUT(fmt, args...) PMD_DRV_LOG(DEBUG, fmt, ##args) ++#define PMD_INIT_FUNC_TRACE() PMD_DRV_LOG(DEBUG, ">>") + + #endif /* _TXGBE_LOGS_H_ */ +diff --git a/dpdk/drivers/net/txgbe/txgbe_pf.c b/dpdk/drivers/net/txgbe/txgbe_pf.c +index 151f2c7a45..20e19b47d8 100644 +--- a/dpdk/drivers/net/txgbe/txgbe_pf.c ++++ b/dpdk/drivers/net/txgbe/txgbe_pf.c +@@ -109,7 +109,7 @@ int txgbe_pf_host_init(struct rte_eth_dev *eth_dev) + nb_queue = 4; + RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS; + } else { +- nb_queue = 8; ++ nb_queue = 4; + RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_16_POOLS; + } + +@@ -237,7 +237,7 @@ int txgbe_pf_host_configure(struct rte_eth_dev *eth_dev) + + wr32(hw, TXGBE_PSRCTL, TXGBE_PSRCTL_LBENA); + +- /* clear VMDq map to perment rar 0 */ ++ /* clear VMDq map to permanent rar 0 */ + hw->mac.clear_vmdq(hw, 0, BIT_MASK32); + + /* clear VMDq map to scan rar 127 */ +diff --git a/dpdk/drivers/net/txgbe/txgbe_ptypes.c b/dpdk/drivers/net/txgbe/txgbe_ptypes.c +index cd160ebbaf..7009f20821 100644 +--- a/dpdk/drivers/net/txgbe/txgbe_ptypes.c ++++ b/dpdk/drivers/net/txgbe/txgbe_ptypes.c +@@ -50,6 +50,7 @@ + static u32 txgbe_ptype_lookup[TXGBE_PTID_MAX] __rte_cache_aligned = { + /* L2:0-3 L3:4-7 L4:8-11 TUN:12-15 EL2:16-19 EL3:20-23 EL2:24-27 */ + /* L2: ETH */ ++ TPTE(0x10, ETHER, NONE, NONE, NONE, NONE, NONE, NONE), + TPTE(0x11, ETHER, NONE, NONE, NONE, NONE, NONE, NONE), + TPTE(0x12, ETHER_TIMESYNC, NONE, NONE, NONE, NONE, NONE, NONE), + TPTE(0x13, ETHER_FIP, NONE, NONE, NONE, NONE, NONE, NONE), +@@ -67,6 +68,7 @@ static u32 txgbe_ptype_lookup[TXGBE_PTID_MAX] __rte_cache_aligned = { + TPTE(0x1E, ETHER_FILTER, NONE, NONE, NONE, NONE, NONE, NONE), + TPTE(0x1F, ETHER_FILTER, NONE, NONE, NONE, NONE, NONE, NONE), + /* L3: IP */ ++ TPTE(0x20, ETHER, IPV4, NONFRAG, NONE, NONE, NONE, NONE), + TPTE(0x21, ETHER, IPV4, FRAG, NONE, NONE, NONE, NONE), + TPTE(0x22, ETHER, IPV4, NONFRAG, NONE, NONE, NONE, NONE), + TPTE(0x23, ETHER, IPV4, UDP, NONE, NONE, NONE, NONE), +@@ -339,7 +341,7 @@ txgbe_encode_ptype_tunnel(u32 ptype) + break; + case RTE_PTYPE_INNER_L2_ETHER_QINQ: + ptid |= TXGBE_PTID_TUN_EIGMV; +- return ptid; ++ break; + default: + break; + } +diff --git a/dpdk/drivers/net/vhost/rte_eth_vhost.c b/dpdk/drivers/net/vhost/rte_eth_vhost.c +index 5845bb15f3..c994f2429b 100644 +--- a/dpdk/drivers/net/vhost/rte_eth_vhost.c ++++ b/dpdk/drivers/net/vhost/rte_eth_vhost.c +@@ -715,10 +715,11 @@ eth_vhost_install_intr(struct rte_eth_dev *dev) + } + + static void +-update_queuing_status(struct rte_eth_dev *dev) ++update_queuing_status(struct rte_eth_dev *dev, bool wait_queuing) + { + struct pmd_internal *internal = dev->data->dev_private; + struct vhost_queue *vq; ++ struct rte_vhost_vring_state *state; + unsigned int i; + int allow_queuing = 1; + +@@ -729,13 +730,18 @@ update_queuing_status(struct rte_eth_dev *dev) + rte_atomic32_read(&internal->dev_attached) == 0) + allow_queuing = 0; + ++ state = vring_states[dev->data->port_id]; ++ + /* Wait until rx/tx_pkt_burst stops accessing vhost device */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + vq = dev->data->rx_queues[i]; + if (vq == NULL) + continue; +- rte_atomic32_set(&vq->allow_queuing, allow_queuing); +- while (rte_atomic32_read(&vq->while_queuing)) ++ if (allow_queuing && state->cur[vq->virtqueue_id]) ++ rte_atomic32_set(&vq->allow_queuing, 1); ++ else ++ rte_atomic32_set(&vq->allow_queuing, 0); ++ while (wait_queuing && rte_atomic32_read(&vq->while_queuing)) + rte_pause(); + } + +@@ -743,8 +749,11 @@ update_queuing_status(struct rte_eth_dev *dev) + vq = dev->data->tx_queues[i]; + if (vq == NULL) + continue; +- rte_atomic32_set(&vq->allow_queuing, allow_queuing); +- while (rte_atomic32_read(&vq->while_queuing)) ++ if (allow_queuing && state->cur[vq->virtqueue_id]) ++ rte_atomic32_set(&vq->allow_queuing, 1); ++ else ++ rte_atomic32_set(&vq->allow_queuing, 0); ++ while (wait_queuing && rte_atomic32_read(&vq->while_queuing)) + rte_pause(); + } + } +@@ -826,7 +835,7 @@ new_device(int vid) + eth_dev->data->dev_link.link_status = ETH_LINK_UP; + + rte_atomic32_set(&internal->dev_attached, 1); +- update_queuing_status(eth_dev); ++ update_queuing_status(eth_dev, false); + + VHOST_LOG(INFO, "Vhost device %d created\n", vid); + +@@ -856,7 +865,7 @@ destroy_device(int vid) + internal = eth_dev->data->dev_private; + + rte_atomic32_set(&internal->dev_attached, 0); +- update_queuing_status(eth_dev); ++ update_queuing_status(eth_dev, true); + + eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; + +@@ -963,6 +972,8 @@ vring_state_changed(int vid, uint16_t vring, int enable) + state->max_vring = RTE_MAX(vring, state->max_vring); + rte_spinlock_unlock(&state->lock); + ++ update_queuing_status(eth_dev, false); ++ + VHOST_LOG(INFO, "vring%u is %s\n", + vring, enable ? "enabled" : "disabled"); + +@@ -1148,7 +1159,7 @@ eth_dev_start(struct rte_eth_dev *eth_dev) + } + + rte_atomic32_set(&internal->started, 1); +- update_queuing_status(eth_dev); ++ update_queuing_status(eth_dev, false); + + return 0; + } +@@ -1160,7 +1171,7 @@ eth_dev_stop(struct rte_eth_dev *dev) + + dev->data->dev_started = 0; + rte_atomic32_set(&internal->started, 0); +- update_queuing_status(dev); ++ update_queuing_status(dev, true); + + return 0; + } +@@ -1593,11 +1604,11 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev) + &open_int, &tso); + if (ret < 0) + goto out_free; ++ } + +- if (tso == 0) { +- disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO4); +- disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO6); +- } ++ if (tso == 0) { ++ disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO4); ++ disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO6); + } + + if (rte_kvargs_count(kvlist, ETH_VHOST_LINEAR_BUF) == 1) { +diff --git a/dpdk/drivers/net/virtio/meson.build b/dpdk/drivers/net/virtio/meson.build +index eaed46373d..eff90489a2 100644 +--- a/dpdk/drivers/net/virtio/meson.build ++++ b/dpdk/drivers/net/virtio/meson.build +@@ -20,18 +20,18 @@ if arch_subdir == 'x86' + c_args: [cflags, '-mavx512f', '-mavx512bw', '-mavx512vl']) + objs += virtio_avx512_lib.extract_objects('virtio_rxtx_packed_avx.c') + if (toolchain == 'gcc' and cc.version().version_compare('>=8.3.0')) +- cflags += '-DVHOST_GCC_UNROLL_PRAGMA' ++ cflags += '-DVIRTIO_GCC_UNROLL_PRAGMA' + elif (toolchain == 'clang' and cc.version().version_compare('>=3.7.0')) +- cflags += '-DVHOST_CLANG_UNROLL_PRAGMA' ++ cflags += '-DVIRTIO_CLANG_UNROLL_PRAGMA' + elif (toolchain == 'icc' and cc.version().version_compare('>=16.0.0')) +- cflags += '-DVHOST_ICC_UNROLL_PRAGMA' ++ cflags += '-DVIRTIO_ICC_UNROLL_PRAGMA' + endif + endif + endif + sources += files('virtio_rxtx_simple_sse.c') + elif arch_subdir == 'ppc' + sources += files('virtio_rxtx_simple_altivec.c') +-elif arch_subdir == 'arm' and host_machine.cpu_family().startswith('aarch64') ++elif arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64') + sources += files('virtio_rxtx_simple_neon.c') + endif + +diff --git a/dpdk/drivers/net/virtio/virtio_ethdev.c b/dpdk/drivers/net/virtio/virtio_ethdev.c +index 6c233b75ba..511735a6bf 100644 +--- a/dpdk/drivers/net/virtio/virtio_ethdev.c ++++ b/dpdk/drivers/net/virtio/virtio_ethdev.c +@@ -647,6 +647,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx) + rte_memzone_free(hdr_mz); + rte_memzone_free(mz); + rte_free(vq); ++ hw->vqs[vtpci_queue_idx] = NULL; + + return ret; + } +@@ -867,6 +868,59 @@ virtio_dev_allmulticast_disable(struct rte_eth_dev *dev) + return 0; + } + ++uint16_t ++virtio_rx_mem_pool_buf_size(struct rte_mempool *mp) ++{ ++ return rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM; ++} ++ ++bool ++virtio_rx_check_scatter(uint16_t max_rx_pkt_len, uint16_t rx_buf_size, ++ bool rx_scatter_enabled, const char **error) ++{ ++ if (!rx_scatter_enabled && max_rx_pkt_len > rx_buf_size) { ++ *error = "Rx scatter is disabled and RxQ mbuf pool object size is too small"; ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool ++virtio_check_scatter_on_all_rx_queues(struct rte_eth_dev *dev, ++ uint16_t frame_size) ++{ ++ struct virtio_hw *hw = dev->data->dev_private; ++ struct virtnet_rx *rxvq; ++ struct virtqueue *vq; ++ unsigned int qidx; ++ uint16_t buf_size; ++ const char *error; ++ ++ if (hw->vqs == NULL) ++ return true; ++ ++ for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) { ++ vq = hw->vqs[2 * qidx + VTNET_SQ_RQ_QUEUE_IDX]; ++ if (vq == NULL) ++ continue; ++ ++ rxvq = &vq->rxq; ++ if (rxvq->mpool == NULL) ++ continue; ++ buf_size = virtio_rx_mem_pool_buf_size(rxvq->mpool); ++ ++ if (!virtio_rx_check_scatter(frame_size, buf_size, ++ hw->rx_ol_scatter, &error)) { ++ PMD_INIT_LOG(ERR, "MTU check for RxQ %u failed: %s", ++ qidx, error); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + #define VLAN_TAG_LEN 4 /* 802.3ac tag (not DMA'd) */ + static int + virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +@@ -884,6 +938,15 @@ virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) + RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len); + return -EINVAL; + } ++ ++ if (!virtio_check_scatter_on_all_rx_queues(dev, frame_size)) { ++ PMD_INIT_LOG(ERR, "MTU vs Rx scatter and Rx buffers check failed"); ++ return -EINVAL; ++ } ++ ++ hw->max_rx_pkt_len = frame_size; ++ dev->data->dev_conf.rxmode.max_rx_pkt_len = hw->max_rx_pkt_len; ++ + return 0; + } + +@@ -1654,13 +1717,15 @@ virtio_configure_intr(struct rte_eth_dev *dev) + } + } + +- /* Re-register callback to update max_intr */ +- rte_intr_callback_unregister(dev->intr_handle, +- virtio_interrupt_handler, +- dev); +- rte_intr_callback_register(dev->intr_handle, +- virtio_interrupt_handler, +- dev); ++ if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) { ++ /* Re-register callback to update max_intr */ ++ rte_intr_callback_unregister(dev->intr_handle, ++ virtio_interrupt_handler, ++ dev); ++ rte_intr_callback_register(dev->intr_handle, ++ virtio_interrupt_handler, ++ dev); ++ } + + /* DO NOT try to remove this! This function will enable msix, or QEMU + * will encounter SIGSEGV when DRIVER_OK is sent. +@@ -1680,6 +1745,32 @@ virtio_configure_intr(struct rte_eth_dev *dev) + + return 0; + } ++ ++static void ++virtio_get_speed_duplex(struct rte_eth_dev *eth_dev, ++ struct rte_eth_link *link) ++{ ++ struct virtio_hw *hw = eth_dev->data->dev_private; ++ struct virtio_net_config *config; ++ struct virtio_net_config local_config; ++ ++ config = &local_config; ++ vtpci_read_dev_config(hw, ++ offsetof(struct virtio_net_config, speed), ++ &config->speed, sizeof(config->speed)); ++ vtpci_read_dev_config(hw, ++ offsetof(struct virtio_net_config, duplex), ++ &config->duplex, sizeof(config->duplex)); ++ hw->speed = config->speed; ++ hw->duplex = config->duplex; ++ if (link != NULL) { ++ link->link_duplex = hw->duplex; ++ link->link_speed = hw->speed; ++ } ++ PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d", ++ hw->speed, hw->duplex); ++} ++ + #define DUPLEX_UNKNOWN 0xff + /* reset device and renegotiate features if needed */ + static int +@@ -1738,19 +1829,10 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features) + hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2], + hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]); + +- if (hw->speed == ETH_SPEED_NUM_UNKNOWN) { +- if (vtpci_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX)) { +- config = &local_config; +- vtpci_read_dev_config(hw, +- offsetof(struct virtio_net_config, speed), +- &config->speed, sizeof(config->speed)); +- vtpci_read_dev_config(hw, +- offsetof(struct virtio_net_config, duplex), +- &config->duplex, sizeof(config->duplex)); +- hw->speed = config->speed; +- hw->duplex = config->duplex; +- } +- } ++ hw->get_speed_via_feat = hw->speed == ETH_SPEED_NUM_UNKNOWN && ++ vtpci_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX); ++ if (hw->get_speed_via_feat) ++ virtio_get_speed_duplex(eth_dev, NULL); + if (hw->duplex == DUPLEX_UNKNOWN) + hw->duplex = ETH_LINK_FULL_DUPLEX; + PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d", +@@ -1935,6 +2017,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) + if (ret < 0) + return ret; + hw->speed = speed; ++ hw->duplex = DUPLEX_UNKNOWN; + + /* Allocate memory for storing MAC addresses */ + eth_dev->data->mac_addrs = rte_zmalloc("virtio", +@@ -2239,9 +2322,15 @@ virtio_dev_configure(struct rte_eth_dev *dev) + return ret; + } + +- if (rxmode->max_rx_pkt_len > hw->max_mtu + ether_hdr_len) ++ if ((rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) && ++ (rxmode->max_rx_pkt_len > hw->max_mtu + ether_hdr_len)) + req_features &= ~(1ULL << VIRTIO_NET_F_MTU); + ++ if (rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) ++ hw->max_rx_pkt_len = rxmode->max_rx_pkt_len; ++ else ++ hw->max_rx_pkt_len = ether_hdr_len + dev->data->mtu; ++ + if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM | + DEV_RX_OFFLOAD_TCP_CKSUM)) + req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM); +@@ -2290,6 +2379,8 @@ virtio_dev_configure(struct rte_eth_dev *dev) + if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP) + hw->vlan_strip = 1; + ++ hw->rx_ol_scatter = (rx_offloads & DEV_RX_OFFLOAD_SCATTER); ++ + if ((rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) + && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) { + PMD_DRV_LOG(ERR, +@@ -2301,7 +2392,7 @@ virtio_dev_configure(struct rte_eth_dev *dev) + hw->has_rx_offload = rx_offload_enabled(hw); + + if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) +- /* Enable vector (0) for Link State Intrerrupt */ ++ /* Enable vector (0) for Link State Interrupt */ + if (VTPCI_OPS(hw)->set_config_irq(hw, 0) == + VIRTIO_MSI_NO_VECTOR) { + PMD_DRV_LOG(ERR, "failed to set config vector"); +@@ -2409,7 +2500,7 @@ virtio_dev_start(struct rte_eth_dev *dev) + } + } + +- /* Enable uio/vfio intr/eventfd mapping: althrough we already did that ++ /* Enable uio/vfio intr/eventfd mapping: although we already did that + * in device configure, but it could be unmapped when device is + * stopped. + */ +@@ -2519,6 +2610,35 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev) + PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num); + } + ++static void ++virtio_tx_completed_cleanup(struct rte_eth_dev *dev) ++{ ++ struct virtio_hw *hw = dev->data->dev_private; ++ struct virtqueue *vq; ++ uint32_t qidx; ++ void (*xmit_cleanup)(struct virtqueue *vq, uint16_t nb_used); ++ ++ if (vtpci_packed_queue(hw)) { ++ if (hw->use_vec_tx) ++ xmit_cleanup = &virtio_xmit_cleanup_inorder_packed; ++ else if (vtpci_with_feature(hw, VIRTIO_F_IN_ORDER)) ++ xmit_cleanup = &virtio_xmit_cleanup_inorder_packed; ++ else ++ xmit_cleanup = &virtio_xmit_cleanup_normal_packed; ++ } else { ++ if (hw->use_inorder_tx) ++ xmit_cleanup = &virtio_xmit_cleanup_inorder; ++ else ++ xmit_cleanup = &virtio_xmit_cleanup; ++ } ++ ++ for (qidx = 0; qidx < hw->max_queue_pairs; qidx++) { ++ vq = hw->vqs[2 * qidx + VTNET_SQ_TQ_QUEUE_IDX]; ++ if (vq != NULL) ++ xmit_cleanup(vq, virtqueue_nused(vq)); ++ } ++} ++ + /* + * Stop device: disable interrupt and mark link down + */ +@@ -2537,6 +2657,8 @@ virtio_dev_stop(struct rte_eth_dev *dev) + goto out_unlock; + hw->started = false; + ++ virtio_tx_completed_cleanup(dev); ++ + if (intr_conf->lsc || intr_conf->rxq) { + virtio_intr_disable(dev); + +@@ -2583,11 +2705,15 @@ virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complet + dev->data->port_id); + } else { + link.link_status = ETH_LINK_UP; ++ if (hw->get_speed_via_feat) ++ virtio_get_speed_duplex(dev, &link); + PMD_INIT_LOG(DEBUG, "Port %d is up", + dev->data->port_id); + } + } else { + link.link_status = ETH_LINK_UP; ++ if (hw->get_speed_via_feat) ++ virtio_get_speed_duplex(dev, &link); + } + + return rte_eth_linkstatus_set(dev, &link); +@@ -2631,10 +2757,13 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) + dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE; + dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN; + dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS; ++ dev_info->max_mtu = hw->max_mtu; + + host_features = VTPCI_OPS(hw)->get_features(hw); + dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME; ++ if (host_features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) ++ dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_SCATTER; + if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) { + dev_info->rx_offload_capa |= + DEV_RX_OFFLOAD_TCP_CKSUM | +diff --git a/dpdk/drivers/net/virtio/virtio_ethdev.h b/dpdk/drivers/net/virtio/virtio_ethdev.h +index b7d52d497f..916ff11f7a 100644 +--- a/dpdk/drivers/net/virtio/virtio_ethdev.h ++++ b/dpdk/drivers/net/virtio/virtio_ethdev.h +@@ -120,4 +120,9 @@ void virtio_dev_resume(struct rte_eth_dev *dev); + int virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts, + int nb_pkts); + ++bool virtio_rx_check_scatter(uint16_t max_rx_pkt_len, uint16_t rx_buf_size, ++ bool rx_scatter_enabled, const char **error); ++ ++uint16_t virtio_rx_mem_pool_buf_size(struct rte_mempool *mp); ++ + #endif /* _VIRTIO_ETHDEV_H_ */ +diff --git a/dpdk/drivers/net/virtio/virtio_pci.c b/dpdk/drivers/net/virtio/virtio_pci.c +index d6b950ee69..878a18555d 100644 +--- a/dpdk/drivers/net/virtio/virtio_pci.c ++++ b/dpdk/drivers/net/virtio/virtio_pci.c +@@ -175,7 +175,7 @@ legacy_get_isr(struct virtio_hw *hw) + return dst; + } + +-/* Enable one vector (0) for Link State Intrerrupt */ ++/* Enable one vector (0) for Link State Interrupt */ + static uint16_t + legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec) + { +diff --git a/dpdk/drivers/net/virtio/virtio_pci.h b/dpdk/drivers/net/virtio/virtio_pci.h +index ab61e911b8..a1696bce1a 100644 +--- a/dpdk/drivers/net/virtio/virtio_pci.h ++++ b/dpdk/drivers/net/virtio/virtio_pci.h +@@ -259,15 +259,22 @@ struct virtio_hw { + uint8_t use_inorder_rx; + uint8_t use_inorder_tx; + uint8_t weak_barriers; ++ bool rx_ol_scatter; + bool has_tx_offload; + bool has_rx_offload; + uint16_t port_id; + uint8_t mac_addr[RTE_ETHER_ADDR_LEN]; ++ /* ++ * Speed is specified via 'speed' devarg or ++ * negotiated via VIRTIO_NET_F_SPEED_DUPLEX ++ */ ++ bool get_speed_via_feat; + uint32_t notify_off_multiplier; + uint32_t speed; /* link speed in MB */ + uint8_t duplex; + uint8_t *isr; + uint16_t *notify_base; ++ size_t max_rx_pkt_len; + struct virtio_pci_common_cfg *common_cfg; + struct virtio_net_config *dev_cfg; + void *virtio_user_dev; +diff --git a/dpdk/drivers/net/virtio/virtio_rxtx.c b/dpdk/drivers/net/virtio/virtio_rxtx.c +index 77934e8c58..fe6f9942a9 100644 +--- a/dpdk/drivers/net/virtio/virtio_rxtx.c ++++ b/dpdk/drivers/net/virtio/virtio_rxtx.c +@@ -333,13 +333,35 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie, + return 0; + } + ++static inline void ++virtqueue_refill_single_packed(struct virtqueue *vq, ++ struct vring_packed_desc *dp, ++ struct rte_mbuf *cookie) ++{ ++ uint16_t flags = vq->vq_packed.cached_flags; ++ struct virtio_hw *hw = vq->hw; ++ ++ dp->addr = cookie->buf_iova + ++ RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; ++ dp->len = cookie->buf_len - ++ RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; ++ ++ virtqueue_store_flags_packed(dp, flags, ++ hw->weak_barriers); ++ ++ if (++vq->vq_avail_idx >= vq->vq_nentries) { ++ vq->vq_avail_idx -= vq->vq_nentries; ++ vq->vq_packed.cached_flags ^= ++ VRING_PACKED_DESC_F_AVAIL_USED; ++ flags = vq->vq_packed.cached_flags; ++ } ++} ++ + static inline int +-virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq, ++virtqueue_enqueue_recv_refill_packed_init(struct virtqueue *vq, + struct rte_mbuf **cookie, uint16_t num) + { + struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc; +- uint16_t flags = vq->vq_packed.cached_flags; +- struct virtio_hw *hw = vq->hw; + struct vq_desc_extra *dxp; + uint16_t idx; + int i; +@@ -355,24 +377,34 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq, + dxp->cookie = (void *)cookie[i]; + dxp->ndescs = 1; + +- start_dp[idx].addr = VIRTIO_MBUF_ADDR(cookie[i], vq) + +- RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; +- start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM +- + hw->vtnet_hdr_size; ++ virtqueue_refill_single_packed(vq, &start_dp[idx], cookie[i]); ++ } ++ vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num); ++ return 0; ++} + +- vq->vq_desc_head_idx = dxp->next; +- if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) +- vq->vq_desc_tail_idx = vq->vq_desc_head_idx; ++static inline int ++virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq, ++ struct rte_mbuf **cookie, uint16_t num) ++{ ++ struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc; ++ struct vq_desc_extra *dxp; ++ uint16_t idx, did; ++ int i; + +- virtqueue_store_flags_packed(&start_dp[idx], flags, +- hw->weak_barriers); ++ if (unlikely(vq->vq_free_cnt == 0)) ++ return -ENOSPC; ++ if (unlikely(vq->vq_free_cnt < num)) ++ return -EMSGSIZE; + +- if (++vq->vq_avail_idx >= vq->vq_nentries) { +- vq->vq_avail_idx -= vq->vq_nentries; +- vq->vq_packed.cached_flags ^= +- VRING_PACKED_DESC_F_AVAIL_USED; +- flags = vq->vq_packed.cached_flags; +- } ++ for (i = 0; i < num; i++) { ++ idx = vq->vq_avail_idx; ++ did = start_dp[idx].id; ++ dxp = &vq->vq_descx[did]; ++ dxp->cookie = (void *)cookie[i]; ++ dxp->ndescs = 1; ++ ++ virtqueue_refill_single_packed(vq, &start_dp[idx], cookie[i]); + } + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num); + return 0; +@@ -639,6 +671,8 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, + struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; + struct virtnet_rx *rxvq; + uint16_t rx_free_thresh; ++ uint16_t buf_size; ++ const char *error; + + PMD_INIT_FUNC_TRACE(); + +@@ -647,30 +681,44 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, + return -EINVAL; + } + ++ buf_size = virtio_rx_mem_pool_buf_size(mp); ++ if (!virtio_rx_check_scatter(hw->max_rx_pkt_len, buf_size, ++ hw->rx_ol_scatter, &error)) { ++ PMD_INIT_LOG(ERR, "RxQ %u Rx scatter check failed: %s", ++ queue_idx, error); ++ return -EINVAL; ++ } ++ + rx_free_thresh = rx_conf->rx_free_thresh; + if (rx_free_thresh == 0) + rx_free_thresh = + RTE_MIN(vq->vq_nentries / 4, DEFAULT_RX_FREE_THRESH); + + if (rx_free_thresh & 0x3) { +- RTE_LOG(ERR, PMD, "rx_free_thresh must be multiples of four." +- " (rx_free_thresh=%u port=%u queue=%u)\n", ++ PMD_INIT_LOG(ERR, "rx_free_thresh must be multiples of four." ++ " (rx_free_thresh=%u port=%u queue=%u)", + rx_free_thresh, dev->data->port_id, queue_idx); + return -EINVAL; + } + + if (rx_free_thresh >= vq->vq_nentries) { +- RTE_LOG(ERR, PMD, "rx_free_thresh must be less than the " ++ PMD_INIT_LOG(ERR, "rx_free_thresh must be less than the " + "number of RX entries (%u)." +- " (rx_free_thresh=%u port=%u queue=%u)\n", ++ " (rx_free_thresh=%u port=%u queue=%u)", + vq->vq_nentries, + rx_free_thresh, dev->data->port_id, queue_idx); + return -EINVAL; + } + vq->vq_free_thresh = rx_free_thresh; + +- if (nb_desc == 0 || nb_desc > vq->vq_nentries) ++ /* ++ * For split ring vectorized path descriptors number must be ++ * equal to the ring size. ++ */ ++ if (nb_desc > vq->vq_nentries || ++ (!vtpci_packed_queue(hw) && hw->use_vec_rx)) { + nb_desc = vq->vq_nentries; ++ } + vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); + + rxvq = &vq->rxq; +@@ -734,10 +782,11 @@ virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx) + if (unlikely(error)) { + for (i = 0; i < free_cnt; i++) + rte_pktmbuf_free(pkts[i]); ++ } else { ++ nbufs += free_cnt; + } + } + +- nbufs += free_cnt; + vq_update_avail_idx(vq); + } + } else { +@@ -748,7 +797,7 @@ virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx) + + /* Enqueue allocated buffers */ + if (vtpci_packed_queue(vq->hw)) +- error = virtqueue_enqueue_recv_refill_packed(vq, ++ error = virtqueue_enqueue_recv_refill_packed_init(vq, + &m, 1); + else + error = virtqueue_enqueue_recv_refill(vq, +@@ -785,7 +834,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, + unsigned int socket_id __rte_unused, + const struct rte_eth_txconf *tx_conf) + { +- uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; ++ uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; + struct virtio_hw *hw = dev->data->dev_private; + struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; + struct virtnet_tx *txvq; +@@ -829,7 +878,7 @@ int + virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev, + uint16_t queue_idx) + { +- uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; ++ uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; + struct virtio_hw *hw = dev->data->dev_private; + struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; + +@@ -933,7 +982,7 @@ virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) + return -EINVAL; + } + +- /* Update mss lengthes in mbuf */ ++ /* Update mss lengths in mbuf */ + m->tso_segsz = hdr->gso_size; + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: +diff --git a/dpdk/drivers/net/virtio/virtio_rxtx_simple_altivec.c b/dpdk/drivers/net/virtio/virtio_rxtx_simple_altivec.c +index a260ebdf57..bcd7d5ee59 100644 +--- a/dpdk/drivers/net/virtio/virtio_rxtx_simple_altivec.c ++++ b/dpdk/drivers/net/virtio/virtio_rxtx_simple_altivec.c +@@ -85,6 +85,12 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) + return 0; + ++ if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { ++ virtio_rxq_rearm_vec(rxvq); ++ if (unlikely(virtqueue_kick_prepare(vq))) ++ virtqueue_notify(vq); ++ } ++ + nb_used = virtqueue_nused(vq); + + rte_compiler_barrier(); +@@ -102,12 +108,6 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + + rte_prefetch0(rused); + +- if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { +- virtio_rxq_rearm_vec(rxvq); +- if (unlikely(virtqueue_kick_prepare(vq))) +- virtqueue_notify(vq); +- } +- + nb_total = nb_used; + ref_rx_pkts = rx_pkts; + for (nb_pkts_received = 0; +diff --git a/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c b/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c +index 12e034dc0a..afe6a25944 100644 +--- a/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c ++++ b/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c +@@ -84,6 +84,12 @@ virtio_recv_pkts_vec(void *rx_queue, + if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) + return 0; + ++ if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { ++ virtio_rxq_rearm_vec(rxvq); ++ if (unlikely(virtqueue_kick_prepare(vq))) ++ virtqueue_notify(vq); ++ } ++ + /* virtqueue_nused has a load-acquire or rte_io_rmb inside */ + nb_used = virtqueue_nused(vq); + +@@ -100,12 +106,6 @@ virtio_recv_pkts_vec(void *rx_queue, + + rte_prefetch_non_temporal(rused); + +- if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { +- virtio_rxq_rearm_vec(rxvq); +- if (unlikely(virtqueue_kick_prepare(vq))) +- virtqueue_notify(vq); +- } +- + nb_total = nb_used; + ref_rx_pkts = rx_pkts; + for (nb_pkts_received = 0; +diff --git a/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c b/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c +index 1056e9c20b..e4e85c2b80 100644 +--- a/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c ++++ b/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c +@@ -85,6 +85,12 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) + return 0; + ++ if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { ++ virtio_rxq_rearm_vec(rxvq); ++ if (unlikely(virtqueue_kick_prepare(vq))) ++ virtqueue_notify(vq); ++ } ++ + nb_used = virtqueue_nused(vq); + + if (unlikely(nb_used == 0)) +@@ -100,12 +106,6 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + + rte_prefetch0(rused); + +- if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { +- virtio_rxq_rearm_vec(rxvq); +- if (unlikely(virtqueue_kick_prepare(vq))) +- virtqueue_notify(vq); +- } +- + nb_total = nb_used; + ref_rx_pkts = rx_pkts; + for (nb_pkts_received = 0; +diff --git a/dpdk/drivers/net/virtio/virtio_user/vhost_user.c b/dpdk/drivers/net/virtio/virtio_user/vhost_user.c +index 350eed4182..dba5368456 100644 +--- a/dpdk/drivers/net/virtio/virtio_user/vhost_user.c ++++ b/dpdk/drivers/net/virtio/virtio_user/vhost_user.c +@@ -468,8 +468,10 @@ vhost_user_setup(struct virtio_user_dev *dev) + } + + flag = fcntl(fd, F_GETFD); +- if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0) +- PMD_DRV_LOG(WARNING, "fcntl failed, %s", strerror(errno)); ++ if (flag == -1) ++ PMD_DRV_LOG(WARNING, "fcntl get fd failed, %s", strerror(errno)); ++ else if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0) ++ PMD_DRV_LOG(WARNING, "fcntl set fd failed, %s", strerror(errno)); + + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; +diff --git a/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c b/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c +index 202431ca22..73f6fd7313 100644 +--- a/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c ++++ b/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c +@@ -312,7 +312,7 @@ virtio_user_fill_intr_handle(struct virtio_user_dev *dev) + } + + for (i = 0; i < dev->max_queue_pairs; ++i) +- eth_dev->intr_handle->efds[i] = dev->callfds[i]; ++ eth_dev->intr_handle->efds[i] = dev->callfds[2 * i]; + eth_dev->intr_handle->nb_efd = dev->max_queue_pairs; + eth_dev->intr_handle->max_intr = dev->max_queue_pairs + 1; + eth_dev->intr_handle->type = RTE_INTR_HANDLE_VDEV; +@@ -599,6 +599,12 @@ void + virtio_user_dev_uninit(struct virtio_user_dev *dev) + { + uint32_t i; ++ struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id]; ++ ++ if (eth_dev->intr_handle) { ++ free(eth_dev->intr_handle); ++ eth_dev->intr_handle = NULL; ++ } + + virtio_user_stop_device(dev); + +diff --git a/dpdk/drivers/net/virtio/virtio_user_ethdev.c b/dpdk/drivers/net/virtio/virtio_user_ethdev.c +index 78998427cc..347caeef41 100644 +--- a/dpdk/drivers/net/virtio/virtio_user_ethdev.c ++++ b/dpdk/drivers/net/virtio/virtio_user_ethdev.c +@@ -63,6 +63,32 @@ virtio_user_reset_queues_packed(struct rte_eth_dev *dev) + rte_spinlock_unlock(&hw->state_lock); + } + ++static void ++virtio_user_delayed_intr_reconfig_handler(void *param) ++{ ++ struct virtio_hw *hw = (struct virtio_hw *)param; ++ struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id]; ++ struct virtio_user_dev *dev = virtio_user_get_dev(hw); ++ ++ PMD_DRV_LOG(DEBUG, "Unregistering intr fd: %d", ++ eth_dev->intr_handle->fd); ++ ++ if (rte_intr_callback_unregister(eth_dev->intr_handle, ++ virtio_interrupt_handler, ++ eth_dev) != 1) ++ PMD_DRV_LOG(ERR, "interrupt unregister failed"); ++ ++ eth_dev->intr_handle->fd = dev->vhostfd; ++ ++ PMD_DRV_LOG(DEBUG, "Registering intr fd: %d", eth_dev->intr_handle->fd); ++ ++ if (rte_intr_callback_register(eth_dev->intr_handle, ++ virtio_interrupt_handler, eth_dev)) ++ PMD_DRV_LOG(ERR, "interrupt register failed"); ++ ++ if (rte_intr_enable(eth_dev->intr_handle) < 0) ++ PMD_DRV_LOG(ERR, "interrupt enable failed"); ++} + + static int + virtio_user_server_reconnect(struct virtio_user_dev *dev) +@@ -148,24 +174,21 @@ virtio_user_server_reconnect(struct virtio_user_dev *dev) + PMD_DRV_LOG(ERR, "interrupt disable failed"); + return -1; + } +- rte_intr_callback_unregister(eth_dev->intr_handle, +- virtio_interrupt_handler, +- eth_dev); +- eth_dev->intr_handle->fd = connectfd; +- rte_intr_callback_register(eth_dev->intr_handle, +- virtio_interrupt_handler, eth_dev); +- +- if (rte_intr_enable(eth_dev->intr_handle) < 0) { +- PMD_DRV_LOG(ERR, "interrupt enable failed"); +- return -1; +- } ++ /* ++ * This function can be called from the interrupt handler, so ++ * we can't unregister interrupt handler here. Setting ++ * alarm to do that later. ++ */ ++ rte_eal_alarm_set(1, ++ virtio_user_delayed_intr_reconfig_handler, ++ (void *)hw); + } + PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection succeeds!"); + return 0; + } + + static void +-virtio_user_delayed_handler(void *param) ++virtio_user_delayed_disconnect_handler(void *param) + { + struct virtio_hw *hw = (struct virtio_hw *)param; + struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id]; +@@ -175,8 +198,14 @@ virtio_user_delayed_handler(void *param) + PMD_DRV_LOG(ERR, "interrupt disable failed"); + return; + } +- rte_intr_callback_unregister(eth_dev->intr_handle, +- virtio_interrupt_handler, eth_dev); ++ ++ PMD_DRV_LOG(DEBUG, "Unregistering intr fd: %d", ++ eth_dev->intr_handle->fd); ++ if (rte_intr_callback_unregister(eth_dev->intr_handle, ++ virtio_interrupt_handler, ++ eth_dev) != 1) ++ PMD_DRV_LOG(ERR, "interrupt unregister failed"); ++ + if (dev->is_server) { + if (dev->vhostfd >= 0) { + close(dev->vhostfd); +@@ -188,8 +217,15 @@ virtio_user_delayed_handler(void *param) + ~(1ULL << VHOST_USER_PROTOCOL_F_STATUS); + } + eth_dev->intr_handle->fd = dev->listenfd; +- rte_intr_callback_register(eth_dev->intr_handle, +- virtio_interrupt_handler, eth_dev); ++ ++ PMD_DRV_LOG(DEBUG, "Registering intr fd: %d", ++ eth_dev->intr_handle->fd); ++ ++ if (rte_intr_callback_register(eth_dev->intr_handle, ++ virtio_interrupt_handler, ++ eth_dev)) ++ PMD_DRV_LOG(ERR, "interrupt register failed"); ++ + if (rte_intr_enable(eth_dev->intr_handle) < 0) { + PMD_DRV_LOG(ERR, "interrupt enable failed"); + return; +@@ -216,15 +252,9 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset, + + if (dev->vhostfd >= 0) { + int r; +- int flags; + +- flags = fcntl(dev->vhostfd, F_GETFL); +- if (fcntl(dev->vhostfd, F_SETFL, +- flags | O_NONBLOCK) == -1) { +- PMD_DRV_LOG(ERR, "error setting O_NONBLOCK flag"); +- return; +- } +- r = recv(dev->vhostfd, buf, 128, MSG_PEEK); ++ r = recv(dev->vhostfd, buf, 128, ++ MSG_PEEK | MSG_DONTWAIT); + if (r == 0 || (r < 0 && errno != EAGAIN)) { + dev->net_status &= (~VIRTIO_NET_S_LINK_UP); + PMD_DRV_LOG(ERR, "virtio-user port %u is down", +@@ -235,16 +265,11 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset, + * unregistered here, set an alarm to do it. + */ + rte_eal_alarm_set(1, +- virtio_user_delayed_handler, +- (void *)hw); ++ virtio_user_delayed_disconnect_handler, ++ (void *)hw); + } else { + dev->net_status |= VIRTIO_NET_S_LINK_UP; + } +- if (fcntl(dev->vhostfd, F_SETFL, +- flags & ~O_NONBLOCK) == -1) { +- PMD_DRV_LOG(ERR, "error clearing O_NONBLOCK flag"); +- return; +- } + } else if (dev->is_server) { + dev->net_status &= (~VIRTIO_NET_S_LINK_UP); + if (virtio_user_server_reconnect(dev) >= 0) +@@ -548,7 +573,7 @@ vdpa_dynamic_major_num(void) + { + FILE *fp; + char *line = NULL; +- size_t size; ++ size_t size = 0; + char name[11]; + bool found = false; + uint32_t num; +@@ -568,6 +593,7 @@ vdpa_dynamic_major_num(void) + break; + } + } ++ free(line); + fclose(fp); + return found ? num : UNNAMED_MAJOR; + } +@@ -850,6 +876,7 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev) + /* previously called by pci probing for physical dev */ + if (eth_virtio_dev_init(eth_dev) < 0) { + PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails"); ++ virtio_user_dev_uninit(hw->virtio_user_dev); + virtio_user_eth_dev_free(eth_dev); + goto end; + } +diff --git a/dpdk/drivers/net/virtio/virtqueue.c b/dpdk/drivers/net/virtio/virtqueue.c +index 2702e120ee..5b2c05649c 100644 +--- a/dpdk/drivers/net/virtio/virtqueue.c ++++ b/dpdk/drivers/net/virtio/virtqueue.c +@@ -13,7 +13,7 @@ + /* + * Two types of mbuf to be cleaned: + * 1) mbuf that has been consumed by backend but not used by virtio. +- * 2) mbuf that hasn't been consued by backend. ++ * 2) mbuf that hasn't been consumed by backend. + */ + struct rte_mbuf * + virtqueue_detach_unused(struct virtqueue *vq) +@@ -186,6 +186,8 @@ virtqueue_txvq_reset_packed(struct virtqueue *vq) + struct vq_desc_extra *dxp; + struct virtnet_tx *txvq; + uint16_t desc_idx; ++ struct virtio_tx_region *txr; ++ struct vring_packed_desc *start_dp; + + vq->vq_used_cons_idx = 0; + vq->vq_desc_head_idx = 0; +@@ -198,6 +200,7 @@ virtqueue_txvq_reset_packed(struct virtqueue *vq) + vq->vq_packed.event_flags_shadow = 0; + + txvq = &vq->txq; ++ txr = txvq->virtio_net_hdr_mz->addr; + memset(txvq->mz->addr, 0, txvq->mz->len); + memset(txvq->virtio_net_hdr_mz->addr, 0, + txvq->virtio_net_hdr_mz->len); +@@ -208,6 +211,17 @@ virtqueue_txvq_reset_packed(struct virtqueue *vq) + rte_pktmbuf_free(dxp->cookie); + dxp->cookie = NULL; + } ++ ++ if (vtpci_with_feature(vq->hw, VIRTIO_RING_F_INDIRECT_DESC)) { ++ /* first indirect descriptor is always the tx header */ ++ start_dp = txr[desc_idx].tx_packed_indir; ++ vring_desc_init_indirect_packed(start_dp, ++ RTE_DIM(txr[desc_idx].tx_packed_indir)); ++ start_dp->addr = txvq->virtio_net_hdr_mem ++ + desc_idx * sizeof(*txr) ++ + offsetof(struct virtio_tx_region, tx_hdr); ++ start_dp->len = vq->hw->vtnet_hdr_size; ++ } + } + + vring_desc_init_packed(vq, size); +diff --git a/dpdk/drivers/net/virtio/virtqueue.h b/dpdk/drivers/net/virtio/virtqueue.h +index 42c4c9882f..8e6fd22412 100644 +--- a/dpdk/drivers/net/virtio/virtqueue.h ++++ b/dpdk/drivers/net/virtio/virtqueue.h +@@ -210,7 +210,7 @@ struct virtio_net_ctrl_mac { + * Control link announce acknowledgement + * + * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that +- * driver has recevied the notification; device would clear the ++ * driver has received the notification; device would clear the + * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives + * this command. + */ +@@ -290,7 +290,7 @@ struct virtqueue { + struct vq_desc_extra vq_descx[0]; + }; + +-/* If multiqueue is provided by host, then we suppport it. */ ++/* If multiqueue is provided by host, then we support it. */ + #define VIRTIO_NET_CTRL_MQ 4 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 +@@ -642,19 +642,25 @@ virtqueue_xmit_offload(struct virtio_net_hdr *hdr, + bool offload) + { + if (offload) { ++ uint16_t o_l23_len = ++ (cookie->ol_flags & PKT_TX_TUNNEL_MASK) ? ++ cookie->outer_l2_len + cookie->outer_l3_len : 0; ++ + if (cookie->ol_flags & PKT_TX_TCP_SEG) + cookie->ol_flags |= PKT_TX_TCP_CKSUM; + + switch (cookie->ol_flags & PKT_TX_L4_MASK) { + case PKT_TX_UDP_CKSUM: +- hdr->csum_start = cookie->l2_len + cookie->l3_len; ++ hdr->csum_start = o_l23_len + ++ cookie->l2_len + cookie->l3_len; + hdr->csum_offset = offsetof(struct rte_udp_hdr, + dgram_cksum); + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + break; + + case PKT_TX_TCP_CKSUM: +- hdr->csum_start = cookie->l2_len + cookie->l3_len; ++ hdr->csum_start = o_l23_len + ++ cookie->l2_len + cookie->l3_len; + hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum); + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + break; +@@ -673,6 +679,7 @@ virtqueue_xmit_offload(struct virtio_net_hdr *hdr, + VIRTIO_NET_HDR_GSO_TCPV4; + hdr->gso_size = cookie->tso_segsz; + hdr->hdr_len = ++ o_l23_len + + cookie->l2_len + + cookie->l3_len + + cookie->l4_len; +@@ -735,6 +742,9 @@ virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie, + RTE_PTR_DIFF(&txr[idx].tx_packed_indir, txr); + start_dp[idx].len = (seg_num + 1) * + sizeof(struct vring_packed_desc); ++ /* Packed descriptor id needs to be restored when inorder. */ ++ if (in_order) ++ start_dp[idx].id = idx; + /* reset flags for indirect desc */ + head_flags = VRING_DESC_F_INDIRECT; + head_flags |= vq->vq_packed.cached_flags; +@@ -828,25 +838,26 @@ vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id) + } + + static void +-virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num) ++virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, uint16_t num) + { + uint16_t used_idx, id, curr_id, free_cnt = 0; + uint16_t size = vq->vq_nentries; + struct vring_packed_desc *desc = vq->vq_packed.ring.desc; + struct vq_desc_extra *dxp; ++ int nb = num; + + used_idx = vq->vq_used_cons_idx; + /* desc_is_used has a load-acquire or rte_io_rmb inside + * and wait for used desc in virtqueue. + */ +- while (num > 0 && desc_is_used(&desc[used_idx], vq)) { ++ while (nb > 0 && desc_is_used(&desc[used_idx], vq)) { + id = desc[used_idx].id; + do { + curr_id = used_idx; + dxp = &vq->vq_descx[used_idx]; + used_idx += dxp->ndescs; + free_cnt += dxp->ndescs; +- num -= dxp->ndescs; ++ nb -= dxp->ndescs; + if (used_idx >= size) { + used_idx -= size; + vq->vq_packed.used_wrap_counter ^= 1; +@@ -862,7 +873,7 @@ virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num) + } + + static void +-virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num) ++virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, uint16_t num) + { + uint16_t used_idx, id; + uint16_t size = vq->vq_nentries; +@@ -892,7 +903,7 @@ virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num) + + /* Cleanup from completed transmits. */ + static inline void +-virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order) ++virtio_xmit_cleanup_packed(struct virtqueue *vq, uint16_t num, int in_order) + { + if (in_order) + virtio_xmit_cleanup_inorder_packed(vq, num); +diff --git a/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c b/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c +index 95c697f8e3..40b366854d 100644 +--- a/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c ++++ b/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c +@@ -341,6 +341,9 @@ vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq) + } + + PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed); ++ ++ /* To avoid compiler warnings when not in DEBUG mode. */ ++ RTE_SET_USED(completed); + } + + uint16_t +diff --git a/dpdk/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/dpdk/drivers/raw/dpaa2_qdma/dpaa2_qdma.c +index c961e18d67..789c807d89 100644 +--- a/dpdk/drivers/raw/dpaa2_qdma/dpaa2_qdma.c ++++ b/dpdk/drivers/raw/dpaa2_qdma/dpaa2_qdma.c +@@ -653,7 +653,7 @@ dpdmai_dev_dequeue_multijob_prefetch( + rte_prefetch0((void *)(size_t)(dq_storage + 1)); + + /* Prepare next pull descriptor. This will give space for the +- * prefething done on DQRR entries ++ * prefetching done on DQRR entries + */ + q_storage->toggle ^= 1; + dq_storage1 = q_storage->dq_storage[q_storage->toggle]; +diff --git a/dpdk/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/dpdk/drivers/raw/dpaa2_qdma/dpaa2_qdma.h +index d6f6bb5522..1973d5d2b2 100644 +--- a/dpdk/drivers/raw/dpaa2_qdma/dpaa2_qdma.h ++++ b/dpdk/drivers/raw/dpaa2_qdma/dpaa2_qdma.h +@@ -82,7 +82,7 @@ struct qdma_device { + /** total number of hw queues. */ + uint16_t num_hw_queues; + /** +- * Maximum number of hw queues to be alocated per core. ++ * Maximum number of hw queues to be allocated per core. + * This is limited by MAX_HW_QUEUE_PER_CORE + */ + uint16_t max_hw_queues_per_core; +@@ -268,7 +268,7 @@ struct dpaa2_dpdmai_dev { + struct fsl_mc_io dpdmai; + /** HW ID for DPDMAI object */ + uint32_t dpdmai_id; +- /** Tocken of this device */ ++ /** Token of this device */ + uint16_t token; + /** Number of queue in this DPDMAI device */ + uint8_t num_queues; +diff --git a/dpdk/drivers/raw/ifpga/base/README b/dpdk/drivers/raw/ifpga/base/README +index 6b2b171b01..55d92d590a 100644 +--- a/dpdk/drivers/raw/ifpga/base/README ++++ b/dpdk/drivers/raw/ifpga/base/README +@@ -42,5 +42,5 @@ Some features added in this version: + 3. Add altera SPI master driver and Intel MAX10 device driver. + 4. Add Altera I2C master driver and AT24 eeprom driver. + 5. Add Device Tree support to get the configuration from card. +-6. Instruding and exposing APIs to DPDK PMD driver to access networking ++6. Instruding and exposing APIs to DPDK PMD to access networking + functionality. +diff --git a/dpdk/drivers/raw/ifpga/base/ifpga_defines.h b/dpdk/drivers/raw/ifpga/base/ifpga_defines.h +index 9f0147d1ed..7853d2976a 100644 +--- a/dpdk/drivers/raw/ifpga/base/ifpga_defines.h ++++ b/dpdk/drivers/raw/ifpga/base/ifpga_defines.h +@@ -93,9 +93,9 @@ enum fpga_id_type { + + #define PORT_FEATURE_ID_HEADER FEATURE_ID_FIU_HEADER + #define PORT_FEATURE_ID_ERROR 0x10 +-#define PORT_FEATURE_ID_UMSG 0x12 +-#define PORT_FEATURE_ID_UINT 0x13 +-#define PORT_FEATURE_ID_STP 0x14 ++#define PORT_FEATURE_ID_UMSG 0x11 ++#define PORT_FEATURE_ID_UINT 0x12 ++#define PORT_FEATURE_ID_STP 0x13 + #define PORT_FEATURE_ID_UAFU FEATURE_ID_AFU + + /* +diff --git a/dpdk/drivers/raw/ifpga/base/meson.build b/dpdk/drivers/raw/ifpga/base/meson.build +index da2d6e33ca..949f7f1271 100644 +--- a/dpdk/drivers/raw/ifpga/base/meson.build ++++ b/dpdk/drivers/raw/ifpga/base/meson.build +@@ -25,7 +25,7 @@ sources = [ + + rtdep = dependency('librt', required: false) + if not rtdep.found() +- rtdep = cc.find_library('librt', required: false) ++ rtdep = cc.find_library('rt', required: false) + endif + if not rtdep.found() + build = false +diff --git a/dpdk/drivers/raw/ifpga/base/opae_spi.c b/dpdk/drivers/raw/ifpga/base/opae_spi.c +index 9efeecb791..ca3d41fb92 100644 +--- a/dpdk/drivers/raw/ifpga/base/opae_spi.c ++++ b/dpdk/drivers/raw/ifpga/base/opae_spi.c +@@ -239,6 +239,18 @@ int spi_command(struct altera_spi_device *dev, unsigned int chip_select, + return 0; + } + ++int spi_write(struct altera_spi_device *dev, unsigned int chip_select, ++ unsigned int wlen, void *wdata) ++{ ++ return spi_command(dev, chip_select, wlen, wdata, 0, NULL); ++} ++ ++int spi_read(struct altera_spi_device *dev, unsigned int chip_select, ++ unsigned int rlen, void *rdata) ++{ ++ return spi_command(dev, chip_select, 0, NULL, rlen, rdata); ++} ++ + struct altera_spi_device *altera_spi_alloc(void *base, int type) + { + struct altera_spi_device *spi_dev = +diff --git a/dpdk/drivers/raw/ifpga/base/opae_spi.h b/dpdk/drivers/raw/ifpga/base/opae_spi.h +index af11656e4d..bcff67dd66 100644 +--- a/dpdk/drivers/raw/ifpga/base/opae_spi.h ++++ b/dpdk/drivers/raw/ifpga/base/opae_spi.h +@@ -117,6 +117,10 @@ struct spi_tran_header { + u32 addr; + }; + ++int spi_read(struct altera_spi_device *dev, unsigned int chip_select, ++ unsigned int rlen, void *rdata); ++int spi_write(struct altera_spi_device *dev, unsigned int chip_select, ++ unsigned int wlen, void *wdata); + int spi_command(struct altera_spi_device *dev, unsigned int chip_select, + unsigned int wlen, void *wdata, unsigned int rlen, void *rdata); + void spi_cs_deactivate(struct altera_spi_device *dev); +diff --git a/dpdk/drivers/raw/ifpga/base/opae_spi_transaction.c b/dpdk/drivers/raw/ifpga/base/opae_spi_transaction.c +index 006cdb4c1a..cd50d40629 100644 +--- a/dpdk/drivers/raw/ifpga/base/opae_spi_transaction.c ++++ b/dpdk/drivers/raw/ifpga/base/opae_spi_transaction.c +@@ -40,7 +40,7 @@ static void print_buffer(const char *string, void *buffer, int len) + printf("%s print buffer, len=%d\n", string, len); + + for (i = 0; i < len; i++) +- printf("%x ", *(p+i)); ++ printf("%02x ", *(p+i)); + printf("\n"); + } + #else +@@ -72,43 +72,6 @@ static void reorder_phy_data(u8 bits_per_word, + } + } + +-enum { +- SPI_FOUND_SOP, +- SPI_FOUND_EOP, +- SPI_NOT_FOUND, +-}; +- +-static int resp_find_sop_eop(unsigned char *resp, unsigned int len, +- int flags) +-{ +- int ret = SPI_NOT_FOUND; +- +- unsigned char *b = resp; +- +- /* find SOP */ +- if (flags != SPI_FOUND_SOP) { +- while (b < resp + len && *b != SPI_PACKET_SOP) +- b++; +- +- if (*b != SPI_PACKET_SOP) +- goto done; +- +- ret = SPI_FOUND_SOP; +- } +- +- /* find EOP */ +- while (b < resp + len && *b != SPI_PACKET_EOP) +- b++; +- +- if (*b != SPI_PACKET_EOP) +- goto done; +- +- ret = SPI_FOUND_EOP; +- +-done: +- return ret; +-} +- + static void phy_tx_pad(unsigned char *phy_buf, unsigned int phy_buf_len, + unsigned int *aligned_len) + { +@@ -137,6 +100,104 @@ static void phy_tx_pad(unsigned char *phy_buf, unsigned int phy_buf_len, + *p++ = SPI_BYTE_IDLE; + } + ++#define RX_ALL_IDLE_DATA (SPI_BYTE_IDLE << 24 | SPI_BYTE_IDLE << 16 | \ ++ SPI_BYTE_IDLE << 8 | SPI_BYTE_IDLE) ++ ++static bool all_idle_data(u8 *rxbuf) ++{ ++ return *(u32 *)rxbuf == RX_ALL_IDLE_DATA; ++} ++ ++static unsigned char *find_eop(u8 *rxbuf, u32 BPW) ++{ ++ return memchr(rxbuf, SPI_PACKET_EOP, BPW); ++} ++ ++static int do_spi_txrx(struct spi_transaction_dev *dev, ++ unsigned char *tx_buffer, ++ unsigned int tx_len, unsigned char *rx_buffer, ++ unsigned int rx_len, ++ unsigned int *actual_rx) ++{ ++ unsigned int rx_cnt = 0; ++ int ret = 0; ++ unsigned int BPW = 4; ++ bool eop_found = false; ++ unsigned char *eop; ++ unsigned char *ptr; ++ unsigned char *rxbuf = rx_buffer; ++ int add_byte = 0; ++ unsigned long ticks; ++ unsigned long timeout; ++ ++ /* send command */ ++ ret = spi_write(dev->dev, dev->chipselect, tx_len, tx_buffer); ++ if (ret) ++ return -EBUSY; ++ ++ timeout = rte_get_timer_cycles() + ++ msecs_to_timer_cycles(2000); ++ ++ /* read out data */ ++ while (rx_cnt < rx_len) { ++ ret = spi_read(dev->dev, dev->chipselect, BPW, rxbuf); ++ if (ret) ++ return -EBUSY; ++ ++ /* skip all of invalid data */ ++ if (!eop_found && all_idle_data(rxbuf)) { ++ ticks = rte_get_timer_cycles(); ++ if (!time_after(ticks, timeout)) { ++ continue; ++ } else { ++ dev_err(dev, "read spi data timeout\n"); ++ return -ETIMEDOUT; ++ } ++ } ++ ++ rx_cnt += BPW; ++ if (!eop_found) { ++ /* EOP is found, we read 2 more bytes and exit. */ ++ eop = find_eop(rxbuf, BPW); ++ if (eop) { ++ if ((BPW + rxbuf - eop) > 2) { ++ /* ++ * check if the last 2 bytes are already ++ * received in current word. ++ */ ++ break; ++ } else if ((BPW + rxbuf - eop) == 2) { ++ /* ++ * skip if last byte is not SPI_BYTE_ESC ++ * or SPI_PACKET_ESC. this is the valid ++ * end of a response too. ++ */ ++ ptr = eop + 1; ++ ++ if (*ptr != SPI_BYTE_ESC && ++ *ptr != SPI_PACKET_ESC) ++ break; ++ ++ add_byte = 1; ++ } else { ++ add_byte = 2; ++ } ++ ++ rx_len = min(rx_len, ++ IFPGA_ALIGN(rx_cnt + ++ add_byte, BPW)); ++ eop_found = true; ++ } ++ } ++ rxbuf += BPW; ++ } ++ ++ *actual_rx = rx_cnt; ++ print_buffer("found valid data:", rx_buffer, rx_cnt); ++ ++ return ret; ++} ++ + static int byte_to_core_convert(struct spi_transaction_dev *dev, + unsigned int send_len, unsigned char *send_data, + unsigned int resp_len, unsigned char *resp_data, +@@ -148,15 +209,9 @@ static int byte_to_core_convert(struct spi_transaction_dev *dev, + unsigned char *resp_packet = dev->buffer->bytes_resp; + unsigned char *p; + unsigned char current_byte; +- unsigned char *tx_buffer; + unsigned int tx_len = 0; +- unsigned char *rx_buffer; +- unsigned int rx_len = 0; +- int retry = 0; +- int spi_flags; +- unsigned long timeout = msecs_to_timer_cycles(1000); +- unsigned long ticks; + unsigned int resp_max_len = 2 * resp_len; ++ unsigned int actual_rx; + + print_buffer("before bytes:", send_data, send_len); + +@@ -190,48 +245,15 @@ static int byte_to_core_convert(struct spi_transaction_dev *dev, + + print_buffer("after order to spi:", send_packet, tx_len); + +- /* call spi */ +- tx_buffer = send_packet; +- rx_buffer = resp_packet; +- rx_len = resp_max_len; +- spi_flags = SPI_NOT_FOUND; +- +-read_again: +- ret = spi_command(dev->dev, dev->chipselect, tx_len, tx_buffer, +- rx_len, rx_buffer); ++ ret = do_spi_txrx(dev, send_packet, tx_len, resp_packet, ++ resp_max_len, &actual_rx); + if (ret) +- return -EBUSY; +- +- print_buffer("read from spi:", rx_buffer, rx_len); +- +- /* look for SOP firstly*/ +- ret = resp_find_sop_eop(rx_buffer, rx_len - 1, spi_flags); +- if (ret != SPI_FOUND_EOP) { +- tx_buffer = NULL; +- tx_len = 0; +- ticks = rte_get_timer_cycles(); +- if (time_after(ticks, timeout) && +- retry++ > SPI_MAX_RETRY) { +- dev_err(NULL, "Have retry %d, found invalid packet data\n", +- retry); +- return -EBUSY; +- } +- +- if (ret == SPI_FOUND_SOP) { +- rx_buffer += rx_len; +- resp_max_len += rx_len; +- } +- +- spi_flags = ret; +- goto read_again; +- } +- +- print_buffer("found valid data:", resp_packet, resp_max_len); ++ return ret; + + /* analyze response packet */ + i = 0; + p = resp_data; +- while (i < resp_max_len) { ++ while (i < actual_rx) { + current_byte = resp_packet[i]; + switch (current_byte) { + case SPI_BYTE_IDLE: +@@ -337,9 +359,13 @@ static int packet_to_byte_conver(struct spi_transaction_dev *dev, + current_byte = resp_packet[i]; + + switch (current_byte) { +- case SPI_PACKET_ESC: +- case SPI_PACKET_CHANNEL: + case SPI_PACKET_SOP: ++ dev_err(dev, "error on get SOP after SOP\n"); ++ return -EINVAL; ++ case SPI_PACKET_CHANNEL: ++ i += 2; ++ break; ++ case SPI_PACKET_ESC: + i++; + current_byte = resp_packet[i]; + *p++ = xor_20(current_byte); +@@ -348,23 +374,30 @@ static int packet_to_byte_conver(struct spi_transaction_dev *dev, + case SPI_PACKET_EOP: + i++; + current_byte = resp_packet[i]; +- if (current_byte == SPI_PACKET_ESC || +- current_byte == SPI_PACKET_CHANNEL || +- current_byte == SPI_PACKET_SOP) { ++ switch (current_byte) { ++ case SPI_PACKET_ESC: + i++; + current_byte = resp_packet[i]; + *p++ = xor_20(current_byte); +- } else ++ break; ++ case SPI_PACKET_CHANNEL: ++ case SPI_PACKET_SOP: ++ case SPI_PACKET_EOP: ++ dev_err(dev, "error get SOP/EOP after EOP\n"); ++ return -EINVAL; ++ default: + *p++ = current_byte; +- i = valid_resp_len; +- break; ++ break; ++ } ++ goto done; ++ + default: + *p++ = current_byte; + i++; + } +- + } + ++done: + *valid = p - resp_buf; + + print_buffer("after packet:", resp_buf, *valid); +diff --git a/dpdk/drivers/raw/ifpga/ifpga_rawdev.c b/dpdk/drivers/raw/ifpga/ifpga_rawdev.c +index 27129b133e..ee6589ad01 100644 +--- a/dpdk/drivers/raw/ifpga/ifpga_rawdev.c ++++ b/dpdk/drivers/raw/ifpga/ifpga_rawdev.c +@@ -68,13 +68,9 @@ static const struct rte_pci_id pci_ifpga_map[] = { + + static struct ifpga_rawdev ifpga_rawdevices[IFPGA_RAWDEV_NUM]; + +-static int ifpga_monitor_start; ++static int ifpga_monitor_refcnt; + static pthread_t ifpga_monitor_start_thread; + +-#define IFPGA_MAX_IRQ 12 +-/* 0 for FME interrupt, others are reserved for AFU irq */ +-static struct rte_intr_handle ifpga_irq_handle[IFPGA_MAX_IRQ]; +- + static struct ifpga_rawdev * + ifpga_rawdev_allocate(struct rte_rawdev *rawdev); + static int set_surprise_link_check_aer( +@@ -82,6 +78,7 @@ static int set_surprise_link_check_aer( + static int ifpga_pci_find_next_ext_capability(unsigned int fd, + int start, uint32_t cap); + static int ifpga_pci_find_ext_capability(unsigned int fd, uint32_t cap); ++static void fme_interrupt_handler(void *param); + + struct ifpga_rawdev * + ifpga_rawdev_get(const struct rte_rawdev *rawdev) +@@ -118,6 +115,7 @@ ifpga_rawdev_allocate(struct rte_rawdev *rawdev) + { + struct ifpga_rawdev *dev; + uint16_t dev_id; ++ int i = 0; + + dev = ifpga_rawdev_get(rawdev); + if (dev != NULL) { +@@ -134,6 +132,11 @@ ifpga_rawdev_allocate(struct rte_rawdev *rawdev) + dev = &ifpga_rawdevices[dev_id]; + dev->rawdev = rawdev; + dev->dev_id = dev_id; ++ for (i = 0; i < IFPGA_MAX_IRQ; i++) ++ dev->intr_handle[i] = NULL; ++ dev->poll_enabled = 0; ++ for (i = 0; i < IFPGA_MAX_VDEV; i++) ++ dev->vdev_name[i] = NULL; + + return dev; + } +@@ -208,15 +211,16 @@ static int ifpga_get_dev_vendor_id(const char *bdf, + + return 0; + } +-static int ifpga_rawdev_fill_info(struct ifpga_rawdev *ifpga_dev, +- const char *bdf) ++ ++static int ifpga_rawdev_fill_info(struct ifpga_rawdev *ifpga_dev) + { +- char path[1024] = "/sys/bus/pci/devices/0000:"; ++ struct opae_adapter *adapter = NULL; ++ char path[1024] = "/sys/bus/pci/devices/"; + char link[1024], link1[1024]; + char dir[1024] = "/sys/devices/"; + char *c; + int ret; +- char sub_brg_bdf[4][16]; ++ char sub_brg_bdf[4][16] = {{0}}; + int point; + DIR *dp = NULL; + struct dirent *entry; +@@ -224,9 +228,14 @@ static int ifpga_rawdev_fill_info(struct ifpga_rawdev *ifpga_dev, + + unsigned int dom, bus, dev; + int func; +- uint32_t dev_id, vendor_id; ++ uint32_t dev_id = 0; ++ uint32_t vendor_id = 0; + +- strlcat(path, bdf, sizeof(path)); ++ adapter = ifpga_dev ? ifpga_rawdev_get_priv(ifpga_dev->rawdev) : NULL; ++ if (!adapter) ++ return -ENODEV; ++ ++ strlcat(path, adapter->name, sizeof(path)); + memset(link, 0, sizeof(link)); + memset(link1, 0, sizeof(link1)); + ret = readlink(path, link, (sizeof(link)-1)); +@@ -376,13 +385,13 @@ ifpga_monitor_sensor(struct rte_rawdev *raw_dev, + /* monitor temperature sensors */ + if (!strcmp(sensor->name, "Board Temperature") || + !strcmp(sensor->name, "FPGA Die Temperature")) { +- IFPGA_RAWDEV_PMD_INFO("read sensor %s %d %d %d\n", ++ IFPGA_RAWDEV_PMD_DEBUG("read sensor %s %d %d %d\n", + sensor->name, value, sensor->high_warn, + sensor->high_fatal); + + if (HIGH_WARN(sensor, value) || + LOW_WARN(sensor, value)) { +- IFPGA_RAWDEV_PMD_INFO("%s reach theshold %d\n", ++ IFPGA_RAWDEV_PMD_INFO("%s reach threshold %d\n", + sensor->name, value); + *gsd_start = true; + break; +@@ -393,7 +402,7 @@ ifpga_monitor_sensor(struct rte_rawdev *raw_dev, + if (!strcmp(sensor->name, "12V AUX Voltage")) { + if (value < AUX_VOLTAGE_WARN) { + IFPGA_RAWDEV_PMD_INFO( +- "%s reach theshold %d mV\n", ++ "%s reach threshold %d mV\n", + sensor->name, value); + *gsd_start = true; + break; +@@ -418,7 +427,7 @@ static int set_surprise_link_check_aer( + bool enable = 0; + uint32_t aer_new0, aer_new1; + +- if (!ifpga_rdev) { ++ if (!ifpga_rdev || !ifpga_rdev->rawdev) { + printf("\n device does not exist\n"); + return -EFAULT; + } +@@ -441,12 +450,12 @@ static int set_surprise_link_check_aer( + pos = ifpga_pci_find_ext_capability(fd, RTE_PCI_EXT_CAP_ID_ERR); + if (!pos) + goto end; +- /* save previout ECAP_AER+0x08 */ ++ /* save previous ECAP_AER+0x08 */ + ret = pread(fd, &data, sizeof(data), pos+0x08); + if (ret == -1) + goto end; + ifpga_rdev->aer_old[0] = data; +- /* save previout ECAP_AER+0x14 */ ++ /* save previous ECAP_AER+0x14 */ + ret = pread(fd, &data, sizeof(data), pos+0x14); + if (ret == -1) + goto end; +@@ -497,11 +506,11 @@ ifpga_rawdev_gsd_handle(__rte_unused void *param) + int gsd_enable, ret; + #define MS 1000 + +- while (1) { ++ while (__atomic_load_n(&ifpga_monitor_refcnt, __ATOMIC_RELAXED)) { + gsd_enable = 0; + for (i = 0; i < IFPGA_RAWDEV_NUM; i++) { + ifpga_rdev = &ifpga_rawdevices[i]; +- if (ifpga_rdev->rawdev) { ++ if (ifpga_rdev->poll_enabled) { + ret = set_surprise_link_check_aer(ifpga_rdev, + gsd_enable); + if (ret == 1 && !gsd_enable) { +@@ -521,30 +530,46 @@ ifpga_rawdev_gsd_handle(__rte_unused void *param) + } + + static int +-ifpga_monitor_start_func(void) ++ifpga_monitor_start_func(struct ifpga_rawdev *dev) + { + int ret; + +- if (ifpga_monitor_start == 0) { ++ if (!dev) ++ return -ENODEV; ++ ++ ret = ifpga_rawdev_fill_info(dev); ++ if (ret) ++ return ret; ++ ++ dev->poll_enabled = 1; ++ ++ if (!__atomic_fetch_add(&ifpga_monitor_refcnt, 1, __ATOMIC_RELAXED)) { + ret = pthread_create(&ifpga_monitor_start_thread, + NULL, + ifpga_rawdev_gsd_handle, NULL); + if (ret) { ++ ifpga_monitor_start_thread = 0; + IFPGA_RAWDEV_PMD_ERR( +- "Fail to create ifpga nonitor thread"); ++ "Fail to create ifpga monitor thread"); + return -1; + } +- ifpga_monitor_start = 1; + } + + return 0; + } ++ + static int +-ifpga_monitor_stop_func(void) ++ifpga_monitor_stop_func(struct ifpga_rawdev *dev) + { + int ret; + +- if (ifpga_monitor_start == 1) { ++ if (!dev || !dev->poll_enabled) ++ return 0; ++ ++ dev->poll_enabled = 0; ++ ++ if (!__atomic_sub_fetch(&ifpga_monitor_refcnt, 1, __ATOMIC_RELAXED) && ++ ifpga_monitor_start_thread) { + ret = pthread_cancel(ifpga_monitor_start_thread); + if (ret) + IFPGA_RAWDEV_PMD_ERR("Can't cancel the thread"); +@@ -553,8 +578,6 @@ ifpga_monitor_stop_func(void) + if (ret) + IFPGA_RAWDEV_PMD_ERR("Can't join the thread"); + +- ifpga_monitor_start = 0; +- + return ret; + } + +@@ -716,17 +739,38 @@ ifpga_rawdev_stop(struct rte_rawdev *dev) + static int + ifpga_rawdev_close(struct rte_rawdev *dev) + { ++ struct ifpga_rawdev *ifpga_rdev = NULL; + struct opae_adapter *adapter; ++ struct opae_manager *mgr; ++ char *vdev_name = NULL; ++ int i, ret = 0; + + if (dev) { ++ ifpga_rdev = ifpga_rawdev_get(dev); ++ if (ifpga_rdev) { ++ for (i = 0; i < IFPGA_MAX_VDEV; i++) { ++ vdev_name = ifpga_rdev->vdev_name[i]; ++ if (vdev_name) ++ rte_vdev_uninit(vdev_name); ++ } ++ ifpga_monitor_stop_func(ifpga_rdev); ++ ifpga_rdev->rawdev = NULL; ++ } + adapter = ifpga_rawdev_get_priv(dev); + if (adapter) { ++ mgr = opae_adapter_get_mgr(adapter); ++ if (ifpga_rdev && mgr) { ++ if (ifpga_unregister_msix_irq(ifpga_rdev, ++ IFPGA_FME_IRQ, 0, ++ fme_interrupt_handler, mgr) < 0) ++ ret = -EINVAL; ++ } + opae_adapter_destroy(adapter); + opae_adapter_data_free(adapter->data); + } + } + +- return dev ? 0:1; ++ return ret; + } + + static int +@@ -1341,36 +1385,62 @@ fme_interrupt_handler(void *param) + } + + int +-ifpga_unregister_msix_irq(enum ifpga_irq_type type, ++ifpga_unregister_msix_irq(struct ifpga_rawdev *dev, enum ifpga_irq_type type, + int vec_start, rte_intr_callback_fn handler, void *arg) + { +- struct rte_intr_handle *intr_handle; ++ struct rte_intr_handle **intr_handle; ++ int rc = 0; ++ int i = vec_start + 1; ++ ++ if (!dev) ++ return -ENODEV; + + if (type == IFPGA_FME_IRQ) +- intr_handle = &ifpga_irq_handle[0]; ++ intr_handle = (struct rte_intr_handle **)&dev->intr_handle[0]; + else if (type == IFPGA_AFU_IRQ) +- intr_handle = &ifpga_irq_handle[vec_start + 1]; ++ intr_handle = (struct rte_intr_handle **)&dev->intr_handle[i]; + else +- return 0; ++ return -EINVAL; ++ ++ if ((*intr_handle) == NULL) { ++ IFPGA_RAWDEV_PMD_ERR("%s interrupt %d not registered\n", ++ type == IFPGA_FME_IRQ ? "FME" : "AFU", ++ type == IFPGA_FME_IRQ ? 0 : vec_start); ++ return -ENOENT; ++ } + +- rte_intr_efd_disable(intr_handle); ++ rte_intr_efd_disable(*intr_handle); + +- return rte_intr_callback_unregister(intr_handle, handler, arg); ++ rc = rte_intr_callback_unregister(*intr_handle, handler, arg); ++ if (rc < 0) { ++ IFPGA_RAWDEV_PMD_ERR("Failed to unregister %s interrupt %d\n", ++ type == IFPGA_FME_IRQ ? "FME" : "AFU", ++ type == IFPGA_FME_IRQ ? 0 : vec_start); ++ } else { ++ rte_free(*intr_handle); ++ *intr_handle = NULL; ++ } ++ ++ return rc; + } + + int +-ifpga_register_msix_irq(struct rte_rawdev *dev, int port_id, ++ifpga_register_msix_irq(struct ifpga_rawdev *dev, int port_id, + enum ifpga_irq_type type, int vec_start, int count, + rte_intr_callback_fn handler, const char *name, + void *arg) + { + int ret; +- struct rte_intr_handle *intr_handle; ++ struct rte_intr_handle **intr_handle; + struct opae_adapter *adapter; + struct opae_manager *mgr; + struct opae_accelerator *acc; ++ int i = 0; + +- adapter = ifpga_rawdev_get_priv(dev); ++ if (!dev || !dev->rawdev) ++ return -ENODEV; ++ ++ adapter = ifpga_rawdev_get_priv(dev->rawdev); + if (!adapter) + return -ENODEV; + +@@ -1379,29 +1449,37 @@ ifpga_register_msix_irq(struct rte_rawdev *dev, int port_id, + return -ENODEV; + + if (type == IFPGA_FME_IRQ) { +- intr_handle = &ifpga_irq_handle[0]; ++ intr_handle = (struct rte_intr_handle **)&dev->intr_handle[0]; + count = 1; + } else if (type == IFPGA_AFU_IRQ) { +- intr_handle = &ifpga_irq_handle[vec_start + 1]; ++ i = vec_start + 1; ++ intr_handle = (struct rte_intr_handle **)&dev->intr_handle[i]; + } else { + return -EINVAL; + } + +- intr_handle->type = RTE_INTR_HANDLE_VFIO_MSIX; ++ if (*intr_handle) ++ return -EBUSY; ++ ++ *intr_handle = rte_zmalloc(NULL, sizeof(struct rte_intr_handle), 0); ++ if (!(*intr_handle)) ++ return -ENOMEM; ++ ++ (*intr_handle)->type = RTE_INTR_HANDLE_VFIO_MSIX; + +- ret = rte_intr_efd_enable(intr_handle, count); ++ ret = rte_intr_efd_enable(*intr_handle, count); + if (ret) + return -ENODEV; + +- intr_handle->fd = intr_handle->efds[0]; ++ (*intr_handle)->fd = (*intr_handle)->efds[0]; + + IFPGA_RAWDEV_PMD_DEBUG("register %s irq, vfio_fd=%d, fd=%d\n", +- name, intr_handle->vfio_dev_fd, +- intr_handle->fd); ++ name, (*intr_handle)->vfio_dev_fd, ++ (*intr_handle)->fd); + + if (type == IFPGA_FME_IRQ) { + struct fpga_fme_err_irq_set err_irq_set; +- err_irq_set.evtfd = intr_handle->efds[0]; ++ err_irq_set.evtfd = (*intr_handle)->efds[0]; + + ret = opae_manager_ifpga_set_err_irq(mgr, &err_irq_set); + if (ret) +@@ -1412,13 +1490,13 @@ ifpga_register_msix_irq(struct rte_rawdev *dev, int port_id, + return -EINVAL; + + ret = opae_acc_set_irq(acc, vec_start, count, +- intr_handle->efds); ++ (*intr_handle)->efds); + if (ret) + return -EINVAL; + } + + /* register interrupt handler using DPDK API */ +- ret = rte_intr_callback_register(intr_handle, ++ ret = rte_intr_callback_register(*intr_handle, + handler, (void *)arg); + if (ret) + return -EINVAL; +@@ -1448,7 +1526,7 @@ ifpga_rawdev_create(struct rte_pci_device *pci_dev, + } + + memset(name, 0, sizeof(name)); +- snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "IFPGA:%02x:%02x.%x", ++ snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, IFPGA_RAWDEV_NAME_FMT, + pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function); + + IFPGA_RAWDEV_PMD_INFO("Init %s on NUMA node %d", name, rte_socket_id()); +@@ -1517,11 +1595,15 @@ ifpga_rawdev_create(struct rte_pci_device *pci_dev, + IFPGA_RAWDEV_PMD_INFO("this is a PF function"); + } + +- ret = ifpga_register_msix_irq(rawdev, 0, IFPGA_FME_IRQ, 0, 0, ++ ret = ifpga_register_msix_irq(dev, 0, IFPGA_FME_IRQ, 0, 0, + fme_interrupt_handler, "fme_irq", mgr); + if (ret) + goto free_adapter_data; + ++ ret = ifpga_monitor_start_func(dev); ++ if (ret) ++ goto free_adapter_data; ++ + return ret; + + free_adapter_data: +@@ -1540,9 +1622,6 @@ ifpga_rawdev_destroy(struct rte_pci_device *pci_dev) + int ret; + struct rte_rawdev *rawdev; + char name[RTE_RAWDEV_NAME_MAX_LEN]; +- struct opae_adapter *adapter; +- struct opae_manager *mgr; +- struct ifpga_rawdev *dev; + + if (!pci_dev) { + IFPGA_RAWDEV_PMD_ERR("Invalid pci_dev of the device!"); +@@ -1551,7 +1630,7 @@ ifpga_rawdev_destroy(struct rte_pci_device *pci_dev) + } + + memset(name, 0, sizeof(name)); +- snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "IFPGA:%x:%02x.%x", ++ snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, IFPGA_RAWDEV_NAME_FMT, + pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function); + + IFPGA_RAWDEV_PMD_INFO("Closing %s on NUMA node %d", +@@ -1562,21 +1641,6 @@ ifpga_rawdev_destroy(struct rte_pci_device *pci_dev) + IFPGA_RAWDEV_PMD_ERR("Invalid device name (%s)", name); + return -EINVAL; + } +- dev = ifpga_rawdev_get(rawdev); +- if (dev) +- dev->rawdev = NULL; +- +- adapter = ifpga_rawdev_get_priv(rawdev); +- if (!adapter) +- return -ENODEV; +- +- mgr = opae_adapter_get_mgr(adapter); +- if (!mgr) +- return -ENODEV; +- +- if (ifpga_unregister_msix_irq(IFPGA_FME_IRQ, 0, +- fme_interrupt_handler, mgr) < 0) +- return -EINVAL; + + /* rte_rawdev_close is called by pmd_release */ + ret = rte_rawdev_pmd_release(rawdev); +@@ -1597,7 +1661,7 @@ ifpga_rawdev_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + static int + ifpga_rawdev_pci_remove(struct rte_pci_device *pci_dev) + { +- ifpga_monitor_stop_func(); ++ IFPGA_RAWDEV_PMD_INFO("remove pci_dev %s", pci_dev->device.name); + return ifpga_rawdev_destroy(pci_dev); + } + +@@ -1639,80 +1703,118 @@ static int ifpga_rawdev_get_string_arg(const char *key __rte_unused, + + return 0; + } ++ + static int +-ifpga_cfg_probe(struct rte_vdev_device *dev) ++ifpga_vdev_parse_devargs(struct rte_devargs *devargs, ++ struct ifpga_vdev_args *args) + { +- struct rte_devargs *devargs; +- struct rte_kvargs *kvlist = NULL; +- struct rte_rawdev *rawdev = NULL; +- struct ifpga_rawdev *ifpga_dev; +- int port; ++ struct rte_kvargs *kvlist; + char *name = NULL; +- const char *bdf; +- char dev_name[RTE_RAWDEV_NAME_MAX_LEN]; +- int ret = -1; ++ int port = 0; ++ int ret = -EINVAL; + +- devargs = dev->device.devargs; ++ if (!devargs || !args) ++ return ret; + + kvlist = rte_kvargs_parse(devargs->args, valid_args); + if (!kvlist) { +- IFPGA_RAWDEV_PMD_LOG(ERR, "error when parsing param"); +- goto end; ++ IFPGA_RAWDEV_PMD_ERR("error when parsing devargs"); ++ return ret; + } + + if (rte_kvargs_count(kvlist, IFPGA_ARG_NAME) == 1) { + if (rte_kvargs_process(kvlist, IFPGA_ARG_NAME, +- &ifpga_rawdev_get_string_arg, +- &name) < 0) { ++ &ifpga_rawdev_get_string_arg, &name) < 0) { + IFPGA_RAWDEV_PMD_ERR("error to parse %s", +- IFPGA_ARG_NAME); ++ IFPGA_ARG_NAME); + goto end; ++ } else { ++ strlcpy(args->bdf, name, sizeof(args->bdf)); ++ rte_free(name); + } + } else { + IFPGA_RAWDEV_PMD_ERR("arg %s is mandatory for ifpga bus", +- IFPGA_ARG_NAME); ++ IFPGA_ARG_NAME); + goto end; + } + + if (rte_kvargs_count(kvlist, IFPGA_ARG_PORT) == 1) { +- if (rte_kvargs_process(kvlist, +- IFPGA_ARG_PORT, +- &rte_ifpga_get_integer32_arg, +- &port) < 0) { ++ if (rte_kvargs_process(kvlist, IFPGA_ARG_PORT, ++ &rte_ifpga_get_integer32_arg, &port) < 0) { + IFPGA_RAWDEV_PMD_ERR("error to parse %s", + IFPGA_ARG_PORT); + goto end; ++ } else { ++ args->port = port; + } + } else { + IFPGA_RAWDEV_PMD_ERR("arg %s is mandatory for ifpga bus", +- IFPGA_ARG_PORT); ++ IFPGA_ARG_PORT); + goto end; + } + ++ ret = 0; ++ ++end: ++ if (kvlist) ++ rte_kvargs_free(kvlist); ++ ++ return ret; ++} ++ ++static int ++ifpga_cfg_probe(struct rte_vdev_device *vdev) ++{ ++ struct rte_rawdev *rawdev = NULL; ++ struct ifpga_rawdev *ifpga_dev; ++ struct ifpga_vdev_args args; ++ char dev_name[RTE_RAWDEV_NAME_MAX_LEN]; ++ const char *vdev_name = NULL; ++ int i, n, ret = 0; ++ ++ vdev_name = rte_vdev_device_name(vdev); ++ if (!vdev_name) ++ return -EINVAL; ++ ++ IFPGA_RAWDEV_PMD_INFO("probe ifpga virtual device %s", vdev_name); ++ ++ ret = ifpga_vdev_parse_devargs(vdev->device.devargs, &args); ++ if (ret) ++ return ret; ++ + memset(dev_name, 0, sizeof(dev_name)); +- snprintf(dev_name, RTE_RAWDEV_NAME_MAX_LEN, "IFPGA:%s", name); ++ snprintf(dev_name, RTE_RAWDEV_NAME_MAX_LEN, "IFPGA:%s", args.bdf); + rawdev = rte_rawdev_pmd_get_named_dev(dev_name); + if (!rawdev) +- goto end; ++ return -ENODEV; + ifpga_dev = ifpga_rawdev_get(rawdev); + if (!ifpga_dev) +- goto end; +- bdf = name; +- ifpga_rawdev_fill_info(ifpga_dev, bdf); ++ return -ENODEV; + +- ifpga_monitor_start_func(); ++ for (i = 0; i < IFPGA_MAX_VDEV; i++) { ++ if (ifpga_dev->vdev_name[i] == NULL) { ++ n = strlen(vdev_name) + 1; ++ ifpga_dev->vdev_name[i] = rte_malloc(NULL, n, 0); ++ if (ifpga_dev->vdev_name[i] == NULL) ++ return -ENOMEM; ++ strlcpy(ifpga_dev->vdev_name[i], vdev_name, n); ++ break; ++ } ++ } + +- memset(dev_name, 0, sizeof(dev_name)); +- snprintf(dev_name, RTE_RAWDEV_NAME_MAX_LEN, "%d|%s", +- port, name); ++ if (i >= IFPGA_MAX_VDEV) { ++ IFPGA_RAWDEV_PMD_ERR("Can't create more virtual device!"); ++ return -ENOENT; ++ } + ++ snprintf(dev_name, RTE_RAWDEV_NAME_MAX_LEN, "%d|%s", ++ args.port, args.bdf); + ret = rte_eal_hotplug_add(RTE_STR(IFPGA_BUS_NAME), +- dev_name, devargs->args); +-end: +- if (kvlist) +- rte_kvargs_free(kvlist); +- if (name) +- free(name); ++ dev_name, vdev->device.devargs->args); ++ if (ret) { ++ rte_free(ifpga_dev->vdev_name[i]); ++ ifpga_dev->vdev_name[i] = NULL; ++ } + + return ret; + } +@@ -1720,10 +1822,47 @@ ifpga_cfg_probe(struct rte_vdev_device *dev) + static int + ifpga_cfg_remove(struct rte_vdev_device *vdev) + { +- IFPGA_RAWDEV_PMD_INFO("Remove ifpga_cfg %p", +- vdev); ++ struct rte_rawdev *rawdev = NULL; ++ struct ifpga_rawdev *ifpga_dev; ++ struct ifpga_vdev_args args; ++ char dev_name[RTE_RAWDEV_NAME_MAX_LEN]; ++ const char *vdev_name = NULL; ++ char *tmp_vdev = NULL; ++ int i, ret = 0; + +- return 0; ++ vdev_name = rte_vdev_device_name(vdev); ++ if (!vdev_name) ++ return -EINVAL; ++ ++ IFPGA_RAWDEV_PMD_INFO("remove ifpga virtual device %s", vdev_name); ++ ++ ret = ifpga_vdev_parse_devargs(vdev->device.devargs, &args); ++ if (ret) ++ return ret; ++ ++ memset(dev_name, 0, sizeof(dev_name)); ++ snprintf(dev_name, RTE_RAWDEV_NAME_MAX_LEN, "IFPGA:%s", args.bdf); ++ rawdev = rte_rawdev_pmd_get_named_dev(dev_name); ++ if (!rawdev) ++ return -ENODEV; ++ ifpga_dev = ifpga_rawdev_get(rawdev); ++ if (!ifpga_dev) ++ return -ENODEV; ++ ++ snprintf(dev_name, RTE_RAWDEV_NAME_MAX_LEN, "%d|%s", ++ args.port, args.bdf); ++ ret = rte_eal_hotplug_remove(RTE_STR(IFPGA_BUS_NAME), dev_name); ++ ++ for (i = 0; i < IFPGA_MAX_VDEV; i++) { ++ tmp_vdev = ifpga_dev->vdev_name[i]; ++ if (tmp_vdev && !strcmp(tmp_vdev, vdev_name)) { ++ free(tmp_vdev); ++ ifpga_dev->vdev_name[i] = NULL; ++ break; ++ } ++ } ++ ++ return ret; + } + + static struct rte_vdev_driver ifpga_cfg_driver = { +diff --git a/dpdk/drivers/raw/ifpga/ifpga_rawdev.h b/dpdk/drivers/raw/ifpga/ifpga_rawdev.h +index 7754beb02b..b4aaa15fda 100644 +--- a/dpdk/drivers/raw/ifpga/ifpga_rawdev.h ++++ b/dpdk/drivers/raw/ifpga/ifpga_rawdev.h +@@ -7,6 +7,8 @@ + + extern int ifpga_rawdev_logtype; + ++#define IFPGA_RAWDEV_NAME_FMT "IFPGA:%02x:%02x.%x" ++ + #define IFPGA_RAWDEV_PMD_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, ifpga_rawdev_logtype, "%s(): " fmt "\n", \ + __func__, ##args) +@@ -48,6 +50,8 @@ ifpga_rawdev_get_priv(const struct rte_rawdev *rawdev) + + #define IFPGA_RAWDEV_MSIX_IRQ_NUM 7 + #define IFPGA_RAWDEV_NUM 32 ++#define IFPGA_MAX_VDEV 4 ++#define IFPGA_MAX_IRQ 12 + + struct ifpga_rawdev { + int dev_id; +@@ -57,6 +61,17 @@ struct ifpga_rawdev { + uint32_t aer_old[2]; + char fvl_bdf[8][16]; + char parent_bdf[16]; ++ /* 0 for FME interrupt, others are reserved for AFU irq */ ++ void *intr_handle[IFPGA_MAX_IRQ]; ++ /* enable monitor thread poll device's sensors or not */ ++ int poll_enabled; ++ /* name of virtual devices created on raw device */ ++ char *vdev_name[IFPGA_MAX_VDEV]; ++}; ++ ++struct ifpga_vdev_args { ++ char bdf[PCI_PRI_STR_SIZE]; ++ int port; + }; + + struct ifpga_rawdev * +@@ -68,12 +83,12 @@ enum ifpga_irq_type { + }; + + int +-ifpga_register_msix_irq(struct rte_rawdev *dev, int port_id, ++ifpga_register_msix_irq(struct ifpga_rawdev *dev, int port_id, + enum ifpga_irq_type type, int vec_start, int count, + rte_intr_callback_fn handler, const char *name, + void *arg); + int +-ifpga_unregister_msix_irq(enum ifpga_irq_type type, ++ifpga_unregister_msix_irq(struct ifpga_rawdev *dev, enum ifpga_irq_type type, + int vec_start, rte_intr_callback_fn handler, void *arg); + + #endif /* _IFPGA_RAWDEV_H_ */ +diff --git a/dpdk/drivers/raw/ioat/dpdk_idxd_cfg.py b/dpdk/drivers/raw/ioat/dpdk_idxd_cfg.py +index bce4bb5bd4..6d6d3a801b 100755 +--- a/dpdk/drivers/raw/ioat/dpdk_idxd_cfg.py ++++ b/dpdk/drivers/raw/ioat/dpdk_idxd_cfg.py +@@ -29,25 +29,32 @@ def write_values(self, values): + f.write(str(contents)) + + ++def get_drv_dir(dtype): ++ "Get the sysfs path for the driver, either 'idxd' or 'user'" ++ drv_dir = "/sys/bus/dsa/drivers/" + dtype ++ if not os.path.exists(drv_dir): ++ return "/sys/bus/dsa/drivers/dsa" ++ return drv_dir ++ ++ + def configure_dsa(dsa_id, queues): + "Configure the DSA instance with appropriate number of queues" + dsa_dir = SysfsDir(f"/sys/bus/dsa/devices/dsa{dsa_id}") +- drv_dir = SysfsDir("/sys/bus/dsa/drivers/dsa") + + max_groups = dsa_dir.read_int("max_groups") + max_engines = dsa_dir.read_int("max_engines") + max_queues = dsa_dir.read_int("max_work_queues") +- max_tokens = dsa_dir.read_int("max_tokens") +- +- # we want one engine per group +- nb_groups = min(max_engines, max_groups) +- for grp in range(nb_groups): +- dsa_dir.write_values({f"engine{dsa_id}.{grp}/group_id": grp}) ++ max_work_queues_size = dsa_dir.read_int("max_work_queues_size") + + nb_queues = min(queues, max_queues) + if queues > nb_queues: + print(f"Setting number of queues to max supported value: {max_queues}") + ++ # we want one engine per group, and no more engines than queues ++ nb_groups = min(max_engines, max_groups, nb_queues) ++ for grp in range(nb_groups): ++ dsa_dir.write_values({f"engine{dsa_id}.{grp}/group_id": grp}) ++ + # configure each queue + for q in range(nb_queues): + wq_dir = SysfsDir(os.path.join(dsa_dir.path, f"wq{dsa_id}.{q}")) +@@ -56,12 +63,16 @@ def configure_dsa(dsa_id, queues): + "mode": "dedicated", + "name": f"dpdk_wq{dsa_id}.{q}", + "priority": 1, +- "size": int(max_tokens / nb_queues)}) ++ "max_batch_size": 1024, ++ "size": int(max_work_queues_size / nb_queues)}) + + # enable device and then queues +- drv_dir.write_values({"bind": f"dsa{dsa_id}"}) ++ idxd_dir = SysfsDir(get_drv_dir("idxd")) ++ idxd_dir.write_values({"bind": f"dsa{dsa_id}"}) ++ ++ user_dir = SysfsDir(get_drv_dir("user")) + for q in range(nb_queues): +- drv_dir.write_values({"bind": f"wq{dsa_id}.{q}"}) ++ user_dir.write_values({"bind": f"wq{dsa_id}.{q}"}) + + + def main(args): +diff --git a/dpdk/drivers/raw/ioat/ioat_common.c b/dpdk/drivers/raw/ioat/ioat_common.c +index 142e171bc9..414b753be7 100644 +--- a/dpdk/drivers/raw/ioat/ioat_common.c ++++ b/dpdk/drivers/raw/ioat/ioat_common.c +@@ -9,6 +9,8 @@ + + #include "ioat_private.h" + ++RTE_LOG_REGISTER(ioat_rawdev_logtype, rawdev.ioat, INFO); ++ + static const char * const xstat_names[] = { + "failed_enqueues", "successful_enqueues", + "copies_started", "copies_completed" +diff --git a/dpdk/drivers/raw/ioat/ioat_private.h b/dpdk/drivers/raw/ioat/ioat_private.h +index 6c423811ec..2564eb51e4 100644 +--- a/dpdk/drivers/raw/ioat/ioat_private.h ++++ b/dpdk/drivers/raw/ioat/ioat_private.h +@@ -18,10 +18,10 @@ + #include + #include "rte_ioat_rawdev.h" + +-extern int ioat_pmd_logtype; ++extern int ioat_rawdev_logtype; + + #define IOAT_PMD_LOG(level, fmt, args...) rte_log(RTE_LOG_ ## level, \ +- ioat_pmd_logtype, "%s(): " fmt "\n", __func__, ##args) ++ ioat_rawdev_logtype, "%s(): " fmt "\n", __func__, ##args) + + #define IOAT_PMD_DEBUG(fmt, args...) IOAT_PMD_LOG(DEBUG, fmt, ## args) + #define IOAT_PMD_INFO(fmt, args...) IOAT_PMD_LOG(INFO, fmt, ## args) +diff --git a/dpdk/drivers/raw/ioat/ioat_rawdev.c b/dpdk/drivers/raw/ioat/ioat_rawdev.c +index 2c88b4369f..ea193f9d11 100644 +--- a/dpdk/drivers/raw/ioat/ioat_rawdev.c ++++ b/dpdk/drivers/raw/ioat/ioat_rawdev.c +@@ -28,8 +28,6 @@ static struct rte_pci_driver ioat_pmd_drv; + #define IOAT_DEVICE_ID_BDXF 0x6f2F + #define IOAT_DEVICE_ID_ICX 0x0b00 + +-RTE_LOG_REGISTER(ioat_pmd_logtype, rawdev.ioat, INFO); +- + #define DESC_SZ sizeof(struct rte_ioat_generic_hw_desc) + #define COMPLETION_SZ sizeof(__m128i) + +diff --git a/dpdk/drivers/raw/ntb/ntb.c b/dpdk/drivers/raw/ntb/ntb.c +index 6dd213ef6e..4e22fb3733 100644 +--- a/dpdk/drivers/raw/ntb/ntb.c ++++ b/dpdk/drivers/raw/ntb/ntb.c +@@ -923,6 +923,11 @@ ntb_dev_start(struct rte_rawdev *dev) + + hw->peer_mw_base = rte_zmalloc("ntb_peer_mw_base", hw->mw_cnt * + sizeof(uint64_t), 0); ++ if (hw->peer_mw_base == NULL) { ++ NTB_LOG(ERR, "Cannot allocate memory for peer mw base."); ++ ret = -ENOMEM; ++ goto err_q_init; ++ } + + if (hw->ntb_ops->spad_read == NULL) { + ret = -ENOTSUP; +@@ -1080,6 +1085,10 @@ ntb_attr_set(struct rte_rawdev *dev, const char *attr_name, + if (hw->ntb_ops->spad_write == NULL) + return -ENOTSUP; + index = atoi(&attr_name[NTB_SPAD_USER_LEN]); ++ if (index < 0 || index >= NTB_SPAD_USER_MAX_NUM) { ++ NTB_LOG(ERR, "Invalid attribute (%s)", attr_name); ++ return -EINVAL; ++ } + (*hw->ntb_ops->spad_write)(dev, hw->spad_user_list[index], + 1, attr_value); + NTB_LOG(DEBUG, "Set attribute (%s) Value (%" PRIu64 ")", +@@ -1174,6 +1183,10 @@ ntb_attr_get(struct rte_rawdev *dev, const char *attr_name, + if (hw->ntb_ops->spad_read == NULL) + return -ENOTSUP; + index = atoi(&attr_name[NTB_SPAD_USER_LEN]); ++ if (index < 0 || index >= NTB_SPAD_USER_MAX_NUM) { ++ NTB_LOG(ERR, "Attribute (%s) out of range", attr_name); ++ return -EINVAL; ++ } + *attr_value = (*hw->ntb_ops->spad_read)(dev, + hw->spad_user_list[index], 0); + NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")", +@@ -1388,6 +1401,10 @@ ntb_init_hw(struct rte_rawdev *dev, struct rte_pci_device *pci_dev) + + /* Init doorbell. */ + hw->db_valid_mask = RTE_LEN2MASK(hw->db_cnt, uint64_t); ++ /* Clear all valid doorbell bits before registering intr handler */ ++ if (hw->ntb_ops->db_clear == NULL) ++ return -ENOTSUP; ++ (*hw->ntb_ops->db_clear)(dev, hw->db_valid_mask); + + intr_handle = &pci_dev->intr_handle; + /* Register callback func to eal lib */ +diff --git a/dpdk/drivers/raw/ntb/ntb.h b/dpdk/drivers/raw/ntb/ntb.h +index cdf7667d5d..c9ff33aa59 100644 +--- a/dpdk/drivers/raw/ntb/ntb.h ++++ b/dpdk/drivers/raw/ntb/ntb.h +@@ -95,7 +95,7 @@ enum ntb_spad_idx { + * @spad_write: Write val to local/peer spad register. + * @db_read: Read doorbells status. + * @db_clear: Clear local doorbells. +- * @db_set_mask: Set bits in db mask, preventing db interrpts generated ++ * @db_set_mask: Set bits in db mask, preventing db interrupts generated + * for those db bits. + * @peer_db_set: Set doorbell bit to generate peer interrupt for that bit. + * @vector_bind: Bind vector source [intr] to msix vector [msix]. +diff --git a/dpdk/drivers/raw/ntb/ntb_hw_intel.c b/dpdk/drivers/raw/ntb/ntb_hw_intel.c +index 4427e11458..a742e8fbb9 100644 +--- a/dpdk/drivers/raw/ntb/ntb_hw_intel.c ++++ b/dpdk/drivers/raw/ntb/ntb_hw_intel.c +@@ -148,6 +148,11 @@ intel_ntb_dev_init(const struct rte_rawdev *dev) + + hw->mw_size = rte_zmalloc("ntb_mw_size", + hw->mw_cnt * sizeof(uint64_t), 0); ++ if (hw->mw_size == NULL) { ++ NTB_LOG(ERR, "Cannot allocate memory for mw size."); ++ return -ENOMEM; ++ } ++ + for (i = 0; i < hw->mw_cnt; i++) { + bar = intel_ntb_bar[i]; + hw->mw_size[i] = hw->pci_dev->mem_resource[bar].len; +diff --git a/dpdk/drivers/raw/octeontx2_dma/otx2_dpi_rawdev.c b/dpdk/drivers/raw/octeontx2_dma/otx2_dpi_rawdev.c +index efdba2779b..8c01f25ec7 100644 +--- a/dpdk/drivers/raw/octeontx2_dma/otx2_dpi_rawdev.c ++++ b/dpdk/drivers/raw/octeontx2_dma/otx2_dpi_rawdev.c +@@ -389,6 +389,7 @@ otx2_dpi_rawdev_probe(struct rte_pci_driver *pci_drv __rte_unused, + vf_id = ((pci_dev->addr.devid & 0x1F) << 3) | + (pci_dev->addr.function & 0x7); + vf_id -= 1; ++ dpivf->dev = pci_dev; + dpivf->state = DPI_QUEUE_START; + dpivf->vf_id = vf_id; + dpivf->vf_bar0 = (uintptr_t)pci_dev->mem_resource[0].addr; +diff --git a/dpdk/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c b/dpdk/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c +index d04e957d82..31459c182e 100644 +--- a/dpdk/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c ++++ b/dpdk/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c +@@ -694,7 +694,6 @@ sdp_droq_read_packet(struct sdp_device *sdpvf __rte_unused, + struct sdp_droq_pkt *droq_pkt) + { + struct sdp_droq_info *info; +- uint32_t total_len = 0; + uint32_t pkt_len = 0; + + info = &droq->info_list[droq->read_idx]; +@@ -706,7 +705,6 @@ sdp_droq_read_packet(struct sdp_device *sdpvf __rte_unused, + + /* Deduce the actual data size */ + info->length -= SDP_RH_SIZE; +- total_len += (uint32_t)info->length; + + otx2_sdp_dbg("OQ: pkt_len[%ld], buffer_size %d", + (long)info->length, droq->buffer_size); +diff --git a/dpdk/drivers/raw/skeleton/skeleton_rawdev_test.c b/dpdk/drivers/raw/skeleton/skeleton_rawdev_test.c +index 1405df080d..484468eeb4 100644 +--- a/dpdk/drivers/raw/skeleton/skeleton_rawdev_test.c ++++ b/dpdk/drivers/raw/skeleton/skeleton_rawdev_test.c +@@ -295,6 +295,7 @@ test_rawdev_attr_set_get(void) + dummy_value = &set_value; + *dummy_value = 200; + ret = rte_rawdev_set_attr(test_dev_id, "Test2", (uintptr_t)dummy_value); ++ RTE_TEST_ASSERT(!ret, "Unable to set an attribute (Test2)"); + + /* Check if attributes have been set */ + ret = rte_rawdev_get_attr(test_dev_id, "Test1", &ret_value); +diff --git a/dpdk/drivers/regex/mlx5/mlx5_regex.c b/dpdk/drivers/regex/mlx5/mlx5_regex.c +index c91c444dda..9ec9346a62 100644 +--- a/dpdk/drivers/regex/mlx5/mlx5_regex.c ++++ b/dpdk/drivers/regex/mlx5/mlx5_regex.c +@@ -159,6 +159,7 @@ mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + rte_errno = ENOMEM; + goto dev_error; + } ++ priv->sq_ts_format = attr.sq_ts_format; + priv->ctx = ctx; + priv->nb_engines = 2; /* attr.regexp_num_of_engines */ + /* Default RXP programming mode to Shared. */ +@@ -247,8 +248,6 @@ mlx5_regex_pci_remove(struct rte_pci_device *pci_dev) + rte_regexdev_unregister(priv->regexdev); + if (priv->ctx) + mlx5_glue->close_device(priv->ctx); +- if (priv->regexdev) +- rte_regexdev_unregister(priv->regexdev); + rte_free(priv); + } + return 0; +diff --git a/dpdk/drivers/regex/mlx5/mlx5_regex.h b/dpdk/drivers/regex/mlx5/mlx5_regex.h +index 2c4877c37d..3fe7bb4a52 100644 +--- a/dpdk/drivers/regex/mlx5/mlx5_regex.h ++++ b/dpdk/drivers/regex/mlx5/mlx5_regex.h +@@ -80,6 +80,7 @@ struct mlx5_regex_priv { + struct ibv_pd *pd; + struct mlx5_dbr_page_list dbrpgs; /* Door-bell pages. */ + struct mlx5_mr_share_cache mr_scache; /* Global shared MR cache. */ ++ uint8_t sq_ts_format; /* Whether SQ supports timestamp formats. */ + }; + + /* mlx5_regex.c */ +diff --git a/dpdk/drivers/regex/mlx5/mlx5_regex_control.c b/dpdk/drivers/regex/mlx5/mlx5_regex_control.c +index d6f452bb6b..7373495e6b 100644 +--- a/dpdk/drivers/regex/mlx5/mlx5_regex_control.c ++++ b/dpdk/drivers/regex/mlx5/mlx5_regex_control.c +@@ -233,6 +233,7 @@ regex_ctrl_create_sq(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp, + attr.tis_num = 0; + attr.user_index = q_ind; + attr.cqn = qp->cq.obj->id; ++ attr.ts_format = mlx5_ts_format_conv(priv->sq_ts_format); + wq_attr->uar_page = priv->uar->page_id; + regex_get_pdn(priv->pd, &pd_num); + wq_attr->pd = pd_num; +diff --git a/dpdk/drivers/regex/mlx5/mlx5_regex_fastpath.c b/dpdk/drivers/regex/mlx5/mlx5_regex_fastpath.c +index 8d134ac98e..2d28e85e4d 100644 +--- a/dpdk/drivers/regex/mlx5/mlx5_regex_fastpath.c ++++ b/dpdk/drivers/regex/mlx5/mlx5_regex_fastpath.c +@@ -25,8 +25,8 @@ + #include "mlx5_regex.h" + + #define MLX5_REGEX_MAX_WQE_INDEX 0xffff +-#define MLX5_REGEX_METADATA_SIZE UINT32_C(64) +-#define MLX5_REGEX_MAX_OUTPUT RTE_BIT32(11) ++#define MLX5_REGEX_METADATA_SIZE ((size_t)64) ++#define MLX5_REGEX_MAX_OUTPUT (((size_t)1) << 11) + #define MLX5_REGEX_WQE_CTRL_OFFSET 12 + #define MLX5_REGEX_WQE_METADATA_OFFSET 16 + #define MLX5_REGEX_WQE_GATHER_OFFSET 32 +diff --git a/dpdk/drivers/regex/mlx5/mlx5_rxp.c b/dpdk/drivers/regex/mlx5/mlx5_rxp.c +index 0753ab3bdc..b7ac6ed4a8 100644 +--- a/dpdk/drivers/regex/mlx5/mlx5_rxp.c ++++ b/dpdk/drivers/regex/mlx5/mlx5_rxp.c +@@ -989,7 +989,7 @@ mlx5_regex_configure(struct rte_regexdev *dev, + dev->data->dev_conf.nb_queue_pairs = priv->nb_queues; + priv->qps = rte_zmalloc(NULL, sizeof(struct mlx5_regex_qp) * + priv->nb_queues, 0); +- if (!priv->nb_queues) { ++ if (!priv->qps) { + DRV_LOG(ERR, "can't allocate qps memory"); + rte_errno = ENOMEM; + return -rte_errno; +diff --git a/dpdk/drivers/regex/octeontx2/meson.build b/dpdk/drivers/regex/octeontx2/meson.build +index 34e51728c2..c42d83d549 100644 +--- a/dpdk/drivers/regex/octeontx2/meson.build ++++ b/dpdk/drivers/regex/octeontx2/meson.build +@@ -12,7 +12,6 @@ lib = cc.find_library('librxp_compiler', required: false) + if lib.found() + ext_deps += lib + ext_deps += cc.find_library('libstdc++', required: true) +- includes += include_directories(inc_dir) + cflags += ['-DREE_COMPILER_SDK'] + endif + +diff --git a/dpdk/drivers/vdpa/ifc/base/ifcvf.c b/dpdk/drivers/vdpa/ifc/base/ifcvf.c +index 3c0b2dff66..d10c1fd6a4 100644 +--- a/dpdk/drivers/vdpa/ifc/base/ifcvf.c ++++ b/dpdk/drivers/vdpa/ifc/base/ifcvf.c +@@ -65,8 +65,13 @@ ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev) + hw->common_cfg = get_cap_addr(hw, &cap); + break; + case IFCVF_PCI_CAP_NOTIFY_CFG: +- PCI_READ_CONFIG_DWORD(dev, &hw->notify_off_multiplier, ++ ret = PCI_READ_CONFIG_DWORD(dev, ++ &hw->notify_off_multiplier, + pos + sizeof(cap)); ++ if (ret < 0) { ++ DEBUGOUT("failed to read notify_off_multiplier\n"); ++ return -1; ++ } + hw->notify_base = get_cap_addr(hw, &cap); + hw->notify_region = cap.bar; + break; +@@ -89,12 +94,14 @@ ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev) + return -1; + } + +- DEBUGOUT("capability mapping:\ncommon cfg: %p\n" +- "notify base: %p\nisr cfg: %p\ndevice cfg: %p\n" +- "multiplier: %u\n", +- hw->common_cfg, hw->dev_cfg, +- hw->isr, hw->notify_base, +- hw->notify_off_multiplier); ++ DEBUGOUT("capability mapping:\n" ++ "common cfg: %p\n" ++ "notify base: %p\n" ++ "isr cfg: %p\n" ++ "device cfg: %p\n" ++ "multiplier: %u\n", ++ hw->common_cfg, hw->notify_base, hw->isr, hw->dev_cfg, ++ hw->notify_off_multiplier); + + return 0; + } +diff --git a/dpdk/drivers/vdpa/ifc/ifcvf_vdpa.c b/dpdk/drivers/vdpa/ifc/ifcvf_vdpa.c +index 6a1b44bc77..85bca09377 100644 +--- a/dpdk/drivers/vdpa/ifc/ifcvf_vdpa.c ++++ b/dpdk/drivers/vdpa/ifc/ifcvf_vdpa.c +@@ -357,6 +357,8 @@ vdpa_enable_vfio_intr(struct ifcvf_internal *internal, bool m_rx) + vring.callfd = -1; + + nr_vring = rte_vhost_get_vring_num(internal->vid); ++ if (nr_vring > IFCVF_MAX_QUEUES * 2) ++ return -1; + + irq_set = (struct vfio_irq_set *)irq_set_buf; + irq_set->argsz = sizeof(irq_set_buf); +diff --git a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa.c b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa.c +index 0b2f1ab68e..65a1edc33c 100644 +--- a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa.c ++++ b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa.c +@@ -82,7 +82,7 @@ mlx5_vdpa_get_queue_num(struct rte_vdpa_device *vdev, uint32_t *queue_num) + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); + return -1; + } +- *queue_num = priv->caps.max_num_virtio_queues; ++ *queue_num = priv->caps.max_num_virtio_queues / 2; + return 0; + } + +@@ -139,7 +139,7 @@ mlx5_vdpa_set_vring_state(int vid, int vring, int state) + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); + return -EINVAL; + } +- if (vring >= (int)priv->caps.max_num_virtio_queues * 2) { ++ if (vring >= (int)priv->caps.max_num_virtio_queues) { + DRV_LOG(ERR, "Too big vring id: %d.", vring); + return -E2BIG; + } +@@ -281,10 +281,10 @@ mlx5_vdpa_dev_close(int vid) + DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name); + return -1; + } +- if (priv->configured) +- ret |= mlx5_vdpa_lm_log(priv); + mlx5_vdpa_err_event_unset(priv); + mlx5_vdpa_cqe_event_unset(priv); ++ if (priv->configured) ++ ret |= mlx5_vdpa_lm_log(priv); + mlx5_vdpa_steer_unset(priv); + mlx5_vdpa_virtqs_release(priv); + mlx5_vdpa_event_qp_global_release(priv); +@@ -504,7 +504,7 @@ mlx5_vdpa_get_ib_device_match(struct rte_pci_addr *addr) + static int + mlx5_vdpa_nl_roce_disable(const char *addr) + { +- int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC); ++ int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC, 0); + int devlink_id; + int enable; + int ret; +@@ -686,6 +686,7 @@ mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + struct mlx5_vdpa_priv *priv = NULL; + struct ibv_context *ctx = NULL; + struct mlx5_hca_attr attr; ++ int retry; + int ret; + + ibv = mlx5_vdpa_get_ib_device_match(&pci_dev->addr); +@@ -725,7 +726,7 @@ mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + DRV_LOG(DEBUG, "No capability to support virtq statistics."); + priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv) + + sizeof(struct mlx5_vdpa_virtq) * +- attr.vdpa.max_num_virtio_queues * 2, ++ attr.vdpa.max_num_virtio_queues, + RTE_CACHE_LINE_SIZE); + if (!priv) { + DRV_LOG(ERR, "Failed to allocate private memory."); +@@ -735,11 +736,19 @@ mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + priv->caps = attr.vdpa; + priv->log_max_rqt_size = attr.log_max_rqt_size; + priv->num_lag_ports = attr.num_lag_ports; ++ priv->qp_ts_format = attr.qp_ts_format; + if (attr.num_lag_ports == 0) + priv->num_lag_ports = 1; + priv->ctx = ctx; + priv->pci_dev = pci_dev; +- priv->var = mlx5_glue->dv_alloc_var(ctx, 0); ++ for (retry = 0; retry < 7; retry++) { ++ priv->var = mlx5_glue->dv_alloc_var(priv->ctx, 0); ++ if (priv->var != NULL) ++ break; ++ DRV_LOG(WARNING, "Failed to allocate VAR, retry %d.\n", retry); ++ /* Wait Qemu release VAR during vdpa restart, 0.1 sec based. */ ++ usleep(100000U << retry); ++ } + if (!priv->var) { + DRV_LOG(ERR, "Failed to allocate VAR %u.\n", errno); + goto error; +@@ -804,6 +813,8 @@ mlx5_vdpa_pci_remove(struct rte_pci_device *pci_dev) + mlx5_glue->dv_free_var(priv->var); + priv->var = NULL; + } ++ if (priv->vdev) ++ rte_vdpa_unregister_device(priv->vdev); + mlx5_glue->close_device(priv->ctx); + pthread_mutex_destroy(&priv->vq_config_lock); + rte_free(priv); +diff --git a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa.h b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa.h +index d039ada65b..0ffe16e295 100644 +--- a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa.h ++++ b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa.h +@@ -67,10 +67,10 @@ struct mlx5_vdpa_event_qp { + + struct mlx5_vdpa_query_mr { + SLIST_ENTRY(mlx5_vdpa_query_mr) next; +- void *addr; +- uint64_t length; +- struct mlx5dv_devx_umem *umem; +- struct mlx5_devx_obj *mkey; ++ union { ++ struct ibv_mr *mr; ++ struct mlx5_devx_obj *mkey; ++ }; + int is_indirect; + }; + +@@ -154,11 +154,13 @@ struct mlx5_vdpa_priv { + struct mlx5_devx_obj *tiss[16]; /* TIS list for each LAG port. */ + uint16_t nr_virtqs; + uint8_t num_lag_ports; ++ uint8_t qp_ts_format; + uint64_t features; /* Negotiated features. */ + uint16_t log_max_rqt_size; + struct mlx5_vdpa_steer steer; + struct mlx5dv_var *var; + void *virtq_db_addr; ++ struct mlx5_pmd_wrapped_mr lm_mr; + SLIST_HEAD(mr_list, mlx5_vdpa_query_mr) mr_list; + struct mlx5_vdpa_virtq virtqs[]; + }; +diff --git a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_event.c b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_event.c +index 3aeaeb893f..03c7f849ca 100644 +--- a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_event.c ++++ b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_event.c +@@ -659,6 +659,7 @@ mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n, + if (mlx5_vdpa_cq_create(priv, log_desc_n, callfd, &eqp->cq)) + return -1; + attr.pd = priv->pdn; ++ attr.ts_format = mlx5_ts_format_conv(priv->qp_ts_format); + eqp->fw_qp = mlx5_devx_cmd_create_qp(priv->ctx, &attr); + if (!eqp->fw_qp) { + DRV_LOG(ERR, "Failed to create FW QP(%u).", rte_errno); +@@ -688,7 +689,8 @@ mlx5_vdpa_event_qp_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n, + attr.wq_umem_id = eqp->umem_obj->umem_id; + attr.wq_umem_offset = 0; + attr.dbr_umem_id = eqp->umem_obj->umem_id; +- attr.dbr_address = (1 << log_desc_n) * MLX5_WSEG_SIZE; ++ attr.ts_format = mlx5_ts_format_conv(priv->qp_ts_format); ++ attr.dbr_address = RTE_BIT64(log_desc_n) * MLX5_WSEG_SIZE; + eqp->sw_qp = mlx5_devx_cmd_create_qp(priv->ctx, &attr); + if (!eqp->sw_qp) { + DRV_LOG(ERR, "Failed to create SW QP(%u).", rte_errno); +diff --git a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_lm.c b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_lm.c +index 6c4284f7f7..bb5bd3b887 100644 +--- a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_lm.c ++++ b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_lm.c +@@ -36,42 +36,22 @@ int + mlx5_vdpa_dirty_bitmap_set(struct mlx5_vdpa_priv *priv, uint64_t log_base, + uint64_t log_size) + { +- struct mlx5_devx_mkey_attr mkey_attr = { +- .addr = (uintptr_t)log_base, +- .size = log_size, +- .pd = priv->pdn, +- .pg_access = 1, +- .klm_array = NULL, +- .klm_num = 0, +- .relaxed_ordering_read = 0, +- .relaxed_ordering_write = 0, +- }; + struct mlx5_devx_virtq_attr attr = { + .type = MLX5_VIRTQ_MODIFY_TYPE_DIRTY_BITMAP_PARAMS, + .dirty_bitmap_addr = log_base, + .dirty_bitmap_size = log_size, + }; +- struct mlx5_vdpa_query_mr *mr = rte_malloc(__func__, sizeof(*mr), 0); + int i; ++ int ret = mlx5_os_wrapped_mkey_create(priv->ctx, priv->pd, ++ priv->pdn, ++ (void *)(uintptr_t)log_base, ++ log_size, &priv->lm_mr); + +- if (!mr) { +- DRV_LOG(ERR, "Failed to allocate mem for lm mr."); ++ if (ret) { ++ DRV_LOG(ERR, "Failed to allocate wrapped MR for lm."); + return -1; + } +- mr->umem = mlx5_glue->devx_umem_reg(priv->ctx, +- (void *)(uintptr_t)log_base, +- log_size, IBV_ACCESS_LOCAL_WRITE); +- if (!mr->umem) { +- DRV_LOG(ERR, "Failed to register umem for lm mr."); +- goto err; +- } +- mkey_attr.umem_id = mr->umem->umem_id; +- mr->mkey = mlx5_devx_cmd_mkey_create(priv->ctx, &mkey_attr); +- if (!mr->mkey) { +- DRV_LOG(ERR, "Failed to create Mkey for lm."); +- goto err; +- } +- attr.dirty_bitmap_mkey = mr->mkey->id; ++ attr.dirty_bitmap_mkey = priv->lm_mr.lkey; + for (i = 0; i < priv->nr_virtqs; ++i) { + attr.queue_index = i; + if (!priv->virtqs[i].virtq) { +@@ -82,15 +62,9 @@ mlx5_vdpa_dirty_bitmap_set(struct mlx5_vdpa_priv *priv, uint64_t log_base, + goto err; + } + } +- mr->is_indirect = 0; +- SLIST_INSERT_HEAD(&priv->mr_list, mr, next); + return 0; + err: +- if (mr->mkey) +- mlx5_devx_cmd_destroy(mr->mkey); +- if (mr->umem) +- mlx5_glue->devx_umem_dereg(mr->umem); +- rte_free(mr); ++ mlx5_os_wrapped_mkey_destroy(&priv->lm_mr); + return -1; + } + +diff --git a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_mem.c b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_mem.c +index f8861d5d26..6b5359bcf8 100644 +--- a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_mem.c ++++ b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_mem.c +@@ -23,14 +23,17 @@ mlx5_vdpa_mem_dereg(struct mlx5_vdpa_priv *priv) + entry = SLIST_FIRST(&priv->mr_list); + while (entry) { + next = SLIST_NEXT(entry, next); +- claim_zero(mlx5_devx_cmd_destroy(entry->mkey)); +- if (!entry->is_indirect) +- claim_zero(mlx5_glue->devx_umem_dereg(entry->umem)); ++ if (entry->is_indirect) ++ claim_zero(mlx5_devx_cmd_destroy(entry->mkey)); ++ else ++ claim_zero(mlx5_glue->dereg_mr(entry->mr)); + SLIST_REMOVE(&priv->mr_list, entry, mlx5_vdpa_query_mr, next); + rte_free(entry); + entry = next; + } + SLIST_INIT(&priv->mr_list); ++ if (priv->lm_mr.addr) ++ mlx5_os_wrapped_mkey_destroy(&priv->lm_mr); + if (priv->null_mr) { + claim_zero(mlx5_glue->dereg_mr(priv->null_mr)); + priv->null_mr = NULL; +@@ -103,15 +106,15 @@ mlx5_vdpa_vhost_mem_regions_prepare(int vid, uint8_t *mode, uint64_t *mem_size, + size = mem->regions[i].guest_phys_addr - + (mem->regions[i - 1].guest_phys_addr + + mem->regions[i - 1].size); +- *gcd = rte_get_gcd(*gcd, size); ++ *gcd = rte_get_gcd64(*gcd, size); + klm_entries_num += KLM_NUM_MAX_ALIGN(size); + } + size = mem->regions[i].size; +- *gcd = rte_get_gcd(*gcd, size); ++ *gcd = rte_get_gcd64(*gcd, size); + klm_entries_num += KLM_NUM_MAX_ALIGN(size); + } + if (*gcd > MLX5_MAX_KLM_BYTE_COUNT) +- *gcd = rte_get_gcd(*gcd, MLX5_MAX_KLM_BYTE_COUNT); ++ *gcd = rte_get_gcd64(*gcd, MLX5_MAX_KLM_BYTE_COUNT); + if (!RTE_IS_POWER_OF_2(*gcd)) { + uint64_t candidate_gcd = rte_align64prevpow2(*gcd); + +@@ -157,7 +160,7 @@ mlx5_vdpa_vhost_mem_regions_prepare(int vid, uint8_t *mode, uint64_t *mem_size, + * The target here is to group all the physical memory regions of the + * virtio device in one indirect mkey. + * For KLM Fixed Buffer Size mode (HW find the translation entry in one +- * read according to the guest phisical address): ++ * read according to the guest physical address): + * All the sub-direct mkeys of it must be in the same size, hence, each + * one of them should be in the GCD size of all the virtio memory + * regions and the holes between them. +@@ -208,31 +211,18 @@ mlx5_vdpa_mem_register(struct mlx5_vdpa_priv *priv) + DRV_LOG(ERR, "Failed to allocate mem entry memory."); + goto error; + } +- entry->umem = mlx5_glue->devx_umem_reg(priv->ctx, +- (void *)(uintptr_t)reg->host_user_addr, +- reg->size, IBV_ACCESS_LOCAL_WRITE); +- if (!entry->umem) { +- DRV_LOG(ERR, "Failed to register Umem by Devx."); +- ret = -errno; +- goto error; +- } +- mkey_attr.addr = (uintptr_t)(reg->guest_phys_addr); +- mkey_attr.size = reg->size; +- mkey_attr.umem_id = entry->umem->umem_id; +- mkey_attr.pd = priv->pdn; +- mkey_attr.pg_access = 1; +- mkey_attr.klm_array = NULL; +- mkey_attr.klm_num = 0; ++ entry->mr = mlx5_glue->reg_mr_iova(priv->pd, ++ (void *)(uintptr_t)(reg->host_user_addr), ++ reg->size, reg->guest_phys_addr, ++ IBV_ACCESS_LOCAL_WRITE); ++ if (!entry->mr) { + mkey_attr.relaxed_ordering_read = 0; + mkey_attr.relaxed_ordering_write = 0; + entry->mkey = mlx5_devx_cmd_mkey_create(priv->ctx, &mkey_attr); +- if (!entry->mkey) { + DRV_LOG(ERR, "Failed to create direct Mkey."); + ret = -rte_errno; + goto error; + } +- entry->addr = (void *)(uintptr_t)(reg->host_user_addr); +- entry->length = reg->size; + entry->is_indirect = 0; + if (i > 0) { + uint64_t sadd; +@@ -262,12 +252,13 @@ mlx5_vdpa_mem_register(struct mlx5_vdpa_priv *priv) + for (k = 0; k < reg->size; k += klm_size) { + klm_array[klm_index].byte_count = k + klm_size > + reg->size ? reg->size - k : klm_size; +- klm_array[klm_index].mkey = entry->mkey->id; ++ klm_array[klm_index].mkey = entry->mr->lkey; + klm_array[klm_index].address = reg->guest_phys_addr + k; + klm_index++; + } + SLIST_INSERT_HEAD(&priv->mr_list, entry, next); + } ++ memset(&mkey_attr, 0, sizeof(mkey_attr)); + mkey_attr.addr = (uintptr_t)(mem->regions[0].guest_phys_addr); + mkey_attr.size = mem_size; + mkey_attr.pd = priv->pdn; +@@ -295,13 +286,8 @@ mlx5_vdpa_mem_register(struct mlx5_vdpa_priv *priv) + priv->gpa_mkey_index = entry->mkey->id; + return 0; + error: +- if (entry) { +- if (entry->mkey) +- mlx5_devx_cmd_destroy(entry->mkey); +- if (entry->umem) +- mlx5_glue->devx_umem_dereg(entry->umem); ++ if (entry) + rte_free(entry); +- } + mlx5_vdpa_mem_dereg(priv); + rte_errno = -ret; + return ret; +diff --git a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c +index 3e882e4000..77bfa80fdc 100644 +--- a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c ++++ b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c +@@ -4,10 +4,12 @@ + #include + #include + #include ++#include + + #include + #include + #include ++#include + + #include + +@@ -16,14 +18,15 @@ + + + static void +-mlx5_vdpa_virtq_handler(void *cb_arg) ++mlx5_vdpa_virtq_kick_handler(void *cb_arg) + { + struct mlx5_vdpa_virtq *virtq = cb_arg; + struct mlx5_vdpa_priv *priv = virtq->priv; + uint64_t buf; + int nbytes; ++ int retry; + +- do { ++ for (retry = 0; retry < 3; ++retry) { + nbytes = read(virtq->intr_handle.fd, &buf, 8); + if (nbytes < 0) { + if (errno == EINTR || +@@ -34,7 +37,9 @@ mlx5_vdpa_virtq_handler(void *cb_arg) + virtq->index, strerror(errno)); + } + break; +- } while (1); ++ } ++ if (nbytes < 0) ++ return; + rte_write32(virtq->index, priv->virtq_db_addr); + if (virtq->notifier_state == MLX5_VDPA_NOTIFIER_STATE_DISABLED) { + if (rte_vhost_host_notifier_ctrl(priv->vid, virtq->index, true)) +@@ -54,19 +59,16 @@ static int + mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq) + { + unsigned int i; +- int retries = MLX5_VDPA_INTR_RETRIES; + int ret = -EAGAIN; + +- if (virtq->intr_handle.fd != -1) { +- while (retries-- && ret == -EAGAIN) { ++ if (virtq->intr_handle.fd >= 0) { ++ while (ret == -EAGAIN) { + ret = rte_intr_callback_unregister(&virtq->intr_handle, +- mlx5_vdpa_virtq_handler, +- virtq); ++ mlx5_vdpa_virtq_kick_handler, virtq); + if (ret == -EAGAIN) { +- DRV_LOG(DEBUG, "Try again to unregister fd %d " +- "of virtq %d interrupt, retries = %d.", ++ DRV_LOG(DEBUG, "Try again to unregister fd %d of virtq %hu interrupt", + virtq->intr_handle.fd, +- (int)virtq->index, retries); ++ virtq->index); + usleep(MLX5_VDPA_INTR_RETRIES_USEC); + } + } +@@ -103,13 +105,8 @@ mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv) + for (i = 0; i < priv->nr_virtqs; i++) { + virtq = &priv->virtqs[i]; + mlx5_vdpa_virtq_unset(virtq); +- if (virtq->counters) { ++ if (virtq->counters) + claim_zero(mlx5_devx_cmd_destroy(virtq->counters)); +- virtq->counters = NULL; +- memset(&virtq->reset, 0, sizeof(virtq->reset)); +- } +- memset(virtq->err_time, 0, sizeof(virtq->err_time)); +- virtq->n_retry = 0; + } + for (i = 0; i < priv->num_lag_ports; i++) { + if (priv->tiss[i]) { +@@ -122,10 +119,13 @@ mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv) + priv->td = NULL; + } + if (priv->virtq_db_addr) { +- claim_zero(munmap(priv->virtq_db_addr, priv->var->length)); ++ /* Mask out the within page offset for munmap. */ ++ claim_zero(munmap((void *)((uintptr_t)priv->virtq_db_addr & ++ ~(rte_mem_page_size() - 1)), priv->var->length)); + priv->virtq_db_addr = NULL; + } + priv->features = 0; ++ memset(priv->virtqs, 0, sizeof(*virtq) * priv->nr_virtqs); + priv->nr_virtqs = 0; + } + +@@ -343,7 +343,7 @@ mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index) + } else { + virtq->intr_handle.type = RTE_INTR_HANDLE_EXT; + if (rte_intr_callback_register(&virtq->intr_handle, +- mlx5_vdpa_virtq_handler, ++ mlx5_vdpa_virtq_kick_handler, + virtq)) { + virtq->intr_handle.fd = -1; + DRV_LOG(ERR, "Failed to register virtq %d interrupt.", +@@ -368,6 +368,9 @@ mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv, int index) + goto error; + } + virtq->stopped = false; ++ /* Initial notification to ask Qemu handling completed buffers. */ ++ if (virtq->eqp.cq.callfd != -1) ++ eventfd_write(virtq->eqp.cq.callfd, (eventfd_t)1); + DRV_LOG(DEBUG, "vid %u virtq %u was created successfully.", priv->vid, + index); + return 0; +@@ -382,7 +385,7 @@ mlx5_vdpa_features_validate(struct mlx5_vdpa_priv *priv) + if (priv->features & (1ULL << VIRTIO_F_RING_PACKED)) { + if (!(priv->caps.virtio_queue_type & (1 << + MLX5_VIRTQ_TYPE_PACKED))) { +- DRV_LOG(ERR, "Failed to configur PACKED mode for vdev " ++ DRV_LOG(ERR, "Failed to configure PACKED mode for vdev " + "%d - it was not reported by HW/driver" + " capability.", priv->vid); + return -ENOTSUP; +@@ -443,9 +446,16 @@ mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv) + DRV_LOG(ERR, "Failed to configure negotiated features."); + return -1; + } +- if (nr_vring > priv->caps.max_num_virtio_queues * 2) { ++ if ((priv->features & (1ULL << VIRTIO_NET_F_CSUM)) == 0 && ++ ((priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO4)) > 0 || ++ (priv->features & (1ULL << VIRTIO_NET_F_HOST_TSO6)) > 0)) { ++ /* Packet may be corrupted if TSO is enabled without CSUM. */ ++ DRV_LOG(INFO, "TSO is enabled without CSUM, force CSUM."); ++ priv->features |= (1ULL << VIRTIO_NET_F_CSUM); ++ } ++ if (nr_vring > priv->caps.max_num_virtio_queues) { + DRV_LOG(ERR, "Do not support more than %d virtqs(%d).", +- (int)priv->caps.max_num_virtio_queues * 2, ++ (int)priv->caps.max_num_virtio_queues, + (int)nr_vring); + return -1; + } +@@ -458,6 +468,10 @@ mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv) + priv->virtq_db_addr = NULL; + goto error; + } else { ++ /* Add within page offset for 64K page system. */ ++ priv->virtq_db_addr = (char *)priv->virtq_db_addr + ++ ((rte_mem_page_size() - 1) & ++ priv->caps.doorbell_bar_offset); + DRV_LOG(DEBUG, "VAR address of doorbell mapping is %p.", + priv->virtq_db_addr); + } +diff --git a/dpdk/examples/bbdev_app/Makefile b/dpdk/examples/bbdev_app/Makefile +index 2f156736d1..942e106ac2 100644 +--- a/dpdk/examples/bbdev_app/Makefile ++++ b/dpdk/examples/bbdev_app/Makefile +@@ -7,8 +7,10 @@ APP = bbdev + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/bbdev_app/main.c b/dpdk/examples/bbdev_app/main.c +index 2e170caf84..a11148f9ab 100644 +--- a/dpdk/examples/bbdev_app/main.c ++++ b/dpdk/examples/bbdev_app/main.c +@@ -8,7 +8,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +@@ -379,7 +379,7 @@ add_awgn(struct rte_mbuf **mbufs, uint16_t num_pkts) + /* Encoder output to Decoder input adapter. The Decoder accepts only soft input + * so each bit of the encoder output must be translated into one byte of LLR. If + * Sub-block Deinterleaver is bypassed, which is the case, the padding bytes +- * must additionally be insterted at the end of each sub-block. ++ * must additionally be inserted at the end of each sub-block. + */ + static inline void + transform_enc_out_dec_in(struct rte_mbuf **mbufs, uint8_t *temp_buf, +@@ -1195,5 +1195,8 @@ main(int argc, char **argv) + ret |= rte_eal_wait_lcore(lcore_id); + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return ret; + } +diff --git a/dpdk/examples/bond/Makefile b/dpdk/examples/bond/Makefile +index 8700b589fb..d96afe2cba 100644 +--- a/dpdk/examples/bond/Makefile ++++ b/dpdk/examples/bond/Makefile +@@ -7,8 +7,10 @@ APP = bond_app + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -21,8 +23,6 @@ static: build/$(APP)-static + + LDFLAGS += -lrte_net_bond + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/bond/main.c b/dpdk/examples/bond/main.c +index 81a6fa976b..43d5df83ae 100644 +--- a/dpdk/examples/bond/main.c ++++ b/dpdk/examples/bond/main.c +@@ -233,7 +233,7 @@ bond_port_init(struct rte_mempool *mbuf_pool) + 0 /*SOCKET_ID_ANY*/); + if (retval < 0) + rte_exit(EXIT_FAILURE, +- "Faled to create bond port\n"); ++ "Failed to create bond port\n"); + + BOND_PORT = retval; + +@@ -376,7 +376,7 @@ static int lcore_main(__rte_unused void *arg1) + bond_ip = BOND_IP_1 | (BOND_IP_2 << 8) | + (BOND_IP_3 << 16) | (BOND_IP_4 << 24); + +- rte_spinlock_trylock(&global_flag_stru_p->lock); ++ rte_spinlock_lock(&global_flag_stru_p->lock); + + while (global_flag_stru_p->LcoreMainIsRunning) { + rte_spinlock_unlock(&global_flag_stru_p->lock); +@@ -408,7 +408,7 @@ static int lcore_main(__rte_unused void *arg1) + struct rte_ether_hdr *); + ether_type = eth_hdr->ether_type; + if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN)) +- printf("VLAN taged frame, offset:"); ++ printf("VLAN tagged frame, offset:"); + offset = get_vlan_offset(eth_hdr, ðer_type); + if (offset > 0) + printf("%d\n", offset); +@@ -457,7 +457,7 @@ static int lcore_main(__rte_unused void *arg1) + if (is_free == 0) + rte_pktmbuf_free(pkts[i]); + } +- rte_spinlock_trylock(&global_flag_stru_p->lock); ++ rte_spinlock_lock(&global_flag_stru_p->lock); + } + rte_spinlock_unlock(&global_flag_stru_p->lock); + printf("BYE lcore_main\n"); +@@ -572,7 +572,7 @@ static void cmd_start_parsed(__rte_unused void *parsed_result, + { + int worker_core_id = rte_lcore_id(); + +- rte_spinlock_trylock(&global_flag_stru_p->lock); ++ rte_spinlock_lock(&global_flag_stru_p->lock); + if (global_flag_stru_p->LcoreMainIsRunning == 0) { + if (rte_eal_get_lcore_state(global_flag_stru_p->LcoreMainCore) + != WAIT) { +@@ -592,7 +592,7 @@ static void cmd_start_parsed(__rte_unused void *parsed_result, + if ((worker_core_id >= RTE_MAX_LCORE) || (worker_core_id == 0)) + return; + +- rte_spinlock_trylock(&global_flag_stru_p->lock); ++ rte_spinlock_lock(&global_flag_stru_p->lock); + global_flag_stru_p->LcoreMainIsRunning = 1; + rte_spinlock_unlock(&global_flag_stru_p->lock); + cmdline_printf(cl, +@@ -660,7 +660,7 @@ static void cmd_stop_parsed(__rte_unused void *parsed_result, + struct cmdline *cl, + __rte_unused void *data) + { +- rte_spinlock_trylock(&global_flag_stru_p->lock); ++ rte_spinlock_lock(&global_flag_stru_p->lock); + if (global_flag_stru_p->LcoreMainIsRunning == 0) { + cmdline_printf(cl, + "lcore_main not running on core:%d\n", +@@ -701,7 +701,7 @@ static void cmd_quit_parsed(__rte_unused void *parsed_result, + struct cmdline *cl, + __rte_unused void *data) + { +- rte_spinlock_trylock(&global_flag_stru_p->lock); ++ rte_spinlock_lock(&global_flag_stru_p->lock); + if (global_flag_stru_p->LcoreMainIsRunning == 0) { + cmdline_printf(cl, + "lcore_main not running on core:%d\n", +@@ -763,7 +763,7 @@ static void cmd_show_parsed(__rte_unused void *parsed_result, + printf("\n"); + } + +- rte_spinlock_trylock(&global_flag_stru_p->lock); ++ rte_spinlock_lock(&global_flag_stru_p->lock); + cmdline_printf(cl, + "Active_slaves:%d " + "packets received:Tot:%d Arp:%d IPv4:%d\n", +@@ -876,5 +876,9 @@ main(int argc, char *argv[]) + prompt(NULL); + + rte_delay_ms(100); ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/cmdline/Makefile b/dpdk/examples/cmdline/Makefile +index 09da84ba0b..fd7ae682a4 100644 +--- a/dpdk/examples/cmdline/Makefile ++++ b/dpdk/examples/cmdline/Makefile +@@ -7,8 +7,10 @@ APP = cmdline + # all source are stored in SRCS-y + SRCS-y := main.c commands.c parse_obj_list.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/cmdline/main.c b/dpdk/examples/cmdline/main.c +index bb79542452..94002f0582 100644 +--- a/dpdk/examples/cmdline/main.c ++++ b/dpdk/examples/cmdline/main.c +@@ -36,5 +36,8 @@ int main(int argc, char **argv) + cmdline_interact(cl); + cmdline_stdin_exit(cl); + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/distributor/Makefile b/dpdk/examples/distributor/Makefile +index d7615f9a32..ade6df08b3 100644 +--- a/dpdk/examples/distributor/Makefile ++++ b/dpdk/examples/distributor/Makefile +@@ -7,8 +7,10 @@ APP = distributor_app + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/distributor/main.c b/dpdk/examples/distributor/main.c +index caa7c46cb1..96c0ac8046 100644 +--- a/dpdk/examples/distributor/main.c ++++ b/dpdk/examples/distributor/main.c +@@ -109,7 +109,7 @@ static inline int + port_init(uint16_t port, struct rte_mempool *mbuf_pool) + { + struct rte_eth_conf port_conf = port_conf_default; +- const uint16_t rxRings = 1, txRings = rte_lcore_count() - 1; ++ const uint16_t rxRings = 1, txRings = 1; + int retval; + uint16_t q; + uint16_t nb_rxd = RX_RING_SIZE; +@@ -265,8 +265,8 @@ lcore_rx(struct lcore_params *p) + * packets are then send straight to the tx core. + */ + #if 0 +- rte_distributor_process(d, bufs, nb_rx); +- const uint16_t nb_ret = rte_distributor_returned_pktsd, ++ rte_distributor_process(p->d, bufs, nb_rx); ++ const uint16_t nb_ret = rte_distributor_returned_pkts(p->d, + bufs, BURST_SIZE*2); + + app_stats.rx.returned_pkts += nb_ret; +@@ -932,5 +932,8 @@ main(int argc, char *argv[]) + rte_free(pd); + rte_free(pr); + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/ethtool/ethtool-app/Makefile b/dpdk/examples/ethtool/ethtool-app/Makefile +index 93ef5c27c3..cda3f14252 100644 +--- a/dpdk/examples/ethtool/ethtool-app/Makefile ++++ b/dpdk/examples/ethtool/ethtool-app/Makefile +@@ -12,8 +12,10 @@ LDFLAGS += -L../lib/build + LDFLAGS_STATIC = -l:librte_ethtool.a + LDFLAGS_SHARED = -lrte_ethtool + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -24,8 +26,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED += $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/ethtool/ethtool-app/ethapp.c b/dpdk/examples/ethtool/ethtool-app/ethapp.c +index e6c93e13a7..36a1c374f4 100644 +--- a/dpdk/examples/ethtool/ethtool-app/ethapp.c ++++ b/dpdk/examples/ethtool/ethtool-app/ethapp.c +@@ -528,7 +528,6 @@ pcmd_mtu_callback(void *ptr_params, + printf("Error: Invalid port number %i\n", params->port); + return; + } +- new_mtu = atoi(params->opt); + new_mtu = strtoul(params->opt, &ptr_parse_end, 10); + if (*ptr_parse_end != '\0' || + new_mtu < RTE_ETHER_MIN_MTU || +diff --git a/dpdk/examples/ethtool/ethtool-app/main.c b/dpdk/examples/ethtool/ethtool-app/main.c +index c6023a1d41..21ed85c7d6 100644 +--- a/dpdk/examples/ethtool/ethtool-app/main.c ++++ b/dpdk/examples/ethtool/ethtool-app/main.c +@@ -299,5 +299,8 @@ int main(int argc, char **argv) + return -1; + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/ethtool/lib/Makefile b/dpdk/examples/ethtool/lib/Makefile +index b4af9b0c91..a33040e66a 100644 +--- a/dpdk/examples/ethtool/lib/Makefile ++++ b/dpdk/examples/ethtool/lib/Makefile +@@ -2,15 +2,15 @@ + # Copyright(c) 2015-2020 Intel Corporation + + +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++PKGCONF ?= pkg-config ++ ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + ifneq ($(shell uname),Linux) + $(error This application can only operate in a linux environment) + endif + +-PKGCONF ?= pkg-config +- + # library name + LIB = librte_ethtool.so + LIB_STATIC = librte_ethtool.a +diff --git a/dpdk/examples/ethtool/lib/rte_ethtool.c b/dpdk/examples/ethtool/lib/rte_ethtool.c +index 4132516307..20e1988353 100644 +--- a/dpdk/examples/ethtool/lib/rte_ethtool.c ++++ b/dpdk/examples/ethtool/lib/rte_ethtool.c +@@ -402,7 +402,7 @@ rte_ethtool_net_set_rx_mode(uint16_t port_id) + #endif + } + +- /* Enable Rx vlan filter, VF unspport status is discard */ ++ /* Enable Rx vlan filter, VF unsupported status is discard */ + ret = rte_eth_dev_set_vlan_offload(port_id, ETH_VLAN_FILTER_MASK); + if (ret != 0) + return ret; +diff --git a/dpdk/examples/ethtool/lib/rte_ethtool.h b/dpdk/examples/ethtool/lib/rte_ethtool.h +index f177096636..d27e0102b1 100644 +--- a/dpdk/examples/ethtool/lib/rte_ethtool.h ++++ b/dpdk/examples/ethtool/lib/rte_ethtool.h +@@ -189,7 +189,7 @@ int rte_ethtool_get_module_eeprom(uint16_t port_id, + + /** + * Retrieve the Ethernet device pause frame configuration according to +- * parameter attributes desribed by ethtool data structure, ++ * parameter attributes described by ethtool data structure, + * ethtool_pauseparam. + * + * @param port_id +@@ -209,7 +209,7 @@ int rte_ethtool_get_pauseparam(uint16_t port_id, + + /** + * Setting the Ethernet device pause frame configuration according to +- * parameter attributes desribed by ethtool data structure, ethtool_pauseparam. ++ * parameter attributes described by ethtool data structure, ethtool_pauseparam. + * + * @param port_id + * The port identifier of the Ethernet device. +diff --git a/dpdk/examples/eventdev_pipeline/Makefile b/dpdk/examples/eventdev_pipeline/Makefile +index f5072a2b0c..faf667a54a 100644 +--- a/dpdk/examples/eventdev_pipeline/Makefile ++++ b/dpdk/examples/eventdev_pipeline/Makefile +@@ -9,8 +9,10 @@ SRCS-y := main.c + SRCS-y += pipeline_worker_generic.c + SRCS-y += pipeline_worker_tx.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -21,8 +23,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/fips_validation/Makefile b/dpdk/examples/fips_validation/Makefile +index 8f82a4c6c5..c41fdb0006 100644 +--- a/dpdk/examples/fips_validation/Makefile ++++ b/dpdk/examples/fips_validation/Makefile +@@ -17,8 +17,10 @@ SRCS-y += fips_dev_self_test.c + SRCS-y += fips_validation_xts.c + SRCS-y += main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -29,8 +31,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/fips_validation/fips_validation.c b/dpdk/examples/fips_validation/fips_validation.c +index 4c3ed80c82..39dfe8e8e6 100644 +--- a/dpdk/examples/fips_validation/fips_validation.c ++++ b/dpdk/examples/fips_validation/fips_validation.c +@@ -522,7 +522,7 @@ parse_uint8_hex_str(const char *key, char *src, struct fips_val *val) + val->val = NULL; + } + +- val->val = rte_zmalloc(NULL, len, 0); ++ val->val = rte_zmalloc(NULL, len + 1, 0); + if (!val->val) + return -ENOMEM; + +diff --git a/dpdk/examples/fips_validation/main.c b/dpdk/examples/fips_validation/main.c +index cad6bcb180..7f4beef94c 100644 +--- a/dpdk/examples/fips_validation/main.c ++++ b/dpdk/examples/fips_validation/main.c +@@ -128,6 +128,10 @@ cryptodev_fips_validate_app_int(void) + if (ret < 0) + goto error_exit; + ++ ret = rte_cryptodev_start(env.dev_id); ++ if (ret < 0) ++ goto error_exit; ++ + return 0; + + error_exit: +@@ -483,6 +487,9 @@ main(int argc, char *argv[]) + fips_test_clear(); + cryptodev_fips_validate_app_uninit(); + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return ret; + + } +@@ -1614,7 +1621,6 @@ fips_mct_sha_test(void) + int ret; + uint32_t i, j; + +- val.val = rte_malloc(NULL, (MAX_DIGEST_SIZE*SHA_MD_BLOCK), 0); + for (i = 0; i < SHA_MD_BLOCK; i++) + md[i].val = rte_malloc(NULL, (MAX_DIGEST_SIZE*2), 0); + +@@ -1826,8 +1832,10 @@ fips_test_one_file(void) + + fips_test_clear(); + +- if (env.digest) ++ if (env.digest) { + rte_free(env.digest); ++ env.digest = NULL; ++ } + if (env.mbuf) + rte_pktmbuf_free(env.mbuf); + +diff --git a/dpdk/examples/flow_classify/Makefile b/dpdk/examples/flow_classify/Makefile +index 4c215daf1b..7e892405de 100644 +--- a/dpdk/examples/flow_classify/Makefile ++++ b/dpdk/examples/flow_classify/Makefile +@@ -7,8 +7,10 @@ APP = flow_classify + # all source are stored in SRCS-y + SRCS-y := flow_classify.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/flow_classify/flow_classify.c b/dpdk/examples/flow_classify/flow_classify.c +index 335d7d2ad8..54c7dda20f 100644 +--- a/dpdk/examples/flow_classify/flow_classify.c ++++ b/dpdk/examples/flow_classify/flow_classify.c +@@ -284,7 +284,7 @@ lcore_main(struct flow_classifier *cls_app) + * for best performance. + */ + RTE_ETH_FOREACH_DEV(port) +- if (rte_eth_dev_socket_id(port) > 0 && ++ if (rte_eth_dev_socket_id(port) >= 0 && + rte_eth_dev_socket_id(port) != (int)rte_socket_id()) { + printf("\n\n"); + printf("WARNING: port %u is on remote NUMA node\n", +@@ -424,7 +424,7 @@ parse_ipv4_5tuple_rule(char *str, struct rte_eth_ntuple_filter *ntuple_filter) + &ntuple_filter->dst_ip, + &ntuple_filter->dst_ip_mask); + if (ret != 0) { +- flow_classify_log("failed to read source address/mask: %s\n", ++ flow_classify_log("failed to read destination address/mask: %s\n", + in[CB_FLD_DST_ADDR]); + return ret; + } +@@ -853,5 +853,8 @@ main(int argc, char *argv[]) + /* Call lcore_main on the main core only. */ + lcore_main(cls_app); + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/flow_filtering/Makefile b/dpdk/examples/flow_filtering/Makefile +index 9bc9179346..7453414d24 100644 +--- a/dpdk/examples/flow_filtering/Makefile ++++ b/dpdk/examples/flow_filtering/Makefile +@@ -5,8 +5,10 @@ APP = flow + + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -17,8 +19,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/flow_filtering/main.c b/dpdk/examples/flow_filtering/main.c +index 93523d625b..932f49fa7f 100644 +--- a/dpdk/examples/flow_filtering/main.c ++++ b/dpdk/examples/flow_filtering/main.c +@@ -259,5 +259,10 @@ main(int argc, char **argv) + rte_exit(EXIT_FAILURE, "error in creating flow"); + } + +- return main_loop(); ++ ret = main_loop(); ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ ++ return ret; + } +diff --git a/dpdk/examples/helloworld/Makefile b/dpdk/examples/helloworld/Makefile +index 436569f5a6..b16773a02f 100644 +--- a/dpdk/examples/helloworld/Makefile ++++ b/dpdk/examples/helloworld/Makefile +@@ -7,8 +7,10 @@ APP = helloworld + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/helloworld/main.c b/dpdk/examples/helloworld/main.c +index 8a4cee60ff..ac72145c73 100644 +--- a/dpdk/examples/helloworld/main.c ++++ b/dpdk/examples/helloworld/main.c +@@ -43,5 +43,9 @@ main(int argc, char **argv) + lcore_hello(NULL); + + rte_eal_mp_wait_lcore(); ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/ioat/Makefile b/dpdk/examples/ioat/Makefile +index c13ad8d8af..c7a54bffbf 100644 +--- a/dpdk/examples/ioat/Makefile ++++ b/dpdk/examples/ioat/Makefile +@@ -7,8 +7,10 @@ APP = ioatfwd + # all source are stored in SRCS-y + SRCS-y := ioatfwd.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/ioat/ioatfwd.c b/dpdk/examples/ioat/ioatfwd.c +index 6502e4531f..e30df8118a 100644 +--- a/dpdk/examples/ioat/ioatfwd.c ++++ b/dpdk/examples/ioat/ioatfwd.c +@@ -83,7 +83,7 @@ static uint16_t nb_queues = 1; + /* MAC updating enabled by default. */ + static int mac_updating = 1; + +-/* hardare copy mode enabled by default. */ ++/* hardware copy mode enabled by default. */ + static copy_mode_t copy_mode = COPY_MODE_IOAT_NUM; + + /* size of IOAT rawdev ring for hardware copy mode or +@@ -103,7 +103,6 @@ static volatile bool force_quit; + /* ethernet addresses of ports */ + static struct rte_ether_addr ioat_ports_eth_addr[RTE_MAX_ETHPORTS]; + +-static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS]; + struct rte_mempool *ioat_pktmbuf_pool; + + /* Print out statistics for one port. */ +@@ -478,11 +477,14 @@ ioat_tx_port(struct rxtx_port_config *tx_config) + + port_statistics.tx[tx_config->rxtx_port] += nb_tx; + +- /* Free any unsent packets. */ +- if (unlikely(nb_tx < nb_dq)) ++ if (unlikely(nb_tx < nb_dq)) { ++ port_statistics.tx_dropped[tx_config->rxtx_port] += ++ (nb_dq - nb_tx); ++ /* Free any unsent packets. */ + rte_mempool_put_bulk(ioat_pktmbuf_pool, + (void *)&mbufs_dst[nb_tx], + nb_dq - nb_tx); ++ } + } + } + +@@ -873,25 +875,6 @@ port_init(uint16_t portid, struct rte_mempool *mbuf_pool, uint16_t nb_queues) + "rte_eth_tx_queue_setup:err=%d,port=%u\n", + ret, portid); + +- /* Initialize TX buffers */ +- tx_buffer[portid] = rte_zmalloc_socket("tx_buffer", +- RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0, +- rte_eth_dev_socket_id(portid)); +- if (tx_buffer[portid] == NULL) +- rte_exit(EXIT_FAILURE, +- "Cannot allocate buffer for tx on port %u\n", +- portid); +- +- rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST); +- +- ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid], +- rte_eth_tx_buffer_count_callback, +- &port_statistics.tx_dropped[portid]); +- if (ret < 0) +- rte_exit(EXIT_FAILURE, +- "Cannot set error callback for tx buffer on port %u\n", +- portid); +- + /* Start device */ + ret = rte_eth_dev_start(portid); + if (ret < 0) +@@ -1011,6 +994,9 @@ main(int argc, char **argv) + rte_ring_free(cfg.ports[i].rx_to_tx_ring); + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + printf("Bye...\n"); + return 0; + } +diff --git a/dpdk/examples/ip_fragmentation/Makefile b/dpdk/examples/ip_fragmentation/Makefile +index f6baf635bb..c7a27e4f14 100644 +--- a/dpdk/examples/ip_fragmentation/Makefile ++++ b/dpdk/examples/ip_fragmentation/Makefile +@@ -8,8 +8,10 @@ APP = ip_fragmentation + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -20,8 +22,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + CFLAGS += -DALLOW_EXPERIMENTAL_API +diff --git a/dpdk/examples/ip_fragmentation/main.c b/dpdk/examples/ip_fragmentation/main.c +index 5a96841dfc..77a6a18d19 100644 +--- a/dpdk/examples/ip_fragmentation/main.c ++++ b/dpdk/examples/ip_fragmentation/main.c +@@ -1075,5 +1075,8 @@ main(int argc, char **argv) + return -1; + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/ip_pipeline/Makefile b/dpdk/examples/ip_pipeline/Makefile +index 4b391973cb..e71cd61b43 100644 +--- a/dpdk/examples/ip_pipeline/Makefile ++++ b/dpdk/examples/ip_pipeline/Makefile +@@ -20,8 +20,10 @@ SRCS-y += thread.c + SRCS-y += tmgr.c + SRCS-y += cryptodev.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -32,8 +34,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/ip_reassembly/Makefile b/dpdk/examples/ip_reassembly/Makefile +index 740f4721d0..28158225a6 100644 +--- a/dpdk/examples/ip_reassembly/Makefile ++++ b/dpdk/examples/ip_reassembly/Makefile +@@ -8,8 +8,10 @@ APP = ip_reassembly + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -20,8 +22,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/ip_reassembly/main.c b/dpdk/examples/ip_reassembly/main.c +index 954a11512a..6cc8fdf6f8 100644 +--- a/dpdk/examples/ip_reassembly/main.c ++++ b/dpdk/examples/ip_reassembly/main.c +@@ -241,7 +241,7 @@ static struct rte_lpm6 *socket_lpm6[RTE_MAX_NUMA_NODES]; + #endif /* RTE_LIBRTE_IP_FRAG_TBL_STAT */ + + /* +- * If number of queued packets reached given threahold, then ++ * If number of queued packets reached given threshold, then + * send burst of packets on an output interface. + */ + static inline uint32_t +@@ -870,7 +870,7 @@ setup_queue_tbl(struct rx_queue *rxq, uint32_t lcore, uint32_t queue) + + /* + * At any given moment up to +- * mbufs could be stored int the fragment table. ++ * mbufs could be stored in the fragment table. + * Plus, each TX queue can hold up to packets. + */ + +@@ -1201,5 +1201,8 @@ main(int argc, char **argv) + return -1; + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/ipsec-secgw/Makefile b/dpdk/examples/ipsec-secgw/Makefile +index 7670cc3684..94a685a9ce 100644 +--- a/dpdk/examples/ipsec-secgw/Makefile ++++ b/dpdk/examples/ipsec-secgw/Makefile +@@ -22,8 +22,10 @@ SRCS-y += flow.c + + CFLAGS += -gdwarf-2 + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -34,8 +36,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/ipsec-secgw/event_helper.c b/dpdk/examples/ipsec-secgw/event_helper.c +index 8475d542b2..eb4640572d 100644 +--- a/dpdk/examples/ipsec-secgw/event_helper.c ++++ b/dpdk/examples/ipsec-secgw/event_helper.c +@@ -712,6 +712,16 @@ eh_initialize_eventdev(struct eventmode_conf *em_conf) + } + } + ++ return 0; ++} ++ ++static int ++eh_start_eventdev(struct eventmode_conf *em_conf) ++{ ++ struct eventdev_params *eventdev_config; ++ int nb_eventdev = em_conf->nb_eventdev; ++ int i, ret; ++ + /* Start event devices */ + for (i = 0; i < nb_eventdev; i++) { + +@@ -1280,7 +1290,7 @@ eh_display_rx_adapter_conf(struct eventmode_conf *em_conf) + for (i = 0; i < nb_rx_adapter; i++) { + adapter = &(em_conf->rx_adapter[i]); + sprintf(print_buf, +- "\tRx adaper ID: %-2d\tConnections: %-2d\tEvent dev ID: %-2d", ++ "\tRx adapter ID: %-2d\tConnections: %-2d\tEvent dev ID: %-2d", + adapter->adapter_id, + adapter->nb_connections, + adapter->eventdev_id); +@@ -1612,6 +1622,13 @@ eh_devs_init(struct eh_conf *conf) + return ret; + } + ++ /* Start eventdev */ ++ ret = eh_start_eventdev(em_conf); ++ if (ret < 0) { ++ EH_LOG_ERR("Failed to start event dev %d", ret); ++ return ret; ++ } ++ + /* Start eth devices after setting up adapter */ + RTE_ETH_FOREACH_DEV(port_id) { + +diff --git a/dpdk/examples/ipsec-secgw/flow.c b/dpdk/examples/ipsec-secgw/flow.c +index 69f8405d15..c217b9e475 100644 +--- a/dpdk/examples/ipsec-secgw/flow.c ++++ b/dpdk/examples/ipsec-secgw/flow.c +@@ -188,7 +188,9 @@ parse_flow_tokens(char **tokens, uint32_t n_tokens, + return; + + rule->port = atoi(tokens[ti]); ++ } + ++ if (strcmp(tokens[ti], "queue") == 0) { + INCREMENT_TOKEN_INDEX(ti, n_tokens, status); + if (status->status < 0) + return; +@@ -212,7 +214,7 @@ flow_init_single(struct flow_rule_entry *rule) + struct rte_flow_item pattern[MAX_RTE_FLOW_PATTERN] = {}; + struct rte_flow_action action[MAX_RTE_FLOW_ACTIONS] = {}; + struct rte_flow_attr attr = {}; +- struct rte_flow_error err; ++ struct rte_flow_error err = {}; + int ret; + + attr.egress = 0; +diff --git a/dpdk/examples/ipsec-secgw/ipsec-secgw.c b/dpdk/examples/ipsec-secgw/ipsec-secgw.c +index 20d69ba813..091f54f065 100644 +--- a/dpdk/examples/ipsec-secgw/ipsec-secgw.c ++++ b/dpdk/examples/ipsec-secgw/ipsec-secgw.c +@@ -160,7 +160,7 @@ uint32_t single_sa_idx; + /* mask of enabled ports */ + static uint32_t enabled_port_mask; + static uint64_t enabled_cryptodev_mask = UINT64_MAX; +-static int32_t promiscuous_on = 1; ++static int32_t promiscuous_on; + static int32_t numa_on = 1; /**< NUMA is enabled by default. */ + static uint32_t nb_lcores; + static uint32_t single_sa; +@@ -255,7 +255,7 @@ struct socket_ctx socket_ctx[NB_SOCKETS]; + /* + * Determine is multi-segment support required: + * - either frame buffer size is smaller then mtu +- * - or reassmeble support is requested ++ * - or reassemble support is requested + */ + static int + multi_seg_required(void) +@@ -292,6 +292,8 @@ adjust_ipv6_pktlen(struct rte_mbuf *m, const struct rte_ipv6_hdr *iph, + + #if (STATS_INTERVAL > 0) + ++struct ipsec_core_statistics core_statistics[RTE_MAX_LCORE]; ++ + /* Print out statistics on packet distribution */ + static void + print_stats_cb(__rte_unused void *param) +@@ -1936,7 +1938,7 @@ add_mapping(struct rte_hash *map, const char *str, uint16_t cdev_id, + + ret = rte_hash_add_key_data(map, &key, (void *)i); + if (ret < 0) { +- printf("Faled to insert cdev mapping for (lcore %u, " ++ printf("Failed to insert cdev mapping for (lcore %u, " + "cdev %u, qp %u), errno %d\n", + key.lcore_id, ipsec_ctx->tbl[i].id, + ipsec_ctx->tbl[i].qp, ret); +@@ -1969,7 +1971,7 @@ add_cdev_mapping(struct rte_cryptodev_info *dev_info, uint16_t cdev_id, + str = "Inbound"; + } + +- /* Required cryptodevs with operation chainning */ ++ /* Required cryptodevs with operation chaining */ + if (!(dev_info->feature_flags & + RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) + return ret; +@@ -2138,7 +2140,7 @@ port_init(uint16_t portid, uint64_t req_rx_offloads, uint64_t req_tx_offloads) + "Error during getting device (port %u) info: %s\n", + portid, strerror(-ret)); + +- /* limit allowed HW offloafs, as user requested */ ++ /* limit allowed HW offloads, as user requested */ + dev_info.rx_offload_capa &= dev_rx_offload; + dev_info.tx_offload_capa &= dev_tx_offload; + +@@ -2188,7 +2190,7 @@ port_init(uint16_t portid, uint64_t req_rx_offloads, uint64_t req_tx_offloads) + local_port_conf.rxmode.offloads) + rte_exit(EXIT_FAILURE, + "Error: port %u required RX offloads: 0x%" PRIx64 +- ", avaialbe RX offloads: 0x%" PRIx64 "\n", ++ ", available RX offloads: 0x%" PRIx64 "\n", + portid, local_port_conf.rxmode.offloads, + dev_info.rx_offload_capa); + +@@ -2196,7 +2198,7 @@ port_init(uint16_t portid, uint64_t req_rx_offloads, uint64_t req_tx_offloads) + local_port_conf.txmode.offloads) + rte_exit(EXIT_FAILURE, + "Error: port %u required TX offloads: 0x%" PRIx64 +- ", avaialbe TX offloads: 0x%" PRIx64 "\n", ++ ", available TX offloads: 0x%" PRIx64 "\n", + portid, local_port_conf.txmode.offloads, + dev_info.tx_offload_capa); + +@@ -2207,7 +2209,7 @@ port_init(uint16_t portid, uint64_t req_rx_offloads, uint64_t req_tx_offloads) + if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) + local_port_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM; + +- printf("port %u configurng rx_offloads=0x%" PRIx64 ++ printf("port %u configuring rx_offloads=0x%" PRIx64 + ", tx_offloads=0x%" PRIx64 "\n", + portid, local_port_conf.rxmode.offloads, + local_port_conf.txmode.offloads); +@@ -2929,13 +2931,14 @@ main(int32_t argc, char **argv) + if ((enabled_port_mask & (1 << portid)) == 0) + continue; + +- /* Create flow before starting the device */ +- create_default_ipsec_flow(portid, req_rx_offloads[portid]); +- + ret = rte_eth_dev_start(portid); + if (ret < 0) + rte_exit(EXIT_FAILURE, "rte_eth_dev_start: " + "err=%d, port=%d\n", ret, portid); ++ ++ /* Create flow after starting the device */ ++ create_default_ipsec_flow(portid, req_rx_offloads[portid]); ++ + /* + * If enabled, put device in promiscuous mode. + * This allows IO forwarding mode to forward packets +@@ -3037,6 +3040,9 @@ main(int32_t argc, char **argv) + rte_eth_dev_close(portid); + printf(" Done\n"); + } ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); + printf("Bye...\n"); + + return 0; +diff --git a/dpdk/examples/ipsec-secgw/ipsec-secgw.h b/dpdk/examples/ipsec-secgw/ipsec-secgw.h +index f2281e73cf..7e094ed495 100644 +--- a/dpdk/examples/ipsec-secgw/ipsec-secgw.h ++++ b/dpdk/examples/ipsec-secgw/ipsec-secgw.h +@@ -90,7 +90,7 @@ struct ipsec_core_statistics { + uint64_t burst_rx; + } __rte_cache_aligned; + +-struct ipsec_core_statistics core_statistics[RTE_MAX_LCORE]; ++extern struct ipsec_core_statistics core_statistics[RTE_MAX_LCORE]; + #endif /* STATS_INTERVAL */ + + extern struct ethaddr_info ethaddr_tbl[RTE_MAX_ETHPORTS]; +diff --git a/dpdk/examples/ipsec-secgw/ipsec.c b/dpdk/examples/ipsec-secgw/ipsec.c +index 6baeeb342f..a8a8cdbe2e 100644 +--- a/dpdk/examples/ipsec-secgw/ipsec.c ++++ b/dpdk/examples/ipsec-secgw/ipsec.c +@@ -425,7 +425,7 @@ int + create_ipsec_esp_flow(struct ipsec_sa *sa) + { + int ret = 0; +- struct rte_flow_error err; ++ struct rte_flow_error err = {}; + if (sa->direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS) { + RTE_LOG(ERR, IPSEC, + "No Flow director rule for Egress traffic\n"); +diff --git a/dpdk/examples/ipsec-secgw/sa.c b/dpdk/examples/ipsec-secgw/sa.c +index cd1397531a..34f0d8a39a 100644 +--- a/dpdk/examples/ipsec-secgw/sa.c ++++ b/dpdk/examples/ipsec-secgw/sa.c +@@ -758,7 +758,7 @@ parse_sa_tokens(char **tokens, uint32_t n_tokens, + continue; + } + +- /* unrecognizeable input */ ++ /* unrecognizable input */ + APP_CHECK(0, status, "unrecognized input \"%s\"", + tokens[ti]); + return; +@@ -998,7 +998,7 @@ get_spi_proto(uint32_t spi, enum rte_security_ipsec_sa_direction dir, + if (rc4 >= 0) { + if (rc6 >= 0) { + RTE_LOG(ERR, IPSEC, +- "%s: SPI %u used simultaeously by " ++ "%s: SPI %u used simultaneously by " + "IPv4(%d) and IPv6 (%d) SP rules\n", + __func__, spi, rc4, rc6); + return -EINVAL; +@@ -1377,7 +1377,7 @@ ipsec_sa_init(struct ipsec_sa *lsa, struct rte_ipsec_sa *sa, uint32_t sa_size) + } + + /* +- * Allocate space and init rte_ipsec_sa strcutures, ++ * Allocate space and init rte_ipsec_sa structures, + * one per session. + */ + static int +diff --git a/dpdk/examples/ipsec-secgw/sp4.c b/dpdk/examples/ipsec-secgw/sp4.c +index beddd7bc1d..fc4101a4a2 100644 +--- a/dpdk/examples/ipsec-secgw/sp4.c ++++ b/dpdk/examples/ipsec-secgw/sp4.c +@@ -410,7 +410,7 @@ parse_sp4_tokens(char **tokens, uint32_t n_tokens, + continue; + } + +- /* unrecognizeable input */ ++ /* unrecognizable input */ + APP_CHECK(0, status, "unrecognized input \"%s\"", + tokens[ti]); + return; +diff --git a/dpdk/examples/ipsec-secgw/sp6.c b/dpdk/examples/ipsec-secgw/sp6.c +index 328e085288..cce4da7862 100644 +--- a/dpdk/examples/ipsec-secgw/sp6.c ++++ b/dpdk/examples/ipsec-secgw/sp6.c +@@ -515,7 +515,7 @@ parse_sp6_tokens(char **tokens, uint32_t n_tokens, + continue; + } + +- /* unrecognizeable input */ ++ /* unrecognizable input */ + APP_CHECK(0, status, "unrecognized input \"%s\"", + tokens[ti]); + return; +diff --git a/dpdk/examples/ipsec-secgw/test/common_defs.sh b/dpdk/examples/ipsec-secgw/test/common_defs.sh +index f22eb3ab12..3ef06bc761 100644 +--- a/dpdk/examples/ipsec-secgw/test/common_defs.sh ++++ b/dpdk/examples/ipsec-secgw/test/common_defs.sh +@@ -20,7 +20,7 @@ REMOTE_MAC=`ssh ${REMOTE_HOST} ip addr show dev ${REMOTE_IFACE}` + st=$? + REMOTE_MAC=`echo ${REMOTE_MAC} | sed -e 's/^.*ether //' -e 's/ brd.*$//'` + if [[ $st -ne 0 || -z "${REMOTE_MAC}" ]]; then +- echo "coouldn't retrieve ether addr from ${REMOTE_IFACE}" ++ echo "couldn't retrieve ether addr from ${REMOTE_IFACE}" + exit 127 + fi + +@@ -40,7 +40,7 @@ DPDK_VARS="" + + # by default ipsec-secgw can't deal with multi-segment packets + # make sure our local/remote host wouldn't generate fragmented packets +-# if reassmebly option is not enabled ++# if reassembly option is not enabled + DEF_MTU_LEN=1400 + DEF_PING_LEN=1200 + +diff --git a/dpdk/examples/ipv4_multicast/Makefile b/dpdk/examples/ipv4_multicast/Makefile +index 7ea44e6f63..2f054fe27d 100644 +--- a/dpdk/examples/ipv4_multicast/Makefile ++++ b/dpdk/examples/ipv4_multicast/Makefile +@@ -8,8 +8,10 @@ APP = ipv4_multicast + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -20,8 +22,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/ipv4_multicast/main.c b/dpdk/examples/ipv4_multicast/main.c +index e18726a5d2..fd6207a18b 100644 +--- a/dpdk/examples/ipv4_multicast/main.c ++++ b/dpdk/examples/ipv4_multicast/main.c +@@ -805,5 +805,8 @@ main(int argc, char **argv) + return -1; + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/kni/Makefile b/dpdk/examples/kni/Makefile +index bbf3bcae12..3dad5329d8 100644 +--- a/dpdk/examples/kni/Makefile ++++ b/dpdk/examples/kni/Makefile +@@ -7,8 +7,10 @@ APP = kni + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + CFLAGS += -DALLOW_EXPERIMENTAL_API +diff --git a/dpdk/examples/kni/main.c b/dpdk/examples/kni/main.c +index fe93b8618a..df9666f78c 100644 +--- a/dpdk/examples/kni/main.c ++++ b/dpdk/examples/kni/main.c +@@ -720,7 +720,7 @@ log_link_state(struct rte_kni *kni, int prev, struct rte_eth_link *link) + + rte_eth_link_to_str(link_status_text, sizeof(link_status_text), link); + if (prev != link->link_status) +- RTE_LOG(INFO, APP, "%s NIC %s", ++ RTE_LOG(INFO, APP, "%s NIC %s\n", + rte_kni_get_name(kni), + link_status_text); + } +@@ -1046,7 +1046,7 @@ main(int argc, char** argv) + pthread_t kni_link_tid; + int pid; + +- /* Associate signal_hanlder function with USR signals */ ++ /* Associate signal_handler function with USR signals */ + signal(SIGUSR1, signal_handler); + signal(SIGUSR2, signal_handler); + signal(SIGRTMIN, signal_handler); +@@ -1140,5 +1140,8 @@ main(int argc, char** argv) + kni_port_params_array[i] = NULL; + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/l2fwd-cat/Makefile b/dpdk/examples/l2fwd-cat/Makefile +index 9ba1135612..532db37ba6 100644 +--- a/dpdk/examples/l2fwd-cat/Makefile ++++ b/dpdk/examples/l2fwd-cat/Makefile +@@ -7,8 +7,10 @@ APP = l2fwd-cat + # all source are stored in SRCS-y + SRCS-y := l2fwd-cat.c cat.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/l2fwd-cat/l2fwd-cat.c b/dpdk/examples/l2fwd-cat/l2fwd-cat.c +index 2e632c5cb6..8e7eb32485 100644 +--- a/dpdk/examples/l2fwd-cat/l2fwd-cat.c ++++ b/dpdk/examples/l2fwd-cat/l2fwd-cat.c +@@ -107,7 +107,7 @@ lcore_main(void) + * for best performance. + */ + RTE_ETH_FOREACH_DEV(port) +- if (rte_eth_dev_socket_id(port) > 0 && ++ if (rte_eth_dev_socket_id(port) >= 0 && + rte_eth_dev_socket_id(port) != + (int)rte_socket_id()) + printf("WARNING, port %u is on remote NUMA node to " +@@ -201,5 +201,8 @@ main(int argc, char *argv[]) + /* Call lcore_main on the main core only. */ + lcore_main(); + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/l2fwd-crypto/Makefile b/dpdk/examples/l2fwd-crypto/Makefile +index 7731eccd03..09aec4d52d 100644 +--- a/dpdk/examples/l2fwd-crypto/Makefile ++++ b/dpdk/examples/l2fwd-crypto/Makefile +@@ -7,8 +7,10 @@ APP = l2fwd-crypto + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/l2fwd-crypto/main.c b/dpdk/examples/l2fwd-crypto/main.c +index a96cb94cc4..755a28f341 100644 +--- a/dpdk/examples/l2fwd-crypto/main.c ++++ b/dpdk/examples/l2fwd-crypto/main.c +@@ -252,11 +252,9 @@ struct l2fwd_port_statistics port_statistics[RTE_MAX_ETHPORTS]; + struct l2fwd_crypto_statistics crypto_statistics[RTE_CRYPTO_MAX_DEVS]; + + /* A tsc-based timer responsible for triggering statistics printout */ +-#define TIMER_MILLISECOND 2000000ULL /* around 1ms at 2 Ghz */ ++#define TIMER_MILLISECOND (rte_get_tsc_hz() / 1000) + #define MAX_TIMER_PERIOD 86400UL /* 1 day max */ +- +-/* default period is 10 seconds */ +-static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000; ++#define DEFAULT_TIMER_PERIOD 10UL + + /* Print out statistics on packets dropped */ + static void +@@ -616,12 +614,26 @@ l2fwd_simple_forward(struct rte_mbuf *m, uint16_t portid, + struct l2fwd_crypto_options *options) + { + uint16_t dst_port; ++ uint32_t pad_len; ++ struct rte_ipv4_hdr *ip_hdr; ++ uint32_t ipdata_offset = sizeof(struct rte_ether_hdr); + ++ ip_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(m, char *) + ++ ipdata_offset); + dst_port = l2fwd_dst_ports[portid]; + + if (options->mac_updating) + l2fwd_mac_updating(m, dst_port); + ++ if (options->auth_xform.auth.op == RTE_CRYPTO_AUTH_OP_VERIFY) ++ rte_pktmbuf_trim(m, options->auth_xform.auth.digest_length); ++ ++ if (options->cipher_xform.cipher.op == RTE_CRYPTO_CIPHER_OP_DECRYPT) { ++ pad_len = m->pkt_len - rte_be_to_cpu_16(ip_hdr->total_length) - ++ ipdata_offset; ++ rte_pktmbuf_trim(m, pad_len); ++ } ++ + l2fwd_send_packet(m, dst_port); + } + +@@ -865,18 +877,17 @@ l2fwd_main_loop(struct l2fwd_crypto_options *options) + } + + /* if timer is enabled */ +- if (timer_period > 0) { ++ if (options->refresh_period > 0) { + + /* advance the timer */ + timer_tsc += diff_tsc; + + /* if timer has reached its timeout */ + if (unlikely(timer_tsc >= +- (uint64_t)timer_period)) { ++ options->refresh_period)) { + + /* do this only on main core */ +- if (lcore_id == rte_get_main_lcore() +- && options->refresh_period) { ++ if (lcore_id == rte_get_main_lcore()) { + print_stats(); + timer_tsc = 0; + } +@@ -1437,7 +1448,8 @@ l2fwd_crypto_default_options(struct l2fwd_crypto_options *options) + { + options->portmask = 0xffffffff; + options->nb_ports_per_lcore = 1; +- options->refresh_period = 10000; ++ options->refresh_period = DEFAULT_TIMER_PERIOD * ++ TIMER_MILLISECOND * 1000; + options->single_lcore = 0; + options->sessionless = 0; + +@@ -2251,6 +2263,12 @@ initialize_cryptodevs(struct l2fwd_crypto_options *options, unsigned nb_ports, + if (enabled_cdevs[cdev_id] == 0) + continue; + ++ if (check_cryptodev_mask(options, cdev_id) < 0) ++ continue; ++ ++ if (check_capabilities(options, cdev_id) < 0) ++ continue; ++ + retval = rte_cryptodev_socket_id(cdev_id); + + if (retval < 0) { +@@ -2616,7 +2634,7 @@ initialize_ports(struct l2fwd_crypto_options *options) + last_portid = portid; + } + +- l2fwd_enabled_port_mask |= (1 << portid); ++ l2fwd_enabled_port_mask |= (1ULL << portid); + enabled_portcount++; + } + +@@ -2805,5 +2823,8 @@ main(int argc, char **argv) + return -1; + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/l2fwd-event/Makefile b/dpdk/examples/l2fwd-event/Makefile +index 384224b24a..e011e31d1e 100644 +--- a/dpdk/examples/l2fwd-event/Makefile ++++ b/dpdk/examples/l2fwd-event/Makefile +@@ -13,8 +13,10 @@ SRCS-y += l2fwd_common.c + SRCS-y += l2fwd_event_generic.c + SRCS-y += l2fwd_event_internal_port.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -25,8 +27,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/l2fwd-event/l2fwd_event_generic.c b/dpdk/examples/l2fwd-event/l2fwd_event_generic.c +index ca7ea11118..3f0ee4523d 100644 +--- a/dpdk/examples/l2fwd-event/l2fwd_event_generic.c ++++ b/dpdk/examples/l2fwd-event/l2fwd_event_generic.c +@@ -40,7 +40,7 @@ l2fwd_event_device_setup_generic(struct l2fwd_resources *rsrc) + ethdev_count++; + } + +- /* Event device configurtion */ ++ /* Event device configuration */ + rte_event_dev_info_get(event_d_id, &dev_info); + + /* Enable implicit release */ +diff --git a/dpdk/examples/l2fwd-event/l2fwd_event_internal_port.c b/dpdk/examples/l2fwd-event/l2fwd_event_internal_port.c +index c9a8430d00..0669e07235 100644 +--- a/dpdk/examples/l2fwd-event/l2fwd_event_internal_port.c ++++ b/dpdk/examples/l2fwd-event/l2fwd_event_internal_port.c +@@ -40,7 +40,7 @@ l2fwd_event_device_setup_internal_port(struct l2fwd_resources *rsrc) + ethdev_count++; + } + +- /* Event device configurtion */ ++ /* Event device configuration */ + rte_event_dev_info_get(event_d_id, &dev_info); + + /* Enable implicit release */ +diff --git a/dpdk/examples/l2fwd-event/main.c b/dpdk/examples/l2fwd-event/main.c +index 444ee4e4db..0acfee4c92 100644 +--- a/dpdk/examples/l2fwd-event/main.c ++++ b/dpdk/examples/l2fwd-event/main.c +@@ -716,6 +716,9 @@ main(int argc, char **argv) + printf(" Done\n"); + } + } ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); + printf("Bye...\n"); + + return 0; +diff --git a/dpdk/examples/l2fwd-jobstats/Makefile b/dpdk/examples/l2fwd-jobstats/Makefile +index fcb60b2a33..9a71c68fa8 100644 +--- a/dpdk/examples/l2fwd-jobstats/Makefile ++++ b/dpdk/examples/l2fwd-jobstats/Makefile +@@ -7,8 +7,10 @@ APP = l2fwd-jobstats + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/l2fwd-jobstats/main.c b/dpdk/examples/l2fwd-jobstats/main.c +index 1151769aa9..943548a510 100644 +--- a/dpdk/examples/l2fwd-jobstats/main.c ++++ b/dpdk/examples/l2fwd-jobstats/main.c +@@ -459,7 +459,7 @@ l2fwd_flush_job(__rte_unused struct rte_timer *timer, __rte_unused void *arg) + qconf->next_flush_time[portid] = rte_get_timer_cycles() + drain_tsc; + } + +- /* Pass target to indicate that this job is happy of time interwal ++ /* Pass target to indicate that this job is happy of time interval + * in which it was called. */ + rte_jobstats_finish(&qconf->flush_job, qconf->flush_job.target); + } +@@ -1022,5 +1022,8 @@ main(int argc, char **argv) + return -1; + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/l2fwd-keepalive/Makefile b/dpdk/examples/l2fwd-keepalive/Makefile +index 09a891149b..ace9b4687e 100644 +--- a/dpdk/examples/l2fwd-keepalive/Makefile ++++ b/dpdk/examples/l2fwd-keepalive/Makefile +@@ -7,8 +7,10 @@ APP = l2fwd-keepalive + # all source are stored in SRCS-y + SRCS-y := main.c shm.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -21,8 +23,6 @@ static: build/$(APP)-static + + LDFLAGS += -pthread -lrt + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/l2fwd-keepalive/ka-agent/Makefile b/dpdk/examples/l2fwd-keepalive/ka-agent/Makefile +index 8b329a78b1..00d364fcbf 100644 +--- a/dpdk/examples/l2fwd-keepalive/ka-agent/Makefile ++++ b/dpdk/examples/l2fwd-keepalive/ka-agent/Makefile +@@ -9,8 +9,10 @@ SRCS-y := main.c + + CFLAGS += -I.. + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -23,8 +25,6 @@ static: build/$(APP)-static + + LDFLAGS += -lpthread -lrt + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/l2fwd-keepalive/main.c b/dpdk/examples/l2fwd-keepalive/main.c +index e4c2b27933..be6616288f 100644 +--- a/dpdk/examples/l2fwd-keepalive/main.c ++++ b/dpdk/examples/l2fwd-keepalive/main.c +@@ -817,5 +817,9 @@ main(int argc, char **argv) + + if (ka_shm != NULL) + rte_keepalive_shm_cleanup(ka_shm); ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/l2fwd/Makefile b/dpdk/examples/l2fwd/Makefile +index b180deb862..85669a298a 100644 +--- a/dpdk/examples/l2fwd/Makefile ++++ b/dpdk/examples/l2fwd/Makefile +@@ -7,8 +7,10 @@ APP = l2fwd + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + # Add flag to allow experimental API as l2fwd uses rte_ethdev_set_ptype API +diff --git a/dpdk/examples/l2fwd/main.c b/dpdk/examples/l2fwd/main.c +index 3377b08322..23e5e46761 100644 +--- a/dpdk/examples/l2fwd/main.c ++++ b/dpdk/examples/l2fwd/main.c +@@ -434,13 +434,16 @@ enum { + + /* first long only option value must be >= 256, so that we won't + * conflict with short options */ +- CMD_LINE_OPT_MIN_NUM = 256, ++ CMD_LINE_OPT_MAC_UPDATING_NUM = 256, ++ CMD_LINE_OPT_NO_MAC_UPDATING_NUM, + CMD_LINE_OPT_PORTMAP_NUM, + }; + + static const struct option lgopts[] = { +- { CMD_LINE_OPT_MAC_UPDATING, no_argument, &mac_updating, 1}, +- { CMD_LINE_OPT_NO_MAC_UPDATING, no_argument, &mac_updating, 0}, ++ { CMD_LINE_OPT_MAC_UPDATING, no_argument, 0, ++ CMD_LINE_OPT_MAC_UPDATING_NUM}, ++ { CMD_LINE_OPT_NO_MAC_UPDATING, no_argument, 0, ++ CMD_LINE_OPT_NO_MAC_UPDATING_NUM}, + { CMD_LINE_OPT_PORTMAP_CONFIG, 1, 0, CMD_LINE_OPT_PORTMAP_NUM}, + {NULL, 0, 0, 0} + }; +@@ -502,6 +505,14 @@ l2fwd_parse_args(int argc, char **argv) + } + break; + ++ case CMD_LINE_OPT_MAC_UPDATING_NUM: ++ mac_updating = 1; ++ break; ++ ++ case CMD_LINE_OPT_NO_MAC_UPDATING_NUM: ++ mac_updating = 0; ++ break; ++ + default: + l2fwd_usage(prgname); + return -1; +@@ -903,6 +914,9 @@ main(int argc, char **argv) + rte_eth_dev_close(portid); + printf(" Done\n"); + } ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); + printf("Bye...\n"); + + return ret; +diff --git a/dpdk/examples/l3fwd-acl/Makefile b/dpdk/examples/l3fwd-acl/Makefile +index 3420ea3a9c..f5d2099d8b 100644 +--- a/dpdk/examples/l3fwd-acl/Makefile ++++ b/dpdk/examples/l3fwd-acl/Makefile +@@ -7,8 +7,10 @@ APP = l3fwd-acl + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/l3fwd-acl/main.c b/dpdk/examples/l3fwd-acl/main.c +index 961594f5fe..148796b1c7 100644 +--- a/dpdk/examples/l3fwd-acl/main.c ++++ b/dpdk/examples/l3fwd-acl/main.c +@@ -790,8 +790,8 @@ send_packets(struct rte_mbuf **m, uint32_t *res, int num) + } + + /* +- * Parses IPV6 address, exepcts the following format: +- * XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:XXXX (where X - is a hexedecimal digit). ++ * Parse IPv6 address, expects the following format: ++ * XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:XXXX (where X is a hexadecimal digit). + */ + static int + parse_ipv6_addr(const char *in, const char **end, uint32_t v[IPV6_ADDR_U32], +@@ -1993,7 +1993,7 @@ check_all_ports_link_status(uint32_t port_mask) + } + + /* +- * build-up default vaues for dest MACs. ++ * build-up default values for dest MACs. + */ + static void + set_default_dest_mac(void) +@@ -2257,5 +2257,8 @@ main(int argc, char **argv) + return -1; + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/l3fwd-graph/Makefile b/dpdk/examples/l3fwd-graph/Makefile +index 6e3d0bca06..4f6ee27272 100644 +--- a/dpdk/examples/l3fwd-graph/Makefile ++++ b/dpdk/examples/l3fwd-graph/Makefile +@@ -7,8 +7,10 @@ APP = l3fwd-graph + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) -DALLOW_EXPERIMENTAL_API + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/l3fwd-graph/main.c b/dpdk/examples/l3fwd-graph/main.c +index 127c5e8dab..75c2e0ef3f 100644 +--- a/dpdk/examples/l3fwd-graph/main.c ++++ b/dpdk/examples/l3fwd-graph/main.c +@@ -1123,6 +1123,9 @@ main(int argc, char **argv) + rte_eth_dev_close(portid); + printf(" Done\n"); + } ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); + printf("Bye...\n"); + + return ret; +diff --git a/dpdk/examples/l3fwd-power/Makefile b/dpdk/examples/l3fwd-power/Makefile +index d69854c9f8..d1eb10e6e1 100644 +--- a/dpdk/examples/l3fwd-power/Makefile ++++ b/dpdk/examples/l3fwd-power/Makefile +@@ -7,8 +7,10 @@ APP = l3fwd-power + # all source are stored in SRCS-y + SRCS-y := main.c perf_core.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/l3fwd-power/main.c b/dpdk/examples/l3fwd-power/main.c +index 995a3b6ad7..9084d4ebeb 100644 +--- a/dpdk/examples/l3fwd-power/main.c ++++ b/dpdk/examples/l3fwd-power/main.c +@@ -428,7 +428,7 @@ signal_exit_now(int sigtype) + + } + +-/* Freqency scale down timer callback */ ++/* Frequency scale down timer callback */ + static void + power_timer_cb(__rte_unused struct rte_timer *tim, + __rte_unused void *arg) +@@ -1716,7 +1716,7 @@ parse_ep_config(const char *q_arg) + int hgh_edpi; + + ep_med_edpi = EMPTY_POLL_MED_THRESHOLD; +- ep_hgh_edpi = EMPTY_POLL_MED_THRESHOLD; ++ ep_hgh_edpi = EMPTY_POLL_HGH_THRESHOLD; + + strlcpy(s, p, sizeof(s)); + +@@ -1739,7 +1739,7 @@ parse_ep_config(const char *q_arg) + if (med_edpi > 0) + ep_med_edpi = med_edpi; + +- if (med_edpi > 0) ++ if (hgh_edpi > 0) + ep_hgh_edpi = hgh_edpi; + + } else { +@@ -2320,7 +2320,7 @@ update_telemetry(__rte_unused struct rte_timer *tim, + ret = rte_metrics_update_values(RTE_METRICS_GLOBAL, telstats_index, + values, RTE_DIM(values)); + if (ret < 0) +- RTE_LOG(WARNING, POWER, "failed to update metrcis\n"); ++ RTE_LOG(WARNING, POWER, "failed to update metrics\n"); + } + + static int +@@ -2464,9 +2464,6 @@ main(int argc, char **argv) + uint16_t portid; + const char *ptr_strings[NUM_TELSTATS]; + +- /* catch SIGINT and restore cpufreq governor to ondemand */ +- signal(SIGINT, signal_exit_now); +- + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) +@@ -2474,6 +2471,9 @@ main(int argc, char **argv) + argc -= ret; + argv += ret; + ++ /* catch SIGINT and restore cpufreq governor to ondemand */ ++ signal(SIGINT, signal_exit_now); ++ + /* init RTE timer library to be used late */ + rte_timer_subsystem_init(); + +diff --git a/dpdk/examples/l3fwd/Makefile b/dpdk/examples/l3fwd/Makefile +index 7e70bbd826..fb4f30c172 100644 +--- a/dpdk/examples/l3fwd/Makefile ++++ b/dpdk/examples/l3fwd/Makefile +@@ -8,8 +8,10 @@ APP = l3fwd + SRCS-y := main.c l3fwd_lpm.c l3fwd_em.c l3fwd_event.c + SRCS-y += l3fwd_event_generic.c l3fwd_event_internal_port.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -20,8 +22,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + # Added for 'rte_eth_link_to_str()' +diff --git a/dpdk/examples/l3fwd/l3fwd.h b/dpdk/examples/l3fwd/l3fwd.h +index 2cf06099e0..31fd8723d6 100644 +--- a/dpdk/examples/l3fwd/l3fwd.h ++++ b/dpdk/examples/l3fwd/l3fwd.h +@@ -79,6 +79,10 @@ struct lcore_conf { + + extern volatile bool force_quit; + ++/* RX and TX queue depths */ ++extern uint16_t nb_rxd; ++extern uint16_t nb_txd; ++ + /* ethernet addresses of ports */ + extern uint64_t dest_eth_addr[RTE_MAX_ETHPORTS]; + extern struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; +diff --git a/dpdk/examples/l3fwd/l3fwd_common.h b/dpdk/examples/l3fwd/l3fwd_common.h +index 7d83ff641a..8e4c27218f 100644 +--- a/dpdk/examples/l3fwd/l3fwd_common.h ++++ b/dpdk/examples/l3fwd/l3fwd_common.h +@@ -51,7 +51,7 @@ rfc1812_process(struct rte_ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) + #endif /* DO_RFC_1812_CHECKS */ + + /* +- * We group consecutive packets with the same destionation port into one burst. ++ * We group consecutive packets with the same destination port into one burst. + * To avoid extra latency this is done together with some other packet + * processing, but after we made a final decision about packet's destination. + * To do this we maintain: +@@ -76,7 +76,7 @@ rfc1812_process(struct rte_ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) + + static const struct { + uint64_t pnum; /* prebuild 4 values for pnum[]. */ +- int32_t idx; /* index for new last updated elemnet. */ ++ int32_t idx; /* index for new last updated element. */ + uint16_t lpv; /* add value to the last updated element. */ + } gptbl[GRPSZ] = { + { +@@ -236,6 +236,9 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[], + + /* copy rest of the packets into the TX buffer. */ + len = num - n; ++ if (len == 0) ++ goto exit; ++ + j = 0; + switch (len % FWDSTEP) { + while (j < len) { +@@ -258,6 +261,7 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[], + } + } + ++exit: + qconf->tx_mbufs[port].len = len; + } + +diff --git a/dpdk/examples/l3fwd/l3fwd_event.c b/dpdk/examples/l3fwd/l3fwd_event.c +index 4d31593a0a..aeb2ea43b3 100644 +--- a/dpdk/examples/l3fwd/l3fwd_event.c ++++ b/dpdk/examples/l3fwd/l3fwd_event.c +@@ -43,8 +43,6 @@ l3fwd_eth_dev_port_setup(struct rte_eth_conf *port_conf) + { + struct l3fwd_event_resources *evt_rsrc = l3fwd_get_eventdev_rsrc(); + uint16_t nb_ports = rte_eth_dev_count_avail(); +- uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +- uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + unsigned int nb_lcores = rte_lcore_count(); + struct rte_eth_conf local_port_conf; + struct rte_eth_dev_info dev_info; +diff --git a/dpdk/examples/l3fwd/l3fwd_event_internal_port.c b/dpdk/examples/l3fwd/l3fwd_event_internal_port.c +index 9916a7f556..49512ab1aa 100644 +--- a/dpdk/examples/l3fwd/l3fwd_event_internal_port.c ++++ b/dpdk/examples/l3fwd/l3fwd_event_internal_port.c +@@ -118,6 +118,8 @@ l3fwd_event_port_setup_internal_port(void) + event_p_conf.event_port_cfg |= + RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL; + ++ evt_rsrc->deq_depth = def_p_conf.dequeue_depth; ++ + for (event_p_id = 0; event_p_id < evt_rsrc->evp.nb_ports; + event_p_id++) { + ret = rte_event_port_setup(event_d_id, event_p_id, +diff --git a/dpdk/examples/l3fwd/l3fwd_lpm.c b/dpdk/examples/l3fwd/l3fwd_lpm.c +index 3dcf1fef18..40f3366adc 100644 +--- a/dpdk/examples/l3fwd/l3fwd_lpm.c ++++ b/dpdk/examples/l3fwd/l3fwd_lpm.c +@@ -28,6 +28,7 @@ + #include + + #include "l3fwd.h" ++#include "l3fwd_common.h" + #include "l3fwd_event.h" + + struct ipv4_l3fwd_lpm_route { +@@ -42,7 +43,10 @@ struct ipv6_l3fwd_lpm_route { + uint8_t if_out; + }; + +-/* 198.18.0.0/16 are set aside for RFC2544 benchmarking (RFC5735). */ ++/* ++ * 198.18.0.0/16 are set aside for RFC2544 benchmarking (RFC5735). ++ * 198.18.{0-7}.0/24 = Port {0-7} ++ */ + static const struct ipv4_l3fwd_lpm_route ipv4_l3fwd_lpm_route_array[] = { + {RTE_IPV4(198, 18, 0, 0), 24, 0}, + {RTE_IPV4(198, 18, 1, 0), 24, 1}, +@@ -54,16 +58,19 @@ static const struct ipv4_l3fwd_lpm_route ipv4_l3fwd_lpm_route_array[] = { + {RTE_IPV4(198, 18, 7, 0), 24, 7}, + }; + +-/* 2001:0200::/48 is IANA reserved range for IPv6 benchmarking (RFC5180) */ ++/* ++ * 2001:200::/48 is IANA reserved range for IPv6 benchmarking (RFC5180). ++ * 2001:200:0:{0-7}::/64 = Port {0-7} ++ */ + static const struct ipv6_l3fwd_lpm_route ipv6_l3fwd_lpm_route_array[] = { +- {{32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 48, 0}, +- {{32, 1, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, 48, 1}, +- {{32, 1, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0}, 48, 2}, +- {{32, 1, 2, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0}, 48, 3}, +- {{32, 1, 2, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0}, 48, 4}, +- {{32, 1, 2, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0}, 48, 5}, +- {{32, 1, 2, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0}, 48, 6}, +- {{32, 1, 2, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0}, 48, 7}, ++ {{32, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 64, 0}, ++ {{32, 1, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, 64, 1}, ++ {{32, 1, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0}, 64, 2}, ++ {{32, 1, 2, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0}, 64, 3}, ++ {{32, 1, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0}, 64, 4}, ++ {{32, 1, 2, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0}, 64, 5}, ++ {{32, 1, 2, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0}, 64, 6}, ++ {{32, 1, 2, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0}, 64, 7}, + }; + + #define IPV4_L3FWD_LPM_MAX_RULES 1024 +@@ -266,30 +273,17 @@ lpm_process_event_pkt(const struct lcore_conf *lconf, struct rte_mbuf *mbuf) + + struct rte_ether_hdr *eth_hdr = rte_pktmbuf_mtod(mbuf, + struct rte_ether_hdr *); +-#ifdef DO_RFC_1812_CHECKS +- struct rte_ipv4_hdr *ipv4_hdr; +- if (RTE_ETH_IS_IPV4_HDR(mbuf->packet_type)) { +- /* Handle IPv4 headers.*/ +- ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, +- struct rte_ipv4_hdr *, +- sizeof(struct rte_ether_hdr)); +- +- if (is_valid_ipv4_pkt(ipv4_hdr, mbuf->pkt_len) +- < 0) { +- mbuf->port = BAD_PORT; +- continue; +- } +- /* Update time to live and header checksum */ +- --(ipv4_hdr->time_to_live); +- ++(ipv4_hdr->hdr_checksum); +- } +-#endif ++ + /* dst addr */ + *(uint64_t *)ð_hdr->d_addr = dest_eth_addr[mbuf->port]; + + /* src addr */ + rte_ether_addr_copy(&ports_eth_addr[mbuf->port], + ð_hdr->s_addr); ++ ++ rfc1812_process(rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *, ++ sizeof(struct rte_ether_hdr)), ++ &mbuf->port, mbuf->packet_type); + #endif + return mbuf->port; + } +diff --git a/dpdk/examples/l3fwd/l3fwd_neon.h b/dpdk/examples/l3fwd/l3fwd_neon.h +index 86ac5971d7..e3d33a5229 100644 +--- a/dpdk/examples/l3fwd/l3fwd_neon.h ++++ b/dpdk/examples/l3fwd/l3fwd_neon.h +@@ -64,7 +64,7 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) + + /* + * Group consecutive packets with the same destination port in bursts of 4. +- * Suppose we have array of destionation ports: ++ * Suppose we have array of destination ports: + * dst_port[] = {a, b, c, d,, e, ... } + * dp1 should contain: , dp2: . + * We doing 4 comparisons at once and the result is 4 bit mask. +diff --git a/dpdk/examples/l3fwd/l3fwd_sse.h b/dpdk/examples/l3fwd/l3fwd_sse.h +index bb565ed546..d5a717e18c 100644 +--- a/dpdk/examples/l3fwd/l3fwd_sse.h ++++ b/dpdk/examples/l3fwd/l3fwd_sse.h +@@ -64,7 +64,7 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) + + /* + * Group consecutive packets with the same destination port in bursts of 4. +- * Suppose we have array of destionation ports: ++ * Suppose we have array of destination ports: + * dst_port[] = {a, b, c, d,, e, ... } + * dp1 should contain: , dp2: . + * We doing 4 comparisons at once and the result is 4 bit mask. +diff --git a/dpdk/examples/l3fwd/main.c b/dpdk/examples/l3fwd/main.c +index bb49e5faff..8d435a2d82 100644 +--- a/dpdk/examples/l3fwd/main.c ++++ b/dpdk/examples/l3fwd/main.c +@@ -53,9 +53,8 @@ + + #define MAX_LCORE_PARAMS 1024 + +-/* Static global variables used within this file. */ +-static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; +-static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; ++uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; ++uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; + + /**< Ports set in promiscuous mode off by default. */ + static int promiscuous_on; +@@ -278,6 +277,8 @@ print_usage(const char *prgname) + " [-E]" + " [-L]" + " --config (port,queue,lcore)[,(port,queue,lcore)]" ++ " [--rx-queue-size NPKTS]" ++ " [--tx-queue-size NPKTS]" + " [--eth-dest=X,MM:MM:MM:MM:MM:MM]" + " [--enable-jumbo [--max-pkt-len PKTLEN]]" + " [--no-numa]" +@@ -293,6 +294,10 @@ print_usage(const char *prgname) + " -E : Enable exact match\n" + " -L : Enable longest prefix match (default)\n" + " --config (port,queue,lcore): Rx queue configuration\n" ++ " --rx-queue-size NPKTS: Rx queue size in decimal\n" ++ " Default: %d\n" ++ " --tx-queue-size NPKTS: Tx queue size in decimal\n" ++ " Default: %d\n" + " --eth-dest=X,MM:MM:MM:MM:MM:MM: Ethernet destination for port X\n" + " --enable-jumbo: Enable jumbo frames\n" + " --max-pkt-len: Under the premise of enabling jumbo,\n" +@@ -311,7 +316,7 @@ print_usage(const char *prgname) + " --event-eth-rxqs: Number of ethernet RX queues per device.\n" + " Default: 1\n" + " Valid only if --mode=eventdev\n\n", +- prgname); ++ prgname, RTE_TEST_RX_DESC_DEFAULT, RTE_TEST_TX_DESC_DEFAULT); + } + + static int +@@ -454,6 +459,38 @@ parse_mode(const char *optarg) + evt_rsrc->enabled = true; + } + ++static void ++parse_queue_size(const char *queue_size_arg, uint16_t *queue_size, int rx) ++{ ++ char *end = NULL; ++ unsigned long value; ++ ++ /* parse decimal string */ ++ value = strtoul(queue_size_arg, &end, 10); ++ if ((queue_size_arg[0] == '\0') || (end == NULL) || ++ (*end != '\0') || (value == 0)) { ++ if (rx == 1) ++ rte_exit(EXIT_FAILURE, "Invalid rx-queue-size\n"); ++ else ++ rte_exit(EXIT_FAILURE, "Invalid tx-queue-size\n"); ++ ++ return; ++ } ++ ++ if (value > UINT16_MAX) { ++ if (rx == 1) ++ rte_exit(EXIT_FAILURE, "rx-queue-size %lu > %d\n", ++ value, UINT16_MAX); ++ else ++ rte_exit(EXIT_FAILURE, "tx-queue-size %lu > %d\n", ++ value, UINT16_MAX); ++ ++ return; ++ } ++ ++ *queue_size = value; ++} ++ + static void + parse_eventq_sched(const char *optarg) + { +@@ -495,6 +532,8 @@ static const char short_options[] = + ; + + #define CMD_LINE_OPT_CONFIG "config" ++#define CMD_LINE_OPT_RX_QUEUE_SIZE "rx-queue-size" ++#define CMD_LINE_OPT_TX_QUEUE_SIZE "tx-queue-size" + #define CMD_LINE_OPT_ETH_DEST "eth-dest" + #define CMD_LINE_OPT_NO_NUMA "no-numa" + #define CMD_LINE_OPT_IPV6 "ipv6" +@@ -512,6 +551,8 @@ enum { + * conflict with short options */ + CMD_LINE_OPT_MIN_NUM = 256, + CMD_LINE_OPT_CONFIG_NUM, ++ CMD_LINE_OPT_RX_QUEUE_SIZE_NUM, ++ CMD_LINE_OPT_TX_QUEUE_SIZE_NUM, + CMD_LINE_OPT_ETH_DEST_NUM, + CMD_LINE_OPT_NO_NUMA_NUM, + CMD_LINE_OPT_IPV6_NUM, +@@ -526,6 +567,8 @@ enum { + + static const struct option lgopts[] = { + {CMD_LINE_OPT_CONFIG, 1, 0, CMD_LINE_OPT_CONFIG_NUM}, ++ {CMD_LINE_OPT_RX_QUEUE_SIZE, 1, 0, CMD_LINE_OPT_RX_QUEUE_SIZE_NUM}, ++ {CMD_LINE_OPT_TX_QUEUE_SIZE, 1, 0, CMD_LINE_OPT_TX_QUEUE_SIZE_NUM}, + {CMD_LINE_OPT_ETH_DEST, 1, 0, CMD_LINE_OPT_ETH_DEST_NUM}, + {CMD_LINE_OPT_NO_NUMA, 0, 0, CMD_LINE_OPT_NO_NUMA_NUM}, + {CMD_LINE_OPT_IPV6, 0, 0, CMD_LINE_OPT_IPV6_NUM}, +@@ -607,6 +650,14 @@ parse_args(int argc, char **argv) + lcore_params = 1; + break; + ++ case CMD_LINE_OPT_RX_QUEUE_SIZE_NUM: ++ parse_queue_size(optarg, &nb_rxd, 1); ++ break; ++ ++ case CMD_LINE_OPT_TX_QUEUE_SIZE_NUM: ++ parse_queue_size(optarg, &nb_txd, 0); ++ break; ++ + case CMD_LINE_OPT_ETH_DEST_NUM: + parse_eth_dest(optarg); + break; +@@ -1315,6 +1366,10 @@ main(int argc, char **argv) + printf(" Done\n"); + } + } ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + printf("Bye...\n"); + + return ret; +diff --git a/dpdk/examples/link_status_interrupt/Makefile b/dpdk/examples/link_status_interrupt/Makefile +index fa608c56a0..c5c342d8e5 100644 +--- a/dpdk/examples/link_status_interrupt/Makefile ++++ b/dpdk/examples/link_status_interrupt/Makefile +@@ -7,8 +7,10 @@ APP = link_status_interrupt + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/link_status_interrupt/main.c b/dpdk/examples/link_status_interrupt/main.c +index f1653b4fb8..a022791673 100644 +--- a/dpdk/examples/link_status_interrupt/main.c ++++ b/dpdk/examples/link_status_interrupt/main.c +@@ -99,9 +99,10 @@ struct lsi_port_statistics { + struct lsi_port_statistics port_statistics[RTE_MAX_ETHPORTS]; + + /* A tsc-based timer responsible for triggering statistics printout */ +-#define TIMER_MILLISECOND 2000000ULL /* around 1ms at 2 Ghz */ ++#define TIMER_MILLISECOND (rte_get_timer_hz() / 1000) + #define MAX_TIMER_PERIOD 86400 /* 1 day max */ +-static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000; /* default period is 10 seconds */ ++#define DEFAULT_TIMER_PERIOD 10UL /* default period is 10 seconds */ ++static int64_t timer_period; + + /* Print out statistics on packets dropped */ + static void +@@ -365,6 +366,8 @@ lsi_parse_args(int argc, char **argv) + {NULL, 0, 0, 0} + }; + ++ timer_period = DEFAULT_TIMER_PERIOD * TIMER_MILLISECOND * 1000; ++ + argvopt = argv; + + while ((opt = getopt_long(argc, argvopt, "p:q:T:", +@@ -731,5 +734,8 @@ main(int argc, char **argv) + return -1; + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/meson.build b/dpdk/examples/meson.build +index b9ab24223f..d065a6a08b 100644 +--- a/dpdk/examples/meson.build ++++ b/dpdk/examples/meson.build +@@ -6,8 +6,6 @@ if get_option('default_library') == 'static' + link_whole_libs = dpdk_static_libraries + dpdk_drivers + endif + +-execinfo = cc.find_library('execinfo', required: false) +- + # list of all example apps. Keep 1-3 per line, in alphabetical order. + all_examples = [ + 'bbdev_app', 'bond', +@@ -48,6 +46,12 @@ all_examples = [ + 'vmdq', 'vmdq_dcb', + ] + ++# on install, skip copying all meson.build files ++ex_file_excludes = ['meson.build'] ++foreach ex:all_examples ++ ex_file_excludes += [ex + '/meson.build'] ++endforeach ++ + if get_option('examples') == '' + subdir_done() + endif +@@ -76,7 +80,7 @@ foreach example: examples + cflags = default_cflags + ldflags = default_ldflags + +- ext_deps = [execinfo] ++ ext_deps = [] + includes = [include_directories(example)] + deps = ['eal', 'mempool', 'net', 'mbuf', 'ethdev', 'cmdline'] + subdir(example) +diff --git a/dpdk/examples/multi_process/client_server_mp/mp_client/Makefile b/dpdk/examples/multi_process/client_server_mp/mp_client/Makefile +index bc00a1c875..e694b5659e 100644 +--- a/dpdk/examples/multi_process/client_server_mp/mp_client/Makefile ++++ b/dpdk/examples/multi_process/client_server_mp/mp_client/Makefile +@@ -9,8 +9,10 @@ SRCS-y := client.c + + CFLAGS += -I../shared + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -21,8 +23,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/multi_process/client_server_mp/mp_client/client.c b/dpdk/examples/multi_process/client_server_mp/mp_client/client.c +index 361d90b54b..6d4c246816 100644 +--- a/dpdk/examples/multi_process/client_server_mp/mp_client/client.c ++++ b/dpdk/examples/multi_process/client_server_mp/mp_client/client.c +@@ -268,4 +268,7 @@ main(int argc, char *argv[]) + + need_flush = 1; + } ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); + } +diff --git a/dpdk/examples/multi_process/client_server_mp/mp_server/Makefile b/dpdk/examples/multi_process/client_server_mp/mp_server/Makefile +index d066524b36..39c481171a 100644 +--- a/dpdk/examples/multi_process/client_server_mp/mp_server/Makefile ++++ b/dpdk/examples/multi_process/client_server_mp/mp_server/Makefile +@@ -9,8 +9,10 @@ SRCS-y := main.c init.c args.c + + CFLAGS += -I../shared + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -21,8 +23,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/multi_process/client_server_mp/mp_server/main.c b/dpdk/examples/multi_process/client_server_mp/mp_server/main.c +index b18e12dd4b..89a996eb58 100644 +--- a/dpdk/examples/multi_process/client_server_mp/mp_server/main.c ++++ b/dpdk/examples/multi_process/client_server_mp/mp_server/main.c +@@ -233,7 +233,7 @@ process_packets(uint32_t port_num __rte_unused, + struct rte_mbuf *pkts[], uint16_t rx_count) + { + uint16_t i; +- uint8_t client = 0; ++ static uint8_t client; + + for (i = 0; i < rx_count; i++) { + enqueue_rx_packet(client, pkts[i]); +@@ -304,5 +304,9 @@ main(int argc, char *argv[]) + rte_eal_mp_remote_launch(sleep_lcore, NULL, SKIP_MAIN); + + do_packet_forwarding(); ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/multi_process/hotplug_mp/Makefile b/dpdk/examples/multi_process/hotplug_mp/Makefile +index 3122449d62..f72e3442db 100644 +--- a/dpdk/examples/multi_process/hotplug_mp/Makefile ++++ b/dpdk/examples/multi_process/hotplug_mp/Makefile +@@ -7,8 +7,10 @@ APP = hotplug_mp + # all source are stored in SRCS-y + SRCS-y := main.c commands.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/multi_process/hotplug_mp/commands.c b/dpdk/examples/multi_process/hotplug_mp/commands.c +index a8a39d07f7..f24d5eef84 100644 +--- a/dpdk/examples/multi_process/hotplug_mp/commands.c ++++ b/dpdk/examples/multi_process/hotplug_mp/commands.c +@@ -178,7 +178,7 @@ static void cmd_dev_detach_parsed(void *parsed_result, + cmdline_printf(cl, "detached device %s\n", + da.name); + else +- cmdline_printf(cl, "failed to dettach device %s\n", ++ cmdline_printf(cl, "failed to detach device %s\n", + da.name); + } + +diff --git a/dpdk/examples/multi_process/simple_mp/Makefile b/dpdk/examples/multi_process/simple_mp/Makefile +index d03597c4c2..32ab346449 100644 +--- a/dpdk/examples/multi_process/simple_mp/Makefile ++++ b/dpdk/examples/multi_process/simple_mp/Makefile +@@ -7,8 +7,10 @@ APP = simple_mp + # all source are stored in SRCS-y + SRCS-y := main.c mp_commands.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/multi_process/simple_mp/main.c b/dpdk/examples/multi_process/simple_mp/main.c +index 109b8bb45d..27f5b45dc1 100644 +--- a/dpdk/examples/multi_process/simple_mp/main.c ++++ b/dpdk/examples/multi_process/simple_mp/main.c +@@ -4,7 +4,7 @@ + + /* + * This sample application is a simple multi-process application which +- * demostrates sharing of queues and memory pools between processes, and ++ * demonstrates sharing of queues and memory pools between processes, and + * using those queues/pools for communication between the processes. + * + * Application is designed to run with two processes, a primary and a +@@ -121,5 +121,9 @@ main(int argc, char **argv) + cmdline_stdin_exit(cl); + + rte_eal_mp_wait_lcore(); ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/multi_process/symmetric_mp/Makefile b/dpdk/examples/multi_process/symmetric_mp/Makefile +index 45b7214cba..8dc6f56857 100644 +--- a/dpdk/examples/multi_process/symmetric_mp/Makefile ++++ b/dpdk/examples/multi_process/symmetric_mp/Makefile +@@ -7,8 +7,10 @@ APP = symmetric_mp + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/multi_process/symmetric_mp/main.c b/dpdk/examples/multi_process/symmetric_mp/main.c +index ae7f5e0d50..37ef037e82 100644 +--- a/dpdk/examples/multi_process/symmetric_mp/main.c ++++ b/dpdk/examples/multi_process/symmetric_mp/main.c +@@ -3,7 +3,7 @@ + */ + + /* +- * Sample application demostrating how to do packet I/O in a multi-process ++ * Sample application demonstrating how to do packet I/O in a multi-process + * environment. The same code can be run as a primary process and as a + * secondary process, just with a different proc-id parameter in each case + * (apart from the EAL flag to indicate a secondary process). +@@ -472,5 +472,8 @@ main(int argc, char **argv) + + rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MAIN); + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/ntb/Makefile b/dpdk/examples/ntb/Makefile +index d35dabc471..2fa9a05823 100644 +--- a/dpdk/examples/ntb/Makefile ++++ b/dpdk/examples/ntb/Makefile +@@ -7,8 +7,10 @@ APP = ntb_fwd + # all source are stored in SRCS-y + SRCS-y := ntb_fwd.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + CFLAGS += -D_FILE_OFFSET_BITS=64 + LDFLAGS += -pthread + +diff --git a/dpdk/examples/ntb/meson.build b/dpdk/examples/ntb/meson.build +index 02be9fc80d..3da983badb 100644 +--- a/dpdk/examples/ntb/meson.build ++++ b/dpdk/examples/ntb/meson.build +@@ -12,11 +12,8 @@ if not is_linux + subdir_done() + endif + +-deps += 'rawdev' ++deps += ['rawdev', 'raw_ntb'] + cflags += ['-D_FILE_OFFSET_BITS=64'] + sources = files( + 'ntb_fwd.c' + ) +-if dpdk_conf.has('RTE_RAW_NTB') +- deps += 'raw_ntb' +-endif +diff --git a/dpdk/examples/ntb/ntb_fwd.c b/dpdk/examples/ntb/ntb_fwd.c +index 54b7f08964..8f42831399 100644 +--- a/dpdk/examples/ntb/ntb_fwd.c ++++ b/dpdk/examples/ntb/ntb_fwd.c +@@ -696,7 +696,7 @@ assign_stream_to_lcores(void) + break; + } + +- /* Print packet forwading config. */ ++ /* Print packet forwarding config. */ + RTE_LCORE_FOREACH_WORKER(lcore_id) { + conf = &fwd_lcore_conf[lcore_id]; + +@@ -1498,5 +1498,8 @@ main(int argc, char **argv) + start_pkt_fwd(); + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/packet_ordering/Makefile b/dpdk/examples/packet_ordering/Makefile +index 09abda91ea..de1a8b9b47 100644 +--- a/dpdk/examples/packet_ordering/Makefile ++++ b/dpdk/examples/packet_ordering/Makefile +@@ -7,8 +7,10 @@ APP = packet_ordering + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/packet_ordering/main.c b/dpdk/examples/packet_ordering/main.c +index 4bea1982d5..ed9c12ae55 100644 +--- a/dpdk/examples/packet_ordering/main.c ++++ b/dpdk/examples/packet_ordering/main.c +@@ -290,7 +290,7 @@ configure_eth_port(uint16_t port_id) + if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) + port_conf.txmode.offloads |= + DEV_TX_OFFLOAD_MBUF_FAST_FREE; +- ret = rte_eth_dev_configure(port_id, rxRings, txRings, &port_conf_default); ++ ret = rte_eth_dev_configure(port_id, rxRings, txRings, &port_conf); + if (ret != 0) + return ret; + +@@ -682,7 +682,7 @@ main(int argc, char **argv) + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid packet_ordering arguments\n"); + +- /* Check if we have enought cores */ ++ /* Check if we have enough cores */ + if (rte_lcore_count() < 3) + rte_exit(EXIT_FAILURE, "Error, This application needs at " + "least 3 logical cores to run:\n" +@@ -776,5 +776,9 @@ main(int argc, char **argv) + } + + print_stats(); ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/performance-thread/common/lthread.c b/dpdk/examples/performance-thread/common/lthread.c +index 3f1f48db43..8ee7a227f5 100644 +--- a/dpdk/examples/performance-thread/common/lthread.c ++++ b/dpdk/examples/performance-thread/common/lthread.c +@@ -177,7 +177,7 @@ lthread_create(struct lthread **new_lt, int lcore_id, + bzero(lt, sizeof(struct lthread)); + lt->root_sched = THIS_SCHED; + +- /* set the function args and exit handlder */ ++ /* set the function args and exit handler */ + _lthread_init(lt, fun, arg, _lthread_exit_handler); + + /* put it in the ready queue */ +@@ -382,7 +382,7 @@ void lthread_exit(void *ptr) + } + + +- /* wait until the joinging thread has collected the exit value */ ++ /* wait until the joining thread has collected the exit value */ + while (lt->join != LT_JOIN_EXIT_VAL_READ) + _reschedule(); + +@@ -408,7 +408,7 @@ int lthread_join(struct lthread *lt, void **ptr) + /* invalid to join a detached thread, or a thread that is joined */ + if ((lt_state & BIT(ST_LT_DETACH)) || (lt->join == LT_JOIN_THREAD_SET)) + return POSIX_ERRNO(EINVAL); +- /* pointer to the joining thread and a poingter to return a value */ ++ /* pointer to the joining thread and a pointer to return a value */ + lt->lt_join = current; + current->lt_exit_ptr = ptr; + /* There is a race between lthread_join() and lthread_exit() +diff --git a/dpdk/examples/performance-thread/common/lthread_diag.c b/dpdk/examples/performance-thread/common/lthread_diag.c +index 57760a1e23..b1bdf7a30c 100644 +--- a/dpdk/examples/performance-thread/common/lthread_diag.c ++++ b/dpdk/examples/performance-thread/common/lthread_diag.c +@@ -232,7 +232,7 @@ lthread_sched_stats_display(void) + } + + /* +- * Defafult diagnostic callback ++ * Default diagnostic callback + */ + static uint64_t + _lthread_diag_default_cb(uint64_t time, struct lthread *lt, int diag_event, +diff --git a/dpdk/examples/performance-thread/common/lthread_int.h b/dpdk/examples/performance-thread/common/lthread_int.h +index a352f13b75..1723714b5f 100644 +--- a/dpdk/examples/performance-thread/common/lthread_int.h ++++ b/dpdk/examples/performance-thread/common/lthread_int.h +@@ -108,7 +108,7 @@ enum join_st { + LT_JOIN_EXIT_VAL_READ, /* joining thread has collected ret val */ + }; + +-/* defnition of an lthread stack object */ ++/* definition of an lthread stack object */ + struct lthread_stack { + uint8_t stack[LTHREAD_MAX_STACK_SIZE]; + size_t stack_size; +diff --git a/dpdk/examples/performance-thread/common/lthread_tls.c b/dpdk/examples/performance-thread/common/lthread_tls.c +index 07de6cafab..2151582989 100644 +--- a/dpdk/examples/performance-thread/common/lthread_tls.c ++++ b/dpdk/examples/performance-thread/common/lthread_tls.c +@@ -214,7 +214,7 @@ void _lthread_tls_alloc(struct lthread *lt) + tls->root_sched = (THIS_SCHED); + lt->tls = tls; + +- /* allocate data for TLS varaiables using RTE_PER_LTHREAD macros */ ++ /* allocate data for TLS variables using RTE_PER_LTHREAD macros */ + if (sizeof(void *) < (uint64_t)RTE_PER_LTHREAD_SECTION_SIZE) { + lt->per_lthread_data = + _lthread_objcache_alloc((THIS_SCHED)->per_lthread_cache); +diff --git a/dpdk/examples/performance-thread/l3fwd-thread/Makefile b/dpdk/examples/performance-thread/l3fwd-thread/Makefile +index ca1a5d087e..6a878482a3 100644 +--- a/dpdk/examples/performance-thread/l3fwd-thread/Makefile ++++ b/dpdk/examples/performance-thread/l3fwd-thread/Makefile +@@ -9,10 +9,12 @@ SRCS-y := main.c + + include ../common/common.mk + ++PKGCONF ?= pkg-config ++ + CFLAGS += -DALLOW_EXPERIMENTAL_API + + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -23,7 +25,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config + + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) +diff --git a/dpdk/examples/performance-thread/l3fwd-thread/main.c b/dpdk/examples/performance-thread/l3fwd-thread/main.c +index 4d82fb82ef..d8b2f830b2 100644 +--- a/dpdk/examples/performance-thread/l3fwd-thread/main.c ++++ b/dpdk/examples/performance-thread/l3fwd-thread/main.c +@@ -126,7 +126,7 @@ cb_parse_ptype(__rte_unused uint16_t port, __rte_unused uint16_t queue, + } + + /* +- * When set to zero, simple forwaring path is eanbled. ++ * When set to zero, simple forwarding path is enabled. + * When set to one, optimized forwarding path is enabled. + * Note that LPM optimisation path uses SSE4.1 instructions. + */ +@@ -1529,7 +1529,7 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) + } + + /* +- * We group consecutive packets with the same destionation port into one burst. ++ * We group consecutive packets with the same destination port into one burst. + * To avoid extra latency this is done together with some other packet + * processing, but after we made a final decision about packet's destination. + * To do this we maintain: +@@ -1554,7 +1554,7 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) + + /* + * Group consecutive packets with the same destination port in bursts of 4. +- * Suppose we have array of destionation ports: ++ * Suppose we have array of destination ports: + * dst_port[] = {a, b, c, d,, e, ... } + * dp1 should contain: , dp2: . + * We doing 4 comparisons at once and the result is 4 bit mask. +@@ -1565,7 +1565,7 @@ port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2) + { + static const struct { + uint64_t pnum; /* prebuild 4 values for pnum[]. */ +- int32_t idx; /* index for new last updated elemnet. */ ++ int32_t idx; /* index for new last updated element. */ + uint16_t lpv; /* add value to the last updated element. */ + } gptbl[GRPSZ] = { + { +@@ -1834,7 +1834,7 @@ process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx, + + /* + * Send packets out, through destination port. +- * Consecuteve pacekts with the same destination port ++ * Consecutive packets with the same destination port + * are already grouped together. + * If destination port for the packet equals BAD_PORT, + * then free the packet without sending it out. +@@ -1885,7 +1885,6 @@ process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx, + static int __rte_noreturn + cpu_load_collector(__rte_unused void *arg) { + unsigned i, j, k; +- uint64_t hits; + uint64_t prev_tsc, diff_tsc, cur_tsc; + uint64_t total[MAX_CPU] = { 0 }; + unsigned min_cpu = MAX_CPU; +@@ -1975,12 +1974,10 @@ cpu_load_collector(__rte_unused void *arg) { + printf("cpu# proc%% poll%% overhead%%\n\n"); + + for (i = min_cpu; i <= max_cpu; i++) { +- hits = 0; + printf("CPU %d:", i); + for (j = 0; j < MAX_CPU_COUNTER; j++) { + printf("%7" PRIu64 "", + cpu_load.hits[j][i] * 100 / cpu_load.counter); +- hits += cpu_load.hits[j][i]; + cpu_load.hits[j][i] = 0; + } + printf("%7" PRIu64 "\n", +@@ -3507,7 +3504,7 @@ main(int argc, char **argv) + + ret = rte_timer_subsystem_init(); + if (ret < 0) +- rte_exit(EXIT_FAILURE, "Failed to initialize timer subystem\n"); ++ rte_exit(EXIT_FAILURE, "Failed to initialize timer subsystem\n"); + + /* pre-init dst MACs for all ports to 02:00:00:00:00:xx */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) { +@@ -3777,5 +3774,8 @@ main(int argc, char **argv) + } + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/performance-thread/pthread_shim/Makefile b/dpdk/examples/performance-thread/pthread_shim/Makefile +index 6b19ff63fe..bf5458e3c7 100644 +--- a/dpdk/examples/performance-thread/pthread_shim/Makefile ++++ b/dpdk/examples/performance-thread/pthread_shim/Makefile +@@ -13,8 +13,10 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API + CFLAGS += -D_GNU_SOURCE + LDFLAGS += "-Wl,--copy-dt-needed-entries" + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -27,8 +29,6 @@ static: build/$(APP)-static + + LDFLAGS += -lpthread + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/performance-thread/pthread_shim/main.c b/dpdk/examples/performance-thread/pthread_shim/main.c +index 23e3b5e311..288c70c6cd 100644 +--- a/dpdk/examples/performance-thread/pthread_shim/main.c ++++ b/dpdk/examples/performance-thread/pthread_shim/main.c +@@ -71,7 +71,7 @@ void *helloworld_pthread(void *arg) + print_count++; + + /* yield thread to give opportunity for lock contention */ +- pthread_yield(); ++ sched_yield(); + + /* retrieve arg from TLS */ + uint64_t thread_no = (uint64_t) pthread_getspecific(key); +@@ -258,5 +258,9 @@ int main(int argc, char **argv) + RTE_LCORE_FOREACH_WORKER(lcore_id) { + rte_eal_wait_lcore(lcore_id); + } ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/performance-thread/pthread_shim/pthread_shim.h b/dpdk/examples/performance-thread/pthread_shim/pthread_shim.h +index e90fb15fc1..ce51627a5b 100644 +--- a/dpdk/examples/performance-thread/pthread_shim/pthread_shim.h ++++ b/dpdk/examples/performance-thread/pthread_shim/pthread_shim.h +@@ -41,7 +41,7 @@ + * + * The decision whether to invoke the real library function or the lthread + * function is controlled by a per pthread flag that can be switched +- * on of off by the pthread_override_set() API described below. Typcially ++ * on of off by the pthread_override_set() API described below. Typically + * this should be done as the first action of the initial lthread. + * + * N.B In general it would be poor practice to revert to invoke a real +diff --git a/dpdk/examples/pipeline/Makefile b/dpdk/examples/pipeline/Makefile +index d0a1f02e1c..86bcc00bd2 100644 +--- a/dpdk/examples/pipeline/Makefile ++++ b/dpdk/examples/pipeline/Makefile +@@ -11,8 +11,10 @@ SRCS-y += main.c + SRCS-y += obj.c + SRCS-y += thread.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -23,8 +25,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/pipeline/main.c b/dpdk/examples/pipeline/main.c +index fb57ef31fe..8ea19f9dd5 100644 +--- a/dpdk/examples/pipeline/main.c ++++ b/dpdk/examples/pipeline/main.c +@@ -190,4 +190,7 @@ main(int argc, char **argv) + + conn_poll_for_msg(conn); + } ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); + } +diff --git a/dpdk/examples/ptpclient/Makefile b/dpdk/examples/ptpclient/Makefile +index 9f56a5e7a0..37c32ff873 100644 +--- a/dpdk/examples/ptpclient/Makefile ++++ b/dpdk/examples/ptpclient/Makefile +@@ -7,8 +7,10 @@ APP = ptpclient + # all source are stored in SRCS-y + SRCS-y := ptpclient.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/ptpclient/ptpclient.c b/dpdk/examples/ptpclient/ptpclient.c +index 09968cdfc7..c8fdae9cce 100644 +--- a/dpdk/examples/ptpclient/ptpclient.c ++++ b/dpdk/examples/ptpclient/ptpclient.c +@@ -382,6 +382,7 @@ parse_fup(struct ptpv2_data_slave_ordinary *ptp_data) + struct ptp_header *ptp_hdr; + struct clock_id *client_clkid; + struct ptp_message *ptp_msg; ++ struct delay_req_msg *req_msg; + struct rte_mbuf *created_pkt; + struct tstamp *origin_tstamp; + struct rte_ether_addr eth_multicast = ether_multicast; +@@ -419,7 +420,12 @@ parse_fup(struct ptpv2_data_slave_ordinary *ptp_data) + + created_pkt = rte_pktmbuf_alloc(mbuf_pool); + pkt_size = sizeof(struct rte_ether_hdr) + +- sizeof(struct ptp_message); ++ sizeof(struct delay_req_msg); ++ ++ if (rte_pktmbuf_append(created_pkt, pkt_size) == NULL) { ++ rte_pktmbuf_free(created_pkt); ++ return; ++ } + created_pkt->data_len = pkt_size; + created_pkt->pkt_len = pkt_size; + eth_hdr = rte_pktmbuf_mtod(created_pkt, struct rte_ether_hdr *); +@@ -429,22 +435,22 @@ parse_fup(struct ptpv2_data_slave_ordinary *ptp_data) + rte_ether_addr_copy(ð_multicast, ð_hdr->d_addr); + + eth_hdr->ether_type = htons(PTP_PROTOCOL); +- ptp_msg = (struct ptp_message *) +- (rte_pktmbuf_mtod(created_pkt, char *) + +- sizeof(struct rte_ether_hdr)); +- +- ptp_msg->delay_req.hdr.seq_id = htons(ptp_data->seqID_SYNC); +- ptp_msg->delay_req.hdr.msg_type = DELAY_REQ; +- ptp_msg->delay_req.hdr.ver = 2; +- ptp_msg->delay_req.hdr.control = 1; +- ptp_msg->delay_req.hdr.log_message_interval = 127; +- ptp_msg->delay_req.hdr.message_length = ++ req_msg = rte_pktmbuf_mtod_offset(created_pkt, ++ struct delay_req_msg *, sizeof(struct ++ rte_ether_hdr)); ++ ++ req_msg->hdr.seq_id = htons(ptp_data->seqID_SYNC); ++ req_msg->hdr.msg_type = DELAY_REQ; ++ req_msg->hdr.ver = 2; ++ req_msg->hdr.control = 1; ++ req_msg->hdr.log_message_interval = 127; ++ req_msg->hdr.message_length = + htons(sizeof(struct delay_req_msg)); +- ptp_msg->delay_req.hdr.domain_number = ptp_hdr->domain_number; ++ req_msg->hdr.domain_number = ptp_hdr->domain_number; + + /* Set up clock id. */ + client_clkid = +- &ptp_msg->delay_req.hdr.source_port_id.clock_id; ++ &req_msg->hdr.source_port_id.clock_id; + + client_clkid->id[0] = eth_hdr->s_addr.addr_bytes[0]; + client_clkid->id[1] = eth_hdr->s_addr.addr_bytes[1]; +@@ -603,10 +609,6 @@ lcore_main(void) + unsigned nb_rx; + struct rte_mbuf *m; + +- /* +- * Check that the port is on the same NUMA node as the polling thread +- * for best performance. +- */ + printf("\nCore %u Waiting for SYNC packets. [Ctrl+C to quit]\n", + rte_lcore_id()); + +@@ -785,5 +787,8 @@ main(int argc, char *argv[]) + /* Call lcore_main on the main core only. */ + lcore_main(); + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/qos_meter/Makefile b/dpdk/examples/qos_meter/Makefile +index 7a53818296..5558ba939a 100644 +--- a/dpdk/examples/qos_meter/Makefile ++++ b/dpdk/examples/qos_meter/Makefile +@@ -7,8 +7,10 @@ APP = qos_meter + # all source are stored in SRCS-y + SRCS-y := main.c rte_policer.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/qos_meter/main.c b/dpdk/examples/qos_meter/main.c +index f2d9c28828..6e724f3783 100644 +--- a/dpdk/examples/qos_meter/main.c ++++ b/dpdk/examples/qos_meter/main.c +@@ -460,5 +460,8 @@ main(int argc, char **argv) + return -1; + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/qos_sched/Makefile b/dpdk/examples/qos_sched/Makefile +index f42406fe1c..0a748f5277 100644 +--- a/dpdk/examples/qos_sched/Makefile ++++ b/dpdk/examples/qos_sched/Makefile +@@ -7,8 +7,10 @@ APP = qos_sched + # all source are stored in SRCS-y + SRCS-y := main.c args.c init.c app_thread.c cfg_file.c cmdline.c stats.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/qos_sched/args.c b/dpdk/examples/qos_sched/args.c +index c369ba9b4d..d8e39957d8 100644 +--- a/dpdk/examples/qos_sched/args.c ++++ b/dpdk/examples/qos_sched/args.c +@@ -11,6 +11,7 @@ + #include + #include + ++#include + #include + #include + #include +@@ -410,13 +411,13 @@ app_parse_args(int argc, char **argv) + + /* check main core index validity */ + for (i = 0; i <= app_main_core; i++) { +- if (app_used_core_mask & (1u << app_main_core)) { ++ if (app_used_core_mask & RTE_BIT64(app_main_core)) { + RTE_LOG(ERR, APP, "Main core index is not configured properly\n"); + app_usage(prgname); + return -1; + } + } +- app_used_core_mask |= 1u << app_main_core; ++ app_used_core_mask |= RTE_BIT64(app_main_core); + + if ((app_used_core_mask != app_eal_core_mask()) || + (app_main_core != rte_get_main_lcore())) { +diff --git a/dpdk/examples/qos_sched/cmdline.c b/dpdk/examples/qos_sched/cmdline.c +index 257b87a7cf..6691b02d89 100644 +--- a/dpdk/examples/qos_sched/cmdline.c ++++ b/dpdk/examples/qos_sched/cmdline.c +@@ -41,7 +41,7 @@ static void cmd_help_parsed(__rte_unused void *parsed_result, + " qavg port X subport Y pipe Z : Show average queue size per pipe.\n" + " qavg port X subport Y pipe Z tc A : Show average queue size per pipe and TC.\n" + " qavg port X subport Y pipe Z tc A q B : Show average queue size of a specific queue.\n" +- " qavg [n|period] X : Set number of times and peiod (us).\n\n" ++ " qavg [n|period] X : Set number of times and period (us).\n\n" + ); + + } +diff --git a/dpdk/examples/qos_sched/main.c b/dpdk/examples/qos_sched/main.c +index a6071b991c..dc6a17a646 100644 +--- a/dpdk/examples/qos_sched/main.c ++++ b/dpdk/examples/qos_sched/main.c +@@ -218,5 +218,8 @@ main(int argc, char **argv) + } + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/rxtx_callbacks/Makefile b/dpdk/examples/rxtx_callbacks/Makefile +index a618cdf751..d11e0b4153 100644 +--- a/dpdk/examples/rxtx_callbacks/Makefile ++++ b/dpdk/examples/rxtx_callbacks/Makefile +@@ -7,8 +7,10 @@ APP = rxtx_callbacks + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/rxtx_callbacks/main.c b/dpdk/examples/rxtx_callbacks/main.c +index 35c6c39807..192521c3c6 100644 +--- a/dpdk/examples/rxtx_callbacks/main.c ++++ b/dpdk/examples/rxtx_callbacks/main.c +@@ -329,7 +329,7 @@ main(int argc, char *argv[]) + /* initialize all ports */ + RTE_ETH_FOREACH_DEV(portid) + if (port_init(portid, mbuf_pool) != 0) +- rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n", ++ rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu16"\n", + portid); + + if (rte_lcore_count() > 1) +@@ -338,5 +338,9 @@ main(int argc, char *argv[]) + + /* call lcore_main on main core only */ + lcore_main(); ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/server_node_efd/node/Makefile b/dpdk/examples/server_node_efd/node/Makefile +index 2120de5397..2c93872e5b 100644 +--- a/dpdk/examples/server_node_efd/node/Makefile ++++ b/dpdk/examples/server_node_efd/node/Makefile +@@ -9,8 +9,10 @@ SRCS-y := node.c + + CFLAGS += -I../shared + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -21,8 +23,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/server_node_efd/node/node.c b/dpdk/examples/server_node_efd/node/node.c +index 67a55808bf..e68606e0ca 100644 +--- a/dpdk/examples/server_node_efd/node/node.c ++++ b/dpdk/examples/server_node_efd/node/node.c +@@ -383,4 +383,7 @@ main(int argc, char *argv[]) + + need_flush = 1; + } ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); + } +diff --git a/dpdk/examples/server_node_efd/server/Makefile b/dpdk/examples/server_node_efd/server/Makefile +index 6b5878d8c0..f51ef134dc 100644 +--- a/dpdk/examples/server_node_efd/server/Makefile ++++ b/dpdk/examples/server_node_efd/server/Makefile +@@ -9,8 +9,10 @@ SRCS-y := main.c init.c args.c + + CFLAGS += -I../shared + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -21,8 +23,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/server_node_efd/server/main.c b/dpdk/examples/server_node_efd/server/main.c +index 4728960eaf..39b7b6370f 100644 +--- a/dpdk/examples/server_node_efd/server/main.c ++++ b/dpdk/examples/server_node_efd/server/main.c +@@ -334,5 +334,9 @@ main(int argc, char *argv[]) + rte_eal_mp_remote_launch(sleep_lcore, NULL, SKIP_MAIN); + + do_packet_forwarding(); ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/service_cores/Makefile b/dpdk/examples/service_cores/Makefile +index 754333c878..2054677f96 100644 +--- a/dpdk/examples/service_cores/Makefile ++++ b/dpdk/examples/service_cores/Makefile +@@ -7,8 +7,10 @@ APP = service_cores + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/service_cores/main.c b/dpdk/examples/service_cores/main.c +index c7c792810d..e0cb4f9acb 100644 +--- a/dpdk/examples/service_cores/main.c ++++ b/dpdk/examples/service_cores/main.c +@@ -118,7 +118,7 @@ apply_profile(int profile_id) + struct profile *p = &profiles[profile_id]; + const uint8_t core_off = 1; + +- if (p->num_cores > rte_lcore_count() + 1) { ++ if (p->num_cores > rte_lcore_count() - 1) { + printf("insufficent cores to run (%s)", + p->name); + return; +@@ -220,5 +220,8 @@ main(int argc, char **argv) + i = 0; + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/skeleton/Makefile b/dpdk/examples/skeleton/Makefile +index 4fa97cb975..4be77631ee 100644 +--- a/dpdk/examples/skeleton/Makefile ++++ b/dpdk/examples/skeleton/Makefile +@@ -7,8 +7,10 @@ APP = basicfwd + # all source are stored in SRCS-y + SRCS-y := basicfwd.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/skeleton/basicfwd.c b/dpdk/examples/skeleton/basicfwd.c +index 4b2b6ab4ff..43b9d17a3c 100644 +--- a/dpdk/examples/skeleton/basicfwd.c ++++ b/dpdk/examples/skeleton/basicfwd.c +@@ -122,7 +122,7 @@ lcore_main(void) + * for best performance. + */ + RTE_ETH_FOREACH_DEV(port) +- if (rte_eth_dev_socket_id(port) > 0 && ++ if (rte_eth_dev_socket_id(port) >= 0 && + rte_eth_dev_socket_id(port) != + (int)rte_socket_id()) + printf("WARNING, port %u is on remote NUMA node to " +@@ -205,5 +205,8 @@ main(int argc, char *argv[]) + /* Call lcore_main on the main core only. */ + lcore_main(); + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/timer/Makefile b/dpdk/examples/timer/Makefile +index 70b1af9f4b..1c8b8f294e 100644 +--- a/dpdk/examples/timer/Makefile ++++ b/dpdk/examples/timer/Makefile +@@ -7,8 +7,10 @@ APP = timer + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/timer/main.c b/dpdk/examples/timer/main.c +index 5a57e48290..d270ce49dc 100644 +--- a/dpdk/examples/timer/main.c ++++ b/dpdk/examples/timer/main.c +@@ -18,8 +18,7 @@ + #include + #include + +-#define TIMER_RESOLUTION_CYCLES 20000000ULL /* around 10ms at 2 Ghz */ +- ++static uint64_t timer_resolution_cycles; + static struct rte_timer timer0; + static struct rte_timer timer1; + +@@ -66,15 +65,14 @@ lcore_mainloop(__rte_unused void *arg) + + while (1) { + /* +- * Call the timer handler on each core: as we don't +- * need a very precise timer, so only call +- * rte_timer_manage() every ~10ms (at 2Ghz). In a real +- * application, this will enhance performances as +- * reading the HPET timer is not efficient. ++ * Call the timer handler on each core: as we don't need a ++ * very precise timer, so only call rte_timer_manage() ++ * every ~10ms. In a real application, this will enhance ++ * performances as reading the HPET timer is not efficient. + */ +- cur_tsc = rte_rdtsc(); ++ cur_tsc = rte_get_timer_cycles(); + diff_tsc = cur_tsc - prev_tsc; +- if (diff_tsc > TIMER_RESOLUTION_CYCLES) { ++ if (diff_tsc > timer_resolution_cycles) { + rte_timer_manage(); + prev_tsc = cur_tsc; + } +@@ -100,8 +98,10 @@ main(int argc, char **argv) + rte_timer_init(&timer0); + rte_timer_init(&timer1); + +- /* load timer0, every second, on main lcore, reloaded automatically */ + hz = rte_get_timer_hz(); ++ timer_resolution_cycles = hz * 10 / 1000; /* around 10ms */ ++ ++ /* load timer0, every second, on main lcore, reloaded automatically */ + lcore_id = rte_lcore_id(); + rte_timer_reset(&timer0, hz, PERIODICAL, lcore_id, timer0_cb, NULL); + +@@ -117,5 +117,8 @@ main(int argc, char **argv) + /* call it on main lcore too */ + (void) lcore_mainloop(NULL); + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/vdpa/Makefile b/dpdk/examples/vdpa/Makefile +index c4b2184ead..369ff331e1 100644 +--- a/dpdk/examples/vdpa/Makefile ++++ b/dpdk/examples/vdpa/Makefile +@@ -8,8 +8,10 @@ APP = vdpa + SRCS-y := main.c + CFLAGS += -DALLOW_EXPERIMENTAL_API + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -20,8 +22,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/vdpa/main.c b/dpdk/examples/vdpa/main.c +index 97e967b9a2..097a267b8c 100644 +--- a/dpdk/examples/vdpa/main.c ++++ b/dpdk/examples/vdpa/main.c +@@ -576,5 +576,8 @@ main(int argc, char *argv[]) + vdpa_sample_quit(); + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/vhost/Makefile b/dpdk/examples/vhost/Makefile +index 8c969caaad..2b88a38fc3 100644 +--- a/dpdk/examples/vhost/Makefile ++++ b/dpdk/examples/vhost/Makefile +@@ -7,8 +7,10 @@ APP = vhost-switch + # all source are stored in SRCS-y + SRCS-y := main.c virtio_net.c ioat.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -21,8 +23,6 @@ static: build/$(APP)-static + + LDFLAGS += -pthread + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/vhost/main.c b/dpdk/examples/vhost/main.c +index 8d8c3038bf..24e37f7ce5 100644 +--- a/dpdk/examples/vhost/main.c ++++ b/dpdk/examples/vhost/main.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -32,6 +33,8 @@ + #define MAX_QUEUES 128 + #endif + ++#define NUM_MBUFS_DEFAULT 0x24000 ++ + /* the maximum number of external ports supported */ + #define MAX_SUP_PORTS 1 + +@@ -59,6 +62,9 @@ + /* Maximum long option length for option parsing. */ + #define MAX_LONG_OPT_SZ 64 + ++/* number of mbufs in all pools - if specified on command-line. */ ++static int total_num_mbufs = NUM_MBUFS_DEFAULT; ++ + /* mask of enabled ports */ + static uint32_t enabled_port_mask = 0; + +@@ -109,7 +115,7 @@ static uint32_t burst_rx_retry_num = BURST_RX_RETRIES; + static char *socket_files; + static int nb_sockets; + +-/* empty vmdq configuration structure. Filled in programatically */ ++/* empty VMDq configuration structure. Filled in programmatically */ + static struct rte_eth_conf vmdq_conf_default = { + .rxmode = { + .mq_mode = ETH_MQ_RX_VMDQ_ONLY, +@@ -117,7 +123,7 @@ static struct rte_eth_conf vmdq_conf_default = { + /* + * VLAN strip is necessary for 1G NIC such as I350, + * this fixes bug of ipv4 forwarding in guest can't +- * forward pakets from one virtio dev to another virtio dev. ++ * forward packets from one virtio dev to another virtio dev. + */ + .offloads = DEV_RX_OFFLOAD_VLAN_STRIP, + }, +@@ -249,6 +255,10 @@ port_init(uint16_t port) + + return retval; + } ++ if (dev_info.max_vmdq_pools == 0) { ++ RTE_LOG(ERR, VHOST_PORT, "Failed to get VMDq info.\n"); ++ return -1; ++ } + + rxconf = &dev_info.default_rxconf; + txconf = &dev_info.default_txconf; +@@ -452,7 +462,7 @@ us_vhost_usage(const char *prgname) + " --nb-devices ND\n" + " -p PORTMASK: Set mask for ports to be used by application\n" + " --vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\n" +- " --rx-retry [0|1]: disable/enable(default) retries on rx. Enable retry if destintation queue is full\n" ++ " --rx-retry [0|1]: disable/enable(default) retries on Rx. Enable retry if destination queue is full\n" + " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n" + " --rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n" + " --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n" +@@ -462,7 +472,8 @@ us_vhost_usage(const char *prgname) + " --tso [0|1] disable/enable TCP segment offload.\n" + " --client register a vhost-user socket as client mode.\n" + " --dma-type register dma type for your vhost async driver. For example \"ioat\" for now.\n" +- " --dmas register dma channel for specific vhost device.\n", ++ " --dmas register dma channel for specific vhost device.\n" ++ " --total-num-mbufs [0-N] set the number of mbufs to be allocated in mbuf pools, the default value is 147456.\n", + prgname); + } + +@@ -490,7 +501,7 @@ us_vhost_parse_args(int argc, char **argv) + {"builtin-net-driver", no_argument, &builtin_net_driver, 1}, + {"dma-type", required_argument, NULL, 0}, + {"dmas", required_argument, NULL, 0}, +- {NULL, 0, 0, 0}, ++ {"total-num-mbufs", required_argument, NULL, 0}, + }; + + /* Parse command line */ +@@ -654,6 +665,21 @@ us_vhost_parse_args(int argc, char **argv) + async_vhost_driver = 1; + } + ++ ++ if (!strncmp(long_option[option_index].name, ++ "total-num-mbufs", MAX_LONG_OPT_SZ)) { ++ ret = parse_num_opt(optarg, INT32_MAX); ++ if (ret == -1) { ++ RTE_LOG(INFO, VHOST_CONFIG, ++ "Invalid argument for total-num-mbufs [0..N]\n"); ++ us_vhost_usage(prgname); ++ return -1; ++ } ++ ++ if (total_num_mbufs < ret) ++ total_num_mbufs = ret; ++ } ++ + break; + + /* Invalid option - print options. */ +@@ -911,33 +937,34 @@ find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m, + return 0; + } + +-static uint16_t +-get_psd_sum(void *l3_hdr, uint64_t ol_flags) +-{ +- if (ol_flags & PKT_TX_IPV4) +- return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); +- else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */ +- return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); +-} +- + static void virtio_tx_offload(struct rte_mbuf *m) + { ++ struct rte_net_hdr_lens hdr_lens; ++ struct rte_ipv4_hdr *ipv4_hdr; ++ struct rte_tcp_hdr *tcp_hdr; ++ uint32_t ptype; + void *l3_hdr; +- struct rte_ipv4_hdr *ipv4_hdr = NULL; +- struct rte_tcp_hdr *tcp_hdr = NULL; +- struct rte_ether_hdr *eth_hdr = +- rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + +- l3_hdr = (char *)eth_hdr + m->l2_len; ++ ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); ++ m->l2_len = hdr_lens.l2_len; ++ m->l3_len = hdr_lens.l3_len; ++ m->l4_len = hdr_lens.l4_len; ++ ++ l3_hdr = rte_pktmbuf_mtod_offset(m, void *, m->l2_len); ++ tcp_hdr = rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *, ++ m->l2_len + m->l3_len); + +- if (m->ol_flags & PKT_TX_IPV4) { ++ m->ol_flags |= PKT_TX_TCP_SEG; ++ if ((ptype & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4) { ++ m->ol_flags |= PKT_TX_IPV4; ++ m->ol_flags |= PKT_TX_IP_CKSUM; + ipv4_hdr = l3_hdr; + ipv4_hdr->hdr_checksum = 0; +- m->ol_flags |= PKT_TX_IP_CKSUM; ++ tcp_hdr->cksum = rte_ipv4_phdr_cksum(l3_hdr, m->ol_flags); ++ } else { /* assume ethertype == RTE_ETHER_TYPE_IPV6 */ ++ m->ol_flags |= PKT_TX_IPV6; ++ tcp_hdr->cksum = rte_ipv6_phdr_cksum(l3_hdr, m->ol_flags); + } +- +- tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + m->l3_len); +- tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags); + } + + static inline void +@@ -1039,7 +1066,7 @@ virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag) + m->vlan_tci = vlan_tag; + } + +- if (m->ol_flags & PKT_TX_TCP_SEG) ++ if (m->ol_flags & PKT_RX_LRO) + virtio_tx_offload(m); + + tx_q->m_table[tx_q->len++] = m; +@@ -1190,7 +1217,7 @@ switch_worker(void *arg __rte_unused) + struct vhost_dev *vdev; + struct mbuf_table *tx_q; + +- RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id); ++ RTE_LOG(INFO, VHOST_DATA, "Processing on Core %u started\n", lcore_id); + + tx_q = &lcore_tx_queue[lcore_id]; + for (i = 0; i < rte_lcore_count(); i++) { +@@ -1234,7 +1261,7 @@ switch_worker(void *arg __rte_unused) + + /* + * Remove a device from the specific data core linked list and from the +- * main linked list. Synchonization occurs through the use of the ++ * main linked list. Synchronization occurs through the use of the + * lcore dev_removal_flag. Device is made volatile here to avoid re-ordering + * of dev->remove=1 which can cause an infinite loop in the rte_pause loop. + */ +@@ -1441,57 +1468,6 @@ sigint_handler(__rte_unused int signum) + exit(0); + } + +-/* +- * While creating an mbuf pool, one key thing is to figure out how +- * many mbuf entries is enough for our use. FYI, here are some +- * guidelines: +- * +- * - Each rx queue would reserve @nr_rx_desc mbufs at queue setup stage +- * +- * - For each switch core (A CPU core does the packet switch), we need +- * also make some reservation for receiving the packets from virtio +- * Tx queue. How many is enough depends on the usage. It's normally +- * a simple calculation like following: +- * +- * MAX_PKT_BURST * max packet size / mbuf size +- * +- * So, we definitely need allocate more mbufs when TSO is enabled. +- * +- * - Similarly, for each switching core, we should serve @nr_rx_desc +- * mbufs for receiving the packets from physical NIC device. +- * +- * - We also need make sure, for each switch core, we have allocated +- * enough mbufs to fill up the mbuf cache. +- */ +-static void +-create_mbuf_pool(uint16_t nr_port, uint32_t nr_switch_core, uint32_t mbuf_size, +- uint32_t nr_queues, uint32_t nr_rx_desc, uint32_t nr_mbuf_cache) +-{ +- uint32_t nr_mbufs; +- uint32_t nr_mbufs_per_core; +- uint32_t mtu = 1500; +- +- if (mergeable) +- mtu = 9000; +- if (enable_tso) +- mtu = 64 * 1024; +- +- nr_mbufs_per_core = (mtu + mbuf_size) * MAX_PKT_BURST / +- (mbuf_size - RTE_PKTMBUF_HEADROOM); +- nr_mbufs_per_core += nr_rx_desc; +- nr_mbufs_per_core = RTE_MAX(nr_mbufs_per_core, nr_mbuf_cache); +- +- nr_mbufs = nr_queues * nr_rx_desc; +- nr_mbufs += nr_mbufs_per_core * nr_switch_core; +- nr_mbufs *= nr_port; +- +- mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", nr_mbufs, +- nr_mbuf_cache, 0, mbuf_size, +- rte_socket_id()); +- if (mbuf_pool == NULL) +- rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); +-} +- + /* + * Main function, does initialisation and calls the per-lcore functions. + */ +@@ -1503,7 +1479,7 @@ main(int argc, char *argv[]) + int ret, i; + uint16_t portid; + static pthread_t tid; +- uint64_t flags = 0; ++ uint64_t flags = RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS; + + signal(SIGINT, sigint_handler); + +@@ -1550,8 +1526,11 @@ main(int argc, char *argv[]) + * many queues here. We probably should only do allocation for + * those queues we are going to use. + */ +- create_mbuf_pool(valid_num_ports, rte_lcore_count() - 1, MBUF_DATA_SIZE, +- MAX_QUEUES, RTE_TEST_RX_DESC_DEFAULT, MBUF_CACHE_SIZE); ++ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", total_num_mbufs, ++ MBUF_CACHE_SIZE, 0, MBUF_DATA_SIZE, ++ rte_socket_id()); ++ if (mbuf_pool == NULL) ++ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + if (vm2vm_mode == VM2VM_HARDWARE) { + /* Enable VT loop back to let L2 switch to do it. */ +@@ -1647,6 +1626,8 @@ main(int argc, char *argv[]) + RTE_LCORE_FOREACH_WORKER(lcore_id) + rte_eal_wait_lcore(lcore_id); + +- return 0; ++ /* clean up the EAL */ ++ rte_eal_cleanup(); + ++ return 0; + } +diff --git a/dpdk/examples/vhost/virtio_net.c b/dpdk/examples/vhost/virtio_net.c +index 8ea6b36d59..28e4b00960 100644 +--- a/dpdk/examples/vhost/virtio_net.c ++++ b/dpdk/examples/vhost/virtio_net.c +@@ -23,6 +23,7 @@ vs_vhost_net_setup(struct vhost_dev *dev) + uint16_t i; + int vid = dev->vid; + struct vhost_queue *queue; ++ int ret; + + RTE_LOG(INFO, VHOST_CONFIG, + "setting builtin vhost-user net driver\n"); +@@ -33,7 +34,12 @@ vs_vhost_net_setup(struct vhost_dev *dev) + else + dev->hdr_len = sizeof(struct virtio_net_hdr); + +- rte_vhost_get_mem_table(vid, &dev->mem); ++ ret = rte_vhost_get_mem_table(vid, &dev->mem); ++ if (ret < 0) { ++ RTE_LOG(ERR, VHOST_CONFIG, "Failed to get " ++ "VM memory layout for device(%d)\n", vid); ++ return; ++ } + + dev->nr_vrings = rte_vhost_get_vring_num(vid); + for (i = 0; i < dev->nr_vrings; i++) { +diff --git a/dpdk/examples/vhost_blk/Makefile b/dpdk/examples/vhost_blk/Makefile +index 792591386e..c6a1f76633 100644 +--- a/dpdk/examples/vhost_blk/Makefile ++++ b/dpdk/examples/vhost_blk/Makefile +@@ -7,8 +7,10 @@ APP = vhost-blk + # all source are stored in SRCS-y + SRCS-y := blk.c vhost_blk.c vhost_blk_compat.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + LDFLAGS += -pthread + + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) +diff --git a/dpdk/examples/vhost_blk/vhost_blk.c b/dpdk/examples/vhost_blk/vhost_blk.c +index bb293d492f..bdefd66f9e 100644 +--- a/dpdk/examples/vhost_blk/vhost_blk.c ++++ b/dpdk/examples/vhost_blk/vhost_blk.c +@@ -893,5 +893,8 @@ int main(int argc, char *argv[]) + while (1) + sleep(1); + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/vhost_crypto/Makefile b/dpdk/examples/vhost_crypto/Makefile +index 27abd91998..cc7f2abb90 100644 +--- a/dpdk/examples/vhost_crypto/Makefile ++++ b/dpdk/examples/vhost_crypto/Makefile +@@ -8,8 +8,10 @@ APP = vhost-crypto + SRCS-y := main.c + CFLAGS += -DALLOW_EXPERIMENTAL_API + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -20,8 +22,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/vhost_crypto/main.c b/dpdk/examples/vhost_crypto/main.c +index 29c8f7228d..1b01f6f089 100644 +--- a/dpdk/examples/vhost_crypto/main.c ++++ b/dpdk/examples/vhost_crypto/main.c +@@ -219,7 +219,7 @@ vhost_crypto_parse_args(int argc, char **argv) + + argvopt = argv; + +- while ((opt = getopt_long(argc, argvopt, "s:", ++ while ((opt = getopt_long(argc, argvopt, "", + lgopts, &option_index)) != EOF) { + + switch (opt) { +@@ -455,6 +455,9 @@ free_resource(void) + } + + memset(&options, 0, sizeof(options)); ++ ++ /* clean up the EAL */ ++ rte_eal_cleanup(); + } + + int +diff --git a/dpdk/examples/vm_power_manager/Makefile b/dpdk/examples/vm_power_manager/Makefile +index 8ac1180b2f..c462f49fcf 100644 +--- a/dpdk/examples/vm_power_manager/Makefile ++++ b/dpdk/examples/vm_power_manager/Makefile +@@ -1,8 +1,10 @@ + # SPDX-License-Identifier: BSD-3-Clause + # Copyright(c) 2010-2020 Intel Corporation + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -25,8 +27,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/vm_power_manager/channel_monitor.c b/dpdk/examples/vm_power_manager/channel_monitor.c +index 99f81544d7..f5275c867d 100644 +--- a/dpdk/examples/vm_power_manager/channel_monitor.c ++++ b/dpdk/examples/vm_power_manager/channel_monitor.c +@@ -407,7 +407,7 @@ get_pcpu_to_control(struct policy *pol) + + /* + * So now that we're handling virtual and physical cores, we need to +- * differenciate between them when adding them to the branch monitor. ++ * differentiate between them when adding them to the branch monitor. + * Virtual cores need to be converted to physical cores. + */ + if (pol->pkt.core_type == RTE_POWER_CORE_TYPE_VIRTUAL) { +diff --git a/dpdk/examples/vm_power_manager/guest_cli/Makefile b/dpdk/examples/vm_power_manager/guest_cli/Makefile +index 1ee1ca1017..751ca6e185 100644 +--- a/dpdk/examples/vm_power_manager/guest_cli/Makefile ++++ b/dpdk/examples/vm_power_manager/guest_cli/Makefile +@@ -7,8 +7,10 @@ APP = guest_vm_power_mgr + # all source are stored in SRCS-y + SRCS-y := main.c vm_power_cli_guest.c parse.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/vm_power_manager/guest_cli/main.c b/dpdk/examples/vm_power_manager/guest_cli/main.c +index 4e17f7fb90..b8fa65ef15 100644 +--- a/dpdk/examples/vm_power_manager/guest_cli/main.c ++++ b/dpdk/examples/vm_power_manager/guest_cli/main.c +@@ -200,5 +200,8 @@ main(int argc, char **argv) + } + run_cli(NULL); + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/vm_power_manager/main.c b/dpdk/examples/vm_power_manager/main.c +index 799d7b9bc3..7d5bf68554 100644 +--- a/dpdk/examples/vm_power_manager/main.c ++++ b/dpdk/examples/vm_power_manager/main.c +@@ -468,5 +468,8 @@ main(int argc, char **argv) + + free(ci->cd); + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/vm_power_manager/power_manager.h b/dpdk/examples/vm_power_manager/power_manager.h +index d35f8cbe01..d51039e2c6 100644 +--- a/dpdk/examples/vm_power_manager/power_manager.h ++++ b/dpdk/examples/vm_power_manager/power_manager.h +@@ -224,7 +224,7 @@ int power_manager_enable_turbo_core(unsigned int core_num); + int power_manager_disable_turbo_core(unsigned int core_num); + + /** +- * Get the current freuency of the core specified by core_num ++ * Get the current frequency of the core specified by core_num + * + * @param core_num + * The core number to get the current frequency +diff --git a/dpdk/examples/vmdq/Makefile b/dpdk/examples/vmdq/Makefile +index 749ed53c6f..cc976384fa 100644 +--- a/dpdk/examples/vmdq/Makefile ++++ b/dpdk/examples/vmdq/Makefile +@@ -7,8 +7,10 @@ APP = vmdq_app + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/vmdq/main.c b/dpdk/examples/vmdq/main.c +index 3cb890fa2b..b88598427f 100644 +--- a/dpdk/examples/vmdq/main.c ++++ b/dpdk/examples/vmdq/main.c +@@ -61,7 +61,7 @@ static uint32_t num_queues = 8; + static uint32_t num_pools = 8; + static uint8_t rss_enable; + +-/* empty vmdq configuration structure. Filled in programatically */ ++/* empty vmdq configuration structure. Filled in programmatically */ + static const struct rte_eth_conf vmdq_conf_default = { + .rxmode = { + .mq_mode = ETH_MQ_RX_VMDQ_ONLY, +@@ -659,5 +659,8 @@ main(int argc, char *argv[]) + return -1; + } + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/vmdq_dcb/Makefile b/dpdk/examples/vmdq_dcb/Makefile +index 1dd42105d6..a34e7e36d4 100644 +--- a/dpdk/examples/vmdq_dcb/Makefile ++++ b/dpdk/examples/vmdq_dcb/Makefile +@@ -7,8 +7,10 @@ APP = vmdq_dcb_app + # all source are stored in SRCS-y + SRCS-y := main.c + ++PKGCONF ?= pkg-config ++ + # Build using pkg-config variables if possible +-ifneq ($(shell pkg-config --exists libdpdk && echo 0),0) ++ifneq ($(shell $(PKGCONF) --exists libdpdk && echo 0),0) + $(error "no installation of DPDK found") + endif + +@@ -19,8 +21,6 @@ shared: build/$(APP)-shared + static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +-PKGCONF ?= pkg-config +- + PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) + CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) + LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +diff --git a/dpdk/examples/vmdq_dcb/main.c b/dpdk/examples/vmdq_dcb/main.c +index 1a74364638..ba992802e9 100644 +--- a/dpdk/examples/vmdq_dcb/main.c ++++ b/dpdk/examples/vmdq_dcb/main.c +@@ -707,5 +707,8 @@ main(int argc, char *argv[]) + /* call on main too */ + (void) lcore_main((void*)i); + ++ /* clean up the EAL */ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/kernel/freebsd/meson.build b/dpdk/kernel/freebsd/meson.build +index dc156a43fd..16a4e0b124 100644 +--- a/dpdk/kernel/freebsd/meson.build ++++ b/dpdk/kernel/freebsd/meson.build +@@ -10,7 +10,7 @@ kmods = ['contigmem', 'nic_uio'] + # files from the individual meson.build files, and then use a custom + # target to call make, passing in the values as env parameters. + kmod_cflags = ['-I' + meson.build_root(), +- '-I' + join_paths(meson.source_root(), 'config'), ++ '-I' + join_paths(dpdk_source_root, 'config'), + '-include rte_config.h'] + + # to avoid warnings due to race conditions with creating the dev_if.h, etc. +diff --git a/dpdk/kernel/linux/kni/compat.h b/dpdk/kernel/linux/kni/compat.h +index 5f65640d5e..3a86d12bbc 100644 +--- a/dpdk/kernel/linux/kni/compat.h ++++ b/dpdk/kernel/linux/kni/compat.h +@@ -133,10 +133,19 @@ + + #if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE || \ + (defined(RHEL_RELEASE_CODE) && \ +- RHEL_RELEASE_VERSION(8, 3) <= RHEL_RELEASE_CODE) ++ RHEL_RELEASE_VERSION(8, 3) <= RHEL_RELEASE_CODE) || \ ++ (defined(CONFIG_SUSE_KERNEL) && defined(HAVE_ARG_TX_QUEUE)) + #define HAVE_TX_TIMEOUT_TXQUEUE + #endif + + #if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE + #define HAVE_TSK_IN_GUP + #endif ++ ++#if KERNEL_VERSION(5, 15, 0) <= LINUX_VERSION_CODE ++#define HAVE_ETH_HW_ADDR_SET ++#endif ++ ++#if KERNEL_VERSION(5, 18, 0) > LINUX_VERSION_CODE ++#define HAVE_NETIF_RX_NI ++#endif +diff --git a/dpdk/kernel/linux/kni/kni_dev.h b/dpdk/kernel/linux/kni/kni_dev.h +index c15da311ba..e8633486ee 100644 +--- a/dpdk/kernel/linux/kni/kni_dev.h ++++ b/dpdk/kernel/linux/kni/kni_dev.h +@@ -34,6 +34,9 @@ + /* Default carrier state for created KNI network interfaces */ + extern uint32_t kni_dflt_carrier; + ++/* Request processing support for bifurcated drivers. */ ++extern uint32_t bifurcated_support; ++ + /** + * A structure describing the private information for a kni device. + */ +diff --git a/dpdk/kernel/linux/kni/kni_fifo.h b/dpdk/kernel/linux/kni/kni_fifo.h +index 5c91b55379..1ba5172002 100644 +--- a/dpdk/kernel/linux/kni/kni_fifo.h ++++ b/dpdk/kernel/linux/kni/kni_fifo.h +@@ -41,7 +41,7 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, uint32_t num) + } + + /** +- * Get up to num elements from the fifo. Return the number actully read ++ * Get up to num elements from the FIFO. Return the number actually read + */ + static inline uint32_t + kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num) +diff --git a/dpdk/kernel/linux/kni/kni_misc.c b/dpdk/kernel/linux/kni/kni_misc.c +index 2b464c4381..0df129240b 100644 +--- a/dpdk/kernel/linux/kni/kni_misc.c ++++ b/dpdk/kernel/linux/kni/kni_misc.c +@@ -41,6 +41,10 @@ static uint32_t multiple_kthread_on; + static char *carrier; + uint32_t kni_dflt_carrier; + ++/* Request processing support for bifurcated drivers. */ ++static char *enable_bifurcated; ++uint32_t bifurcated_support; ++ + #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */ + + static int kni_net_id; +@@ -180,13 +184,17 @@ kni_dev_remove(struct kni_dev *dev) + if (!dev) + return -ENODEV; + ++ /* ++ * The memory of kni device is allocated and released together ++ * with net device. Release mbuf before freeing net device. ++ */ ++ kni_net_release_fifo_phy(dev); ++ + if (dev->net_dev) { + unregister_netdev(dev->net_dev); + free_netdev(dev->net_dev); + } + +- kni_net_release_fifo_phy(dev); +- + return 0; + } + +@@ -216,8 +224,8 @@ kni_release(struct inode *inode, struct file *file) + dev->pthread = NULL; + } + +- kni_dev_remove(dev); + list_del(&dev->list); ++ kni_dev_remove(dev); + } + up_write(&knet->kni_list_lock); + +@@ -396,14 +404,16 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num, + pr_debug("mbuf_size: %u\n", kni->mbuf_size); + + /* if user has provided a valid mac address */ +- if (is_valid_ether_addr(dev_info.mac_addr)) ++ if (is_valid_ether_addr(dev_info.mac_addr)) { ++#ifdef HAVE_ETH_HW_ADDR_SET ++ eth_hw_addr_set(net_dev, dev_info.mac_addr); ++#else + memcpy(net_dev->dev_addr, dev_info.mac_addr, ETH_ALEN); +- else +- /* +- * Generate random mac address. eth_random_addr() is the +- * newer version of generating mac address in kernel. +- */ +- random_ether_addr(net_dev->dev_addr); ++#endif ++ } else { ++ /* Assign random MAC address. */ ++ eth_hw_addr_random(net_dev); ++ } + + if (dev_info.mtu) + net_dev->mtu = dev_info.mtu; +@@ -469,8 +479,8 @@ kni_ioctl_release(struct net *net, uint32_t ioctl_num, + dev->pthread = NULL; + } + +- kni_dev_remove(dev); + list_del(&dev->list); ++ kni_dev_remove(dev); + ret = 0; + break; + } +@@ -481,10 +491,10 @@ kni_ioctl_release(struct net *net, uint32_t ioctl_num, + return ret; + } + +-static int +-kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param) ++static long ++kni_ioctl(struct file *file, unsigned int ioctl_num, unsigned long ioctl_param) + { +- int ret = -EINVAL; ++ long ret = -EINVAL; + struct net *net = current->nsproxy->net_ns; + + pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); +@@ -510,8 +520,8 @@ kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param) + return ret; + } + +-static int +-kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num, ++static long ++kni_compat_ioctl(struct file *file, unsigned int ioctl_num, + unsigned long ioctl_param) + { + /* 32 bits app on 64 bits OS to be supported later */ +@@ -524,8 +534,8 @@ static const struct file_operations kni_fops = { + .owner = THIS_MODULE, + .open = kni_open, + .release = kni_release, +- .unlocked_ioctl = (void *)kni_ioctl, +- .compat_ioctl = (void *)kni_compat_ioctl, ++ .unlocked_ioctl = kni_ioctl, ++ .compat_ioctl = kni_compat_ioctl, + }; + + static struct miscdevice kni_misc = { +@@ -568,6 +578,22 @@ kni_parse_carrier_state(void) + return 0; + } + ++static int __init ++kni_parse_bifurcated_support(void) ++{ ++ if (!enable_bifurcated) { ++ bifurcated_support = 0; ++ return 0; ++ } ++ ++ if (strcmp(enable_bifurcated, "on") == 0) ++ bifurcated_support = 1; ++ else ++ return -1; ++ ++ return 0; ++} ++ + static int __init + kni_init(void) + { +@@ -593,6 +619,13 @@ kni_init(void) + else + pr_debug("Default carrier state set to on.\n"); + ++ if (kni_parse_bifurcated_support() < 0) { ++ pr_err("Invalid parameter for bifurcated support\n"); ++ return -EINVAL; ++ } ++ if (bifurcated_support == 1) ++ pr_debug("bifurcated support is enabled.\n"); ++ + #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS + rc = register_pernet_subsys(&kni_net_ops); + #else +@@ -659,3 +692,12 @@ MODULE_PARM_DESC(carrier, + "\t\ton Interfaces will be created with carrier state set to on.\n" + "\t\t" + ); ++ ++module_param(enable_bifurcated, charp, 0644); ++MODULE_PARM_DESC(enable_bifurcated, ++"Enable request processing support for bifurcated drivers, " ++"which means releasing rtnl_lock before calling userspace callback and " ++"supporting async requests (default=off):\n" ++"\t\ton Enable request processing support for bifurcated drivers.\n" ++"\t\t" ++); +diff --git a/dpdk/kernel/linux/kni/kni_net.c b/dpdk/kernel/linux/kni/kni_net.c +index 4b752083da..779ee3451a 100644 +--- a/dpdk/kernel/linux/kni/kni_net.c ++++ b/dpdk/kernel/linux/kni/kni_net.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -102,16 +103,24 @@ get_data_kva(struct kni_dev *kni, void *pkt_kva) + * It can be called to process the request. + */ + static int +-kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) ++kni_net_process_request(struct net_device *dev, struct rte_kni_request *req) + { ++ struct kni_dev *kni = netdev_priv(dev); + int ret = -1; + void *resp_va; + uint32_t num; + int ret_val; + +- if (!kni || !req) { +- pr_err("No kni instance or request\n"); +- return -EINVAL; ++ ASSERT_RTNL(); ++ ++ if (bifurcated_support) { ++ /* If we need to wait and RTNL mutex is held ++ * drop the mutex and hold reference to keep device ++ */ ++ if (req->async == 0) { ++ dev_hold(dev); ++ rtnl_unlock(); ++ } + } + + mutex_lock(&kni->sync_lock); +@@ -125,6 +134,16 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) + goto fail; + } + ++ if (bifurcated_support) { ++ /* No result available since request is handled ++ * asynchronously. set response to success. ++ */ ++ if (req->async != 0) { ++ req->result = 0; ++ goto async; ++ } ++ } ++ + ret_val = wait_event_interruptible_timeout(kni->wq, + kni_fifo_count(kni->resp_q), 3 * HZ); + if (signal_pending(current) || ret_val <= 0) { +@@ -140,10 +159,17 @@ kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) + } + + memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request)); ++async: + ret = 0; + + fail: + mutex_unlock(&kni->sync_lock); ++ if (bifurcated_support) { ++ if (req->async == 0) { ++ rtnl_lock(); ++ dev_put(dev); ++ } ++ } + return ret; + } + +@@ -155,7 +181,6 @@ kni_net_open(struct net_device *dev) + { + int ret; + struct rte_kni_request req; +- struct kni_dev *kni = netdev_priv(dev); + + netif_start_queue(dev); + if (kni_dflt_carrier == 1) +@@ -168,7 +193,7 @@ kni_net_open(struct net_device *dev) + + /* Setting if_up to non-zero means up */ + req.if_up = 1; +- ret = kni_net_process_request(kni, &req); ++ ret = kni_net_process_request(dev, &req); + + return (ret == 0) ? req.result : ret; + } +@@ -178,7 +203,6 @@ kni_net_release(struct net_device *dev) + { + int ret; + struct rte_kni_request req; +- struct kni_dev *kni = netdev_priv(dev); + + netif_stop_queue(dev); /* can't transmit any more */ + netif_carrier_off(dev); +@@ -188,7 +212,13 @@ kni_net_release(struct net_device *dev) + + /* Setting if_up to 0 means down */ + req.if_up = 0; +- ret = kni_net_process_request(kni, &req); ++ ++ if (bifurcated_support) { ++ /* request async because of the deadlock problem */ ++ req.async = 1; ++ } ++ ++ ret = kni_net_process_request(dev, &req); + + return (ret == 0) ? req.result : ret; + } +@@ -223,7 +253,7 @@ kni_fifo_trans_pa2va(struct kni_dev *kni, + break; + + prev_kva = kva; +- kva = pa2kva(kva->next); ++ kva = get_kva(kni, kva->next); + /* Convert physical address to virtual address */ + prev_kva->next = pa2va(prev_kva->next, kva); + } +@@ -400,7 +430,7 @@ kni_net_rx_normal(struct kni_dev *kni) + break; + + prev_kva = kva; +- kva = pa2kva(kva->next); ++ kva = get_kva(kni, kva->next); + data_kva = kva2data_kva(kva); + /* Convert physical address to virtual address */ + prev_kva->next = pa2va(prev_kva->next, kva); +@@ -411,7 +441,11 @@ kni_net_rx_normal(struct kni_dev *kni) + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* Call netif interface */ ++#ifdef HAVE_NETIF_RX_NI + netif_rx_ni(skb); ++#else ++ netif_rx(skb); ++#endif + + /* Update statistics */ + dev->stats.rx_bytes += len; +@@ -479,7 +513,7 @@ kni_net_rx_lo_fifo(struct kni_dev *kni) + kni->va[i] = pa2va(kni->pa[i], kva); + + while (kva->next) { +- next_kva = pa2kva(kva->next); ++ next_kva = get_kva(kni, kva->next); + /* Convert physical address to virtual address */ + kva->next = pa2va(kva->next, next_kva); + kva = next_kva; +@@ -643,14 +677,13 @@ kni_net_change_mtu(struct net_device *dev, int new_mtu) + { + int ret; + struct rte_kni_request req; +- struct kni_dev *kni = netdev_priv(dev); + + pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu); + + memset(&req, 0, sizeof(req)); + req.req_id = RTE_KNI_REQ_CHANGE_MTU; + req.new_mtu = new_mtu; +- ret = kni_net_process_request(kni, &req); ++ ret = kni_net_process_request(dev, &req); + if (ret == 0 && req.result == 0) + dev->mtu = new_mtu; + +@@ -661,7 +694,6 @@ static void + kni_net_change_rx_flags(struct net_device *netdev, int flags) + { + struct rte_kni_request req; +- struct kni_dev *kni = netdev_priv(netdev); + + memset(&req, 0, sizeof(req)); + +@@ -683,7 +715,7 @@ kni_net_change_rx_flags(struct net_device *netdev, int flags) + req.promiscusity = 0; + } + +- kni_net_process_request(kni, &req); ++ kni_net_process_request(netdev, &req); + } + + /* +@@ -742,7 +774,6 @@ kni_net_set_mac(struct net_device *netdev, void *p) + { + int ret; + struct rte_kni_request req; +- struct kni_dev *kni; + struct sockaddr *addr = p; + + memset(&req, 0, sizeof(req)); +@@ -752,10 +783,13 @@ kni_net_set_mac(struct net_device *netdev, void *p) + return -EADDRNOTAVAIL; + + memcpy(req.mac_addr, addr->sa_data, netdev->addr_len); ++#ifdef HAVE_ETH_HW_ADDR_SET ++ eth_hw_addr_set(netdev, addr->sa_data); ++#else + memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); ++#endif + +- kni = netdev_priv(netdev); +- ret = kni_net_process_request(kni, &req); ++ ret = kni_net_process_request(netdev, &req); + + return (ret == 0 ? req.result : ret); + } +diff --git a/dpdk/kernel/linux/kni/meson.build b/dpdk/kernel/linux/kni/meson.build +index 07e0c9dae7..dab4f77df4 100644 +--- a/dpdk/kernel/linux/kni/meson.build ++++ b/dpdk/kernel/linux/kni/meson.build +@@ -1,6 +1,17 @@ + # SPDX-License-Identifier: BSD-3-Clause + # Copyright(c) 2018 Luca Boccassi + ++# For SUSE build check function arguments of ndo_tx_timeout API ++# Ref: https://jira.devtools.intel.com/browse/DPDK-29263 ++kmod_cflags = '' ++file_path = kernel_source_dir + '/include/linux/netdevice.h' ++run_cmd = run_command('grep', 'ndo_tx_timeout', file_path, check: false) ++ ++if run_cmd.stdout().contains('txqueue') == true ++ kmod_cflags = '-DHAVE_ARG_TX_QUEUE' ++endif ++ ++ + kni_mkfile = custom_target('rte_kni_makefile', + output: 'Makefile', + command: ['touch', '@OUTPUT@']) +@@ -16,10 +27,11 @@ custom_target('rte_kni', + command: ['make', '-j4', '-C', kernel_dir + '/build', + 'M=' + meson.current_build_dir(), + 'src=' + meson.current_source_dir(), +- 'MODULE_CFLAGS=-include ' + meson.source_root() + '/config/rte_config.h' + +- ' -I' + meson.source_root() + '/lib/librte_eal/include' + +- ' -I' + meson.source_root() + '/lib/librte_kni' + +- ' -I' + meson.build_root() + ++ ' '.join(['MODULE_CFLAGS=', kmod_cflags,'-include ']) ++ + dpdk_source_root + '/config/rte_config.h' + ++ ' -I' + dpdk_source_root + '/lib/librte_eal/include' + ++ ' -I' + dpdk_source_root + '/lib/librte_kni' + ++ ' -I' + dpdk_build_root + + ' -I' + meson.current_source_dir(), + 'modules'], + depends: kni_mkfile, +diff --git a/dpdk/kernel/linux/meson.build b/dpdk/kernel/linux/meson.build +index 5c864a4653..b23298b1fd 100644 +--- a/dpdk/kernel/linux/meson.build ++++ b/dpdk/kernel/linux/meson.build +@@ -11,13 +11,19 @@ endif + kernel_dir = get_option('kernel_dir') + if kernel_dir == '' + # use default path for native builds +- kernel_version = run_command('uname', '-r').stdout().strip() ++ kernel_version = run_command('uname', '-r', check: true).stdout().strip() + kernel_dir = '/lib/modules/' + kernel_version + endif + ++kernel_source_dir = get_option('kernel_dir') ++if kernel_source_dir == '' ++ # use default path for native builds ++ kernel_source_dir = '/lib/modules/' + kernel_version + '/source' ++endif ++ + # test running make in kernel directory, using "make kernelversion" + make_returncode = run_command('make', '-sC', kernel_dir + '/build', +- 'kernelversion').returncode() ++ 'kernelversion', check: true).returncode() + if make_returncode != 0 + error('Cannot compile kernel modules as requested - are kernel headers installed?') + endif +diff --git a/dpdk/lib/librte_acl/acl.h b/dpdk/lib/librte_acl/acl.h +index 4089ab2a04..f5739a475c 100644 +--- a/dpdk/lib/librte_acl/acl.h ++++ b/dpdk/lib/librte_acl/acl.h +@@ -45,7 +45,7 @@ struct rte_acl_bitset { + * Each transition is 64 bit value with the following format: + * | node_type_specific : 32 | node_type : 3 | node_addr : 29 | + * For all node types except RTE_ACL_NODE_MATCH, node_addr is an index +- * to the start of the node in the transtions array. ++ * to the start of the node in the transitions array. + * Few different node types are used: + * RTE_ACL_NODE_MATCH: + * node_addr value is and index into an array that contains the return value +@@ -66,7 +66,7 @@ struct rte_acl_bitset { + * RTE_ACL_NODE_SINGLE: + * always transitions to the same node regardless of the input value. + * RTE_ACL_NODE_DFA: +- * that node consits of up to 256 transitions. ++ * that node consists of up to 256 transitions. + * In attempt to conserve space all transitions are divided into 4 consecutive + * groups, by 64 transitions per group: + * group64[i] contains transitions[i * 64, .. i * 64 + 63]. +diff --git a/dpdk/lib/librte_acl/acl_bld.c b/dpdk/lib/librte_acl/acl_bld.c +index da10864cd8..2816632803 100644 +--- a/dpdk/lib/librte_acl/acl_bld.c ++++ b/dpdk/lib/librte_acl/acl_bld.c +@@ -12,6 +12,9 @@ + /* number of pointers per alloc */ + #define ACL_PTR_ALLOC 32 + ++/* account for situation when all fields are 8B long */ ++#define ACL_MAX_INDEXES (2 * RTE_ACL_MAX_FIELDS) ++ + /* macros for dividing rule sets heuristics */ + #define NODE_MAX 0x4000 + #define NODE_MIN 0x800 +@@ -80,7 +83,7 @@ struct acl_build_context { + struct tb_mem_pool pool; + struct rte_acl_trie tries[RTE_ACL_MAX_TRIES]; + struct rte_acl_bld_trie bld_tries[RTE_ACL_MAX_TRIES]; +- uint32_t data_indexes[RTE_ACL_MAX_TRIES][RTE_ACL_MAX_FIELDS]; ++ uint32_t data_indexes[RTE_ACL_MAX_TRIES][ACL_MAX_INDEXES]; + + /* memory free lists for nodes and blocks used for node ptrs */ + struct acl_mem_block blocks[MEM_BLOCK_NUM]; +@@ -885,7 +888,7 @@ acl_gen_range_trie(struct acl_build_context *context, + return root; + } + +- /* gather information about divirgent paths */ ++ /* gather information about divergent paths */ + lo_00 = 0; + hi_ff = UINT8_MAX; + for (k = n - 1; k >= 0; k--) { +@@ -988,7 +991,7 @@ build_trie(struct acl_build_context *context, struct rte_acl_build_rule *head, + */ + uint64_t mask; + mask = RTE_ACL_MASKLEN_TO_BITMASK( +- fld->mask_range.u32, ++ fld->mask_range.u64, + rule->config->defs[n].size); + + /* gen a mini-trie for this field */ +@@ -1301,6 +1304,9 @@ acl_build_index(const struct rte_acl_config *config, uint32_t *data_index) + if (last_header != config->defs[n].input_index) { + last_header = config->defs[n].input_index; + data_index[m++] = config->defs[n].offset; ++ if (config->defs[n].size > sizeof(uint32_t)) ++ data_index[m++] = config->defs[n].offset + ++ sizeof(uint32_t); + } + } + +@@ -1487,14 +1493,14 @@ acl_set_data_indexes(struct rte_acl_ctx *ctx) + memcpy(ctx->data_indexes + ofs, ctx->trie[i].data_index, + n * sizeof(ctx->data_indexes[0])); + ctx->trie[i].data_index = ctx->data_indexes + ofs; +- ofs += RTE_ACL_MAX_FIELDS; ++ ofs += ACL_MAX_INDEXES; + } + } + + /* + * Internal routine, performs 'build' phase of trie generation: + * - setups build context. +- * - analizes given set of rules. ++ * - analyzes given set of rules. + * - builds internal tree(s). + */ + static int +@@ -1643,7 +1649,7 @@ rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg) + /* allocate and fill run-time structures. */ + rc = rte_acl_gen(ctx, bcx.tries, bcx.bld_tries, + bcx.num_tries, bcx.cfg.num_categories, +- RTE_ACL_MAX_FIELDS * RTE_DIM(bcx.tries) * ++ ACL_MAX_INDEXES * RTE_DIM(bcx.tries) * + sizeof(ctx->data_indexes[0]), max_size); + if (rc == 0) { + /* set data indexes. */ +diff --git a/dpdk/lib/librte_acl/acl_run_altivec.h b/dpdk/lib/librte_acl/acl_run_altivec.h +index 2de6f27b1f..24a41eec17 100644 +--- a/dpdk/lib/librte_acl/acl_run_altivec.h ++++ b/dpdk/lib/librte_acl/acl_run_altivec.h +@@ -146,7 +146,7 @@ transition4(xmm_t next_input, const uint64_t *trans, + + dfa_ofs = vec_sub(t, r); + +- /* QUAD/SINGLE caluclations. */ ++ /* QUAD/SINGLE calculations. */ + t = (xmm_t)vec_cmpgt((vector signed char)in, (vector signed char)tr_hi); + t = (xmm_t)vec_sel( + vec_sel( +diff --git a/dpdk/lib/librte_acl/acl_run_avx2.h b/dpdk/lib/librte_acl/acl_run_avx2.h +index d06d2e8782..0b8967f22e 100644 +--- a/dpdk/lib/librte_acl/acl_run_avx2.h ++++ b/dpdk/lib/librte_acl/acl_run_avx2.h +@@ -125,7 +125,7 @@ acl_process_matches_avx2x8(const struct rte_acl_ctx *ctx, + /* For each transition: put low 32 into tr_lo and high 32 into tr_hi */ + ACL_TR_HILO(mm256, __m256, t0, t1, lo, hi); + +- /* Keep transitions wth NOMATCH intact. */ ++ /* Keep transitions with NOMATCH intact. */ + *tr_lo = _mm256_blendv_epi8(*tr_lo, lo, matches); + *tr_hi = _mm256_blendv_epi8(*tr_hi, hi, matches); + } +diff --git a/dpdk/lib/librte_acl/acl_run_avx512.c b/dpdk/lib/librte_acl/acl_run_avx512.c +index 3fd1e33c3f..01c4461362 100644 +--- a/dpdk/lib/librte_acl/acl_run_avx512.c ++++ b/dpdk/lib/librte_acl/acl_run_avx512.c +@@ -64,7 +64,7 @@ update_flow_mask(const struct acl_flow_avx512 *flow, uint32_t *fmsk, + } + + /* +- * Resolve matches for multiple categories (LE 8, use 128b instuctions/regs) ++ * Resolve matches for multiple categories (LE 8, use 128b instructions/regs) + */ + static inline void + resolve_mcle8_avx512x1(uint32_t result[], +diff --git a/dpdk/lib/librte_acl/acl_run_avx512_common.h b/dpdk/lib/librte_acl/acl_run_avx512_common.h +index fafaf591e8..fbad74d459 100644 +--- a/dpdk/lib/librte_acl/acl_run_avx512_common.h ++++ b/dpdk/lib/librte_acl/acl_run_avx512_common.h +@@ -303,6 +303,28 @@ _F_(match_check_process)(struct acl_flow_avx512 *flow, uint32_t fm[2], + } + } + ++static inline void ++_F_(reset_flow_vars)(_T_simd di[2], _T_simd idx[2], _T_simd pdata[4], ++ _T_simd tr_lo[2], _T_simd tr_hi[2]) ++{ ++ di[0] = _M_SI_(setzero)(); ++ di[1] = _M_SI_(setzero)(); ++ ++ idx[0] = _M_SI_(setzero)(); ++ idx[1] = _M_SI_(setzero)(); ++ ++ pdata[0] = _M_SI_(setzero)(); ++ pdata[1] = _M_SI_(setzero)(); ++ pdata[2] = _M_SI_(setzero)(); ++ pdata[3] = _M_SI_(setzero)(); ++ ++ tr_lo[0] = _M_SI_(setzero)(); ++ tr_lo[1] = _M_SI_(setzero)(); ++ ++ tr_hi[0] = _M_SI_(setzero)(); ++ tr_hi[1] = _M_SI_(setzero)(); ++} ++ + /* + * Perform search for up to (2 * _N_) flows in parallel. + * Use two sets of metadata, each serves _N_ flows max. +@@ -313,6 +335,8 @@ _F_(search_trie)(struct acl_flow_avx512 *flow) + uint32_t fm[2]; + _T_simd di[2], idx[2], in[2], pdata[4], tr_lo[2], tr_hi[2]; + ++ _F_(reset_flow_vars)(di, idx, pdata, tr_lo, tr_hi); ++ + /* first 1B load */ + _F_(start_flow)(flow, _SIMD_MASK_BIT_, _SIMD_MASK_MAX_, + &pdata[0], &idx[0], &di[0]); +diff --git a/dpdk/lib/librte_acl/acl_run_avx512x16.h b/dpdk/lib/librte_acl/acl_run_avx512x16.h +index da244bc257..ecd40f6834 100644 +--- a/dpdk/lib/librte_acl/acl_run_avx512x16.h ++++ b/dpdk/lib/librte_acl/acl_run_avx512x16.h +@@ -10,7 +10,7 @@ + */ + + /* +- * This implementation uses 512-bit registers(zmm) and instrincts. ++ * This implementation uses 512-bit registers(zmm) and intrinsics. + * So our main SIMD type is 512-bit width and each such variable can + * process sizeof(__m512i) / sizeof(uint32_t) == 16 entries in parallel. + */ +@@ -25,20 +25,20 @@ + #define _F_(x) x##_avx512x16 + + /* +- * Same instrincts have different syntaxis (depending on the bit-width), ++ * Same intrinsics have different syntaxes (depending on the bit-width), + * so to overcome that few macros need to be defined. + */ + +-/* Naming convention for generic epi(packed integers) type instrincts. */ ++/* Naming convention for generic epi(packed integers) type intrinsics. */ + #define _M_I_(x) _mm512_##x + +-/* Naming convention for si(whole simd integer) type instrincts. */ ++/* Naming convention for si(whole simd integer) type intrinsics. */ + #define _M_SI_(x) _mm512_##x##_si512 + +-/* Naming convention for masked gather type instrincts. */ ++/* Naming convention for masked gather type intrinsics. */ + #define _M_MGI_(x) _mm512_##x + +-/* Naming convention for gather type instrincts. */ ++/* Naming convention for gather type intrinsics. */ + #define _M_GI_(name, idx, base, scale) _mm512_##name(idx, base, scale) + + /* num/mask of transitions per SIMD regs */ +@@ -239,7 +239,7 @@ _F_(gather_bytes)(__m512i zero, const __m512i p[2], const uint32_t m[2], + } + + /* +- * Resolve matches for multiple categories (GT 8, use 512b instuctions/regs) ++ * Resolve matches for multiple categories (GT 8, use 512b instructions/regs) + */ + static inline void + resolve_mcgt8_avx512x1(uint32_t result[], +diff --git a/dpdk/lib/librte_acl/acl_run_avx512x8.h b/dpdk/lib/librte_acl/acl_run_avx512x8.h +index 61ac9d1b47..5da2bbfdeb 100644 +--- a/dpdk/lib/librte_acl/acl_run_avx512x8.h ++++ b/dpdk/lib/librte_acl/acl_run_avx512x8.h +@@ -10,7 +10,7 @@ + */ + + /* +- * This implementation uses 256-bit registers(ymm) and instrincts. ++ * This implementation uses 256-bit registers(ymm) and intrinsics. + * So our main SIMD type is 256-bit width and each such variable can + * process sizeof(__m256i) / sizeof(uint32_t) == 8 entries in parallel. + */ +@@ -25,20 +25,20 @@ + #define _F_(x) x##_avx512x8 + + /* +- * Same instrincts have different syntaxis (depending on the bit-width), ++ * Same intrinsics have different syntaxes (depending on the bit-width), + * so to overcome that few macros need to be defined. + */ + +-/* Naming convention for generic epi(packed integers) type instrincts. */ ++/* Naming convention for generic epi(packed integers) type intrinsics. */ + #define _M_I_(x) _mm256_##x + +-/* Naming convention for si(whole simd integer) type instrincts. */ ++/* Naming convention for si(whole simd integer) type intrinsics. */ + #define _M_SI_(x) _mm256_##x##_si256 + +-/* Naming convention for masked gather type instrincts. */ ++/* Naming convention for masked gather type intrinsics. */ + #define _M_MGI_(x) _mm256_m##x + +-/* Naming convention for gather type instrincts. */ ++/* Naming convention for gather type intrinsics. */ + #define _M_GI_(name, idx, base, scale) _mm256_##name(base, idx, scale) + + /* num/mask of transitions per SIMD regs */ +diff --git a/dpdk/lib/librte_acl/meson.build b/dpdk/lib/librte_acl/meson.build +index ee4e229e59..c261101de7 100644 +--- a/dpdk/lib/librte_acl/meson.build ++++ b/dpdk/lib/librte_acl/meson.build +@@ -30,7 +30,7 @@ if dpdk_conf.has('RTE_ARCH_X86') + # compile AVX512 version if: + # we are building 64-bit binary AND binutils can generate proper code + +- if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok.returncode() == 0 ++ if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok + + # compile AVX512 version if either: + # a. we have AVX512 supported in minimum instruction set +diff --git a/dpdk/lib/librte_acl/rte_acl_osdep.h b/dpdk/lib/librte_acl/rte_acl_osdep.h +index b2c262dee7..3c1dc402ca 100644 +--- a/dpdk/lib/librte_acl/rte_acl_osdep.h ++++ b/dpdk/lib/librte_acl/rte_acl_osdep.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_ACL_OSDEP_H_ + #define _RTE_ACL_OSDEP_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** + * @file + * +@@ -45,4 +49,8 @@ + #include + #include + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_ACL_OSDEP_H_ */ +diff --git a/dpdk/lib/librte_bbdev/rte_bbdev.c b/dpdk/lib/librte_bbdev/rte_bbdev.c +index 5ba891c232..310de3761d 100644 +--- a/dpdk/lib/librte_bbdev/rte_bbdev.c ++++ b/dpdk/lib/librte_bbdev/rte_bbdev.c +@@ -138,7 +138,7 @@ rte_bbdev_data_alloc(void) + } + + /* +- * Find data alocated for the device or if not found return first unused bbdev ++ * Find data allocated for the device or if not found return first unused bbdev + * data. If all structures are in use and none is used by the device return + * NULL. + */ +diff --git a/dpdk/lib/librte_bbdev/rte_bbdev.h b/dpdk/lib/librte_bbdev/rte_bbdev.h +index 7017124414..7fbc75f572 100644 +--- a/dpdk/lib/librte_bbdev/rte_bbdev.h ++++ b/dpdk/lib/librte_bbdev/rte_bbdev.h +@@ -174,7 +174,7 @@ rte_bbdev_queue_configure(uint16_t dev_id, uint16_t queue_id, + * + * @return + * - 0 on success +- * - negative value on failure - as returned from PMD driver ++ * - negative value on failure - as returned from PMD + */ + __rte_experimental + int +@@ -220,7 +220,7 @@ rte_bbdev_close(uint16_t dev_id); + * + * @return + * - 0 on success +- * - negative value on failure - as returned from PMD driver ++ * - negative value on failure - as returned from PMD + */ + __rte_experimental + int +@@ -236,7 +236,7 @@ rte_bbdev_queue_start(uint16_t dev_id, uint16_t queue_id); + * + * @return + * - 0 on success +- * - negative value on failure - as returned from PMD driver ++ * - negative value on failure - as returned from PMD + */ + __rte_experimental + int +@@ -807,7 +807,7 @@ rte_bbdev_callback_unregister(uint16_t dev_id, enum rte_bbdev_event_type event, + * + * @return + * - 0 on success +- * - negative value on failure - as returned from PMD driver ++ * - negative value on failure - as returned from PMD + */ + __rte_experimental + int +@@ -824,7 +824,7 @@ rte_bbdev_queue_intr_enable(uint16_t dev_id, uint16_t queue_id); + * + * @return + * - 0 on success +- * - negative value on failure - as returned from PMD driver ++ * - negative value on failure - as returned from PMD + */ + __rte_experimental + int +@@ -852,7 +852,7 @@ rte_bbdev_queue_intr_disable(uint16_t dev_id, uint16_t queue_id); + * @return + * - 0 on success + * - ENOTSUP if interrupts are not supported by the identified device +- * - negative value on failure - as returned from PMD driver ++ * - negative value on failure - as returned from PMD + */ + __rte_experimental + int +diff --git a/dpdk/lib/librte_bbdev/rte_bbdev_pmd.h b/dpdk/lib/librte_bbdev/rte_bbdev_pmd.h +index 237e3361d7..b8a5cb4015 100644 +--- a/dpdk/lib/librte_bbdev/rte_bbdev_pmd.h ++++ b/dpdk/lib/librte_bbdev/rte_bbdev_pmd.h +@@ -76,7 +76,7 @@ struct rte_bbdev * + rte_bbdev_get_named_dev(const char *name); + + /** +- * Definitions of all functions exported by a driver through the the generic ++ * Definitions of all functions exported by a driver through the generic + * structure of type *rte_bbdev_ops* supplied in the *rte_bbdev* structure + * associated with a device. + */ +diff --git a/dpdk/lib/librte_bitratestats/rte_bitrate.c b/dpdk/lib/librte_bitratestats/rte_bitrate.c +index 8fd9f47288..1664e4863b 100644 +--- a/dpdk/lib/librte_bitratestats/rte_bitrate.c ++++ b/dpdk/lib/librte_bitratestats/rte_bitrate.c +@@ -55,8 +55,10 @@ rte_stats_bitrate_reg(struct rte_stats_bitrates *bitrate_data) + return -EINVAL; + + return_value = rte_metrics_reg_names(&names[0], RTE_DIM(names)); +- if (return_value >= 0) ++ if (return_value >= 0) { + bitrate_data->id_stats_set = return_value; ++ return 0; ++ } + return return_value; + } + +@@ -78,7 +80,7 @@ rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data, + + ret_code = rte_eth_stats_get(port_id, ð_stats); + if (ret_code != 0) +- return ret_code; ++ return ret_code < 0 ? ret_code : -ret_code; + + port_data = &bitrate_data->port_stats[port_id]; + +diff --git a/dpdk/lib/librte_bpf/bpf_impl.h b/dpdk/lib/librte_bpf/bpf_impl.h +index 03ba0ae112..8f5dd10212 100644 +--- a/dpdk/lib/librte_bpf/bpf_impl.h ++++ b/dpdk/lib/librte_bpf/bpf_impl.h +@@ -2,8 +2,8 @@ + * Copyright(c) 2018 Intel Corporation + */ + +-#ifndef _BPF_H_ +-#define _BPF_H_ ++#ifndef BPF_IMPL_H ++#define BPF_IMPL_H + + #include + #include +@@ -51,4 +51,4 @@ bpf_size(uint32_t bpf_op_sz) + } + #endif + +-#endif /* _BPF_H_ */ ++#endif /* BPF_IMPL_H */ +diff --git a/dpdk/lib/librte_bpf/bpf_jit_x86.c b/dpdk/lib/librte_bpf/bpf_jit_x86.c +index aa22ea78a0..518513376a 100644 +--- a/dpdk/lib/librte_bpf/bpf_jit_x86.c ++++ b/dpdk/lib/librte_bpf/bpf_jit_x86.c +@@ -1245,7 +1245,7 @@ emit_epilog(struct bpf_jit_state *st) + uint32_t i; + int32_t spil, ofs; + +- /* if we allready have an epilog generate a jump to it */ ++ /* if we already have an epilog generate a jump to it */ + if (st->exit.num++ != 0) { + emit_abs_jmp(st, st->exit.off); + return; +diff --git a/dpdk/lib/librte_bpf/bpf_load_elf.c b/dpdk/lib/librte_bpf/bpf_load_elf.c +index 2b11adeb5e..02a5d8ba0d 100644 +--- a/dpdk/lib/librte_bpf/bpf_load_elf.c ++++ b/dpdk/lib/librte_bpf/bpf_load_elf.c +@@ -80,7 +80,7 @@ resolve_xsym(const char *sn, size_t ofs, struct ebpf_insn *ins, size_t ins_sz, + if (type == RTE_BPF_XTYPE_FUNC) { + + /* we don't support multiple functions per BPF module, +- * so treat EBPF_PSEUDO_CALL to extrernal function ++ * so treat EBPF_PSEUDO_CALL to external function + * as an ordinary EBPF_CALL. + */ + if (ins[idx].src_reg == EBPF_PSEUDO_CALL) { +diff --git a/dpdk/lib/librte_bpf/bpf_pkt.c b/dpdk/lib/librte_bpf/bpf_pkt.c +index 6e8248f0d6..701e8e2c62 100644 +--- a/dpdk/lib/librte_bpf/bpf_pkt.c ++++ b/dpdk/lib/librte_bpf/bpf_pkt.c +@@ -169,7 +169,7 @@ bpf_eth_cbh_add(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue) + } + + /* +- * BPF packet processing routinies. ++ * BPF packet processing routines. + */ + + static inline uint32_t +diff --git a/dpdk/lib/librte_bpf/bpf_validate.c b/dpdk/lib/librte_bpf/bpf_validate.c +index 9214f15038..09331258eb 100644 +--- a/dpdk/lib/librte_bpf/bpf_validate.c ++++ b/dpdk/lib/librte_bpf/bpf_validate.c +@@ -661,8 +661,15 @@ eval_alu(struct bpf_verifier *bvf, const struct ebpf_insn *ins) + + op = BPF_OP(ins->code); + ++ /* Allow self-xor as way to zero register */ ++ if (op == BPF_XOR && BPF_SRC(ins->code) == BPF_X && ++ ins->src_reg == ins->dst_reg) { ++ eval_fill_imm(&rs, UINT64_MAX, 0); ++ eval_fill_imm(rd, UINT64_MAX, 0); ++ } ++ + err = eval_defined((op != EBPF_MOV) ? rd : NULL, +- (op != BPF_NEG) ? &rs : NULL); ++ (op != BPF_NEG) ? &rs : NULL); + if (err != NULL) + return err; + +@@ -1115,7 +1122,7 @@ eval_jcc(struct bpf_verifier *bvf, const struct ebpf_insn *ins) + eval_jsgt_jsle(trd, trs, frd, frs); + else if (op == EBPF_JSLE) + eval_jsgt_jsle(frd, frs, trd, trs); +- else if (op == EBPF_JLT) ++ else if (op == EBPF_JSLT) + eval_jslt_jsge(trd, trs, frd, frs); + else if (op == EBPF_JSGE) + eval_jslt_jsge(frd, frs, trd, trs); +@@ -1723,7 +1730,7 @@ static const struct bpf_ins_check ins_chk[UINT8_MAX + 1] = { + + /* + * make sure that instruction syntax is valid, +- * and it fields don't violate partciular instrcution type restrictions. ++ * and its fields don't violate particular instruction type restrictions. + */ + static const char * + check_syntax(const struct ebpf_insn *ins) +@@ -1954,7 +1961,7 @@ log_loop(const struct bpf_verifier *bvf) + * First pass goes though all instructions in the set, checks that each + * instruction is a valid one (correct syntax, valid field values, etc.) + * and constructs control flow graph (CFG). +- * Then deapth-first search is performed over the constructed graph. ++ * Then depth-first search is performed over the constructed graph. + * Programs with unreachable instructions and/or loops will be rejected. + */ + static int +@@ -1981,7 +1988,7 @@ validate(struct bpf_verifier *bvf) + + /* + * construct CFG, jcc nodes have to outgoing edges, +- * 'exit' nodes - none, all others nodes have exaclty one ++ * 'exit' nodes - none, all other nodes have exactly one + * outgoing edge. + */ + switch (ins->code) { +@@ -2251,7 +2258,7 @@ evaluate(struct bpf_verifier *bvf) + idx = get_node_idx(bvf, node); + op = ins[idx].code; + +- /* for jcc node make a copy of evaluatoion state */ ++ /* for jcc node make a copy of evaluation state */ + if (node->nb_edge > 1) + rc |= save_eval_state(bvf, node); + +diff --git a/dpdk/lib/librte_compressdev/rte_compressdev_internal.h b/dpdk/lib/librte_compressdev/rte_compressdev_internal.h +index 22ceac66e2..b3b193e3ee 100644 +--- a/dpdk/lib/librte_compressdev/rte_compressdev_internal.h ++++ b/dpdk/lib/librte_compressdev/rte_compressdev_internal.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_COMPRESSDEV_INTERNAL_H_ + #define _RTE_COMPRESSDEV_INTERNAL_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /* rte_compressdev_internal.h + * This file holds Compressdev private data structures. + */ +@@ -18,7 +22,7 @@ + /* Logging Macros */ + extern int compressdev_logtype; + #define COMPRESSDEV_LOG(level, fmt, args...) \ +- rte_log(RTE_LOG_ ## level, compressdev_logtype, "%s(): "fmt "\n", \ ++ rte_log(RTE_LOG_ ## level, compressdev_logtype, "%s(): " fmt "\n", \ + __func__, ##args) + + /** +@@ -94,7 +98,7 @@ struct rte_compressdev { + struct rte_compressdev_data { + uint8_t dev_id; + /**< Compress device identifier */ +- uint8_t socket_id; ++ int socket_id; + /**< Socket identifier where memory is allocated */ + char name[RTE_COMPRESSDEV_NAME_MAX_LEN]; + /**< Unique identifier name */ +@@ -111,4 +115,9 @@ struct rte_compressdev_data { + void *dev_private; + /**< PMD-specific private data */ + } __rte_cache_aligned; ++ ++#ifdef __cplusplus ++} ++#endif ++ + #endif +diff --git a/dpdk/lib/librte_compressdev/rte_compressdev_pmd.h b/dpdk/lib/librte_compressdev/rte_compressdev_pmd.h +index 16b6bc6b35..f9a42d1f05 100644 +--- a/dpdk/lib/librte_compressdev/rte_compressdev_pmd.h ++++ b/dpdk/lib/librte_compressdev/rte_compressdev_pmd.h +@@ -64,7 +64,7 @@ struct rte_compressdev * + rte_compressdev_pmd_get_named_dev(const char *name); + + /** +- * Definitions of all functions exported by a driver through the ++ * Definitions of all functions exported by a driver through + * the generic structure of type *comp_dev_ops* supplied in the + * *rte_compressdev* structure associated with a device. + */ +@@ -319,7 +319,7 @@ rte_compressdev_pmd_release_device(struct rte_compressdev *dev); + * PMD assist function to parse initialisation arguments for comp driver + * when creating a new comp PMD device instance. + * +- * PMD driver should set default values for that PMD before calling function, ++ * PMD should set default values for that PMD before calling function, + * these default values will be over-written with successfully parsed values + * from args string. + * +diff --git a/dpdk/lib/librte_cryptodev/rte_crypto.h b/dpdk/lib/librte_cryptodev/rte_crypto.h +index fd5ef3a876..2ba12cff2e 100644 +--- a/dpdk/lib/librte_cryptodev/rte_crypto.h ++++ b/dpdk/lib/librte_cryptodev/rte_crypto.h +@@ -113,15 +113,24 @@ struct rte_crypto_op { + rte_iova_t phys_addr; + /**< physical address of crypto operation */ + ++/* empty structures do not have zero size in C++ leading to compilation errors ++ * with clang about structure/union having different sizes in C and C++. ++ * While things are clearer with an explicit union, since each field is ++ * zero-sized it's not actually needed, so omit it for C++ ++ */ ++#ifndef __cplusplus + __extension__ + union { ++#endif + struct rte_crypto_sym_op sym[0]; + /**< Symmetric operation parameters */ + + struct rte_crypto_asym_op asym[0]; + /**< Asymmetric operation parameters */ + ++#ifndef __cplusplus + }; /**< operation specific parameters */ ++#endif + }; + + /** +diff --git a/dpdk/lib/librte_cryptodev/rte_crypto_asym.h b/dpdk/lib/librte_cryptodev/rte_crypto_asym.h +index 9c866f553f..d59e05323e 100644 +--- a/dpdk/lib/librte_cryptodev/rte_crypto_asym.h ++++ b/dpdk/lib/librte_cryptodev/rte_crypto_asym.h +@@ -146,10 +146,12 @@ enum rte_crypto_rsa_padding_type { + enum rte_crypto_rsa_priv_key_type { + RTE_RSA_KEY_TYPE_EXP, + /**< RSA private key is an exponent */ +- RTE_RSA_KET_TYPE_QT, ++ RTE_RSA_KEY_TYPE_QT, + /**< RSA private key is in quintuple format + * See rte_crypto_rsa_priv_key_qt + */ ++ RTE_RSA_KET_TYPE_QT = RTE_RSA_KEY_TYPE_QT, ++ /**< Backward-compatible definition of old name */ + }; + + /** +diff --git a/dpdk/lib/librte_cryptodev/rte_crypto_sym.h b/dpdk/lib/librte_cryptodev/rte_crypto_sym.h +index 9d572ec057..406c5c7a5c 100644 +--- a/dpdk/lib/librte_cryptodev/rte_crypto_sym.h ++++ b/dpdk/lib/librte_cryptodev/rte_crypto_sym.h +@@ -963,6 +963,7 @@ rte_crypto_mbuf_to_vec(const struct rte_mbuf *mb, uint32_t ofs, uint32_t len, + /* whole requested data is completed */ + vec[i].len = left; + left = 0; ++ i++; + break; + } + +@@ -972,7 +973,7 @@ rte_crypto_mbuf_to_vec(const struct rte_mbuf *mb, uint32_t ofs, uint32_t len, + } + + RTE_ASSERT(left == 0); +- return i + 1; ++ return i; + } + + +diff --git a/dpdk/lib/librte_cryptodev/rte_cryptodev_pmd.c b/dpdk/lib/librte_cryptodev/rte_cryptodev_pmd.c +index 0912004127..e342daabc4 100644 +--- a/dpdk/lib/librte_cryptodev/rte_cryptodev_pmd.c ++++ b/dpdk/lib/librte_cryptodev/rte_cryptodev_pmd.c +@@ -140,6 +140,7 @@ int + rte_cryptodev_pmd_destroy(struct rte_cryptodev *cryptodev) + { + int retval; ++ void *dev_priv = cryptodev->data->dev_private; + + CDEV_LOG_INFO("Closing crypto device %s", cryptodev->device->name); + +@@ -149,7 +150,7 @@ rte_cryptodev_pmd_destroy(struct rte_cryptodev *cryptodev) + return retval; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) +- rte_free(cryptodev->data->dev_private); ++ rte_free(dev_priv); + + + cryptodev->device = NULL; +diff --git a/dpdk/lib/librte_cryptodev/rte_cryptodev_pmd.h b/dpdk/lib/librte_cryptodev/rte_cryptodev_pmd.h +index 1274436870..6c46acf7c2 100644 +--- a/dpdk/lib/librte_cryptodev/rte_cryptodev_pmd.h ++++ b/dpdk/lib/librte_cryptodev/rte_cryptodev_pmd.h +@@ -113,7 +113,7 @@ extern struct rte_cryptodev *rte_cryptodevs; + + /** + * Definitions of all functions exported by a driver through the +- * the generic structure of type *crypto_dev_ops* supplied in the ++ * generic structure of type *crypto_dev_ops* supplied in the + * *rte_cryptodev* structure associated with a device. + */ + +@@ -435,7 +435,7 @@ rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev); + * PMD assist function to parse initialisation arguments for crypto driver + * when creating a new crypto PMD device instance. + * +- * PMD driver should set default values for that PMD before calling function, ++ * PMD should set default values for that PMD before calling function, + * these default values will be over-written with successfully parsed values + * from args string. + * +diff --git a/dpdk/lib/librte_distributor/rte_distributor.c b/dpdk/lib/librte_distributor/rte_distributor.c +index 07e385a259..c210cf86bd 100644 +--- a/dpdk/lib/librte_distributor/rte_distributor.c ++++ b/dpdk/lib/librte_distributor/rte_distributor.c +@@ -478,7 +478,7 @@ rte_distributor_process(struct rte_distributor *d, + return 0; + + while (next_idx < num_mbufs) { +- uint16_t matches[RTE_DIST_BURST_SIZE]; ++ uint16_t matches[RTE_DIST_BURST_SIZE] __rte_aligned(128); + unsigned int pkts; + + if ((num_mbufs - next_idx) < RTE_DIST_BURST_SIZE) +diff --git a/dpdk/lib/librte_distributor/rte_distributor_single.c b/dpdk/lib/librte_distributor/rte_distributor_single.c +index f4725b1d0b..e8a13ce980 100644 +--- a/dpdk/lib/librte_distributor/rte_distributor_single.c ++++ b/dpdk/lib/librte_distributor/rte_distributor_single.c +@@ -249,8 +249,7 @@ rte_distributor_process_single(struct rte_distributor_single *d, + * worker given by the bit-position + */ + for (i = 0; i < d->num_workers; i++) +- match |= (!(d->in_flight_tags[i] ^ new_tag) +- << i); ++ match |= ((uint64_t)!(d->in_flight_tags[i] ^ new_tag) << i); + + /* Only turned-on bits are considered as match */ + match &= d->in_flight_bitmask; +diff --git a/dpdk/lib/librte_eal/arm/include/rte_cycles_32.h b/dpdk/lib/librte_eal/arm/include/rte_cycles_32.h +index f79718ce8c..cec4d69e7a 100644 +--- a/dpdk/lib/librte_eal/arm/include/rte_cycles_32.h ++++ b/dpdk/lib/librte_eal/arm/include/rte_cycles_32.h +@@ -30,7 +30,7 @@ extern "C" { + + /** + * This call is easily portable to any architecture, however, +- * it may require a system call and inprecise for some tasks. ++ * it may require a system call and imprecise for some tasks. + */ + static inline uint64_t + __rte_rdtsc_syscall(void) +diff --git a/dpdk/lib/librte_eal/arm/rte_cpuflags.c b/dpdk/lib/librte_eal/arm/rte_cpuflags.c +index e3a53bcece..845770f1e5 100644 +--- a/dpdk/lib/librte_eal/arm/rte_cpuflags.c ++++ b/dpdk/lib/librte_eal/arm/rte_cpuflags.c +@@ -108,7 +108,7 @@ const struct feature_entry rte_cpu_feature_table[] = { + FEAT_DEF(SVEF32MM, REG_HWCAP2, 10) + FEAT_DEF(SVEF64MM, REG_HWCAP2, 11) + FEAT_DEF(SVEBF16, REG_HWCAP2, 12) +- FEAT_DEF(AARCH64, REG_PLATFORM, 1) ++ FEAT_DEF(AARCH64, REG_PLATFORM, 0) + }; + #endif /* RTE_ARCH */ + +diff --git a/dpdk/lib/librte_eal/common/eal_common_dev.c b/dpdk/lib/librte_eal/common/eal_common_dev.c +index 8a3bd3100a..3358f1328a 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_dev.c ++++ b/dpdk/lib/librte_eal/common/eal_common_dev.c +@@ -575,7 +575,7 @@ int + rte_dev_iterator_init(struct rte_dev_iterator *it, + const char *dev_str) + { +- struct rte_devargs devargs; ++ struct rte_devargs devargs = { .bus = NULL }; + struct rte_class *cls = NULL; + struct rte_bus *bus = NULL; + +diff --git a/dpdk/lib/librte_eal/common/eal_common_dynmem.c b/dpdk/lib/librte_eal/common/eal_common_dynmem.c +index 7c5437ddfa..c1e1889f5c 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_dynmem.c ++++ b/dpdk/lib/librte_eal/common/eal_common_dynmem.c +@@ -304,6 +304,10 @@ eal_dynmem_hugepage_init(void) + needed = num_pages - num_pages_alloc; + + pages = malloc(sizeof(*pages) * needed); ++ if (pages == NULL) { ++ RTE_LOG(ERR, EAL, "Failed to malloc pages\n"); ++ return -1; ++ } + + /* do not request exact number of pages */ + cur_pages = eal_memalloc_alloc_seg_bulk(pages, +diff --git a/dpdk/lib/librte_eal/common/eal_common_fbarray.c b/dpdk/lib/librte_eal/common/eal_common_fbarray.c +index d974f3dab7..592ec58594 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_fbarray.c ++++ b/dpdk/lib/librte_eal/common/eal_common_fbarray.c +@@ -81,9 +81,8 @@ get_used_mask(void *data, unsigned int elt_sz, unsigned int len) + } + + static int +-resize_and_map(int fd, void *addr, size_t len) ++resize_and_map(int fd, const char *path, void *addr, size_t len) + { +- char path[PATH_MAX]; + void *map_addr; + + if (eal_file_truncate(fd, len)) { +@@ -792,7 +791,7 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, + if (eal_file_lock(fd, EAL_FLOCK_SHARED, EAL_FLOCK_RETURN)) + goto fail; + +- if (resize_and_map(fd, data, mmap_len)) ++ if (resize_and_map(fd, path, data, mmap_len)) + goto fail; + } + ma->addr = data; +@@ -895,7 +894,7 @@ rte_fbarray_attach(struct rte_fbarray *arr) + if (eal_file_lock(fd, EAL_FLOCK_SHARED, EAL_FLOCK_RETURN)) + goto fail; + +- if (resize_and_map(fd, data, mmap_len)) ++ if (resize_and_map(fd, path, data, mmap_len)) + goto fail; + + /* store our new memory area */ +diff --git a/dpdk/lib/librte_eal/common/eal_common_options.c b/dpdk/lib/librte_eal/common/eal_common_options.c +index 622c7bc429..977ba21c51 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_options.c ++++ b/dpdk/lib/librte_eal/common/eal_common_options.c +@@ -228,9 +228,9 @@ eal_save_args(int argc, char **argv) + return -1; + + for (i = 0; i < argc; i++) { +- eal_args[i] = strdup(argv[i]); + if (strcmp(argv[i], "--") == 0) + break; ++ eal_args[i] = strdup(argv[i]); + } + eal_args[i++] = NULL; /* always finish with NULL */ + +@@ -508,10 +508,14 @@ is_shared_build(void) + } + + while (len >= minlen) { ++ void *handle; ++ + /* check if we have this .so loaded, if so - shared build */ + RTE_LOG(DEBUG, EAL, "Checking presence of .so '%s'\n", soname); +- if (dlopen(soname, RTLD_LAZY | RTLD_NOLOAD) != NULL) { ++ handle = dlopen(soname, RTLD_LAZY | RTLD_NOLOAD); ++ if (handle != NULL) { + RTE_LOG(INFO, EAL, "Detected shared linkage of DPDK\n"); ++ dlclose(handle); + return 1; + } + +@@ -757,10 +761,10 @@ static int + eal_parse_service_corelist(const char *corelist) + { + struct rte_config *cfg = rte_eal_get_configuration(); +- int i, idx = 0; ++ int i; + unsigned count = 0; + char *end = NULL; +- int min, max; ++ uint32_t min, max, idx; + uint32_t taken_lcore_count = 0; + + if (corelist == NULL) +@@ -784,6 +788,8 @@ eal_parse_service_corelist(const char *corelist) + idx = strtoul(corelist, &end, 10); + if (errno || end == NULL) + return -1; ++ if (idx >= RTE_MAX_LCORE) ++ return -1; + while (isblank(*end)) + end++; + if (*end == '-') { +diff --git a/dpdk/lib/librte_eal/common/eal_common_proc.c b/dpdk/lib/librte_eal/common/eal_common_proc.c +index 6d1af3c0e7..b33d58ea0a 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_proc.c ++++ b/dpdk/lib/librte_eal/common/eal_common_proc.c +@@ -35,6 +35,7 @@ + #include "eal_internal_cfg.h" + + static int mp_fd = -1; ++static pthread_t mp_handle_tid; + static char mp_filter[PATH_MAX]; /* Filter for secondary process sockets */ + static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */ + static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER; +@@ -281,8 +282,17 @@ read_msg(struct mp_msg_internal *m, struct sockaddr_un *s) + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + ++retry: + msglen = recvmsg(mp_fd, &msgh, 0); ++ ++ /* zero length message means socket was closed */ ++ if (msglen == 0) ++ return 0; ++ + if (msglen < 0) { ++ if (errno == EINTR) ++ goto retry; ++ + RTE_LOG(ERR, EAL, "recvmsg failed, %s\n", strerror(errno)); + return -1; + } +@@ -310,7 +320,7 @@ read_msg(struct mp_msg_internal *m, struct sockaddr_un *s) + RTE_LOG(ERR, EAL, "invalid received data length\n"); + return -1; + } +- return 0; ++ return msglen; + } + + static void +@@ -383,9 +393,14 @@ mp_handle(void *arg __rte_unused) + struct mp_msg_internal msg; + struct sockaddr_un sa; + +- while (1) { +- if (read_msg(&msg, &sa) == 0) +- process_msg(&msg, &sa); ++ while (mp_fd >= 0) { ++ int ret; ++ ++ ret = read_msg(&msg, &sa); ++ if (ret <= 0) ++ break; ++ ++ process_msg(&msg, &sa); + } + + return NULL; +@@ -490,14 +505,11 @@ async_reply_handle_thread_unsafe(void *arg) + struct pending_request *req = (struct pending_request *)arg; + enum async_action action; + struct timespec ts_now; +- struct timeval now; + +- if (gettimeofday(&now, NULL) < 0) { ++ if (clock_gettime(CLOCK_MONOTONIC, &ts_now) < 0) { + RTE_LOG(ERR, EAL, "Cannot get current time\n"); + goto no_trigger; + } +- ts_now.tv_nsec = now.tv_usec * 1000; +- ts_now.tv_sec = now.tv_sec; + + action = process_async_request(req, &ts_now); + +@@ -570,14 +582,11 @@ open_socket_fd(void) + } + + static void +-close_socket_fd(void) ++close_socket_fd(int fd) + { + char path[PATH_MAX]; + +- if (mp_fd < 0) +- return; +- +- close(mp_fd); ++ close(fd); + create_socket_path(peer_name, path, sizeof(path)); + unlink(path); + } +@@ -587,7 +596,6 @@ rte_mp_channel_init(void) + { + char path[PATH_MAX]; + int dir_fd; +- pthread_t mp_handle_tid; + const struct internal_config *internal_conf = + eal_get_internal_configuration(); + +@@ -648,7 +656,16 @@ rte_mp_channel_init(void) + void + rte_mp_channel_cleanup(void) + { +- close_socket_fd(); ++ int fd; ++ ++ if (mp_fd < 0) ++ return; ++ ++ fd = mp_fd; ++ mp_fd = -1; ++ pthread_cancel(mp_handle_tid); ++ pthread_join(mp_handle_tid, NULL); ++ close_socket_fd(fd); + } + + /** +@@ -896,6 +913,7 @@ mp_request_sync(const char *dst, struct rte_mp_msg *req, + struct rte_mp_reply *reply, const struct timespec *ts) + { + int ret; ++ pthread_condattr_t attr; + struct rte_mp_msg msg, *tmp; + struct pending_request pending_req, *exist; + +@@ -904,7 +922,9 @@ mp_request_sync(const char *dst, struct rte_mp_msg *req, + strlcpy(pending_req.dst, dst, sizeof(pending_req.dst)); + pending_req.request = req; + pending_req.reply = &msg; +- pthread_cond_init(&pending_req.sync.cond, NULL); ++ pthread_condattr_init(&attr); ++ pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); ++ pthread_cond_init(&pending_req.sync.cond, &attr); + + exist = find_pending_request(dst, req->name); + if (exist) { +@@ -967,8 +987,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply, + int dir_fd, ret = -1; + DIR *mp_dir; + struct dirent *ent; +- struct timeval now; +- struct timespec end; ++ struct timespec now, end; + const struct internal_config *internal_conf = + eal_get_internal_configuration(); + +@@ -987,15 +1006,15 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply, + return -1; + } + +- if (gettimeofday(&now, NULL) < 0) { ++ if (clock_gettime(CLOCK_MONOTONIC, &now) < 0) { + RTE_LOG(ERR, EAL, "Failed to get current time\n"); + rte_errno = errno; + goto end; + } + +- end.tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000; ++ end.tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000; + end.tv_sec = now.tv_sec + ts->tv_sec + +- (now.tv_usec * 1000 + ts->tv_nsec) / 1000000000; ++ (now.tv_nsec + ts->tv_nsec) / 1000000000; + + /* for secondary process, send request to the primary process only */ + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { +@@ -1069,7 +1088,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts, + int dir_fd, ret = 0; + DIR *mp_dir; + struct dirent *ent; +- struct timeval now; ++ struct timespec now; + struct timespec *end; + bool dummy_used = false; + const struct internal_config *internal_conf = +@@ -1086,7 +1105,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts, + return -1; + } + +- if (gettimeofday(&now, NULL) < 0) { ++ if (clock_gettime(CLOCK_MONOTONIC, &now) < 0) { + RTE_LOG(ERR, EAL, "Failed to get current time\n"); + rte_errno = errno; + return -1; +@@ -1108,9 +1127,9 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts, + end = ¶m->end; + reply = ¶m->user_reply; + +- end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000; ++ end->tv_nsec = (now.tv_nsec + ts->tv_nsec) % 1000000000; + end->tv_sec = now.tv_sec + ts->tv_sec + +- (now.tv_usec * 1000 + ts->tv_nsec) / 1000000000; ++ (now.tv_nsec + ts->tv_nsec) / 1000000000; + reply->nb_sent = 0; + reply->nb_received = 0; + reply->msgs = NULL; +diff --git a/dpdk/lib/librte_eal/common/eal_common_thread.c b/dpdk/lib/librte_eal/common/eal_common_thread.c +index 73a055902a..1a52f42a2b 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_thread.c ++++ b/dpdk/lib/librte_eal/common/eal_common_thread.c +@@ -170,25 +170,34 @@ struct rte_thread_ctrl_params { + void *(*start_routine)(void *); + void *arg; + pthread_barrier_t configured; ++ unsigned int refcnt; + }; + ++static void ctrl_params_free(struct rte_thread_ctrl_params *params) ++{ ++ if (__atomic_sub_fetch(¶ms->refcnt, 1, __ATOMIC_ACQ_REL) == 0) { ++ (void)pthread_barrier_destroy(¶ms->configured); ++ free(params); ++ } ++} ++ + static void *ctrl_thread_init(void *arg) + { +- int ret; + struct internal_config *internal_conf = + eal_get_internal_configuration(); + rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; + struct rte_thread_ctrl_params *params = arg; +- void *(*start_routine)(void *) = params->start_routine; ++ void *(*start_routine)(void *); + void *routine_arg = params->arg; + + __rte_thread_init(rte_lcore_id(), cpuset); + +- ret = pthread_barrier_wait(¶ms->configured); +- if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { +- pthread_barrier_destroy(¶ms->configured); +- free(params); +- } ++ pthread_barrier_wait(¶ms->configured); ++ start_routine = params->start_routine; ++ ctrl_params_free(params); ++ ++ if (start_routine == NULL) ++ return NULL; + + return start_routine(routine_arg); + } +@@ -210,14 +219,15 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name, + + params->start_routine = start_routine; + params->arg = arg; ++ params->refcnt = 2; + +- pthread_barrier_init(¶ms->configured, NULL, 2); ++ ret = pthread_barrier_init(¶ms->configured, NULL, 2); ++ if (ret != 0) ++ goto fail_no_barrier; + + ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params); +- if (ret != 0) { +- free(params); +- return -ret; +- } ++ if (ret != 0) ++ goto fail_with_barrier; + + if (name != NULL) { + ret = rte_thread_setname(*thread, name); +@@ -227,25 +237,25 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name, + } + + ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset); +- if (ret) +- goto fail; ++ if (ret != 0) ++ params->start_routine = NULL; + +- ret = pthread_barrier_wait(¶ms->configured); +- if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { +- pthread_barrier_destroy(¶ms->configured); +- free(params); +- } ++ pthread_barrier_wait(¶ms->configured); ++ ctrl_params_free(params); + +- return 0; ++ if (ret != 0) ++ /* start_routine has been set to NULL above; */ ++ /* ctrl thread will exit immediately */ ++ pthread_join(*thread, NULL); ++ ++ return -ret; ++ ++fail_with_barrier: ++ (void)pthread_barrier_destroy(¶ms->configured); ++ ++fail_no_barrier: ++ free(params); + +-fail: +- if (PTHREAD_BARRIER_SERIAL_THREAD == +- pthread_barrier_wait(¶ms->configured)) { +- pthread_barrier_destroy(¶ms->configured); +- free(params); +- } +- pthread_cancel(*thread); +- pthread_join(*thread, NULL); + return -ret; + } + +diff --git a/dpdk/lib/librte_eal/common/eal_common_trace_utils.c b/dpdk/lib/librte_eal/common/eal_common_trace_utils.c +index 64f58fb66a..2b55dbec65 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_trace_utils.c ++++ b/dpdk/lib/librte_eal/common/eal_common_trace_utils.c +@@ -104,13 +104,15 @@ trace_session_name_generate(char *trace_dir) + rc = rte_strscpy(trace_dir, eal_get_hugefile_prefix(), + TRACE_PREFIX_LEN); + if (rc == -E2BIG) +- rc = TRACE_PREFIX_LEN; ++ rc = TRACE_PREFIX_LEN - 1; + trace_dir[rc++] = '-'; + + rc = strftime(trace_dir + rc, TRACE_DIR_STR_LEN - rc, + "%Y-%m-%d-%p-%I-%M-%S", tm_result); +- if (rc == 0) ++ if (rc == 0) { ++ errno = ENOSPC; + goto fail; ++ } + + return rc; + fail: +diff --git a/dpdk/lib/librte_eal/common/malloc_heap.c b/dpdk/lib/librte_eal/common/malloc_heap.c +index 5a09247a65..f4e20eab92 100644 +--- a/dpdk/lib/librte_eal/common/malloc_heap.c ++++ b/dpdk/lib/librte_eal/common/malloc_heap.c +@@ -397,7 +397,7 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz, + bool callback_triggered = false; + + alloc_sz = RTE_ALIGN_CEIL(align + elt_size + +- MALLOC_ELEM_TRAILER_LEN, pg_sz); ++ MALLOC_ELEM_OVERHEAD, pg_sz); + n_segs = alloc_sz / pg_sz; + + /* we can't know in advance how many pages we'll need, so we malloc */ +@@ -693,6 +693,26 @@ malloc_heap_alloc_on_heap_id(const char *type, size_t size, + return ret; + } + ++static unsigned int ++malloc_get_numa_socket(void) ++{ ++ const struct internal_config *conf = eal_get_internal_configuration(); ++ unsigned int socket_id = rte_socket_id(); ++ unsigned int idx; ++ ++ if (socket_id != (unsigned int)SOCKET_ID_ANY) ++ return socket_id; ++ ++ /* for control threads, return first socket where memory is available */ ++ for (idx = 0; idx < rte_socket_count(); idx++) { ++ socket_id = rte_socket_id_by_idx(idx); ++ if (conf->socket_mem[socket_id] != 0) ++ return socket_id; ++ } ++ ++ return rte_socket_id_by_idx(0); ++} ++ + void * + malloc_heap_alloc(const char *type, size_t size, int socket_arg, + unsigned int flags, size_t align, size_t bound, bool contig) +diff --git a/dpdk/lib/librte_eal/common/malloc_heap.h b/dpdk/lib/librte_eal/common/malloc_heap.h +index 772736b53f..dfd57e6a56 100644 +--- a/dpdk/lib/librte_eal/common/malloc_heap.h ++++ b/dpdk/lib/librte_eal/common/malloc_heap.h +@@ -37,17 +37,6 @@ struct malloc_heap { + extern "C" { + #endif + +-static inline unsigned +-malloc_get_numa_socket(void) +-{ +- unsigned socket_id = rte_socket_id(); +- +- if (socket_id == (unsigned)SOCKET_ID_ANY) +- return 0; +- +- return socket_id; +-} +- + void * + malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags, + size_t align, size_t bound, bool contig); +diff --git a/dpdk/lib/librte_eal/common/malloc_mp.c b/dpdk/lib/librte_eal/common/malloc_mp.c +index 1f212f8349..f9d558ba64 100644 +--- a/dpdk/lib/librte_eal/common/malloc_mp.c ++++ b/dpdk/lib/librte_eal/common/malloc_mp.c +@@ -170,9 +170,7 @@ handle_sync(const struct rte_mp_msg *msg, const void *peer) + resp->id = req->id; + resp->result = ret == 0 ? REQ_RESULT_SUCCESS : REQ_RESULT_FAIL; + +- rte_mp_reply(&reply, peer); +- +- return 0; ++ return rte_mp_reply(&reply, peer); + } + + static int +@@ -188,7 +186,7 @@ handle_alloc_request(const struct malloc_mp_req *m, + void *map_addr; + + alloc_sz = RTE_ALIGN_CEIL(ar->align + ar->elt_size + +- MALLOC_ELEM_TRAILER_LEN, ar->page_sz); ++ MALLOC_ELEM_OVERHEAD, ar->page_sz); + n_segs = alloc_sz / ar->page_sz; + + heap = ar->heap; +diff --git a/dpdk/lib/librte_eal/common/meson.build b/dpdk/lib/librte_eal/common/meson.build +index 39abf7a0a4..7105e8bc4b 100644 +--- a/dpdk/lib/librte_eal/common/meson.build ++++ b/dpdk/lib/librte_eal/common/meson.build +@@ -5,51 +5,16 @@ includes += include_directories('.') + + cflags += [ '-DABI_VERSION="@0@"'.format(abi_version) ] + +-if is_windows +- sources += files( +- 'eal_common_bus.c', +- 'eal_common_class.c', +- 'eal_common_config.c', +- 'eal_common_debug.c', +- 'eal_common_dev.c', +- 'eal_common_devargs.c', +- 'eal_common_dynmem.c', +- 'eal_common_errno.c', +- 'eal_common_fbarray.c', +- 'eal_common_hexdump.c', +- 'eal_common_launch.c', +- 'eal_common_lcore.c', +- 'eal_common_log.c', +- 'eal_common_mcfg.c', +- 'eal_common_memalloc.c', +- 'eal_common_memory.c', +- 'eal_common_memzone.c', +- 'eal_common_options.c', +- 'eal_common_string_fns.c', +- 'eal_common_tailqs.c', +- 'eal_common_thread.c', +- 'eal_common_trace_points.c', +- 'malloc_elem.c', +- 'malloc_heap.c', +- 'rte_malloc.c', +- 'eal_common_timer.c', +- 'rte_service.c', +- ) +- subdir_done() +-endif +- + sources += files( + 'eal_common_bus.c', +- 'eal_common_cpuflags.c', + 'eal_common_class.c', + 'eal_common_config.c', + 'eal_common_debug.c', +- 'eal_common_devargs.c', + 'eal_common_dev.c', ++ 'eal_common_devargs.c', + 'eal_common_errno.c', + 'eal_common_fbarray.c', + 'eal_common_hexdump.c', +- 'eal_common_hypervisor.c', + 'eal_common_launch.c', + 'eal_common_lcore.c', + 'eal_common_log.c', +@@ -58,27 +23,32 @@ sources += files( + 'eal_common_memory.c', + 'eal_common_memzone.c', + 'eal_common_options.c', +- 'eal_common_proc.c', + 'eal_common_string_fns.c', + 'eal_common_tailqs.c', + 'eal_common_thread.c', + 'eal_common_timer.c', +- 'eal_common_trace.c', +- 'eal_common_trace_ctf.c', + 'eal_common_trace_points.c', +- 'eal_common_trace_utils.c', + 'eal_common_uuid.c', +- 'hotplug_mp.c', + 'malloc_elem.c', + 'malloc_heap.c', +- 'malloc_mp.c', +- 'rte_keepalive.c', + 'rte_malloc.c', +- 'rte_random.c', + 'rte_reciprocal.c', + 'rte_service.c', + ) +- +-if is_linux ++if is_linux or is_windows + sources += files('eal_common_dynmem.c') + endif ++if not is_windows ++ sources += files( ++ 'eal_common_cpuflags.c', ++ 'eal_common_hypervisor.c', ++ 'eal_common_proc.c', ++ 'eal_common_trace.c', ++ 'eal_common_trace_ctf.c', ++ 'eal_common_trace_utils.c', ++ 'hotplug_mp.c', ++ 'malloc_mp.c', ++ 'rte_random.c', ++ 'rte_keepalive.c', ++ ) ++endif +diff --git a/dpdk/lib/librte_eal/common/rte_service.c b/dpdk/lib/librte_eal/common/rte_service.c +index bd8fb72e78..e76c2baffc 100644 +--- a/dpdk/lib/librte_eal/common/rte_service.c ++++ b/dpdk/lib/librte_eal/common/rte_service.c +@@ -764,7 +764,9 @@ rte_service_lcore_stop(uint32_t lcore) + return -EALREADY; + + uint32_t i; +- uint64_t service_mask = lcore_states[lcore].service_mask; ++ struct core_state *cs = &lcore_states[lcore]; ++ uint64_t service_mask = cs->service_mask; ++ + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { + int32_t enabled = service_mask & (UINT64_C(1) << i); + int32_t service_running = rte_service_runstate_get(i); +@@ -772,6 +774,11 @@ rte_service_lcore_stop(uint32_t lcore) + __atomic_load_n(&rte_services[i].num_mapped_cores, + __ATOMIC_RELAXED)); + ++ /* Switch off this core for all services, to ensure that future ++ * calls to may_be_active() know this core is switched off. ++ */ ++ cs->service_active_on_lcore[i] = 0; ++ + /* if the core is mapped, and the service is running, and this + * is the only core that is mapped, the service would cease to + * run if this core stopped, so fail instead. +diff --git a/dpdk/lib/librte_eal/freebsd/eal.c b/dpdk/lib/librte_eal/freebsd/eal.c +index 51478358c7..6f9f12911e 100644 +--- a/dpdk/lib/librte_eal/freebsd/eal.c ++++ b/dpdk/lib/librte_eal/freebsd/eal.c +@@ -521,6 +521,10 @@ eal_parse_args(int argc, char **argv) + goto out; + } + ++ /* eal_log_level_parse() already handled this option */ ++ if (opt == OPT_LOG_LEVEL_NUM) ++ continue; ++ + ret = eal_parse_common_option(opt, optarg, internal_conf); + /* common parser is not happy */ + if (ret < 0) { +@@ -671,6 +675,8 @@ rte_eal_init(int argc, char **argv) + const struct rte_config *config = rte_eal_get_configuration(); + struct internal_config *internal_conf = + eal_get_internal_configuration(); ++ bool has_phys_addr; ++ enum rte_iova_mode iova_mode; + + /* checks if the machine is adequate */ + if (!rte_cpu_is_supported()) { +@@ -712,6 +718,10 @@ rte_eal_init(int argc, char **argv) + + /* FreeBSD always uses legacy memory model */ + internal_conf->legacy_mem = true; ++ if (internal_conf->in_memory) { ++ RTE_LOG(WARNING, EAL, "Warning: ignoring unsupported flag, '%s'\n", OPT_IN_MEMORY); ++ internal_conf->in_memory = false; ++ } + + if (eal_plugins_init() < 0) { + rte_eal_init_alert("Cannot init plugins"); +@@ -767,19 +777,30 @@ rte_eal_init(int argc, char **argv) + return -1; + } + +- /* if no EAL option "--iova-mode=", use bus IOVA scheme */ +- if (internal_conf->iova_mode == RTE_IOVA_DC) { +- /* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */ +- enum rte_iova_mode iova_mode = rte_bus_get_iommu_class(); +- +- if (iova_mode == RTE_IOVA_DC) +- iova_mode = RTE_IOVA_PA; +- rte_eal_get_configuration()->iova_mode = iova_mode; +- } else { +- rte_eal_get_configuration()->iova_mode = +- internal_conf->iova_mode; ++ /* ++ * PA are only available for hugepages via contigmem. ++ * If contigmem is inaccessible, rte_eal_hugepage_init() will fail ++ * with a message describing the cause. ++ */ ++ has_phys_addr = internal_conf->no_hugetlbfs == 0; ++ iova_mode = internal_conf->iova_mode; ++ if (iova_mode == RTE_IOVA_PA && !has_phys_addr) { ++ rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available"); ++ rte_errno = EINVAL; ++ return -1; + } +- ++ if (iova_mode == RTE_IOVA_DC) { ++ RTE_LOG(DEBUG, EAL, "Specific IOVA mode is not requested, autodetecting\n"); ++ if (has_phys_addr) { ++ RTE_LOG(DEBUG, EAL, "Selecting IOVA mode according to bus requests\n"); ++ iova_mode = rte_bus_get_iommu_class(); ++ if (iova_mode == RTE_IOVA_DC) ++ iova_mode = RTE_IOVA_PA; ++ } else { ++ iova_mode = RTE_IOVA_VA; ++ } ++ } ++ rte_eal_get_configuration()->iova_mode = iova_mode; + RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n", + rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA"); + +@@ -993,6 +1014,7 @@ int rte_vfio_setup_device(__rte_unused const char *sysfs_base, + __rte_unused int *vfio_dev_fd, + __rte_unused struct vfio_device_info *device_info) + { ++ rte_errno = ENOTSUP; + return -1; + } + +@@ -1000,11 +1022,13 @@ int rte_vfio_release_device(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, + __rte_unused int fd) + { ++ rte_errno = ENOTSUP; + return -1; + } + + int rte_vfio_enable(__rte_unused const char *modname) + { ++ rte_errno = ENOTSUP; + return -1; + } + +@@ -1020,7 +1044,8 @@ int rte_vfio_noiommu_is_enabled(void) + + int rte_vfio_clear_group(__rte_unused int vfio_group_fd) + { +- return 0; ++ rte_errno = ENOTSUP; ++ return -1; + } + + int +@@ -1028,30 +1053,35 @@ rte_vfio_get_group_num(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, + __rte_unused int *iommu_group_num) + { ++ rte_errno = ENOTSUP; + return -1; + } + + int + rte_vfio_get_container_fd(void) + { ++ rte_errno = ENOTSUP; + return -1; + } + + int + rte_vfio_get_group_fd(__rte_unused int iommu_group_num) + { ++ rte_errno = ENOTSUP; + return -1; + } + + int + rte_vfio_container_create(void) + { ++ rte_errno = ENOTSUP; + return -1; + } + + int + rte_vfio_container_destroy(__rte_unused int container_fd) + { ++ rte_errno = ENOTSUP; + return -1; + } + +@@ -1059,6 +1089,7 @@ int + rte_vfio_container_group_bind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) + { ++ rte_errno = ENOTSUP; + return -1; + } + +@@ -1066,6 +1097,7 @@ int + rte_vfio_container_group_unbind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) + { ++ rte_errno = ENOTSUP; + return -1; + } + +@@ -1075,6 +1107,7 @@ rte_vfio_container_dma_map(__rte_unused int container_fd, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) + { ++ rte_errno = ENOTSUP; + return -1; + } + +@@ -1084,5 +1117,6 @@ rte_vfio_container_dma_unmap(__rte_unused int container_fd, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) + { ++ rte_errno = ENOTSUP; + return -1; + } +diff --git a/dpdk/lib/librte_eal/freebsd/eal_hugepage_info.c b/dpdk/lib/librte_eal/freebsd/eal_hugepage_info.c +index 408f054f7a..9dbe375bd3 100644 +--- a/dpdk/lib/librte_eal/freebsd/eal_hugepage_info.c ++++ b/dpdk/lib/librte_eal/freebsd/eal_hugepage_info.c +@@ -90,6 +90,10 @@ eal_hugepage_info_init(void) + RTE_LOG(ERR, EAL, "could not open "CONTIGMEM_DEV"\n"); + return -1; + } ++ if (flock(fd, LOCK_EX | LOCK_NB) < 0) { ++ RTE_LOG(ERR, EAL, "could not lock memory. Is another DPDK process running?\n"); ++ return -1; ++ } + + if (buffer_size >= 1<<30) + RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dGB\n", +diff --git a/dpdk/lib/librte_eal/freebsd/eal_interrupts.c b/dpdk/lib/librte_eal/freebsd/eal_interrupts.c +index 72eeacbc14..3591eb5725 100644 +--- a/dpdk/lib/librte_eal/freebsd/eal_interrupts.c ++++ b/dpdk/lib/librte_eal/freebsd/eal_interrupts.c +@@ -226,7 +226,7 @@ rte_intr_callback_unregister_pending(const struct rte_intr_handle *intr_handle, + + rte_spinlock_lock(&intr_lock); + +- /* check if the insterrupt source for the fd is existent */ ++ /* check if the interrupt source for the fd is existent */ + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == intr_handle->fd) + break; +@@ -280,7 +280,7 @@ rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, + + rte_spinlock_lock(&intr_lock); + +- /* check if the insterrupt source for the fd is existent */ ++ /* check if the interrupt source for the fd is existent */ + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == intr_handle->fd) + break; +diff --git a/dpdk/lib/librte_eal/freebsd/eal_memory.c b/dpdk/lib/librte_eal/freebsd/eal_memory.c +index 78ac142b82..17ab10e0ca 100644 +--- a/dpdk/lib/librte_eal/freebsd/eal_memory.c ++++ b/dpdk/lib/librte_eal/freebsd/eal_memory.c +@@ -446,8 +446,8 @@ memseg_secondary_init(void) + + msl = &mcfg->memsegs[msl_idx]; + +- /* skip empty memseg lists */ +- if (msl->memseg_arr.len == 0) ++ /* skip empty and external memseg lists */ ++ if (msl->memseg_arr.len == 0 || msl->external) + continue; + + if (rte_fbarray_attach(&msl->memseg_arr)) { +diff --git a/dpdk/lib/librte_eal/freebsd/eal_thread.c b/dpdk/lib/librte_eal/freebsd/eal_thread.c +index 1dce9b04f2..bbc3a8e985 100644 +--- a/dpdk/lib/librte_eal/freebsd/eal_thread.c ++++ b/dpdk/lib/librte_eal/freebsd/eal_thread.c +@@ -126,6 +126,8 @@ eal_thread_loop(__rte_unused void *arg) + fct_arg = lcore_config[lcore_id].arg; + ret = lcore_config[lcore_id].f(fct_arg); + lcore_config[lcore_id].ret = ret; ++ lcore_config[lcore_id].f = NULL; ++ lcore_config[lcore_id].arg = NULL; + rte_wmb(); + lcore_config[lcore_id].state = FINISHED; + } +diff --git a/dpdk/lib/librte_eal/freebsd/include/rte_os.h b/dpdk/lib/librte_eal/freebsd/include/rte_os.h +index eeb750cd81..e3c9f80720 100644 +--- a/dpdk/lib/librte_eal/freebsd/include/rte_os.h ++++ b/dpdk/lib/librte_eal/freebsd/include/rte_os.h +@@ -5,15 +5,21 @@ + #ifndef _RTE_OS_H_ + #define _RTE_OS_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** +- * This is header should contain any function/macro definition +- * which are not supported natively or named differently in the +- * freebsd OS. Functions will be added in future releases. ++ * This header should contain any definition ++ * which is not supported natively or named differently in FreeBSD. + */ + + #include + + typedef cpuset_t rte_cpuset_t; ++#define RTE_HAS_CPUSET ++ ++#ifdef RTE_EAL_FREEBSD_CPUSET_LEGACY + #define RTE_CPU_AND(dst, src1, src2) do \ + { \ + cpuset_t tmp; \ +@@ -47,6 +53,23 @@ typedef cpuset_t rte_cpuset_t; + CPU_ANDNOT(&tmp, src); \ + CPU_COPY(&tmp, dst); \ + } while (0) ++#endif /* CPU_NAND */ ++ ++#else /* RTE_EAL_FREEBSD_CPUSET_LEGACY */ ++ ++#define RTE_CPU_AND CPU_AND ++#define RTE_CPU_OR CPU_OR ++#define RTE_CPU_FILL CPU_FILL ++#define RTE_CPU_NOT(dst, src) do { \ ++ cpu_set_t tmp; \ ++ CPU_FILL(&tmp); \ ++ CPU_XOR(dst, src, &tmp); \ ++} while (0) ++ ++#endif /* RTE_EAL_FREEBSD_CPUSET_LEGACY */ ++ ++#ifdef __cplusplus ++} + #endif + + #endif /* _RTE_OS_H_ */ +diff --git a/dpdk/lib/librte_eal/freebsd/meson.build b/dpdk/lib/librte_eal/freebsd/meson.build +index e10fd8a16d..0b7e2ca4a1 100644 +--- a/dpdk/lib/librte_eal/freebsd/meson.build ++++ b/dpdk/lib/librte_eal/freebsd/meson.build +@@ -19,3 +19,14 @@ sources += files( + ) + + deps += ['kvargs', 'telemetry'] ++ ++# test for version of cpuset macros ++cpuset_test_code = ''' ++ #include ++ #include ++ void cpu_test_or(cpuset_t *s) { CPU_OR(s, s, s); } ++''' ++ ++if not cc.compiles(cpuset_test_code, name: 'Detect argument count for CPU_OR') ++ dpdk_conf.set('RTE_EAL_FREEBSD_CPUSET_LEGACY', 1) ++endif +diff --git a/dpdk/lib/librte_eal/include/generic/rte_ticketlock.h b/dpdk/lib/librte_eal/include/generic/rte_ticketlock.h +index c1b8808f51..693c67b517 100644 +--- a/dpdk/lib/librte_eal/include/generic/rte_ticketlock.h ++++ b/dpdk/lib/librte_eal/include/generic/rte_ticketlock.h +@@ -91,13 +91,13 @@ rte_ticketlock_unlock(rte_ticketlock_t *tl) + static inline int + rte_ticketlock_trylock(rte_ticketlock_t *tl) + { +- rte_ticketlock_t old, new; +- old.tickets = __atomic_load_n(&tl->tickets, __ATOMIC_RELAXED); +- new.tickets = old.tickets; +- new.s.next++; +- if (old.s.next == old.s.current) { +- if (__atomic_compare_exchange_n(&tl->tickets, &old.tickets, +- new.tickets, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) ++ rte_ticketlock_t oldl, newl; ++ oldl.tickets = __atomic_load_n(&tl->tickets, __ATOMIC_RELAXED); ++ newl.tickets = oldl.tickets; ++ newl.s.next++; ++ if (oldl.s.next == oldl.s.current) { ++ if (__atomic_compare_exchange_n(&tl->tickets, &oldl.tickets, ++ newl.tickets, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) + return 1; + } + +diff --git a/dpdk/lib/librte_eal/include/rte_bitmap.h b/dpdk/lib/librte_eal/include/rte_bitmap.h +index 7c90ef333f..b38d9ac2ba 100644 +--- a/dpdk/lib/librte_eal/include/rte_bitmap.h ++++ b/dpdk/lib/librte_eal/include/rte_bitmap.h +@@ -185,9 +185,8 @@ rte_bitmap_init(uint32_t n_bits, uint8_t *mem, uint32_t mem_size) + size = __rte_bitmap_get_memory_footprint(n_bits, + &array1_byte_offset, &array1_slabs, + &array2_byte_offset, &array2_slabs); +- if (size < mem_size) { ++ if (size > mem_size) + return NULL; +- } + + /* Setup bitmap */ + memset(mem, 0, size); +diff --git a/dpdk/lib/librte_eal/include/rte_bitops.h b/dpdk/lib/librte_eal/include/rte_bitops.h +index 141e8ea730..f50dbe4388 100644 +--- a/dpdk/lib/librte_eal/include/rte_bitops.h ++++ b/dpdk/lib/librte_eal/include/rte_bitops.h +@@ -17,6 +17,10 @@ + #include + #include + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** + * Get the uint64_t value for a specified bit set. + * +@@ -271,4 +275,8 @@ rte_bit_relaxed_test_and_clear64(unsigned int nr, volatile uint64_t *addr) + return val & mask; + } + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_BITOPS_H_ */ +diff --git a/dpdk/lib/librte_eal/include/rte_branch_prediction.h b/dpdk/lib/librte_eal/include/rte_branch_prediction.h +index 854ef9e5dd..0256a9de60 100644 +--- a/dpdk/lib/librte_eal/include/rte_branch_prediction.h ++++ b/dpdk/lib/librte_eal/include/rte_branch_prediction.h +@@ -10,6 +10,10 @@ + #ifndef _RTE_BRANCH_PREDICTION_H_ + #define _RTE_BRANCH_PREDICTION_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** + * Check if a branch is likely to be taken. + * +@@ -38,4 +42,8 @@ + #define unlikely(x) __builtin_expect(!!(x), 0) + #endif /* unlikely */ + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_BRANCH_PREDICTION_H_ */ +diff --git a/dpdk/lib/librte_eal/include/rte_common.h b/dpdk/lib/librte_eal/include/rte_common.h +index 1b630baf16..677b52a2f8 100644 +--- a/dpdk/lib/librte_eal/include/rte_common.h ++++ b/dpdk/lib/librte_eal/include/rte_common.h +@@ -83,6 +83,11 @@ typedef uint16_t unaligned_uint16_t; + */ + #define __rte_packed __attribute__((__packed__)) + ++/** ++ * Macro to mark a type that is not subject to type-based aliasing rules ++ */ ++#define __rte_may_alias __attribute__((__may_alias__)) ++ + /******* Macro to mark functions and fields scheduled for removal *****/ + #define __rte_deprecated __attribute__((__deprecated__)) + #define __rte_deprecated_msg(msg) __attribute__((__deprecated__(msg))) +diff --git a/dpdk/lib/librte_eal/include/rte_compat.h b/dpdk/lib/librte_eal/include/rte_compat.h +index 2718612cce..a7dbe23449 100644 +--- a/dpdk/lib/librte_eal/include/rte_compat.h ++++ b/dpdk/lib/librte_eal/include/rte_compat.h +@@ -6,6 +6,10 @@ + #ifndef _RTE_COMPAT_H_ + #define _RTE_COMPAT_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #ifndef ALLOW_EXPERIMENTAL_API + + #define __rte_experimental \ +@@ -43,4 +47,8 @@ __attribute__((section(".text.internal"))) + + #endif + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_COMPAT_H_ */ +diff --git a/dpdk/lib/librte_eal/include/rte_dev.h b/dpdk/lib/librte_eal/include/rte_dev.h +index 6dd72c11a1..616192cdc4 100644 +--- a/dpdk/lib/librte_eal/include/rte_dev.h ++++ b/dpdk/lib/librte_eal/include/rte_dev.h +@@ -8,7 +8,7 @@ + /** + * @file + * +- * RTE PMD Driver Registration Interface ++ * RTE PMD Registration Interface + * + * This file manages the list of device drivers. + */ +@@ -327,10 +327,6 @@ rte_dev_iterator_next(struct rte_dev_iterator *it); + dev != NULL; \ + dev = rte_dev_iterator_next(it)) + +-#ifdef __cplusplus +-} +-#endif +- + /** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice +@@ -503,4 +499,8 @@ int + rte_dev_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova, + size_t len); + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_DEV_H_ */ +diff --git a/dpdk/lib/librte_eal/include/rte_eal_paging.h b/dpdk/lib/librte_eal/include/rte_eal_paging.h +index ed98e70e9e..c60317d0f5 100644 +--- a/dpdk/lib/librte_eal/include/rte_eal_paging.h ++++ b/dpdk/lib/librte_eal/include/rte_eal_paging.h +@@ -61,7 +61,7 @@ enum rte_map_flags { + __rte_internal + void * + rte_mem_map(void *requested_addr, size_t size, int prot, int flags, +- int fd, size_t offset); ++ int fd, uint64_t offset); + + /** + * OS-independent implementation of POSIX munmap(3). +diff --git a/dpdk/lib/librte_eal/include/rte_function_versioning.h b/dpdk/lib/librte_eal/include/rte_function_versioning.h +index 746a1e1992..eb6dd2bc17 100644 +--- a/dpdk/lib/librte_eal/include/rte_function_versioning.h ++++ b/dpdk/lib/librte_eal/include/rte_function_versioning.h +@@ -15,7 +15,7 @@ + + /* + * Provides backwards compatibility when updating exported functions. +- * When a symol is exported from a library to provide an API, it also provides a ++ * When a symbol is exported from a library to provide an API, it also provides a + * calling convention (ABI) that is embodied in its name, return type, + * arguments, etc. On occasion that function may need to change to accommodate + * new functionality, behavior, etc. When that occurs, it is desirable to +diff --git a/dpdk/lib/librte_eal/include/rte_hypervisor.h b/dpdk/lib/librte_eal/include/rte_hypervisor.h +index 5fe719c1d4..1666431ce3 100644 +--- a/dpdk/lib/librte_eal/include/rte_hypervisor.h ++++ b/dpdk/lib/librte_eal/include/rte_hypervisor.h +@@ -5,6 +5,10 @@ + #ifndef RTE_HYPERVISOR_H + #define RTE_HYPERVISOR_H + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** + * @file + * Hypervisor awareness. +@@ -30,4 +34,8 @@ rte_hypervisor_get(void); + const char * + rte_hypervisor_get_name(enum rte_hypervisor id); + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* RTE_HYPERVISOR_H */ +diff --git a/dpdk/lib/librte_eal/include/rte_keepalive.h b/dpdk/lib/librte_eal/include/rte_keepalive.h +index bd25508da8..538fb09095 100644 +--- a/dpdk/lib/librte_eal/include/rte_keepalive.h ++++ b/dpdk/lib/librte_eal/include/rte_keepalive.h +@@ -11,6 +11,10 @@ + #ifndef _KEEPALIVE_H_ + #define _KEEPALIVE_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #include + #include + +@@ -139,4 +143,8 @@ rte_keepalive_register_relay_callback(struct rte_keepalive *keepcfg, + rte_keepalive_relay_callback_t callback, + void *data); + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _KEEPALIVE_H_ */ +diff --git a/dpdk/lib/librte_eal/include/rte_lcore.h b/dpdk/lib/librte_eal/include/rte_lcore.h +index 48b87e253a..a55fd7496d 100644 +--- a/dpdk/lib/librte_eal/include/rte_lcore.h ++++ b/dpdk/lib/librte_eal/include/rte_lcore.h +@@ -185,6 +185,8 @@ __rte_experimental + int + rte_lcore_to_cpu_id(int lcore_id); + ++#ifdef RTE_HAS_CPUSET ++ + /** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. +@@ -199,6 +201,8 @@ __rte_experimental + rte_cpuset_t + rte_lcore_cpuset(unsigned int lcore_id); + ++#endif /* RTE_HAS_CPUSET */ ++ + /** + * Test if an lcore is enabled. + * +@@ -357,6 +361,8 @@ __rte_experimental + void + rte_lcore_dump(FILE *f); + ++#ifdef RTE_HAS_CPUSET ++ + /** + * Set core affinity of the current thread. + * Support both EAL and non-EAL thread and update TLS. +@@ -378,6 +384,8 @@ int rte_thread_set_affinity(rte_cpuset_t *cpusetp); + */ + void rte_thread_get_affinity(rte_cpuset_t *cpusetp); + ++#endif /* RTE_HAS_CPUSET */ ++ + /** + * Set thread names. + * +diff --git a/dpdk/lib/librte_eal/include/rte_malloc.h b/dpdk/lib/librte_eal/include/rte_malloc.h +index 3af64f8761..c8da894af1 100644 +--- a/dpdk/lib/librte_eal/include/rte_malloc.h ++++ b/dpdk/lib/librte_eal/include/rte_malloc.h +@@ -58,7 +58,7 @@ rte_malloc(const char *type, size_t size, unsigned align) + __rte_alloc_size(2); + + /** +- * Allocate zero'ed memory from the heap. ++ * Allocate zeroed memory from the heap. + * + * Equivalent to rte_malloc() except that the memory zone is + * initialised with zeros. In NUMA systems, the memory allocated resides on the +@@ -160,7 +160,7 @@ rte_realloc(void *ptr, size_t size, unsigned int align) + __rte_experimental + void * + rte_realloc_socket(void *ptr, size_t size, unsigned int align, int socket) +- __rte_alloc_size(2, 3); ++ __rte_alloc_size(2); + + /** + * This function allocates memory from the huge-page area of memory. The memory +@@ -190,7 +190,7 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket) + __rte_alloc_size(2); + + /** +- * Allocate zero'ed memory from the heap. ++ * Allocate zeroed memory from the heap. + * + * Equivalent to rte_malloc() except that the memory zone is + * initialised with zeros. +diff --git a/dpdk/lib/librte_eal/include/rte_pci_dev_feature_defs.h b/dpdk/lib/librte_eal/include/rte_pci_dev_feature_defs.h +index e12c22081f..c5bb631286 100644 +--- a/dpdk/lib/librte_eal/include/rte_pci_dev_feature_defs.h ++++ b/dpdk/lib/librte_eal/include/rte_pci_dev_feature_defs.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_PCI_DEV_DEFS_H_ + #define _RTE_PCI_DEV_DEFS_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /* interrupt mode */ + enum rte_intr_mode { + RTE_INTR_MODE_NONE = 0, +@@ -13,4 +17,8 @@ enum rte_intr_mode { + RTE_INTR_MODE_MSIX + }; + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_PCI_DEV_DEFS_H_ */ +diff --git a/dpdk/lib/librte_eal/include/rte_pci_dev_features.h b/dpdk/lib/librte_eal/include/rte_pci_dev_features.h +index 6104123d27..ee6e10590c 100644 +--- a/dpdk/lib/librte_eal/include/rte_pci_dev_features.h ++++ b/dpdk/lib/librte_eal/include/rte_pci_dev_features.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_PCI_DEV_FEATURES_H + #define _RTE_PCI_DEV_FEATURES_H + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #include + + #define RTE_INTR_MODE_NONE_NAME "none" +@@ -12,4 +16,8 @@ + #define RTE_INTR_MODE_MSI_NAME "msi" + #define RTE_INTR_MODE_MSIX_NAME "msix" + ++#ifdef __cplusplus ++} ++#endif ++ + #endif +diff --git a/dpdk/lib/librte_eal/include/rte_reciprocal.h b/dpdk/lib/librte_eal/include/rte_reciprocal.h +index 735adb029b..fa1cb4854e 100644 +--- a/dpdk/lib/librte_eal/include/rte_reciprocal.h ++++ b/dpdk/lib/librte_eal/include/rte_reciprocal.h +@@ -29,6 +29,10 @@ + + #include + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + struct rte_reciprocal { + uint32_t m; + uint8_t sh1, sh2; +@@ -89,4 +93,8 @@ rte_reciprocal_divide_u64(uint64_t a, const struct rte_reciprocal_u64 *R) + struct rte_reciprocal rte_reciprocal_value(uint32_t d); + struct rte_reciprocal_u64 rte_reciprocal_value_u64(uint64_t d); + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_RECIPROCAL_H_ */ +diff --git a/dpdk/lib/librte_eal/include/rte_service.h b/dpdk/lib/librte_eal/include/rte_service.h +index ca9950d091..c7d037d862 100644 +--- a/dpdk/lib/librte_eal/include/rte_service.h ++++ b/dpdk/lib/librte_eal/include/rte_service.h +@@ -47,10 +47,7 @@ extern "C" { + #define RTE_SERVICE_CAP_MT_SAFE (1 << 0) + + /** +- * Return the number of services registered. +- * +- * The number of services registered can be passed to *rte_service_get_by_id*, +- * enabling the application to retrieve the specification of each service. ++ * Return the number of services registered. + * + * @return The number of services registered. + */ +diff --git a/dpdk/lib/librte_eal/include/rte_time.h b/dpdk/lib/librte_eal/include/rte_time.h +index 5ad7c8841a..ec25f7b93d 100644 +--- a/dpdk/lib/librte_eal/include/rte_time.h ++++ b/dpdk/lib/librte_eal/include/rte_time.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_TIME_H_ + #define _RTE_TIME_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #include + #include + +@@ -98,4 +102,8 @@ rte_ns_to_timespec(uint64_t nsec) + return ts; + } + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_TIME_H_ */ +diff --git a/dpdk/lib/librte_eal/include/rte_trace_point.h b/dpdk/lib/librte_eal/include/rte_trace_point.h +index e226f073f7..0f8700974f 100644 +--- a/dpdk/lib/librte_eal/include/rte_trace_point.h ++++ b/dpdk/lib/librte_eal/include/rte_trace_point.h +@@ -370,7 +370,7 @@ do { \ + do { \ + if (unlikely(in == NULL)) \ + return; \ +- rte_strscpy(mem, in, __RTE_TRACE_EMIT_STRING_LEN_MAX); \ ++ rte_strscpy((char *)mem, in, __RTE_TRACE_EMIT_STRING_LEN_MAX); \ + mem = RTE_PTR_ADD(mem, __RTE_TRACE_EMIT_STRING_LEN_MAX); \ + } while (0) + +diff --git a/dpdk/lib/librte_eal/include/rte_trace_point_register.h b/dpdk/lib/librte_eal/include/rte_trace_point_register.h +index 4f5c86552d..2e61439940 100644 +--- a/dpdk/lib/librte_eal/include/rte_trace_point_register.h ++++ b/dpdk/lib/librte_eal/include/rte_trace_point_register.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_TRACE_POINT_REGISTER_H_ + #define _RTE_TRACE_POINT_REGISTER_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #ifdef _RTE_TRACE_POINT_H_ + #error for registration, include this file first before + #endif +@@ -42,4 +46,8 @@ do { \ + RTE_STR(in)"[32]", "string_bounded_t"); \ + } while (0) + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_TRACE_POINT_REGISTER_H_ */ +diff --git a/dpdk/lib/librte_eal/include/rte_vfio.h b/dpdk/lib/librte_eal/include/rte_vfio.h +index 20ed8c45a9..7bdb8932b2 100644 +--- a/dpdk/lib/librte_eal/include/rte_vfio.h ++++ b/dpdk/lib/librte_eal/include/rte_vfio.h +@@ -14,6 +14,7 @@ + extern "C" { + #endif + ++#include + #include + + /* +@@ -149,14 +150,13 @@ int rte_vfio_enable(const char *modname); + /** + * Check whether a VFIO-related kmod is enabled. + * +- * This function is only relevant to linux and will return +- * an error on BSD. ++ * This function is only relevant to Linux. + * + * @param modname + * kernel module name. + * + * @return +- * !0 if true. ++ * 1 if true. + * 0 otherwise. + */ + int rte_vfio_is_enabled(const char *modname); +@@ -164,12 +164,12 @@ int rte_vfio_is_enabled(const char *modname); + /** + * Whether VFIO NOIOMMU mode is enabled. + * +- * This function is only relevant to linux and will return +- * an error on BSD. ++ * This function is only relevant to Linux. + * + * @return +- * !0 if true. +- * 0 otherwise. ++ * 1 if true. ++ * 0 if false. ++ * <0 for errors. + */ + int rte_vfio_noiommu_is_enabled(void); + +diff --git a/dpdk/lib/librte_eal/linux/eal.c b/dpdk/lib/librte_eal/linux/eal.c +index 32b48c3de9..5814f9ce69 100644 +--- a/dpdk/lib/librte_eal/linux/eal.c ++++ b/dpdk/lib/librte_eal/linux/eal.c +@@ -561,7 +561,6 @@ eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg) + char * arg[RTE_MAX_NUMA_NODES]; + char *end; + int arg_num, i, len; +- uint64_t total_mem = 0; + + len = strnlen(strval, SOCKET_MEM_STRLEN); + if (len == SOCKET_MEM_STRLEN) { +@@ -593,7 +592,6 @@ eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg) + (arg[i][0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + val <<= 20; +- total_mem += val; + socket_arg[i] = val; + } + +@@ -704,6 +702,10 @@ eal_parse_args(int argc, char **argv) + goto out; + } + ++ /* eal_log_level_parse() already handled this option */ ++ if (opt == OPT_LOG_LEVEL_NUM) ++ continue; ++ + ret = eal_parse_common_option(opt, optarg, internal_conf); + /* common parser is not happy */ + if (ret < 0) { +@@ -1358,7 +1360,11 @@ rte_eal_cleanup(void) + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_memseg_walk(mark_freeable, NULL); ++ + rte_service_finalize(); ++#ifdef VFIO_PRESENT ++ vfio_mp_sync_cleanup(); ++#endif + rte_mp_channel_cleanup(); + rte_trace_save(); + eal_trace_fini(); +diff --git a/dpdk/lib/librte_eal/linux/eal_dev.c b/dpdk/lib/librte_eal/linux/eal_dev.c +index 5c0e752b2d..c1737f4672 100644 +--- a/dpdk/lib/librte_eal/linux/eal_dev.c ++++ b/dpdk/lib/librte_eal/linux/eal_dev.c +@@ -157,6 +157,9 @@ dev_uev_parse(const char *buf, struct rte_dev_event *event, int length) + break; + buf++; + } ++ if (i >= length) ++ break; ++ + /** + * check device uevent from kernel side, no need to check + * uevent from udev. +@@ -223,13 +226,13 @@ dev_uev_handler(__rte_unused void *param) + { + struct rte_dev_event uevent; + int ret; +- char buf[EAL_UEV_MSG_LEN]; ++ char buf[EAL_UEV_MSG_LEN + 1]; + struct rte_bus *bus; + struct rte_device *dev; + const char *busname = ""; + + memset(&uevent, 0, sizeof(struct rte_dev_event)); +- memset(buf, 0, EAL_UEV_MSG_LEN); ++ memset(buf, 0, EAL_UEV_MSG_LEN + 1); + + ret = recv(intr_handle.fd, buf, EAL_UEV_MSG_LEN, MSG_DONTWAIT); + if (ret < 0 && errno == EAGAIN) +diff --git a/dpdk/lib/librte_eal/linux/eal_interrupts.c b/dpdk/lib/librte_eal/linux/eal_interrupts.c +index 1dd994bd1f..455c94068e 100644 +--- a/dpdk/lib/librte_eal/linux/eal_interrupts.c ++++ b/dpdk/lib/librte_eal/linux/eal_interrupts.c +@@ -563,7 +563,7 @@ rte_intr_callback_unregister_pending(const struct rte_intr_handle *intr_handle, + + rte_spinlock_lock(&intr_lock); + +- /* check if the insterrupt source for the fd is existent */ ++ /* check if the interrupt source for the fd is existent */ + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == intr_handle->fd) + break; +@@ -613,7 +613,7 @@ rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, + + rte_spinlock_lock(&intr_lock); + +- /* check if the insterrupt source for the fd is existent */ ++ /* check if the interrupt source for the fd is existent */ + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == intr_handle->fd) + break; +@@ -906,17 +906,14 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) + bytes_read = sizeof(buf.timerfd_num); + break; + #ifdef VFIO_PRESENT ++#ifdef HAVE_VFIO_DEV_REQ_INTERFACE ++ case RTE_INTR_HANDLE_VFIO_REQ: ++#endif + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + bytes_read = sizeof(buf.vfio_intr_count); + break; +-#ifdef HAVE_VFIO_DEV_REQ_INTERFACE +- case RTE_INTR_HANDLE_VFIO_REQ: +- bytes_read = 0; +- call = true; +- break; +-#endif + #endif + case RTE_INTR_HANDLE_VDEV: + case RTE_INTR_HANDLE_EXT: +diff --git a/dpdk/lib/librte_eal/linux/eal_log.c b/dpdk/lib/librte_eal/linux/eal_log.c +index 43c8460bfb..2095df74c5 100644 +--- a/dpdk/lib/librte_eal/linux/eal_log.c ++++ b/dpdk/lib/librte_eal/linux/eal_log.c +@@ -27,9 +27,9 @@ console_log_write(__rte_unused void *c, const char *buf, size_t size) + { + ssize_t ret; + +- /* write on stdout */ +- ret = fwrite(buf, 1, size, stdout); +- fflush(stdout); ++ /* write on stderr */ ++ ret = fwrite(buf, 1, size, stderr); ++ fflush(stderr); + + /* Syslog error levels are from 0 to 7, so subtract 1 to convert */ + syslog(rte_log_cur_msg_loglevel() - 1, "%.*s", (int)size, buf); +diff --git a/dpdk/lib/librte_eal/linux/eal_memalloc.c b/dpdk/lib/librte_eal/linux/eal_memalloc.c +index 6dc1b2baec..93e21f1321 100644 +--- a/dpdk/lib/librte_eal/linux/eal_memalloc.c ++++ b/dpdk/lib/librte_eal/linux/eal_memalloc.c +@@ -107,7 +107,7 @@ static struct rte_memseg_list local_memsegs[RTE_MAX_MEMSEG_LISTS]; + + static sigjmp_buf huge_jmpenv; + +-static void __rte_unused huge_sigbus_handler(int signo __rte_unused) ++static void huge_sigbus_handler(int signo __rte_unused) + { + siglongjmp(huge_jmpenv, 1); + } +@@ -116,7 +116,7 @@ static void __rte_unused huge_sigbus_handler(int signo __rte_unused) + * non-static local variable in the stack frame calling sigsetjmp might be + * clobbered by a call to longjmp. + */ +-static int __rte_unused huge_wrap_sigsetjmp(void) ++static int huge_wrap_sigsetjmp(void) + { + return sigsetjmp(huge_jmpenv, 1); + } +@@ -124,7 +124,7 @@ static int __rte_unused huge_wrap_sigsetjmp(void) + static struct sigaction huge_action_old; + static int huge_need_recover; + +-static void __rte_unused ++static void + huge_register_sigbus(void) + { + sigset_t mask; +@@ -139,7 +139,7 @@ huge_register_sigbus(void) + huge_need_recover = !sigaction(SIGBUS, &action, &huge_action_old); + } + +-static void __rte_unused ++static void + huge_recover_sigbus(void) + { + if (huge_need_recover) { +@@ -308,8 +308,8 @@ get_seg_fd(char *path, int buflen, struct hugepage_info *hi, + if (fd < 0) { + fd = open(path, O_CREAT | O_RDWR, 0600); + if (fd < 0) { +- RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", +- __func__, strerror(errno)); ++ RTE_LOG(ERR, EAL, "%s(): open '%s' failed: %s\n", ++ __func__, path, strerror(errno)); + return -1; + } + /* take out a read lock and keep it indefinitely */ +@@ -346,8 +346,8 @@ get_seg_fd(char *path, int buflen, struct hugepage_info *hi, + + fd = open(path, O_CREAT | O_RDWR, 0600); + if (fd < 0) { +- RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", +- __func__, strerror(errno)); ++ RTE_LOG(ERR, EAL, "%s(): open '%s' failed: %s\n", ++ __func__, path, strerror(errno)); + return -1; + } + /* take out a read lock */ +@@ -576,6 +576,8 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id, + mmap_flags = MAP_SHARED | MAP_POPULATE | MAP_FIXED; + } + ++ huge_register_sigbus(); ++ + /* + * map the segment, and populate page tables, the kernel fills + * this segment with zeros if it's a new page. +@@ -651,6 +653,8 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id, + __func__); + #endif + ++ huge_recover_sigbus(); ++ + ms->addr = addr; + ms->hugepage_sz = alloc_sz; + ms->len = alloc_sz; +@@ -664,6 +668,7 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id, + mapped: + munmap(addr, alloc_sz); + unmapped: ++ huge_recover_sigbus(); + flags = EAL_RESERVE_FORCE_ADDRESS; + new_addr = eal_get_virtual_area(addr, &alloc_sz, alloc_sz, 0, flags); + if (new_addr != addr) { +@@ -709,7 +714,6 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi, + uint64_t map_offset; + char path[PATH_MAX]; + int fd, ret = 0; +- bool exit_early; + const struct internal_config *internal_conf = + eal_get_internal_configuration(); + +@@ -725,17 +729,8 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi, + + eal_mem_set_dump(ms->addr, ms->len, false); + +- exit_early = false; +- + /* if we're using anonymous hugepages, nothing to be done */ +- if (internal_conf->in_memory && !memfd_create_supported) +- exit_early = true; +- +- /* if we've already unlinked the page, nothing needs to be done */ +- if (!internal_conf->in_memory && internal_conf->hugepage_unlink) +- exit_early = true; +- +- if (exit_early) { ++ if (internal_conf->in_memory && !memfd_create_supported) { + memset(ms, 0, sizeof(*ms)); + return 0; + } +@@ -761,7 +756,7 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi, + /* if we're able to take out a write lock, we're the last one + * holding onto this page. + */ +- if (!internal_conf->in_memory) { ++ if (!internal_conf->in_memory && !internal_conf->hugepage_unlink) { + ret = lock(fd, LOCK_EX); + if (ret >= 0) { + /* no one else is using this page */ +diff --git a/dpdk/lib/librte_eal/linux/eal_memory.c b/dpdk/lib/librte_eal/linux/eal_memory.c +index 03a4f2dd2d..fda6a159d5 100644 +--- a/dpdk/lib/librte_eal/linux/eal_memory.c ++++ b/dpdk/lib/librte_eal/linux/eal_memory.c +@@ -1883,8 +1883,8 @@ memseg_secondary_init(void) + + msl = &mcfg->memsegs[msl_idx]; + +- /* skip empty memseg lists */ +- if (msl->memseg_arr.len == 0) ++ /* skip empty and external memseg lists */ ++ if (msl->memseg_arr.len == 0 || msl->external) + continue; + + if (rte_fbarray_attach(&msl->memseg_arr)) { +diff --git a/dpdk/lib/librte_eal/linux/eal_thread.c b/dpdk/lib/librte_eal/linux/eal_thread.c +index 83c2034b93..8f3c0dafd6 100644 +--- a/dpdk/lib/librte_eal/linux/eal_thread.c ++++ b/dpdk/lib/librte_eal/linux/eal_thread.c +@@ -126,6 +126,8 @@ eal_thread_loop(__rte_unused void *arg) + fct_arg = lcore_config[lcore_id].arg; + ret = lcore_config[lcore_id].f(fct_arg); + lcore_config[lcore_id].ret = ret; ++ lcore_config[lcore_id].f = NULL; ++ lcore_config[lcore_id].arg = NULL; + rte_wmb(); + + /* when a service core returns, it should go directly to WAIT +diff --git a/dpdk/lib/librte_eal/linux/eal_vfio.c b/dpdk/lib/librte_eal/linux/eal_vfio.c +index 050082444e..c80777bc21 100644 +--- a/dpdk/lib/librte_eal/linux/eal_vfio.c ++++ b/dpdk/lib/librte_eal/linux/eal_vfio.c +@@ -70,6 +70,7 @@ static const struct vfio_iommu_type iommu_types[] = { + { + .type_id = RTE_VFIO_TYPE1, + .name = "Type 1", ++ .partial_unmap = false, + .dma_map_func = &vfio_type1_dma_map, + .dma_user_map_func = &vfio_type1_dma_mem_map + }, +@@ -77,6 +78,7 @@ static const struct vfio_iommu_type iommu_types[] = { + { + .type_id = RTE_VFIO_SPAPR, + .name = "sPAPR", ++ .partial_unmap = true, + .dma_map_func = &vfio_spapr_dma_map, + .dma_user_map_func = &vfio_spapr_dma_mem_map + }, +@@ -84,6 +86,7 @@ static const struct vfio_iommu_type iommu_types[] = { + { + .type_id = RTE_VFIO_NOIOMMU, + .name = "No-IOMMU", ++ .partial_unmap = true, + .dma_map_func = &vfio_noiommu_dma_map, + .dma_user_map_func = &vfio_noiommu_dma_mem_map + }, +@@ -168,6 +171,10 @@ adjust_map(struct user_mem_map *src, struct user_mem_map *end, + static int + merge_map(struct user_mem_map *left, struct user_mem_map *right) + { ++ /* merge the same maps into one */ ++ if (memcmp(left, right, sizeof(struct user_mem_map)) == 0) ++ goto out; ++ + if (left->addr + left->len != right->addr) + return 0; + if (left->iova + left->len != right->iova) +@@ -175,6 +182,7 @@ merge_map(struct user_mem_map *left, struct user_mem_map *right) + + left->len += right->len; + ++out: + memset(right, 0, sizeof(*right)); + + return 1; +@@ -517,85 +525,49 @@ static void + vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len, + void *arg __rte_unused) + { +- rte_iova_t iova_start, iova_expected; + struct rte_memseg_list *msl; + struct rte_memseg *ms; + size_t cur_len = 0; +- uint64_t va_start; + + msl = rte_mem_virt2memseg_list(addr); + + /* for IOVA as VA mode, no need to care for IOVA addresses */ + if (rte_eal_iova_mode() == RTE_IOVA_VA && msl->external == 0) { + uint64_t vfio_va = (uint64_t)(uintptr_t)addr; +- if (type == RTE_MEM_EVENT_ALLOC) +- vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va, +- len, 1); +- else +- vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va, +- len, 0); ++ uint64_t page_sz = msl->page_sz; ++ ++ /* Maintain granularity of DMA map/unmap to memseg size */ ++ for (; cur_len < len; cur_len += page_sz) { ++ if (type == RTE_MEM_EVENT_ALLOC) ++ vfio_dma_mem_map(default_vfio_cfg, vfio_va, ++ vfio_va, page_sz, 1); ++ else ++ vfio_dma_mem_map(default_vfio_cfg, vfio_va, ++ vfio_va, page_sz, 0); ++ vfio_va += page_sz; ++ } ++ + return; + } + + /* memsegs are contiguous in memory */ + ms = rte_mem_virt2memseg(addr, msl); +- +- /* +- * This memory is not guaranteed to be contiguous, but it still could +- * be, or it could have some small contiguous chunks. Since the number +- * of VFIO mappings is limited, and VFIO appears to not concatenate +- * adjacent mappings, we have to do this ourselves. +- * +- * So, find contiguous chunks, then map them. +- */ +- va_start = ms->addr_64; +- iova_start = iova_expected = ms->iova; + while (cur_len < len) { +- bool new_contig_area = ms->iova != iova_expected; +- bool last_seg = (len - cur_len) == ms->len; +- bool skip_last = false; +- +- /* only do mappings when current contiguous area ends */ +- if (new_contig_area) { +- if (type == RTE_MEM_EVENT_ALLOC) +- vfio_dma_mem_map(default_vfio_cfg, va_start, +- iova_start, +- iova_expected - iova_start, 1); +- else +- vfio_dma_mem_map(default_vfio_cfg, va_start, +- iova_start, +- iova_expected - iova_start, 0); +- va_start = ms->addr_64; +- iova_start = ms->iova; +- } + /* some memory segments may have invalid IOVA */ + if (ms->iova == RTE_BAD_IOVA) { + RTE_LOG(DEBUG, EAL, "Memory segment at %p has bad IOVA, skipping\n", + ms->addr); +- skip_last = true; ++ goto next; + } +- iova_expected = ms->iova + ms->len; ++ if (type == RTE_MEM_EVENT_ALLOC) ++ vfio_dma_mem_map(default_vfio_cfg, ms->addr_64, ++ ms->iova, ms->len, 1); ++ else ++ vfio_dma_mem_map(default_vfio_cfg, ms->addr_64, ++ ms->iova, ms->len, 0); ++next: + cur_len += ms->len; + ++ms; +- +- /* +- * don't count previous segment, and don't attempt to +- * dereference a potentially invalid pointer. +- */ +- if (skip_last && !last_seg) { +- iova_expected = iova_start = ms->iova; +- va_start = ms->addr_64; +- } else if (!skip_last && last_seg) { +- /* this is the last segment and we're not skipping */ +- if (type == RTE_MEM_EVENT_ALLOC) +- vfio_dma_mem_map(default_vfio_cfg, va_start, +- iova_start, +- iova_expected - iova_start, 1); +- else +- vfio_dma_mem_map(default_vfio_cfg, va_start, +- iova_start, +- iova_expected - iova_start, 0); +- } + } + } + +@@ -1391,6 +1363,12 @@ vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, + RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n", + errno, strerror(errno)); + return -1; ++ } else if (dma_unmap.size != len) { ++ RTE_LOG(ERR, EAL, " unexpected size %"PRIu64" of DMA " ++ "remapping cleared instead of %"PRIu64"\n", ++ (uint64_t)dma_unmap.size, len); ++ rte_errno = EIO; ++ return -1; + } + } + +@@ -1866,6 +1844,12 @@ container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, + /* we're partially unmapping a previously mapped region, so we + * need to split entry into two. + */ ++ if (!vfio_cfg->vfio_iommu_type->partial_unmap) { ++ RTE_LOG(DEBUG, EAL, "DMA partial unmap unsupported\n"); ++ rte_errno = ENOTSUP; ++ ret = -1; ++ goto out; ++ } + if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) { + RTE_LOG(ERR, EAL, "Not enough space to store partial mapping\n"); + rte_errno = ENOMEM; +diff --git a/dpdk/lib/librte_eal/linux/eal_vfio.h b/dpdk/lib/librte_eal/linux/eal_vfio.h +index cb2d35fb12..bba5c7afa5 100644 +--- a/dpdk/lib/librte_eal/linux/eal_vfio.h ++++ b/dpdk/lib/librte_eal/linux/eal_vfio.h +@@ -103,7 +103,7 @@ struct vfio_group { + typedef int (*vfio_dma_func_t)(int); + + /* Custom memory region DMA mapping function prototype. +- * Takes VFIO container fd, virtual address, phisical address, length and ++ * Takes VFIO container fd, virtual address, physical address, length and + * operation type (0 to unmap 1 for map) as a parameters. + * Returns 0 on success, -1 on error. + **/ +@@ -113,6 +113,7 @@ typedef int (*vfio_dma_user_func_t)(int fd, uint64_t vaddr, uint64_t iova, + struct vfio_iommu_type { + int type_id; + const char *name; ++ bool partial_unmap; + vfio_dma_user_func_t dma_user_map_func; + vfio_dma_func_t dma_map_func; + }; +@@ -132,6 +133,7 @@ int + vfio_has_supported_extensions(int vfio_container_fd); + + int vfio_mp_sync_setup(void); ++void vfio_mp_sync_cleanup(void); + + #define EAL_VFIO_MP "eal_vfio_mp_sync" + +diff --git a/dpdk/lib/librte_eal/linux/eal_vfio_mp_sync.c b/dpdk/lib/librte_eal/linux/eal_vfio_mp_sync.c +index a2accfab3a..d12bbaee64 100644 +--- a/dpdk/lib/librte_eal/linux/eal_vfio_mp_sync.c ++++ b/dpdk/lib/librte_eal/linux/eal_vfio_mp_sync.c +@@ -120,4 +120,12 @@ vfio_mp_sync_setup(void) + return 0; + } + ++void ++vfio_mp_sync_cleanup(void) ++{ ++ if (rte_eal_process_type() != RTE_PROC_PRIMARY) ++ return; ++ ++ rte_mp_action_unregister(EAL_VFIO_MP); ++} + #endif +diff --git a/dpdk/lib/librte_eal/linux/include/rte_os.h b/dpdk/lib/librte_eal/linux/include/rte_os.h +index 218d4fa86e..748c4c7602 100644 +--- a/dpdk/lib/librte_eal/linux/include/rte_os.h ++++ b/dpdk/lib/librte_eal/linux/include/rte_os.h +@@ -5,15 +5,20 @@ + #ifndef _RTE_OS_H_ + #define _RTE_OS_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** +- * This is header should contain any function/macro definition +- * which are not supported natively or named differently in the +- * linux OS. Functions will be added in future releases. ++ * This header should contain any definition ++ * which is not supported natively or named differently in Linux. + */ + + #include + ++#ifdef CPU_SETSIZE /* may require _GNU_SOURCE */ + typedef cpu_set_t rte_cpuset_t; ++#define RTE_HAS_CPUSET + #define RTE_CPU_AND(dst, src1, src2) CPU_AND(dst, src1, src2) + #define RTE_CPU_OR(dst, src1, src2) CPU_OR(dst, src1, src2) + #define RTE_CPU_FILL(set) do \ +@@ -29,5 +34,10 @@ typedef cpu_set_t rte_cpuset_t; + RTE_CPU_FILL(&tmp); \ + CPU_XOR(dst, &tmp, src); \ + } while (0) ++#endif ++ ++#ifdef __cplusplus ++} ++#endif + + #endif /* _RTE_OS_H_ */ +diff --git a/dpdk/lib/librte_eal/ppc/include/rte_memcpy.h b/dpdk/lib/librte_eal/ppc/include/rte_memcpy.h +index c2a1f356d5..6f388c0234 100644 +--- a/dpdk/lib/librte_eal/ppc/include/rte_memcpy.h ++++ b/dpdk/lib/librte_eal/ppc/include/rte_memcpy.h +@@ -1,6 +1,6 @@ + /* + * SPDX-License-Identifier: BSD-3-Clause +- * Copyright (C) IBM Corporation 2014. ++ * Copyright (C) IBM Corporation 2014,2021 + */ + + #ifndef _RTE_MEMCPY_PPC_64_H_ +@@ -18,11 +18,16 @@ extern "C" { + + #include "generic/rte_memcpy.h" + +-#if (GCC_VERSION >= 90000 && GCC_VERSION < 90400) ++#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 90000) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Warray-bounds" + #endif + ++#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 100000) ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wstringop-overflow" ++#endif ++ + static inline void + rte_mov16(uint8_t *dst, const uint8_t *src) + { +@@ -198,7 +203,11 @@ rte_memcpy_func(void *dst, const void *src, size_t n) + return ret; + } + +-#if (GCC_VERSION >= 90000 && GCC_VERSION < 90400) ++#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 100000) ++#pragma GCC diagnostic pop ++#endif ++ ++#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 90000) + #pragma GCC diagnostic pop + #endif + +diff --git a/dpdk/lib/librte_eal/unix/eal_file.c b/dpdk/lib/librte_eal/unix/eal_file.c +index 1b26475ba4..ec554e0096 100644 +--- a/dpdk/lib/librte_eal/unix/eal_file.c ++++ b/dpdk/lib/librte_eal/unix/eal_file.c +@@ -4,6 +4,7 @@ + + #include + #include ++#include + #include + + #include +diff --git a/dpdk/lib/librte_eal/unix/eal_unix_memory.c b/dpdk/lib/librte_eal/unix/eal_unix_memory.c +index ec7156df96..68ae93bd6e 100644 +--- a/dpdk/lib/librte_eal/unix/eal_unix_memory.c ++++ b/dpdk/lib/librte_eal/unix/eal_unix_memory.c +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -24,14 +25,14 @@ + + static void * + mem_map(void *requested_addr, size_t size, int prot, int flags, +- int fd, size_t offset) ++ int fd, uint64_t offset) + { + void *virt = mmap(requested_addr, size, prot, flags, fd, offset); + if (virt == MAP_FAILED) { + RTE_LOG(DEBUG, EAL, +- "Cannot mmap(%p, 0x%zx, 0x%x, 0x%x, %d, 0x%zx): %s\n", +- requested_addr, size, prot, flags, fd, offset, +- strerror(errno)); ++ "Cannot mmap(%p, 0x%zx, 0x%x, 0x%x, %d, 0x%"PRIx64"): %s\n", ++ requested_addr, size, prot, flags, fd, offset, ++ strerror(errno)); + rte_errno = errno; + return NULL; + } +@@ -106,7 +107,7 @@ mem_rte_to_sys_prot(int prot) + + void * + rte_mem_map(void *requested_addr, size_t size, int prot, int flags, +- int fd, size_t offset) ++ int fd, uint64_t offset) + { + int sys_flags = 0; + int sys_prot; +diff --git a/dpdk/lib/librte_eal/version.map b/dpdk/lib/librte_eal/version.map +index 354c068f31..fe065a41d9 100644 +--- a/dpdk/lib/librte_eal/version.map ++++ b/dpdk/lib/librte_eal/version.map +@@ -159,7 +159,6 @@ DPDK_21 { + rte_service_component_unregister; + rte_service_dump; + rte_service_finalize; +- rte_service_get_by_id; + rte_service_get_by_name; + rte_service_get_count; + rte_service_get_name; +@@ -199,6 +198,7 @@ DPDK_21 { + rte_uuid_is_null; + rte_uuid_parse; + rte_uuid_unparse; ++ rte_version; + rte_vfio_clear_group; + rte_vfio_container_create; + rte_vfio_container_destroy; +diff --git a/dpdk/lib/librte_eal/windows/eal.c b/dpdk/lib/librte_eal/windows/eal.c +index 1e5f6576f0..892c69356d 100644 +--- a/dpdk/lib/librte_eal/windows/eal.c ++++ b/dpdk/lib/librte_eal/windows/eal.c +@@ -149,6 +149,10 @@ eal_parse_args(int argc, char **argv) + return -1; + } + ++ /* eal_log_level_parse() already handled this option */ ++ if (opt == OPT_LOG_LEVEL_NUM) ++ continue; ++ + ret = eal_parse_common_option(opt, optarg, internal_conf); + /* common parser is not happy */ + if (ret < 0) { +@@ -252,6 +256,7 @@ rte_eal_cleanup(void) + struct internal_config *internal_conf = + eal_get_internal_configuration(); + ++ eal_mem_virt2iova_cleanup(); + eal_cleanup_config(internal_conf); + return 0; + } +@@ -264,6 +269,8 @@ rte_eal_init(int argc, char **argv) + const struct rte_config *config = rte_eal_get_configuration(); + struct internal_config *internal_conf = + eal_get_internal_configuration(); ++ bool has_phys_addr; ++ enum rte_iova_mode iova_mode; + int ret; + + rte_eal_log_init(NULL, 0); +@@ -310,18 +317,59 @@ rte_eal_init(int argc, char **argv) + internal_conf->memory = MEMSIZE_IF_NO_HUGE_PAGE; + } + ++ if (rte_eal_intr_init() < 0) { ++ rte_eal_init_alert("Cannot init interrupt-handling thread"); ++ return -1; ++ } ++ ++ if (rte_eal_timer_init() < 0) { ++ rte_eal_init_alert("Cannot init TSC timer"); ++ rte_errno = EFAULT; ++ return -1; ++ } ++ ++ bscan = rte_bus_scan(); ++ if (bscan < 0) { ++ rte_eal_init_alert("Cannot scan the buses"); ++ rte_errno = ENODEV; ++ return -1; ++ } ++ + if (eal_mem_win32api_init() < 0) { + rte_eal_init_alert("Cannot access Win32 memory management"); + rte_errno = ENOTSUP; + return -1; + } + ++ has_phys_addr = true; + if (eal_mem_virt2iova_init() < 0) { + /* Non-fatal error if physical addresses are not required. */ +- RTE_LOG(WARNING, EAL, "Cannot access virt2phys driver, " ++ RTE_LOG(DEBUG, EAL, "Cannot access virt2phys driver, " + "PA will not be available\n"); ++ has_phys_addr = false; + } + ++ iova_mode = internal_conf->iova_mode; ++ if (iova_mode == RTE_IOVA_PA && !has_phys_addr) { ++ rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available"); ++ rte_errno = EINVAL; ++ return -1; ++ } ++ if (iova_mode == RTE_IOVA_DC) { ++ RTE_LOG(DEBUG, EAL, "Specific IOVA mode is not requested, autodetecting\n"); ++ if (has_phys_addr) { ++ RTE_LOG(DEBUG, EAL, "Selecting IOVA mode according to bus requests\n"); ++ iova_mode = rte_bus_get_iommu_class(); ++ if (iova_mode == RTE_IOVA_DC) ++ iova_mode = RTE_IOVA_PA; ++ } else { ++ iova_mode = RTE_IOVA_VA; ++ } ++ } ++ RTE_LOG(DEBUG, EAL, "Selected IOVA mode '%s'\n", ++ iova_mode == RTE_IOVA_PA ? "PA" : "VA"); ++ rte_eal_get_configuration()->iova_mode = iova_mode; ++ + if (rte_eal_memzone_init() < 0) { + rte_eal_init_alert("Cannot init memzone"); + rte_errno = ENODEV; +@@ -346,27 +394,9 @@ rte_eal_init(int argc, char **argv) + return -1; + } + +- if (rte_eal_intr_init() < 0) { +- rte_eal_init_alert("Cannot init interrupt-handling thread"); +- return -1; +- } +- +- if (rte_eal_timer_init() < 0) { +- rte_eal_init_alert("Cannot init TSC timer"); +- rte_errno = EFAULT; +- return -1; +- } +- + __rte_thread_init(config->main_lcore, + &lcore_config[config->main_lcore].cpuset); + +- bscan = rte_bus_scan(); +- if (bscan < 0) { +- rte_eal_init_alert("Cannot init PCI"); +- rte_errno = ENODEV; +- return -1; +- } +- + RTE_LCORE_FOREACH_WORKER(i) { + + /* +@@ -416,6 +446,7 @@ rte_vfio_container_dma_map(__rte_unused int container_fd, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) + { ++ rte_errno = ENOTSUP; + return -1; + } + +@@ -425,5 +456,6 @@ rte_vfio_container_dma_unmap(__rte_unused int container_fd, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) + { ++ rte_errno = ENOTSUP; + return -1; + } +diff --git a/dpdk/lib/librte_eal/windows/eal_alarm.c b/dpdk/lib/librte_eal/windows/eal_alarm.c +index f5bf88715a..e5dc54efb8 100644 +--- a/dpdk/lib/librte_eal/windows/eal_alarm.c ++++ b/dpdk/lib/librte_eal/windows/eal_alarm.c +@@ -91,6 +91,12 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg) + LARGE_INTEGER deadline; + int ret; + ++ if (cb_fn == NULL) { ++ RTE_LOG(ERR, EAL, "NULL callback\n"); ++ ret = -EINVAL; ++ goto exit; ++ } ++ + /* Calculate deadline ASAP, unit of measure = 100ns. */ + GetSystemTimePreciseAsFileTime(&ft); + deadline.LowPart = ft.dwLowDateTime; +@@ -180,6 +186,12 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg) + bool executing; + + removed = 0; ++ ++ if (cb_fn == NULL) { ++ RTE_LOG(ERR, EAL, "NULL callback\n"); ++ return -EINVAL; ++ } ++ + do { + executing = false; + +diff --git a/dpdk/lib/librte_eal/windows/eal_hugepages.c b/dpdk/lib/librte_eal/windows/eal_hugepages.c +index 44dae985e5..83a3d0ffc6 100644 +--- a/dpdk/lib/librte_eal/windows/eal_hugepages.c ++++ b/dpdk/lib/librte_eal/windows/eal_hugepages.c +@@ -1,3 +1,7 @@ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright (c) 2020 Dmitry Kozlyuk ++ */ ++ + #include + #include + #include +diff --git a/dpdk/lib/librte_eal/windows/eal_memalloc.c b/dpdk/lib/librte_eal/windows/eal_memalloc.c +index d8cae3ebc1..690a8d8243 100644 +--- a/dpdk/lib/librte_eal/windows/eal_memalloc.c ++++ b/dpdk/lib/librte_eal/windows/eal_memalloc.c +@@ -18,7 +18,7 @@ eal_memalloc_get_seg_fd(int list_idx, int seg_idx) + RTE_SET_USED(list_idx); + RTE_SET_USED(seg_idx); + EAL_LOG_NOT_IMPLEMENTED(); +- return -1; ++ return -ENOTSUP; + } + + int +@@ -29,7 +29,7 @@ eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset) + RTE_SET_USED(seg_idx); + RTE_SET_USED(offset); + EAL_LOG_NOT_IMPLEMENTED(); +- return -1; ++ return -ENOTSUP; + } + + static int +@@ -100,16 +100,11 @@ alloc_seg(struct rte_memseg *ms, void *requested_addr, int socket_id, + */ + *(volatile int *)addr = *(volatile int *)addr; + +- /* Only try to obtain IOVA if it's available, so that applications +- * that do not need IOVA can use this allocator. +- */ +- if (rte_eal_using_phys_addrs()) { +- iova = rte_mem_virt2iova(addr); +- if (iova == RTE_BAD_IOVA) { +- RTE_LOG(DEBUG, EAL, +- "Cannot get IOVA of allocated segment\n"); +- goto error; +- } ++ iova = rte_mem_virt2iova(addr); ++ if (iova == RTE_BAD_IOVA) { ++ RTE_LOG(DEBUG, EAL, ++ "Cannot get IOVA of allocated segment\n"); ++ goto error; + } + + /* Only "Ex" function can handle hugepages. */ +@@ -434,7 +429,7 @@ eal_memalloc_sync_with_primary(void) + { + /* No multi-process support. */ + EAL_LOG_NOT_IMPLEMENTED(); +- return -1; ++ return -ENOTSUP; + } + + int +diff --git a/dpdk/lib/librte_eal/windows/eal_memory.c b/dpdk/lib/librte_eal/windows/eal_memory.c +index 2cf5a5e649..2fd37d9708 100644 +--- a/dpdk/lib/librte_eal/windows/eal_memory.c ++++ b/dpdk/lib/librte_eal/windows/eal_memory.c +@@ -198,6 +198,13 @@ eal_mem_virt2iova_init(void) + return ret; + } + ++void ++eal_mem_virt2iova_cleanup(void) ++{ ++ if (virt2phys_device != INVALID_HANDLE_VALUE) ++ CloseHandle(virt2phys_device); ++} ++ + phys_addr_t + rte_mem_virt2phy(const void *virt) + { +@@ -218,19 +225,17 @@ rte_mem_virt2phy(const void *virt) + return phys.QuadPart; + } + +-/* Windows currently only supports IOVA as PA. */ + rte_iova_t + rte_mem_virt2iova(const void *virt) + { + phys_addr_t phys; + +- if (virt2phys_device == INVALID_HANDLE_VALUE) +- return RTE_BAD_IOVA; ++ if (rte_eal_iova_mode() == RTE_IOVA_VA) ++ return (rte_iova_t)virt; + + phys = rte_mem_virt2phy(virt); + if (phys == RTE_BAD_PHYS_ADDR) + return RTE_BAD_IOVA; +- + return (rte_iova_t)phys; + } + +@@ -508,7 +513,7 @@ eal_mem_set_dump(void *virt, size_t size, bool dump) + + void * + rte_mem_map(void *requested_addr, size_t size, int prot, int flags, +- int fd, size_t offset) ++ int fd, uint64_t offset) + { + HANDLE file_handle = INVALID_HANDLE_VALUE; + HANDLE mapping_handle = INVALID_HANDLE_VALUE; +diff --git a/dpdk/lib/librte_eal/windows/eal_thread.c b/dpdk/lib/librte_eal/windows/eal_thread.c +index 908e726d16..b40498c0d6 100644 +--- a/dpdk/lib/librte_eal/windows/eal_thread.c ++++ b/dpdk/lib/librte_eal/windows/eal_thread.c +@@ -110,6 +110,8 @@ eal_thread_loop(void *arg __rte_unused) + fct_arg = lcore_config[lcore_id].arg; + ret = lcore_config[lcore_id].f(fct_arg); + lcore_config[lcore_id].ret = ret; ++ lcore_config[lcore_id].f = NULL; ++ lcore_config[lcore_id].arg = NULL; + rte_wmb(); + + /* when a service core returns, it should go directly to WAIT +@@ -130,12 +132,17 @@ eal_thread_create(pthread_t *thread) + + th = CreateThread(NULL, 0, + (LPTHREAD_START_ROUTINE)(ULONG_PTR)eal_thread_loop, +- NULL, 0, (LPDWORD)thread); ++ NULL, CREATE_SUSPENDED, (LPDWORD)thread); + if (!th) + return -1; + +- SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS); +- SetThreadPriority(th, THREAD_PRIORITY_TIME_CRITICAL); ++ SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); ++ SetThreadPriority(th, THREAD_PRIORITY_NORMAL); ++ ++ if (ResumeThread(th) == (DWORD)-1) { ++ (void)CloseHandle(th); ++ return -1; ++ } + + return 0; + } +diff --git a/dpdk/lib/librte_eal/windows/eal_windows.h b/dpdk/lib/librte_eal/windows/eal_windows.h +index 478accc1b9..289e8054bf 100644 +--- a/dpdk/lib/librte_eal/windows/eal_windows.h ++++ b/dpdk/lib/librte_eal/windows/eal_windows.h +@@ -63,7 +63,7 @@ unsigned int eal_socket_numa_node(unsigned int socket_id); + * @param arg + * Argument to the called function. + * @return +- * 0 on success, netagive error code on failure. ++ * 0 on success, negative error code on failure. + */ + int eal_intr_thread_schedule(void (*func)(void *arg), void *arg); + +@@ -74,6 +74,11 @@ int eal_intr_thread_schedule(void (*func)(void *arg), void *arg); + */ + int eal_mem_virt2iova_init(void); + ++/** ++ * Cleanup resources used for virtual to physical address translation. ++ */ ++void eal_mem_virt2iova_cleanup(void); ++ + /** + * Locate Win32 memory management routines in system libraries. + * +diff --git a/dpdk/lib/librte_eal/windows/include/dirent.h b/dpdk/lib/librte_eal/windows/include/dirent.h +index 869a598378..b522424403 100644 +--- a/dpdk/lib/librte_eal/windows/include/dirent.h ++++ b/dpdk/lib/librte_eal/windows/include/dirent.h +@@ -440,7 +440,7 @@ opendir(const char *dirname) + * display correctly on console. The problem can be fixed in two ways: + * (1) change the character set of console to 1252 using chcp utility + * and use Lucida Console font, or (2) use _cprintf function when +- * writing to console. The _cprinf() will re-encode ANSI strings to the ++ * writing to console. The _cprintf() will re-encode ANSI strings to the + * console code page so many non-ASCII characters will display correctly. + */ + static struct dirent* +@@ -579,7 +579,7 @@ dirent_mbstowcs_s( + wcstr[n] = 0; + } + +- /* Length of resuting multi-byte string WITH zero ++ /* Length of resulting multi-byte string WITH zero + *terminator + */ + if (pReturnValue) +@@ -658,7 +658,4 @@ dirent_set_errno(int error) + #endif + } + +-#ifdef __cplusplus +-} +-#endif + #endif /*DIRENT_H*/ +diff --git a/dpdk/lib/librte_eal/windows/include/fnmatch.h b/dpdk/lib/librte_eal/windows/include/fnmatch.h +index 142753c356..c6b226bd5d 100644 +--- a/dpdk/lib/librte_eal/windows/include/fnmatch.h ++++ b/dpdk/lib/librte_eal/windows/include/fnmatch.h +@@ -26,14 +26,14 @@ extern "C" { + #define FNM_PREFIX_DIRS 0x20 + + /** +- * This function is used for searhing a given string source ++ * This function is used for searching a given string source + * with the given regular expression pattern. + * + * @param pattern +- * regular expression notation decribing the pattern to match ++ * regular expression notation describing the pattern to match + * + * @param string +- * source string to searcg for the pattern ++ * source string to search for the pattern + * + * @param flag + * containing information about the pattern +diff --git a/dpdk/lib/librte_eal/windows/include/meson.build b/dpdk/lib/librte_eal/windows/include/meson.build +index b3534b025f..5fb1962ac7 100644 +--- a/dpdk/lib/librte_eal/windows/include/meson.build ++++ b/dpdk/lib/librte_eal/windows/include/meson.build +@@ -5,6 +5,5 @@ includes += include_directories('.') + + headers += files( + 'rte_os.h', +- 'rte_virt2phys.h', + 'rte_windows.h', + ) +diff --git a/dpdk/lib/librte_eal/windows/include/pthread.h b/dpdk/lib/librte_eal/windows/include/pthread.h +index fb11a07ce6..27fd2cca52 100644 +--- a/dpdk/lib/librte_eal/windows/include/pthread.h ++++ b/dpdk/lib/librte_eal/windows/include/pthread.h +@@ -35,12 +35,12 @@ typedef CRITICAL_SECTION pthread_mutex_t; + typedef SYNCHRONIZATION_BARRIER pthread_barrier_t; + + #define pthread_barrier_init(barrier, attr, count) \ +- InitializeSynchronizationBarrier(barrier, count, -1) ++ !InitializeSynchronizationBarrier(barrier, count, -1) + #define pthread_barrier_wait(barrier) EnterSynchronizationBarrier(barrier, \ + SYNCHRONIZATION_BARRIER_FLAGS_BLOCK_ONLY) + #define pthread_barrier_destroy(barrier) \ +- DeleteSynchronizationBarrier(barrier) +-#define pthread_cancel(thread) TerminateThread((HANDLE) thread, 0) ++ !DeleteSynchronizationBarrier(barrier) ++#define pthread_cancel(thread) !TerminateThread((HANDLE) thread, 0) + + /* pthread function overrides */ + #define pthread_self() \ +@@ -137,12 +137,18 @@ pthread_create(void *threadid, const void *threadattr, void *threadfunc, + hThread = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)threadfunc, + args, 0, (LPDWORD)threadid); + if (hThread) { +- SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS); +- SetThreadPriority(hThread, THREAD_PRIORITY_TIME_CRITICAL); ++ SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); ++ SetThreadPriority(hThread, THREAD_PRIORITY_NORMAL); + } + return ((hThread != NULL) ? 0 : E_FAIL); + } + ++static inline int ++pthread_detach(__rte_unused pthread_t thread) ++{ ++ return 0; ++} ++ + static inline int + pthread_join(__rte_unused pthread_t thread, + __rte_unused void **value_ptr) +diff --git a/dpdk/lib/librte_eal/windows/include/rte_os.h b/dpdk/lib/librte_eal/windows/include/rte_os.h +index 7ef38ff06c..f0512f20a6 100644 +--- a/dpdk/lib/librte_eal/windows/include/rte_os.h ++++ b/dpdk/lib/librte_eal/windows/include/rte_os.h +@@ -6,9 +6,8 @@ + #define _RTE_OS_H_ + + /** +- * This is header should contain any function/macro definition +- * which are not supported natively or named differently in the +- * Windows OS. It must not include Windows-specific headers. ++ * This header should contain any definition ++ * which is not supported natively or named differently in Windows. + */ + + #include +diff --git a/dpdk/lib/librte_eal/windows/include/rte_windows.h b/dpdk/lib/librte_eal/windows/include/rte_windows.h +index b82af34f6d..422c989966 100644 +--- a/dpdk/lib/librte_eal/windows/include/rte_windows.h ++++ b/dpdk/lib/librte_eal/windows/include/rte_windows.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_WINDOWS_H_ + #define _RTE_WINDOWS_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** + * @file Windows-specific facilities + * +@@ -42,4 +46,8 @@ + RTE_FMT_HEAD(__VA_ARGS__,) "\n", GetLastError(), \ + RTE_FMT_TAIL(__VA_ARGS__,))) + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_WINDOWS_H_ */ +diff --git a/dpdk/lib/librte_eal/windows/include/sched.h b/dpdk/lib/librte_eal/windows/include/sched.h +index fbe07f742c..bc31cc8465 100644 +--- a/dpdk/lib/librte_eal/windows/include/sched.h ++++ b/dpdk/lib/librte_eal/windows/include/sched.h +@@ -28,6 +28,7 @@ extern "C" { + typedef struct _rte_cpuset_s { + long long _bits[_NUM_SETS(CPU_SETSIZE)]; + } rte_cpuset_t; ++#define RTE_HAS_CPUSET + + #define CPU_SET(b, s) ((s)->_bits[_WHICH_SET(b)] |= (1LL << _WHICH_BIT(b))) + +@@ -48,7 +49,7 @@ count_cpu(rte_cpuset_t *s) + unsigned int _i; + int count = 0; + +- for (_i = 0; _i < _NUM_SETS(CPU_SETSIZE); _i++) ++ for (_i = 0; _i < CPU_SETSIZE; _i++) + if (CPU_ISSET(_i, s) != 0LL) + count++; + return count; +diff --git a/dpdk/lib/librte_eal/windows/meson.build b/dpdk/lib/librte_eal/windows/meson.build +index 3b2faf29eb..557e3c04a5 100644 +--- a/dpdk/lib/librte_eal/windows/meson.build ++++ b/dpdk/lib/librte_eal/windows/meson.build +@@ -22,3 +22,13 @@ sources += files( + ) + + dpdk_conf.set10('RTE_EAL_NUMA_AWARE_HUGEPAGES', true) ++ ++ext_deps += [ ++ cc.find_library('dbghelp'), ++ cc.find_library('setupapi'), ++ cc.find_library('ws2_32'), ++] ++if is_ms_linker ++ # Contrary to docs, VirtualAlloc2() is exported by mincore.lib. ++ ext_deps += cc.find_library('mincore') ++endif +diff --git a/dpdk/lib/librte_eal/x86/include/rte_atomic.h b/dpdk/lib/librte_eal/x86/include/rte_atomic.h +index 915afd9d27..f2ee1a9ce9 100644 +--- a/dpdk/lib/librte_eal/x86/include/rte_atomic.h ++++ b/dpdk/lib/librte_eal/x86/include/rte_atomic.h +@@ -60,7 +60,7 @@ extern "C" { + * Basic idea is to use lock prefixed add with some dummy memory location + * as the destination. From their experiments 128B(2 cache lines) below + * current stack pointer looks like a good candidate. +- * So below we use that techinque for rte_smp_mb() implementation. ++ * So below we use that technique for rte_smp_mb() implementation. + */ + + static __rte_always_inline void +diff --git a/dpdk/lib/librte_eal/x86/include/rte_memcpy.h b/dpdk/lib/librte_eal/x86/include/rte_memcpy.h +index 79f381dd9b..b678b5c942 100644 +--- a/dpdk/lib/librte_eal/x86/include/rte_memcpy.h ++++ b/dpdk/lib/librte_eal/x86/include/rte_memcpy.h +@@ -45,6 +45,52 @@ extern "C" { + static __rte_always_inline void * + rte_memcpy(void *dst, const void *src, size_t n); + ++/** ++ * Copy bytes from one location to another, ++ * locations should not overlap. ++ * Use with n <= 15. ++ */ ++static __rte_always_inline void * ++rte_mov15_or_less(void *dst, const void *src, size_t n) ++{ ++ /** ++ * Use the following structs to avoid violating C standard ++ * alignment requirements and to avoid strict aliasing bugs ++ */ ++ struct rte_uint64_alias { ++ uint64_t val; ++ } __rte_packed __rte_may_alias; ++ struct rte_uint32_alias { ++ uint32_t val; ++ } __rte_packed __rte_may_alias; ++ struct rte_uint16_alias { ++ uint16_t val; ++ } __rte_packed __rte_may_alias; ++ ++ void *ret = dst; ++ if (n & 8) { ++ ((struct rte_uint64_alias *)dst)->val = ++ ((const struct rte_uint64_alias *)src)->val; ++ src = (const uint64_t *)src + 1; ++ dst = (uint64_t *)dst + 1; ++ } ++ if (n & 4) { ++ ((struct rte_uint32_alias *)dst)->val = ++ ((const struct rte_uint32_alias *)src)->val; ++ src = (const uint32_t *)src + 1; ++ dst = (uint32_t *)dst + 1; ++ } ++ if (n & 2) { ++ ((struct rte_uint16_alias *)dst)->val = ++ ((const struct rte_uint16_alias *)src)->val; ++ src = (const uint16_t *)src + 1; ++ dst = (uint16_t *)dst + 1; ++ } ++ if (n & 1) ++ *(uint8_t *)dst = *(const uint8_t *)src; ++ return ret; ++} ++ + #if defined __AVX512F__ && defined RTE_MEMCPY_AVX512 + + #define ALIGNMENT_MASK 0x3F +@@ -171,8 +217,6 @@ rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n) + static __rte_always_inline void * + rte_memcpy_generic(void *dst, const void *src, size_t n) + { +- uintptr_t dstu = (uintptr_t)dst; +- uintptr_t srcu = (uintptr_t)src; + void *ret = dst; + size_t dstofss; + size_t bits; +@@ -181,24 +225,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) + * Copy less than 16 bytes + */ + if (n < 16) { +- if (n & 0x01) { +- *(uint8_t *)dstu = *(const uint8_t *)srcu; +- srcu = (uintptr_t)((const uint8_t *)srcu + 1); +- dstu = (uintptr_t)((uint8_t *)dstu + 1); +- } +- if (n & 0x02) { +- *(uint16_t *)dstu = *(const uint16_t *)srcu; +- srcu = (uintptr_t)((const uint16_t *)srcu + 1); +- dstu = (uintptr_t)((uint16_t *)dstu + 1); +- } +- if (n & 0x04) { +- *(uint32_t *)dstu = *(const uint32_t *)srcu; +- srcu = (uintptr_t)((const uint32_t *)srcu + 1); +- dstu = (uintptr_t)((uint32_t *)dstu + 1); +- } +- if (n & 0x08) +- *(uint64_t *)dstu = *(const uint64_t *)srcu; +- return ret; ++ return rte_mov15_or_less(dst, src, n); + } + + /** +@@ -303,8 +330,8 @@ rte_mov16(uint8_t *dst, const uint8_t *src) + { + __m128i xmm0; + +- xmm0 = _mm_loadu_si128((const __m128i *)src); +- _mm_storeu_si128((__m128i *)dst, xmm0); ++ xmm0 = _mm_loadu_si128((const __m128i *)(const void *)src); ++ _mm_storeu_si128((__m128i *)(void *)dst, xmm0); + } + + /** +@@ -316,8 +343,8 @@ rte_mov32(uint8_t *dst, const uint8_t *src) + { + __m256i ymm0; + +- ymm0 = _mm256_loadu_si256((const __m256i *)src); +- _mm256_storeu_si256((__m256i *)dst, ymm0); ++ ymm0 = _mm256_loadu_si256((const __m256i *)(const void *)src); ++ _mm256_storeu_si256((__m256i *)(void *)dst, ymm0); + } + + /** +@@ -354,16 +381,24 @@ rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n) + __m256i ymm0, ymm1, ymm2, ymm3; + + while (n >= 128) { +- ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32)); ++ ymm0 = _mm256_loadu_si256((const __m256i *)(const void *) ++ ((const uint8_t *)src + 0 * 32)); + n -= 128; +- ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32)); +- ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32)); +- ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32)); ++ ymm1 = _mm256_loadu_si256((const __m256i *)(const void *) ++ ((const uint8_t *)src + 1 * 32)); ++ ymm2 = _mm256_loadu_si256((const __m256i *)(const void *) ++ ((const uint8_t *)src + 2 * 32)); ++ ymm3 = _mm256_loadu_si256((const __m256i *)(const void *) ++ ((const uint8_t *)src + 3 * 32)); + src = (const uint8_t *)src + 128; +- _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0); +- _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1); +- _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2); +- _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3); ++ _mm256_storeu_si256((__m256i *)(void *) ++ ((uint8_t *)dst + 0 * 32), ymm0); ++ _mm256_storeu_si256((__m256i *)(void *) ++ ((uint8_t *)dst + 1 * 32), ymm1); ++ _mm256_storeu_si256((__m256i *)(void *) ++ ((uint8_t *)dst + 2 * 32), ymm2); ++ _mm256_storeu_si256((__m256i *)(void *) ++ ((uint8_t *)dst + 3 * 32), ymm3); + dst = (uint8_t *)dst + 128; + } + } +@@ -371,8 +406,6 @@ rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n) + static __rte_always_inline void * + rte_memcpy_generic(void *dst, const void *src, size_t n) + { +- uintptr_t dstu = (uintptr_t)dst; +- uintptr_t srcu = (uintptr_t)src; + void *ret = dst; + size_t dstofss; + size_t bits; +@@ -381,25 +414,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) + * Copy less than 16 bytes + */ + if (n < 16) { +- if (n & 0x01) { +- *(uint8_t *)dstu = *(const uint8_t *)srcu; +- srcu = (uintptr_t)((const uint8_t *)srcu + 1); +- dstu = (uintptr_t)((uint8_t *)dstu + 1); +- } +- if (n & 0x02) { +- *(uint16_t *)dstu = *(const uint16_t *)srcu; +- srcu = (uintptr_t)((const uint16_t *)srcu + 1); +- dstu = (uintptr_t)((uint16_t *)dstu + 1); +- } +- if (n & 0x04) { +- *(uint32_t *)dstu = *(const uint32_t *)srcu; +- srcu = (uintptr_t)((const uint32_t *)srcu + 1); +- dstu = (uintptr_t)((uint32_t *)dstu + 1); +- } +- if (n & 0x08) { +- *(uint64_t *)dstu = *(const uint64_t *)srcu; +- } +- return ret; ++ return rte_mov15_or_less(dst, src, n); + } + + /** +@@ -496,8 +511,8 @@ rte_mov16(uint8_t *dst, const uint8_t *src) + { + __m128i xmm0; + +- xmm0 = _mm_loadu_si128((const __m128i *)(const __m128i *)src); +- _mm_storeu_si128((__m128i *)dst, xmm0); ++ xmm0 = _mm_loadu_si128((const __m128i *)(const void *)src); ++ _mm_storeu_si128((__m128i *)(void *)dst, xmm0); + } + + /** +@@ -581,25 +596,25 @@ rte_mov256(uint8_t *dst, const uint8_t *src) + __extension__ ({ \ + size_t tmp; \ + while (len >= 128 + 16 - offset) { \ +- xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ ++ xmm0 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 0 * 16)); \ + len -= 128; \ +- xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \ +- xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \ +- xmm3 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 3 * 16)); \ +- xmm4 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 4 * 16)); \ +- xmm5 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 5 * 16)); \ +- xmm6 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 6 * 16)); \ +- xmm7 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 7 * 16)); \ +- xmm8 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 8 * 16)); \ ++ xmm1 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 1 * 16)); \ ++ xmm2 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 2 * 16)); \ ++ xmm3 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 3 * 16)); \ ++ xmm4 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 4 * 16)); \ ++ xmm5 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 5 * 16)); \ ++ xmm6 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 6 * 16)); \ ++ xmm7 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 7 * 16)); \ ++ xmm8 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 8 * 16)); \ + src = (const uint8_t *)src + 128; \ +- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ +- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ +- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \ +- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \ +- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \ +- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \ +- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \ +- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \ ++ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ ++ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ ++ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \ ++ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \ ++ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \ ++ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \ ++ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \ ++ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \ + dst = (uint8_t *)dst + 128; \ + } \ + tmp = len; \ +@@ -609,13 +624,13 @@ __extension__ ({ + dst = (uint8_t *)dst + tmp; \ + if (len >= 32 + 16 - offset) { \ + while (len >= 32 + 16 - offset) { \ +- xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ ++ xmm0 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 0 * 16)); \ + len -= 32; \ +- xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \ +- xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \ ++ xmm1 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 1 * 16)); \ ++ xmm2 = _mm_loadu_si128((const __m128i *)(const void *)((const uint8_t *)src - offset + 2 * 16)); \ + src = (const uint8_t *)src + 32; \ +- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ +- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ ++ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ ++ _mm_storeu_si128((__m128i *)(void *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ + dst = (uint8_t *)dst + 32; \ + } \ + tmp = len; \ +@@ -664,8 +679,6 @@ static __rte_always_inline void * + rte_memcpy_generic(void *dst, const void *src, size_t n) + { + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; +- uintptr_t dstu = (uintptr_t)dst; +- uintptr_t srcu = (uintptr_t)src; + void *ret = dst; + size_t dstofss; + size_t srcofs; +@@ -674,25 +687,7 @@ rte_memcpy_generic(void *dst, const void *src, size_t n) + * Copy less than 16 bytes + */ + if (n < 16) { +- if (n & 0x01) { +- *(uint8_t *)dstu = *(const uint8_t *)srcu; +- srcu = (uintptr_t)((const uint8_t *)srcu + 1); +- dstu = (uintptr_t)((uint8_t *)dstu + 1); +- } +- if (n & 0x02) { +- *(uint16_t *)dstu = *(const uint16_t *)srcu; +- srcu = (uintptr_t)((const uint16_t *)srcu + 1); +- dstu = (uintptr_t)((uint16_t *)dstu + 1); +- } +- if (n & 0x04) { +- *(uint32_t *)dstu = *(const uint32_t *)srcu; +- srcu = (uintptr_t)((const uint32_t *)srcu + 1); +- dstu = (uintptr_t)((uint32_t *)dstu + 1); +- } +- if (n & 0x08) { +- *(uint64_t *)dstu = *(const uint64_t *)srcu; +- } +- return ret; ++ return rte_mov15_or_less(dst, src, n); + } + + /** +@@ -810,27 +805,9 @@ rte_memcpy_aligned(void *dst, const void *src, size_t n) + { + void *ret = dst; + +- /* Copy size <= 16 bytes */ ++ /* Copy size < 16 bytes */ + if (n < 16) { +- if (n & 0x01) { +- *(uint8_t *)dst = *(const uint8_t *)src; +- src = (const uint8_t *)src + 1; +- dst = (uint8_t *)dst + 1; +- } +- if (n & 0x02) { +- *(uint16_t *)dst = *(const uint16_t *)src; +- src = (const uint16_t *)src + 1; +- dst = (uint16_t *)dst + 1; +- } +- if (n & 0x04) { +- *(uint32_t *)dst = *(const uint32_t *)src; +- src = (const uint32_t *)src + 1; +- dst = (uint32_t *)dst + 1; +- } +- if (n & 0x08) +- *(uint64_t *)dst = *(const uint64_t *)src; +- +- return ret; ++ return rte_mov15_or_less(dst, src, n); + } + + /* Copy 16 <= size <= 32 bytes */ +@@ -874,6 +851,8 @@ rte_memcpy(void *dst, const void *src, size_t n) + return rte_memcpy_generic(dst, src, n); + } + ++#undef ALIGNMENT_MASK ++ + #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 100000) + #pragma GCC diagnostic pop + #endif +diff --git a/dpdk/lib/librte_eal/x86/rte_cpuflags.c b/dpdk/lib/librte_eal/x86/rte_cpuflags.c +index a96312ff7f..2fc9e84666 100644 +--- a/dpdk/lib/librte_eal/x86/rte_cpuflags.c ++++ b/dpdk/lib/librte_eal/x86/rte_cpuflags.c +@@ -100,12 +100,12 @@ const struct feature_entry rte_cpu_feature_table[] = { + FEAT_DEF(ENERGY_EFF, 0x00000006, 0, RTE_REG_ECX, 3) + + FEAT_DEF(FSGSBASE, 0x00000007, 0, RTE_REG_EBX, 0) +- FEAT_DEF(BMI1, 0x00000007, 0, RTE_REG_EBX, 2) ++ FEAT_DEF(BMI1, 0x00000007, 0, RTE_REG_EBX, 3) + FEAT_DEF(HLE, 0x00000007, 0, RTE_REG_EBX, 4) + FEAT_DEF(AVX2, 0x00000007, 0, RTE_REG_EBX, 5) +- FEAT_DEF(SMEP, 0x00000007, 0, RTE_REG_EBX, 6) +- FEAT_DEF(BMI2, 0x00000007, 0, RTE_REG_EBX, 7) +- FEAT_DEF(ERMS, 0x00000007, 0, RTE_REG_EBX, 8) ++ FEAT_DEF(SMEP, 0x00000007, 0, RTE_REG_EBX, 7) ++ FEAT_DEF(BMI2, 0x00000007, 0, RTE_REG_EBX, 8) ++ FEAT_DEF(ERMS, 0x00000007, 0, RTE_REG_EBX, 9) + FEAT_DEF(INVPCID, 0x00000007, 0, RTE_REG_EBX, 10) + FEAT_DEF(RTM, 0x00000007, 0, RTE_REG_EBX, 11) + FEAT_DEF(AVX512F, 0x00000007, 0, RTE_REG_EBX, 16) +diff --git a/dpdk/lib/librte_efd/rte_efd.c b/dpdk/lib/librte_efd/rte_efd.c +index 77f46809f8..ae9fb43404 100644 +--- a/dpdk/lib/librte_efd/rte_efd.c ++++ b/dpdk/lib/librte_efd/rte_efd.c +@@ -1165,7 +1165,7 @@ rte_efd_update(struct rte_efd_table * const table, const unsigned int socket_id, + { + uint32_t chunk_id = 0, group_id = 0, bin_id = 0; + uint8_t new_bin_choice = 0; +- struct efd_online_group_entry entry; ++ struct efd_online_group_entry entry = {{0}}; + + int status = efd_compute_update(table, socket_id, key, value, + &chunk_id, &group_id, &bin_id, +diff --git a/dpdk/lib/librte_ethdev/rte_dev_info.h b/dpdk/lib/librte_ethdev/rte_dev_info.h +index 7a6b61fdb7..cacb989ced 100644 +--- a/dpdk/lib/librte_ethdev/rte_dev_info.h ++++ b/dpdk/lib/librte_ethdev/rte_dev_info.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_DEV_INFO_H_ + #define _RTE_DEV_INFO_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #include + + /* +@@ -48,4 +52,8 @@ struct rte_eth_dev_module_info { + #define RTE_ETH_MODULE_SFF_8436_LEN 256 + #define RTE_ETH_MODULE_SFF_8436_MAX_LEN 640 + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_DEV_INFO_H_ */ +diff --git a/dpdk/lib/librte_ethdev/rte_ethdev.c b/dpdk/lib/librte_ethdev/rte_ethdev.c +index ecd46ac01f..4b59854c12 100644 +--- a/dpdk/lib/librte_ethdev/rte_ethdev.c ++++ b/dpdk/lib/librte_ethdev/rte_ethdev.c +@@ -254,7 +254,9 @@ rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs_str) + } + + /* Convert bus args to new syntax for use with new API dev_iterate. */ +- if (strcmp(iter->bus->name, "vdev") == 0) { ++ if ((strcmp(iter->bus->name, "vdev") == 0) || ++ (strcmp(iter->bus->name, "fslmc") == 0) || ++ (strcmp(iter->bus->name, "dpaa_bus") == 0)) { + bus_param_key = "name"; + } else if (strcmp(iter->bus->name, "pci") == 0) { + bus_param_key = "addr"; +@@ -716,10 +718,13 @@ rte_eth_dev_owner_delete(const uint64_t owner_id) + rte_spinlock_lock(ð_dev_shared_data->ownership_lock); + + if (eth_is_valid_owner_id(owner_id)) { +- for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) +- if (rte_eth_devices[port_id].data->owner.id == owner_id) +- memset(&rte_eth_devices[port_id].data->owner, 0, ++ for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) { ++ struct rte_eth_dev_data *data = ++ rte_eth_devices[port_id].data; ++ if (data != NULL && data->owner.id == owner_id) ++ memset(&data->owner, 0, + sizeof(struct rte_eth_dev_owner)); ++ } + RTE_ETHDEV_LOG(NOTICE, + "All port owners owned by %016"PRIx64" identifier have removed\n", + owner_id); +@@ -834,6 +839,17 @@ rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id) + return -ENODEV; + } + ++struct rte_eth_dev * ++rte_eth_dev_get_by_name(const char *name) ++{ ++ uint16_t pid; ++ ++ if (rte_eth_dev_get_port_by_name(name, &pid)) ++ return NULL; ++ ++ return &rte_eth_devices[pid]; ++} ++ + static int + eth_err(uint16_t port_id, int ret) + { +@@ -1769,8 +1785,9 @@ rte_eth_dev_stop(uint16_t port_id) + return 0; + } + +- dev->data->dev_started = 0; + ret = (*dev->dev_ops->dev_stop)(dev); ++ if (ret == 0) ++ dev->data->dev_started = 0; + rte_ethdev_trace_stop(port_id, ret); + + return ret; +@@ -1812,6 +1829,18 @@ rte_eth_dev_close(uint16_t port_id) + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + ++ /* ++ * Secondary process needs to close device to release process private ++ * resources. But secondary process should not be obliged to wait ++ * for device stop before closing ethdev. ++ */ ++ if (rte_eal_process_type() == RTE_PROC_PRIMARY && ++ dev->data->dev_started) { ++ RTE_ETHDEV_LOG(ERR, "Cannot close started device (port %u)\n", ++ port_id); ++ return -EINVAL; ++ } ++ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_close, -ENOTSUP); + *lasterr = (*dev->dev_ops->dev_close)(dev); + if (*lasterr != 0) +@@ -3148,7 +3177,8 @@ rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats, + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); +- ++ if (xstats == NULL && n > 0) ++ return -EINVAL; + dev = &rte_eth_devices[port_id]; + + nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); +@@ -3165,7 +3195,7 @@ rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats, + * xstats struct. + */ + xcount = (*dev->dev_ops->xstats_get)(dev, +- xstats ? xstats + count : NULL, ++ (n > count) ? xstats + count : NULL, + (n > count) ? n - count : 0); + + if (xcount < 0) +@@ -3792,6 +3822,7 @@ rte_eth_dev_rss_reta_update(uint16_t port_id, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) + { ++ enum rte_eth_rx_mq_mode mq_mode; + struct rte_eth_dev *dev; + int ret; + +@@ -3809,6 +3840,12 @@ rte_eth_dev_rss_reta_update(uint16_t port_id, + if (ret < 0) + return ret; + ++ mq_mode = dev->data->dev_conf.rxmode.mq_mode; ++ if (!(mq_mode & ETH_MQ_RX_RSS_FLAG)) { ++ RTE_ETHDEV_LOG(ERR, "Multi-queue RSS mode isn't enabled.\n"); ++ return -ENOTSUP; ++ } ++ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->reta_update, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->reta_update)(dev, reta_conf, + reta_size)); +@@ -3841,6 +3878,7 @@ rte_eth_dev_rss_hash_update(uint16_t port_id, + { + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info = { .flow_type_rss_offloads = 0, }; ++ enum rte_eth_rx_mq_mode mq_mode; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); +@@ -3860,6 +3898,13 @@ rte_eth_dev_rss_hash_update(uint16_t port_id, + dev_info.flow_type_rss_offloads); + return -EINVAL; + } ++ ++ mq_mode = dev->data->dev_conf.rxmode.mq_mode; ++ if (!(mq_mode & ETH_MQ_RX_RSS_FLAG)) { ++ RTE_ETHDEV_LOG(ERR, "Multi-queue RSS mode isn't enabled.\n"); ++ return -ENOTSUP; ++ } ++ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_update, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->rss_hash_update)(dev, + rss_conf)); +@@ -5258,6 +5303,8 @@ rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info) + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); ++ if (info == NULL) ++ return -EINVAL; + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_reg, -ENOTSUP); +@@ -5282,6 +5329,8 @@ rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info) + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); ++ if (info == NULL) ++ return -EINVAL; + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_eeprom, -ENOTSUP); +@@ -5294,6 +5343,8 @@ rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info) + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); ++ if (info == NULL) ++ return -EINVAL; + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_eeprom, -ENOTSUP); +@@ -5307,6 +5358,8 @@ rte_eth_dev_get_module_info(uint16_t port_id, + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); ++ if (modinfo == NULL) ++ return -EINVAL; + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_module_info, -ENOTSUP); +@@ -5320,6 +5373,8 @@ rte_eth_dev_get_module_eeprom(uint16_t port_id, + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); ++ if (info == NULL || info->data == NULL || info->length == 0) ++ return -EINVAL; + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_module_eeprom, -ENOTSUP); +@@ -5595,6 +5650,8 @@ eth_dev_add_port_queue_stats(struct rte_tel_data *d, uint64_t *q_stats, + { + int q; + struct rte_tel_data *q_data = rte_tel_data_alloc(); ++ if (q_data == NULL) ++ return; + rte_tel_data_start_array(q_data, RTE_TEL_U64_VAL); + for (q = 0; q < RTE_ETHDEV_QUEUE_STAT_CNTRS; q++) + rte_tel_data_add_array_u64(q_data, q_stats[q]); +@@ -5688,6 +5745,7 @@ eth_dev_handle_port_xstats(const char *cmd __rte_unused, + for (i = 0; i < num_xstats; i++) + rte_tel_data_add_dict_u64(d, xstat_names[i].name, + eth_xstats[i].value); ++ free(eth_xstats); + return 0; + } + +diff --git a/dpdk/lib/librte_ethdev/rte_ethdev.h b/dpdk/lib/librte_ethdev/rte_ethdev.h +index f5f8919186..5e8331da1c 100644 +--- a/dpdk/lib/librte_ethdev/rte_ethdev.h ++++ b/dpdk/lib/librte_ethdev/rte_ethdev.h +@@ -74,7 +74,7 @@ + * rte_eth_rx_queue_setup()), it must call rte_eth_dev_stop() first to stop the + * device and then do the reconfiguration before calling rte_eth_dev_start() + * again. The transmit and receive functions should not be invoked when the +- * device is stopped. ++ * device or the queue is stopped. + * + * Please note that some configuration is not stored between calls to + * rte_eth_dev_stop()/rte_eth_dev_start(). The following configuration will +@@ -1502,7 +1502,7 @@ struct rte_eth_rxseg_capa { + * device, etc... + */ + struct rte_eth_dev_info { +- struct rte_device *device; /** Generic device information */ ++ struct rte_device *device; /**< Generic device information */ + const char *driver_name; /**< Device Driver name. */ + unsigned int if_index; /**< Index to bound host interface, or 0 if none. + Use if_indextoname() to translate into an interface name. */ +@@ -1516,8 +1516,8 @@ struct rte_eth_dev_info { + uint16_t max_rx_queues; /**< Maximum number of RX queues. */ + uint16_t max_tx_queues; /**< Maximum number of TX queues. */ + uint32_t max_mac_addrs; /**< Maximum number of MAC addresses. */ +- uint32_t max_hash_mac_addrs; + /** Maximum number of hash MAC addresses for MTA and UTA. */ ++ uint32_t max_hash_mac_addrs; + uint16_t max_vfs; /**< Maximum number of VFs. */ + uint16_t max_vmdq_pools; /**< Maximum number of VMDq pools. */ + struct rte_eth_rxseg_capa rx_seg_capa; /**< Segmentation capability.*/ +@@ -1561,6 +1561,13 @@ struct rte_eth_dev_info { + void *reserved_ptrs[2]; /**< Reserved for future fields */ + }; + ++/** ++ * RX/TX queue states ++ */ ++#define RTE_ETH_QUEUE_STATE_STOPPED 0 ++#define RTE_ETH_QUEUE_STATE_STARTED 1 ++#define RTE_ETH_QUEUE_STATE_HAIRPIN 2 ++ + /** + * Ethernet device RX queue information structure. + * Used to retrieve information about configured queue. +@@ -2155,7 +2162,7 @@ rte_eth_dev_is_removed(uint16_t port_id); + * The configuration structure also contains the pointer to the array + * of the receiving buffer segment descriptions, see rx_seg and rx_nseg + * fields, this extended configuration might be used by split offloads like +- * RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT. If mp_pool is not NULL, ++ * RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT. If mb_pool is not NULL, + * the extended configuration fields must be set to NULL and zero. + * @param mb_pool + * The pointer to the memory pool from which to allocate *rte_mbuf* network +@@ -2321,7 +2328,7 @@ int rte_eth_tx_hairpin_queue_setup + * - (-EINVAL) if bad parameter. + * - (-ENODEV) if *port_id* invalid + * - (-ENOTSUP) if hardware doesn't support. +- * - Others detailed errors from PMD drivers. ++ * - Others detailed errors from PMDs. + */ + __rte_experimental + int rte_eth_hairpin_get_peer_ports(uint16_t port_id, uint16_t *peer_ports, +@@ -2347,7 +2354,7 @@ int rte_eth_hairpin_get_peer_ports(uint16_t port_id, uint16_t *peer_ports, + * - (-ENODEV) if Tx port ID is invalid. + * - (-EBUSY) if device is not in started state. + * - (-ENOTSUP) if hardware doesn't support. +- * - Others detailed errors from PMD drivers. ++ * - Others detailed errors from PMDs. + */ + __rte_experimental + int rte_eth_hairpin_bind(uint16_t tx_port, uint16_t rx_port); +@@ -2374,7 +2381,7 @@ int rte_eth_hairpin_bind(uint16_t tx_port, uint16_t rx_port); + * - (-ENODEV) if Tx port ID is invalid. + * - (-EBUSY) if device is in stopped state. + * - (-ENOTSUP) if hardware doesn't support. +- * - Others detailed errors from PMD drivers. ++ * - Others detailed errors from PMDs. + */ + __rte_experimental + int rte_eth_hairpin_unbind(uint16_t tx_port, uint16_t rx_port); +@@ -2417,7 +2424,7 @@ int rte_eth_dev_is_valid_port(uint16_t port_id); + * - -ENODEV: if *port_id* is invalid. + * - -EINVAL: The queue_id out of range or belong to hairpin. + * - -EIO: if device is removed. +- * - -ENOTSUP: The function not supported in PMD driver. ++ * - -ENOTSUP: The function not supported in PMD. + */ + int rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id); + +@@ -2435,7 +2442,7 @@ int rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id); + * - -ENODEV: if *port_id* is invalid. + * - -EINVAL: The queue_id out of range or belong to hairpin. + * - -EIO: if device is removed. +- * - -ENOTSUP: The function not supported in PMD driver. ++ * - -ENOTSUP: The function not supported in PMD. + */ + int rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id); + +@@ -2454,7 +2461,7 @@ int rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id); + * - -ENODEV: if *port_id* is invalid. + * - -EINVAL: The queue_id out of range or belong to hairpin. + * - -EIO: if device is removed. +- * - -ENOTSUP: The function not supported in PMD driver. ++ * - -ENOTSUP: The function not supported in PMD. + */ + int rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id); + +@@ -2472,7 +2479,7 @@ int rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id); + * - -ENODEV: if *port_id* is invalid. + * - -EINVAL: The queue_id out of range or belong to hairpin. + * - -EIO: if device is removed. +- * - -ENOTSUP: The function not supported in PMD driver. ++ * - -ENOTSUP: The function not supported in PMD. + */ + int rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id); + +@@ -2674,7 +2681,7 @@ int rte_eth_allmulticast_get(uint16_t port_id); + * Link information written back. + * @return + * - (0) if successful. +- * - (-ENOTSUP) if the function is not supported in PMD driver. ++ * - (-ENOTSUP) if the function is not supported in PMD. + * - (-ENODEV) if *port_id* invalid. + */ + int rte_eth_link_get(uint16_t port_id, struct rte_eth_link *link); +@@ -2689,7 +2696,7 @@ int rte_eth_link_get(uint16_t port_id, struct rte_eth_link *link); + * Link information written back. + * @return + * - (0) if successful. +- * - (-ENOTSUP) if the function is not supported in PMD driver. ++ * - (-ENOTSUP) if the function is not supported in PMD. + * - (-ENODEV) if *port_id* invalid. + */ + int rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *link); +@@ -2814,9 +2821,13 @@ int rte_eth_xstats_get_names(uint16_t port_id, + * @param xstats + * A pointer to a table of structure of type *rte_eth_xstat* + * to be filled with device statistics ids and values. +- * This parameter can be set to NULL if n is 0. ++ * This parameter can be set to NULL if and only if n is 0. + * @param n + * The size of the xstats array (number of elements). ++ * If lower than the required number of elements, the function returns ++ * the required number of elements. ++ * If equal to zero, the xstats must be NULL, the function returns the ++ * required number of elements. + * @return + * - A positive value lower or equal to n: success. The return value + * is the number of entries filled in the stats table. +@@ -2835,21 +2846,23 @@ int rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats, + * @param port_id + * The port identifier of the Ethernet device. + * @param xstats_names +- * An rte_eth_xstat_name array of at least *size* elements to +- * be filled. If set to NULL, the function returns the required number +- * of elements. +- * @param ids +- * IDs array given by app to retrieve specific statistics ++ * Array to be filled in with names of requested device statistics. ++ * Must not be NULL if @p ids are specified (not NULL). + * @param size +- * The size of the xstats_names array (number of elements). ++ * Number of elements in @p xstats_names array (if not NULL) and in ++ * @p ids array (if not NULL). Must be 0 if both array pointers are NULL. ++ * @param ids ++ * IDs array given by app to retrieve specific statistics. May be NULL to ++ * retrieve names of all available statistics or, if @p xstats_names is ++ * NULL as well, just the number of available statistics. + * @return + * - A positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. +- * - A positive value higher than size: error, the given statistics table ++ * - A positive value higher than size: success. The given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. +- * - A negative value on error (invalid port id). ++ * - A negative value on error. + */ + int + rte_eth_xstats_get_names_by_id(uint16_t port_id, +@@ -2862,22 +2875,23 @@ rte_eth_xstats_get_names_by_id(uint16_t port_id, + * @param port_id + * The port identifier of the Ethernet device. + * @param ids +- * A pointer to an ids array passed by application. This tells which +- * statistics values function should retrieve. This parameter +- * can be set to NULL if size is 0. In this case function will retrieve +- * all available statistics. ++ * IDs array given by app to retrieve specific statistics. May be NULL to ++ * retrieve all available statistics or, if @p values is NULL as well, ++ * just the number of available statistics. + * @param values +- * A pointer to a table to be filled with device statistics values. ++ * Array to be filled in with requested device statistics. ++ * Must not be NULL if ids are specified (not NULL). + * @param size +- * The size of the ids array (number of elements). ++ * Number of elements in @p values array (if not NULL) and in @p ids ++ * array (if not NULL). Must be 0 if both array pointers are NULL. + * @return + * - A positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. +- * - A positive value higher than size: error, the given statistics table ++ * - A positive value higher than size: success: The given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. +- * - A negative value on error (invalid port id). ++ * - A negative value on error. + */ + int rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids, + uint64_t *values, unsigned int size); +@@ -4348,6 +4362,7 @@ int rte_eth_tx_burst_mode_get(uint16_t port_id, uint16_t queue_id, + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. ++ * - (-EINVAL) if bad parameter. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +@@ -4378,6 +4393,7 @@ int rte_eth_dev_get_eeprom_length(uint16_t port_id); + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. ++ * - (-EINVAL) if bad parameter. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. +@@ -4395,6 +4411,7 @@ int rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info); + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. ++ * - (-EINVAL) if bad parameter. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. +@@ -4414,6 +4431,7 @@ int rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info); + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. ++ * - (-EINVAL) if bad parameter. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. +@@ -4438,6 +4456,7 @@ rte_eth_dev_get_module_info(uint16_t port_id, + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. ++ * - (-EINVAL) if bad parameter. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +diff --git a/dpdk/lib/librte_ethdev/rte_ethdev_driver.h b/dpdk/lib/librte_ethdev/rte_ethdev_driver.h +index 0eacfd8425..6764ffb854 100644 +--- a/dpdk/lib/librte_ethdev/rte_ethdev_driver.h ++++ b/dpdk/lib/librte_ethdev/rte_ethdev_driver.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_ETHDEV_DRIVER_H_ + #define _RTE_ETHDEV_DRIVER_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** + * @file + * +@@ -73,7 +77,7 @@ typedef int (*eth_is_removed_t)(struct rte_eth_dev *dev); + * @retval -E_RTE_SECONDARY + * Function was called from a secondary process instance and not supported. + * @retval -ETIMEDOUT +- * Attempt to enable promiscuos mode failed because of timeout. ++ * Attempt to enable promiscuous mode failed because of timeout. + * @retval -EAGAIN + * Failed to enable promiscuous mode. + */ +@@ -98,7 +102,7 @@ typedef int (*eth_promiscuous_enable_t)(struct rte_eth_dev *dev); + * @retval -E_RTE_SECONDARY + * Function was called from a secondary process instance and not supported. + * @retval -ETIMEDOUT +- * Attempt to disable promiscuos mode failed because of timeout. ++ * Attempt to disable promiscuous mode failed because of timeout. + * @retval -EAGAIN + * Failed to disable promiscuous mode. + */ +@@ -919,13 +923,6 @@ struct eth_dev_ops { + /**< Disconnect the hairpin queues of a pair from each other. */ + }; + +-/** +- * RX/TX queue states +- */ +-#define RTE_ETH_QUEUE_STATE_STOPPED 0 +-#define RTE_ETH_QUEUE_STATE_STARTED 1 +-#define RTE_ETH_QUEUE_STATE_HAIRPIN 2 +- + /** + * @internal + * Check if the selected Rx queue is hairpin queue. +@@ -1326,6 +1323,24 @@ rte_eth_hairpin_queue_peer_bind(uint16_t cur_port, uint16_t cur_queue, + struct rte_hairpin_peer_info *peer_info, + uint32_t direction); + ++/** ++ * @internal ++ * Get rte_eth_dev from device name. The device name should be specified ++ * as below: ++ * - PCIe address (Domain:Bus:Device.Function), for example 0000:2:00.0 ++ * - SoC device name, for example fsl-gmac0 ++ * - vdev dpdk name, for example net_[pcap0|null0|tap0] ++ * ++ * @param name ++ * PCI address or name of the device ++ * @return ++ * - rte_eth_dev if successful ++ * - NULL on failure ++ */ ++__rte_internal ++struct rte_eth_dev* ++rte_eth_dev_get_by_name(const char *name); ++ + /** + * @internal + * Reset the current queue state and configuration to disconnect (unbind) it +diff --git a/dpdk/lib/librte_ethdev/rte_ethdev_pci.h b/dpdk/lib/librte_ethdev/rte_ethdev_pci.h +index bf715896ae..d015697e21 100644 +--- a/dpdk/lib/librte_ethdev/rte_ethdev_pci.h ++++ b/dpdk/lib/librte_ethdev/rte_ethdev_pci.h +@@ -6,6 +6,10 @@ + #ifndef _RTE_ETHDEV_PCI_H_ + #define _RTE_ETHDEV_PCI_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #include + #include + #include +@@ -46,8 +50,9 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, + } + + static inline int +-eth_dev_pci_specific_init(struct rte_eth_dev *eth_dev, void *bus_device) { +- struct rte_pci_device *pci_dev = bus_device; ++eth_dev_pci_specific_init(struct rte_eth_dev *eth_dev, void *bus_device) ++{ ++ struct rte_pci_device *pci_dev = (struct rte_pci_device *)bus_device; + + if (!pci_dev) + return -ENODEV; +@@ -151,6 +156,16 @@ rte_eth_dev_pci_generic_remove(struct rte_pci_device *pci_dev, + if (!eth_dev) + return 0; + ++ /* ++ * In secondary process, a released eth device can be found by its name ++ * in shared memory. ++ * If the state of the eth device is RTE_ETH_DEV_UNUSED, it means the ++ * eth device has been released. ++ */ ++ if (rte_eal_process_type() == RTE_PROC_SECONDARY && ++ eth_dev->state == RTE_ETH_DEV_UNUSED) ++ return 0; ++ + if (dev_uninit) { + ret = dev_uninit(eth_dev); + if (ret) +@@ -161,4 +176,8 @@ rte_eth_dev_pci_generic_remove(struct rte_pci_device *pci_dev, + return 0; + } + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_ETHDEV_PCI_H_ */ +diff --git a/dpdk/lib/librte_ethdev/rte_ethdev_vdev.h b/dpdk/lib/librte_ethdev/rte_ethdev_vdev.h +index 4ba3f28964..96a710d96a 100644 +--- a/dpdk/lib/librte_ethdev/rte_ethdev_vdev.h ++++ b/dpdk/lib/librte_ethdev/rte_ethdev_vdev.h +@@ -6,6 +6,10 @@ + #ifndef _RTE_ETHDEV_VDEV_H_ + #define _RTE_ETHDEV_VDEV_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #include + #include + #include +@@ -52,4 +56,8 @@ rte_eth_vdev_allocate(struct rte_vdev_device *dev, size_t private_data_size) + return eth_dev; + } + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_ETHDEV_VDEV_H_ */ +diff --git a/dpdk/lib/librte_ethdev/rte_flow.h b/dpdk/lib/librte_ethdev/rte_flow.h +index 0977a78270..8f9e82ce2d 100644 +--- a/dpdk/lib/librte_ethdev/rte_flow.h ++++ b/dpdk/lib/librte_ethdev/rte_flow.h +@@ -1385,14 +1385,14 @@ static const struct rte_flow_item_meta rte_flow_item_meta_mask = { + */ + struct rte_flow_item_gtp_psc { + uint8_t pdu_type; /**< PDU type. */ +- uint8_t qfi; /**< QoS flow identifier. */ ++ uint8_t qfi; /**< PPP, RQI, QoS flow identifier. */ + }; + + /** Default mask for RTE_FLOW_ITEM_TYPE_GTP_PSC. */ + #ifndef __cplusplus + static const struct rte_flow_item_gtp_psc + rte_flow_item_gtp_psc_mask = { +- .qfi = 0x3f, ++ .qfi = 0xff, + }; + #endif + +@@ -2819,7 +2819,7 @@ rte_flow_dynf_metadata_set(struct rte_mbuf *m, uint32_t v) + *RTE_FLOW_DYNF_METADATA(m) = v; + } + +-/* ++/** + * Definition of a single action. + * + * A list of actions is terminated by a END action. +@@ -3088,7 +3088,7 @@ enum rte_flow_conv_op { + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * @return +- * 0 on success, a nagative value otherwise. ++ * 0 on success, a negative value otherwise. + */ + __rte_experimental + int +@@ -3449,7 +3449,7 @@ rte_flow_conv(enum rte_flow_conv_op op, + * + * RTE_ETH_EVENT_FLOW_AGED event will be triggered when at least one new aged + * out flow was detected after the last call to rte_flow_get_aged_flows. +- * This function can be called to get the aged flows usynchronously from the ++ * This function can be called to get the aged flows asynchronously from the + * event callback or synchronously regardless the event. + * This is not safe to call rte_flow_get_aged_flows function with other flow + * functions from multiple threads simultaneously. +diff --git a/dpdk/lib/librte_ethdev/version.map b/dpdk/lib/librte_ethdev/version.map +index d3f5410806..d3f9baf23c 100644 +--- a/dpdk/lib/librte_ethdev/version.map ++++ b/dpdk/lib/librte_ethdev/version.map +@@ -251,6 +251,7 @@ INTERNAL { + rte_eth_dev_callback_process; + rte_eth_dev_create; + rte_eth_dev_destroy; ++ rte_eth_dev_get_by_name; + rte_eth_dev_is_rx_hairpin_queue; + rte_eth_dev_is_tx_hairpin_queue; + rte_eth_dev_probing_finish; +diff --git a/dpdk/lib/librte_eventdev/rte_event_crypto_adapter.c b/dpdk/lib/librte_eventdev/rte_event_crypto_adapter.c +index b04312128a..ca58945a84 100644 +--- a/dpdk/lib/librte_eventdev/rte_event_crypto_adapter.c ++++ b/dpdk/lib/librte_eventdev/rte_event_crypto_adapter.c +@@ -861,6 +861,7 @@ rte_event_crypto_adapter_queue_pair_add(uint8_t id, + * b. OP_NEW mode -> SW Dequeue + */ + if ((cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW && ++ !(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD) && + adapter->mode == RTE_EVENT_CRYPTO_ADAPTER_OP_FORWARD) || + (!(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW) && + !(cap & RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_FWD) && +diff --git a/dpdk/lib/librte_eventdev/rte_event_eth_rx_adapter.c b/dpdk/lib/librte_eventdev/rte_event_eth_rx_adapter.c +index 3c73046551..89213297db 100644 +--- a/dpdk/lib/librte_eventdev/rte_event_eth_rx_adapter.c ++++ b/dpdk/lib/librte_eventdev/rte_event_eth_rx_adapter.c +@@ -1284,12 +1284,11 @@ rxa_create_intr_thread(struct rte_event_eth_rx_adapter *rx_adapter) + + err = rte_ctrl_thread_create(&rx_adapter->rx_intr_thread, thread_name, + NULL, rxa_intr_thread, rx_adapter); +- if (!err) { +- rte_thread_setname(rx_adapter->rx_intr_thread, thread_name); ++ if (!err) + return 0; +- } + + RTE_EDEV_LOG_ERR("Failed to create interrupt thread err = %d\n", err); ++ rte_free(rx_adapter->epoll_events); + error: + rte_ring_free(rx_adapter->intr_ring); + rx_adapter->intr_ring = NULL; +@@ -2240,6 +2239,11 @@ rte_event_eth_rx_adapter_queue_del(uint8_t id, uint16_t eth_dev_id, + rx_adapter->eth_rx_poll = rx_poll; + rx_adapter->wrr_sched = rx_wrr; + rx_adapter->wrr_len = nb_wrr; ++ /* ++ * reset next poll start position (wrr_pos) to avoid buffer ++ * overrun when wrr_len is reduced in case of queue delete ++ */ ++ rx_adapter->wrr_pos = 0; + rx_adapter->num_intr_vec += num_intr_vec; + + if (dev_info->nb_dev_queues == 0) { +diff --git a/dpdk/lib/librte_eventdev/rte_event_eth_tx_adapter.c b/dpdk/lib/librte_eventdev/rte_event_eth_tx_adapter.c +index cc27bbca32..f2b7d36657 100644 +--- a/dpdk/lib/librte_eventdev/rte_event_eth_tx_adapter.c ++++ b/dpdk/lib/librte_eventdev/rte_event_eth_tx_adapter.c +@@ -224,7 +224,7 @@ txa_service_data_init(void) + if (txa_service_data_array == NULL) { + txa_service_data_array = + txa_memzone_array_get("txa_service_data_array", +- sizeof(int), ++ sizeof(*txa_service_data_array), + RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE); + if (txa_service_data_array == NULL) + return -ENOMEM; +@@ -286,7 +286,6 @@ txa_service_conf_cb(uint8_t __rte_unused id, uint8_t dev_id, + return ret; + } + +- pc->event_port_cfg = 0; + ret = rte_event_port_setup(dev_id, port_id, pc); + if (ret) { + RTE_EDEV_LOG_ERR("failed to setup event port %u\n", +@@ -761,10 +760,8 @@ txa_service_queue_add(uint8_t id, + + rte_spinlock_lock(&txa->tx_lock); + +- if (txa_service_is_queue_added(txa, eth_dev, tx_queue_id)) { +- rte_spinlock_unlock(&txa->tx_lock); +- return 0; +- } ++ if (txa_service_is_queue_added(txa, eth_dev, tx_queue_id)) ++ goto ret_unlock; + + ret = txa_service_queue_array_alloc(txa, eth_dev->data->port_id); + if (ret) +@@ -776,6 +773,8 @@ txa_service_queue_add(uint8_t id, + + tdi = &txa->txa_ethdev[eth_dev->data->port_id]; + tqi = txa_service_queue(txa, eth_dev->data->port_id, tx_queue_id); ++ if (tqi == NULL) ++ goto err_unlock; + + txa_retry = &tqi->txa_retry; + txa_retry->id = txa->id; +@@ -791,6 +790,10 @@ txa_service_queue_add(uint8_t id, + tdi->nb_queues++; + txa->nb_queues++; + ++ret_unlock: ++ rte_spinlock_unlock(&txa->tx_lock); ++ return 0; ++ + err_unlock: + if (txa->nb_queues == 0) { + txa_service_queue_array_free(txa, +@@ -799,7 +802,7 @@ txa_service_queue_add(uint8_t id, + } + + rte_spinlock_unlock(&txa->tx_lock); +- return 0; ++ return -1; + } + + static int +@@ -819,7 +822,7 @@ txa_service_queue_del(uint8_t id, + uint16_t i, q, nb_queues; + int ret = 0; + +- nb_queues = txa->nb_queues; ++ nb_queues = txa->txa_ethdev[port_id].nb_queues; + if (nb_queues == 0) + return 0; + +@@ -842,9 +845,10 @@ txa_service_queue_del(uint8_t id, + + txa = txa_service_id_to_data(id); + ++ rte_spinlock_lock(&txa->tx_lock); + tqi = txa_service_queue(txa, port_id, tx_queue_id); + if (tqi == NULL || !tqi->added) +- return 0; ++ goto ret_unlock; + + tb = tqi->tx_buf; + tqi->added = 0; +@@ -854,6 +858,9 @@ txa_service_queue_del(uint8_t id, + txa->txa_ethdev[port_id].nb_queues--; + + txa_service_queue_array_free(txa, port_id); ++ ++ret_unlock: ++ rte_spinlock_unlock(&txa->tx_lock); + return 0; + } + +diff --git a/dpdk/lib/librte_eventdev/rte_event_ring.h b/dpdk/lib/librte_eventdev/rte_event_ring.h +index c0861b0ec2..0b9aefb000 100644 +--- a/dpdk/lib/librte_eventdev/rte_event_ring.h ++++ b/dpdk/lib/librte_eventdev/rte_event_ring.h +@@ -14,6 +14,10 @@ + #ifndef _RTE_EVENT_RING_ + #define _RTE_EVENT_RING_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #include + + #include +@@ -266,4 +270,9 @@ rte_event_ring_get_capacity(const struct rte_event_ring *r) + { + return rte_ring_get_capacity(&r->r); + } ++ ++#ifdef __cplusplus ++} ++#endif ++ + #endif +diff --git a/dpdk/lib/librte_eventdev/rte_event_timer_adapter.c b/dpdk/lib/librte_eventdev/rte_event_timer_adapter.c +index 4c5e49ea3b..64b0f7ed0f 100644 +--- a/dpdk/lib/librte_eventdev/rte_event_timer_adapter.c ++++ b/dpdk/lib/librte_eventdev/rte_event_timer_adapter.c +@@ -493,7 +493,7 @@ event_buffer_flush(struct event_buffer *bufp, uint8_t dev_id, uint8_t port_id, + + RTE_ASSERT(head_idx < EVENT_BUFFER_SZ && tail_idx < EVENT_BUFFER_SZ); + +- /* Determine the largest contigous run we can attempt to enqueue to the ++ /* Determine the largest contiguous run we can attempt to enqueue to the + * event device. + */ + if (head_idx > tail_idx) +diff --git a/dpdk/lib/librte_eventdev/rte_event_timer_adapter.h b/dpdk/lib/librte_eventdev/rte_event_timer_adapter.h +index d2ebcb0909..6919b1de90 100644 +--- a/dpdk/lib/librte_eventdev/rte_event_timer_adapter.h ++++ b/dpdk/lib/librte_eventdev/rte_event_timer_adapter.h +@@ -665,4 +665,8 @@ rte_event_timer_cancel_burst(const struct rte_event_timer_adapter *adapter, + return adapter->cancel_burst(adapter, evtims, nb_evtims); + } + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* __RTE_EVENT_TIMER_ADAPTER_H__ */ +diff --git a/dpdk/lib/librte_eventdev/rte_eventdev.h b/dpdk/lib/librte_eventdev/rte_eventdev.h +index ce1fc2ce0f..bec8f3c0c9 100644 +--- a/dpdk/lib/librte_eventdev/rte_eventdev.h ++++ b/dpdk/lib/librte_eventdev/rte_eventdev.h +@@ -1380,7 +1380,7 @@ __rte_event_enqueue_burst(uint8_t dev_id, uint8_t port_id, + return 0; + } + #endif +- rte_eventdev_trace_enq_burst(dev_id, port_id, ev, nb_events, fn); ++ rte_eventdev_trace_enq_burst(dev_id, port_id, ev, nb_events, (void *)fn); + /* + * Allow zero cost non burst mode routine invocation if application + * requests nb_events as const one +diff --git a/dpdk/lib/librte_eventdev/rte_eventdev_pmd.h b/dpdk/lib/librte_eventdev/rte_eventdev_pmd.h +index 7eb9a77393..31d11f6f02 100644 +--- a/dpdk/lib/librte_eventdev/rte_eventdev_pmd.h ++++ b/dpdk/lib/librte_eventdev/rte_eventdev_pmd.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_EVENTDEV_PMD_H_ + #define _RTE_EVENTDEV_PMD_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** @file + * RTE Event PMD APIs + * +@@ -147,7 +151,7 @@ rte_event_pmd_is_valid_dev(uint8_t dev_id) + + /** + * Definitions of all functions exported by a driver through the +- * the generic structure of type *event_dev_ops* supplied in the ++ * generic structure of type *event_dev_ops* supplied in the + * *rte_eventdev* structure associated with a device. + */ + +@@ -1142,4 +1146,8 @@ rte_event_pmd_release(struct rte_eventdev *eventdev); + } + #endif + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_EVENTDEV_PMD_H_ */ +diff --git a/dpdk/lib/librte_eventdev/rte_eventdev_pmd_pci.h b/dpdk/lib/librte_eventdev/rte_eventdev_pmd_pci.h +index 5f238bf496..e3b5e6e86e 100644 +--- a/dpdk/lib/librte_eventdev/rte_eventdev_pmd_pci.h ++++ b/dpdk/lib/librte_eventdev/rte_eventdev_pmd_pci.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_EVENTDEV_PMD_PCI_H_ + #define _RTE_EVENTDEV_PMD_PCI_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** @file + * RTE Eventdev PCI PMD APIs + * +diff --git a/dpdk/lib/librte_eventdev/rte_eventdev_pmd_vdev.h b/dpdk/lib/librte_eventdev/rte_eventdev_pmd_vdev.h +index 8c64a06743..ff79d82530 100644 +--- a/dpdk/lib/librte_eventdev/rte_eventdev_pmd_vdev.h ++++ b/dpdk/lib/librte_eventdev/rte_eventdev_pmd_vdev.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_EVENTDEV_PMD_VDEV_H_ + #define _RTE_EVENTDEV_PMD_VDEV_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** @file + * RTE Eventdev VDEV PMD APIs + * +diff --git a/dpdk/lib/librte_fib/meson.build b/dpdk/lib/librte_fib/meson.build +index 18eadcc56c..2438a2791c 100644 +--- a/dpdk/lib/librte_fib/meson.build ++++ b/dpdk/lib/librte_fib/meson.build +@@ -8,7 +8,7 @@ deps += ['rib'] + + # compile AVX512 version if: + # we are building 64-bit binary AND binutils can generate proper code +-if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok.returncode() == 0 ++if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok + # compile AVX512 version if either: + # a. we have AVX512F supported in minimum instruction set baseline + # b. it's not minimum instruction set, but supported by compiler +diff --git a/dpdk/lib/librte_fib/rte_fib.c b/dpdk/lib/librte_fib/rte_fib.c +index b354d4bfd0..07e45e272a 100644 +--- a/dpdk/lib/librte_fib/rte_fib.c ++++ b/dpdk/lib/librte_fib/rte_fib.c +@@ -40,10 +40,10 @@ EAL_REGISTER_TAILQ(rte_fib_tailq) + struct rte_fib { + char name[RTE_FIB_NAMESIZE]; + enum rte_fib_type type; /**< Type of FIB struct */ +- struct rte_rib *rib; /**< RIB helper datastruct */ ++ struct rte_rib *rib; /**< RIB helper datastructure */ + void *dp; /**< pointer to the dataplane struct*/ +- rte_fib_lookup_fn_t lookup; /**< fib lookup function */ +- rte_fib_modify_fn_t modify; /**< modify fib datastruct */ ++ rte_fib_lookup_fn_t lookup; /**< FIB lookup function */ ++ rte_fib_modify_fn_t modify; /**< modify FIB datastructure */ + uint64_t def_nh; + }; + +diff --git a/dpdk/lib/librte_fib/rte_fib.h b/dpdk/lib/librte_fib/rte_fib.h +index acad20963c..88b238596f 100644 +--- a/dpdk/lib/librte_fib/rte_fib.h ++++ b/dpdk/lib/librte_fib/rte_fib.h +@@ -197,7 +197,7 @@ rte_fib_lookup_bulk(struct rte_fib *fib, uint32_t *ips, + * FIB object handle + * @return + * Pointer on the dataplane struct on success +- * NULL othervise ++ * NULL otherwise + */ + __rte_experimental + void * +@@ -210,7 +210,7 @@ rte_fib_get_dp(struct rte_fib *fib); + * FIB object handle + * @return + * Pointer on the RIB on success +- * NULL othervise ++ * NULL otherwise + */ + __rte_experimental + struct rte_rib * +diff --git a/dpdk/lib/librte_fib/rte_fib6.c b/dpdk/lib/librte_fib/rte_fib6.c +index 44cc0c954d..68d2138ea8 100644 +--- a/dpdk/lib/librte_fib/rte_fib6.c ++++ b/dpdk/lib/librte_fib/rte_fib6.c +@@ -40,10 +40,10 @@ EAL_REGISTER_TAILQ(rte_fib6_tailq) + struct rte_fib6 { + char name[FIB6_NAMESIZE]; + enum rte_fib6_type type; /**< Type of FIB struct */ +- struct rte_rib6 *rib; /**< RIB helper datastruct */ ++ struct rte_rib6 *rib; /**< RIB helper datastructure */ + void *dp; /**< pointer to the dataplane struct*/ +- rte_fib6_lookup_fn_t lookup; /**< fib lookup function */ +- rte_fib6_modify_fn_t modify; /**< modify fib datastruct */ ++ rte_fib6_lookup_fn_t lookup; /**< FIB lookup function */ ++ rte_fib6_modify_fn_t modify; /**< modify FIB datastructure */ + uint64_t def_nh; + }; + +diff --git a/dpdk/lib/librte_fib/rte_fib6.h b/dpdk/lib/librte_fib/rte_fib6.h +index 0e193b8e7b..7d0c2022a1 100644 +--- a/dpdk/lib/librte_fib/rte_fib6.h ++++ b/dpdk/lib/librte_fib/rte_fib6.h +@@ -192,7 +192,7 @@ rte_fib6_lookup_bulk(struct rte_fib6 *fib, + * FIB6 object handle + * @return + * Pointer on the dataplane struct on success +- * NULL othervise ++ * NULL otherwise + */ + __rte_experimental + void * +@@ -205,7 +205,7 @@ rte_fib6_get_dp(struct rte_fib6 *fib); + * FIB object handle + * @return + * Pointer on the RIB6 on success +- * NULL othervise ++ * NULL otherwise + */ + __rte_experimental + struct rte_rib6 * +diff --git a/dpdk/lib/librte_flow_classify/rte_flow_classify.c b/dpdk/lib/librte_flow_classify/rte_flow_classify.c +index 639b0051f5..d5bcb35e1d 100644 +--- a/dpdk/lib/librte_flow_classify/rte_flow_classify.c ++++ b/dpdk/lib/librte_flow_classify/rte_flow_classify.c +@@ -579,12 +579,12 @@ rte_flow_classify_table_entry_delete(struct rte_flow_classifier *cls, + &rule->u.key.key_del, + &rule->key_found, + &rule->entry); +- ++ if (ret == 0) ++ free(rule); + return ret; + } + } + } +- free(rule); + return ret; + } + +diff --git a/dpdk/lib/librte_graph/graph_stats.c b/dpdk/lib/librte_graph/graph_stats.c +index 125e08d732..aa70929dc3 100644 +--- a/dpdk/lib/librte_graph/graph_stats.c ++++ b/dpdk/lib/librte_graph/graph_stats.c +@@ -119,8 +119,8 @@ stats_mem_init(struct cluster *cluster, + cluster_node_size = RTE_ALIGN(cluster_node_size, RTE_CACHE_LINE_SIZE); + + stats = realloc(NULL, sz); +- memset(stats, 0, sz); + if (stats) { ++ memset(stats, 0, sz); + stats->fn = fn; + stats->cluster_node_size = cluster_node_size; + stats->max_nodes = 0; +@@ -165,6 +165,7 @@ stats_mem_populate(struct rte_graph_cluster_stats **stats_in, + stats = realloc(stats, stats->sz + stats->cluster_node_size); + if (stats == NULL) + SET_ERR_JMP(ENOMEM, err, "Realloc failed"); ++ *stats_in = NULL; + + /* Clear the new struct cluster_node area */ + cluster = RTE_PTR_ADD(stats, stats->sz), +@@ -174,7 +175,7 @@ stats_mem_populate(struct rte_graph_cluster_stats **stats_in, + cluster->stat.hz = rte_get_timer_hz(); + node = graph_node_id_to_ptr(graph, id); + if (node == NULL) +- SET_ERR_JMP(ENOENT, err, "Failed to find node %s in graph %s", ++ SET_ERR_JMP(ENOENT, free, "Failed to find node %s in graph %s", + graph_node->node->name, graph->name); + cluster->nodes[cluster->nb_nodes++] = node; + +@@ -183,6 +184,8 @@ stats_mem_populate(struct rte_graph_cluster_stats **stats_in, + *stats_in = stats; + + return 0; ++free: ++ free(stats); + err: + return -rte_errno; + } +diff --git a/dpdk/lib/librte_graph/rte_graph_worker.h b/dpdk/lib/librte_graph/rte_graph_worker.h +index eef77f732a..0c0b9c095a 100644 +--- a/dpdk/lib/librte_graph/rte_graph_worker.h ++++ b/dpdk/lib/librte_graph/rte_graph_worker.h +@@ -155,7 +155,7 @@ rte_graph_walk(struct rte_graph *graph) + * +-----+ <= cir_start + mask + */ + while (likely(head != graph->tail)) { +- node = RTE_PTR_ADD(graph, cir_start[(int32_t)head++]); ++ node = (struct rte_node *)RTE_PTR_ADD(graph, cir_start[(int32_t)head++]); + RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); + objs = node->objs; + rte_prefetch0(objs); +diff --git a/dpdk/lib/librte_gro/rte_gro.c b/dpdk/lib/librte_gro/rte_gro.c +index 8ca4da67e9..7a788523ad 100644 +--- a/dpdk/lib/librte_gro/rte_gro.c ++++ b/dpdk/lib/librte_gro/rte_gro.c +@@ -33,6 +33,7 @@ static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = { + + #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ + ((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP) && \ ++ ((ptype & RTE_PTYPE_L4_FRAG) != RTE_PTYPE_L4_FRAG) && \ + (RTE_ETH_IS_TUNNEL_PKT(ptype) == 0)) + + #define IS_IPV4_UDP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ +@@ -41,6 +42,7 @@ static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = { + + #define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ + ((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \ ++ ((ptype & RTE_PTYPE_L4_FRAG) != RTE_PTYPE_L4_FRAG) && \ + ((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \ + RTE_PTYPE_TUNNEL_VXLAN) && \ + ((ptype & RTE_PTYPE_INNER_L4_TCP) == \ +diff --git a/dpdk/lib/librte_hash/rte_thash.h b/dpdk/lib/librte_hash/rte_thash.h +index 061efa2ae1..c9f1e2c392 100644 +--- a/dpdk/lib/librte_hash/rte_thash.h ++++ b/dpdk/lib/librte_hash/rte_thash.h +@@ -8,20 +8,16 @@ + /** + * @file + * +- * toeplitz hash functions. +- */ +- +-#ifdef __cplusplus +-extern "C" { +-#endif +- +-/** + * Software implementation of the Toeplitz hash function used by RSS. + * Can be used either for packet distribution on single queue NIC + * or for simulating of RSS computation on specific NIC (for example + * after GRE header decapsulating) + */ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #include + #include + #include +diff --git a/dpdk/lib/librte_ip_frag/rte_ipv4_fragmentation.c b/dpdk/lib/librte_ip_frag/rte_ipv4_fragmentation.c +index e9de335ae2..2e7739d027 100644 +--- a/dpdk/lib/librte_ip_frag/rte_ipv4_fragmentation.c ++++ b/dpdk/lib/librte_ip_frag/rte_ipv4_fragmentation.c +@@ -23,10 +23,10 @@ + #define IPV4_HDR_FO_ALIGN (1 << RTE_IPV4_HDR_FO_SHIFT) + + static inline void __fill_ipv4hdr_frag(struct rte_ipv4_hdr *dst, +- const struct rte_ipv4_hdr *src, uint16_t len, uint16_t fofs, +- uint16_t dofs, uint32_t mf) ++ const struct rte_ipv4_hdr *src, uint16_t header_len, ++ uint16_t len, uint16_t fofs, uint16_t dofs, uint32_t mf) + { +- rte_memcpy(dst, src, sizeof(*dst)); ++ rte_memcpy(dst, src, header_len); + fofs = (uint16_t)(fofs + (dofs >> RTE_IPV4_HDR_FO_SHIFT)); + fofs = (uint16_t)(fofs | mf << RTE_IPV4_HDR_MF_SHIFT); + dst->fragment_offset = rte_cpu_to_be_16(fofs); +@@ -74,7 +74,7 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, + struct rte_ipv4_hdr *in_hdr; + uint32_t out_pkt_pos, in_seg_data_pos; + uint32_t more_in_segs; +- uint16_t fragment_offset, flag_offset, frag_size; ++ uint16_t fragment_offset, flag_offset, frag_size, header_len; + uint16_t frag_bytes_remaining; + + /* +@@ -86,14 +86,22 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, + unlikely(mtu_size < RTE_ETHER_MIN_MTU)) + return -EINVAL; + ++ in_hdr = rte_pktmbuf_mtod(pkt_in, struct rte_ipv4_hdr *); ++ header_len = (in_hdr->version_ihl & RTE_IPV4_HDR_IHL_MASK) * ++ RTE_IPV4_IHL_MULTIPLIER; ++ ++ /* Check IP header length */ ++ if (unlikely(pkt_in->data_len < header_len) || ++ unlikely(mtu_size < header_len)) ++ return -EINVAL; ++ + /* + * Ensure the IP payload length of all fragments is aligned to a + * multiple of 8 bytes as per RFC791 section 2.3. + */ +- frag_size = RTE_ALIGN_FLOOR((mtu_size - sizeof(struct rte_ipv4_hdr)), ++ frag_size = RTE_ALIGN_FLOOR((mtu_size - header_len), + IPV4_HDR_FO_ALIGN); + +- in_hdr = rte_pktmbuf_mtod(pkt_in, struct rte_ipv4_hdr *); + flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset); + + /* If Don't Fragment flag is set */ +@@ -102,11 +110,11 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, + + /* Check that pkts_out is big enough to hold all fragments */ + if (unlikely(frag_size * nb_pkts_out < +- (uint16_t)(pkt_in->pkt_len - sizeof(struct rte_ipv4_hdr)))) ++ (uint16_t)(pkt_in->pkt_len - header_len))) + return -EINVAL; + + in_seg = pkt_in; +- in_seg_data_pos = sizeof(struct rte_ipv4_hdr); ++ in_seg_data_pos = header_len; + out_pkt_pos = 0; + fragment_offset = 0; + +@@ -124,8 +132,8 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, + } + + /* Reserve space for the IP header that will be built later */ +- out_pkt->data_len = sizeof(struct rte_ipv4_hdr); +- out_pkt->pkt_len = sizeof(struct rte_ipv4_hdr); ++ out_pkt->data_len = header_len; ++ out_pkt->pkt_len = header_len; + frag_bytes_remaining = frag_size; + + out_seg_prev = out_pkt; +@@ -176,14 +184,14 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, + + out_hdr = rte_pktmbuf_mtod(out_pkt, struct rte_ipv4_hdr *); + +- __fill_ipv4hdr_frag(out_hdr, in_hdr, ++ __fill_ipv4hdr_frag(out_hdr, in_hdr, header_len, + (uint16_t)out_pkt->pkt_len, + flag_offset, fragment_offset, more_in_segs); + + fragment_offset = (uint16_t)(fragment_offset + +- out_pkt->pkt_len - sizeof(struct rte_ipv4_hdr)); ++ out_pkt->pkt_len - header_len); + +- out_pkt->l3_len = sizeof(struct rte_ipv4_hdr); ++ out_pkt->l3_len = header_len; + + /* Write the fragment to the output list */ + pkts_out[out_pkt_pos] = out_pkt; +diff --git a/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c b/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c +index 69666c8b82..4a89a5f536 100644 +--- a/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c ++++ b/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c +@@ -80,7 +80,7 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp) + + /* + * Process new mbuf with fragment of IPV4 packet. +- * Incoming mbuf should have it's l2_len/l3_len fields setuped correclty. ++ * Incoming mbuf should have it's l2_len/l3_len fields setup correctly. + * @param tbl + * Table where to lookup/add the fragmented packet. + * @param mb +diff --git a/dpdk/lib/librte_ip_frag/rte_ipv6_fragmentation.c b/dpdk/lib/librte_ip_frag/rte_ipv6_fragmentation.c +index 5d67336f2d..a512c90955 100644 +--- a/dpdk/lib/librte_ip_frag/rte_ipv6_fragmentation.c ++++ b/dpdk/lib/librte_ip_frag/rte_ipv6_fragmentation.c +@@ -90,7 +90,7 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in, + + /* + * Ensure the IP payload length of all fragments (except the +- * the last fragment) are a multiple of 8 bytes per RFC2460. ++ * last fragment) are a multiple of 8 bytes per RFC2460. + */ + + frag_size = mtu_size - sizeof(struct rte_ipv6_hdr) - +diff --git a/dpdk/lib/librte_ipsec/esp_inb.c b/dpdk/lib/librte_ipsec/esp_inb.c +index 2b1df6a032..846fc0ea71 100644 +--- a/dpdk/lib/librte_ipsec/esp_inb.c ++++ b/dpdk/lib/librte_ipsec/esp_inb.c +@@ -415,7 +415,7 @@ trs_process_check(struct rte_mbuf *mb, struct rte_mbuf **ml, + + /* + * packet checks for tunnel mode: +- * - same as for trasnport mode ++ * - same as for transport mode + * - esp tail next proto contains expected for that SA value + */ + static inline int32_t +@@ -501,7 +501,7 @@ trs_process_step3(struct rte_mbuf *mb) + static inline void + tun_process_step3(struct rte_mbuf *mb, uint64_t txof_msk, uint64_t txof_val) + { +- /* reset mbuf metatdata: L2/L3 len, packet type */ ++ /* reset mbuf metadata: L2/L3 len, packet type */ + mb->packet_type = RTE_PTYPE_UNKNOWN; + mb->tx_offload = (mb->tx_offload & txof_msk) | txof_val; + +diff --git a/dpdk/lib/librte_ipsec/esp_outb.c b/dpdk/lib/librte_ipsec/esp_outb.c +index 1e181cf2ce..0bf3cd6bd4 100644 +--- a/dpdk/lib/librte_ipsec/esp_outb.c ++++ b/dpdk/lib/librte_ipsec/esp_outb.c +@@ -525,7 +525,7 @@ cpu_outb_trs_pkt_prepare(const struct rte_ipsec_session *ss, + + /* + * process outbound packets for SA with ESN support, +- * for algorithms that require SQN.hibits to be implictly included ++ * for algorithms that require SQN.hibits to be implicitly included + * into digest computation. + * In that case we have to move ICV bytes back to their proper place. + */ +diff --git a/dpdk/lib/librte_ipsec/ipsec_sad.c b/dpdk/lib/librte_ipsec/ipsec_sad.c +index 3f9533c80a..531e1e323c 100644 +--- a/dpdk/lib/librte_ipsec/ipsec_sad.c ++++ b/dpdk/lib/librte_ipsec/ipsec_sad.c +@@ -62,7 +62,7 @@ EAL_REGISTER_TAILQ(rte_ipsec_sad_tailq) + * Inserts a rule into an appropriate hash table, + * updates the value for a given SPI in SPI_ONLY hash table + * reflecting presence of more specific rule type in two LSBs. +- * Updates a counter that reflects the number of rules whith the same SPI. ++ * Updates a counter that reflects the number of rules with the same SPI. + */ + static inline int + add_specific(struct rte_ipsec_sad *sad, const void *key, +diff --git a/dpdk/lib/librte_ipsec/rte_ipsec_group.h b/dpdk/lib/librte_ipsec/rte_ipsec_group.h +index ea3bdfad95..530cd92eef 100644 +--- a/dpdk/lib/librte_ipsec/rte_ipsec_group.h ++++ b/dpdk/lib/librte_ipsec/rte_ipsec_group.h +@@ -49,10 +49,10 @@ rte_ipsec_ses_from_crypto(const struct rte_crypto_op *cop) + + if (cop->sess_type == RTE_CRYPTO_OP_SECURITY_SESSION) { + ss = cop->sym[0].sec_session; +- return (void *)(uintptr_t)ss->opaque_data; ++ return (struct rte_ipsec_session *)(uintptr_t)ss->opaque_data; + } else if (cop->sess_type == RTE_CRYPTO_OP_WITH_SESSION) { + cs = cop->sym[0].session; +- return (void *)(uintptr_t)cs->opaque_data; ++ return (struct rte_ipsec_session *)(uintptr_t)cs->opaque_data; + } + return NULL; + } +diff --git a/dpdk/lib/librte_ipsec/rte_ipsec_sad.h b/dpdk/lib/librte_ipsec/rte_ipsec_sad.h +index b65d295831..a3ae57df7e 100644 +--- a/dpdk/lib/librte_ipsec/rte_ipsec_sad.h ++++ b/dpdk/lib/librte_ipsec/rte_ipsec_sad.h +@@ -153,7 +153,7 @@ rte_ipsec_sad_destroy(struct rte_ipsec_sad *sad); + * @param keys + * Array of keys to be looked up in the SAD + * @param sa +- * Pointer assocoated with the keys. ++ * Pointer associated with the keys. + * If the lookup for the given key failed, then corresponding sa + * will be NULL + * @param n +diff --git a/dpdk/lib/librte_ipsec/sa.c b/dpdk/lib/librte_ipsec/sa.c +index e59189d215..f49b3ec15d 100644 +--- a/dpdk/lib/librte_ipsec/sa.c ++++ b/dpdk/lib/librte_ipsec/sa.c +@@ -126,7 +126,7 @@ ipsec_sa_size(uint64_t type, uint32_t *wnd_sz, uint32_t *nb_bucket) + /* + * RFC 4303 recommends 64 as minimum window size. + * there is no point to use ESN mode without SQN window, +- * so make sure we have at least 64 window when ESN is enalbed. ++ * so make sure we have at least 64 window when ESN is enabled. + */ + wsz = ((type & RTE_IPSEC_SATP_ESN_MASK) == + RTE_IPSEC_SATP_ESN_DISABLE) ? +diff --git a/dpdk/lib/librte_ipsec/sa.h b/dpdk/lib/librte_ipsec/sa.h +index 1bffe751f5..b8ce4e9581 100644 +--- a/dpdk/lib/librte_ipsec/sa.h ++++ b/dpdk/lib/librte_ipsec/sa.h +@@ -116,7 +116,7 @@ struct rte_ipsec_sa { + * In case of SA handled by multiple threads *sqn* cacheline + * could be shared by multiple cores. + * To minimise performance impact, we try to locate in a separate +- * place from other frequently accesed data. ++ * place from other frequently accessed data. + */ + union { + uint64_t outb; +diff --git a/dpdk/lib/librte_kni/rte_kni.c b/dpdk/lib/librte_kni/rte_kni.c +index 837d0217d2..17e4487306 100644 +--- a/dpdk/lib/librte_kni/rte_kni.c ++++ b/dpdk/lib/librte_kni/rte_kni.c +@@ -514,6 +514,8 @@ kni_config_promiscusity(uint16_t port_id, uint8_t to_on) + static int + kni_config_allmulticast(uint16_t port_id, uint8_t to_on) + { ++ int ret; ++ + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_LOG(ERR, KNI, "Invalid port id %d\n", port_id); + return -EINVAL; +@@ -523,11 +525,16 @@ kni_config_allmulticast(uint16_t port_id, uint8_t to_on) + port_id, to_on); + + if (to_on) +- rte_eth_allmulticast_enable(port_id); ++ ret = rte_eth_allmulticast_enable(port_id); + else +- rte_eth_allmulticast_disable(port_id); ++ ret = rte_eth_allmulticast_disable(port_id); ++ if (ret != 0) ++ RTE_LOG(ERR, KNI, ++ "Failed to %s allmulticast mode for port %u: %s\n", ++ to_on ? "enable" : "disable", port_id, ++ rte_strerror(-ret)); + +- return 0; ++ return ret; + } + + int +@@ -591,8 +598,11 @@ rte_kni_handle_request(struct rte_kni *kni) + break; + } + +- /* Construct response mbuf and put it back to resp_q */ +- ret = kni_fifo_put(kni->resp_q, (void **)&req, 1); ++ /* if needed, construct response buffer and put it back to resp_q */ ++ if (!req->async) ++ ret = kni_fifo_put(kni->resp_q, (void **)&req, 1); ++ else ++ ret = 1; + if (ret != 1) { + RTE_LOG(ERR, KNI, "Fail to put the muf back to resp_q\n"); + return -1; /* It is an error of can't putting the mbuf back */ +@@ -674,8 +684,9 @@ kni_allocate_mbufs(struct rte_kni *kni) + return; + } + +- allocq_free = (kni->alloc_q->read - kni->alloc_q->write - 1) +- & (MAX_MBUF_BURST_NUM - 1); ++ allocq_free = kni_fifo_free_count(kni->alloc_q); ++ allocq_free = (allocq_free > MAX_MBUF_BURST_NUM) ? ++ MAX_MBUF_BURST_NUM : allocq_free; + for (i = 0; i < allocq_free; i++) { + pkts[i] = rte_pktmbuf_alloc(kni->pktmbuf_pool); + if (unlikely(pkts[i] == NULL)) { +diff --git a/dpdk/lib/librte_kni/rte_kni_common.h b/dpdk/lib/librte_kni/rte_kni_common.h +index ffb3182731..8d3ee0fa4f 100644 +--- a/dpdk/lib/librte_kni/rte_kni_common.h ++++ b/dpdk/lib/librte_kni/rte_kni_common.h +@@ -6,6 +6,10 @@ + #ifndef _RTE_KNI_COMMON_H_ + #define _RTE_KNI_COMMON_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #ifdef __KERNEL__ + #include + #include +@@ -48,6 +52,7 @@ struct rte_kni_request { + uint8_t promiscusity;/**< 1: promisc mode enable, 0: disable */ + uint8_t allmulti; /**< 1: all-multicast mode enable, 0: disable */ + }; ++ int32_t async : 1; /**< 1: request is asynchronous */ + int32_t result; /**< Result for processing request */ + } __attribute__((__packed__)); + +@@ -135,4 +140,8 @@ struct rte_kni_device_info { + #define RTE_KNI_IOCTL_CREATE _IOWR(0, 2, struct rte_kni_device_info) + #define RTE_KNI_IOCTL_RELEASE _IOWR(0, 3, struct rte_kni_device_info) + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_KNI_COMMON_H_ */ +diff --git a/dpdk/lib/librte_kvargs/rte_kvargs.h b/dpdk/lib/librte_kvargs/rte_kvargs.h +index eff598e08b..5b2e164287 100644 +--- a/dpdk/lib/librte_kvargs/rte_kvargs.h ++++ b/dpdk/lib/librte_kvargs/rte_kvargs.h +@@ -145,7 +145,7 @@ int rte_kvargs_process(const struct rte_kvargs *kvlist, + * The rte_kvargs structure + * @param key_match + * The key that should match, or NULL to count all associations +- ++ * + * @return + * The number of entries + */ +diff --git a/dpdk/lib/librte_lpm/rte_lpm6.c b/dpdk/lib/librte_lpm/rte_lpm6.c +index 37baabb26d..73768fc956 100644 +--- a/dpdk/lib/librte_lpm/rte_lpm6.c ++++ b/dpdk/lib/librte_lpm/rte_lpm6.c +@@ -80,7 +80,7 @@ struct rte_lpm6_rule { + /** Rules tbl entry key. */ + struct rte_lpm6_rule_key { + uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */ +- uint8_t depth; /**< Rule depth. */ ++ uint32_t depth; /**< Rule depth. */ + }; + + /* Header of tbl8 */ +@@ -259,6 +259,8 @@ rte_lpm6_create(const char *name, int socket_id, + lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list); + + RTE_BUILD_BUG_ON(sizeof(struct rte_lpm6_tbl_entry) != sizeof(uint32_t)); ++ RTE_BUILD_BUG_ON(sizeof(struct rte_lpm6_rule_key) % ++ sizeof(uint32_t) != 0); + + /* Check user arguments. */ + if ((name == NULL) || (socket_id < -1) || (config == NULL) || +diff --git a/dpdk/lib/librte_mbuf/rte_mbuf.c b/dpdk/lib/librte_mbuf/rte_mbuf.c +index 7d09ee2939..6d3eb73ced 100644 +--- a/dpdk/lib/librte_mbuf/rte_mbuf.c ++++ b/dpdk/lib/librte_mbuf/rte_mbuf.c +@@ -129,10 +129,10 @@ rte_pktmbuf_free_pinned_extmem(void *addr, void *opaque) + + rte_mbuf_ext_refcnt_set(m->shinfo, 1); + m->ol_flags = EXT_ATTACHED_MBUF; +- if (m->next != NULL) { ++ if (m->next != NULL) + m->next = NULL; ++ if (m->nb_segs != 1) + m->nb_segs = 1; +- } + rte_mbuf_raw_free(m); + } + +@@ -680,6 +680,9 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len) + fprintf(f, " pkt_len=%u, ol_flags=%#"PRIx64", nb_segs=%u, port=%u", + m->pkt_len, m->ol_flags, m->nb_segs, m->port); + ++ if (m->ol_flags & (PKT_RX_QINQ | PKT_TX_QINQ)) ++ fprintf(f, ", vlan_tci_outer=%u", m->vlan_tci_outer); ++ + if (m->ol_flags & (PKT_RX_VLAN | PKT_TX_VLAN)) + fprintf(f, ", vlan_tci=%u", m->vlan_tci); + +diff --git a/dpdk/lib/librte_mbuf/rte_mbuf.h b/dpdk/lib/librte_mbuf/rte_mbuf.h +index c4c9ebfaa0..bcd8b743a7 100644 +--- a/dpdk/lib/librte_mbuf/rte_mbuf.h ++++ b/dpdk/lib/librte_mbuf/rte_mbuf.h +@@ -1340,10 +1340,10 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m) + return NULL; + } + +- if (m->next != NULL) { ++ if (m->next != NULL) + m->next = NULL; ++ if (m->nb_segs != 1) + m->nb_segs = 1; +- } + + return m; + +@@ -1357,10 +1357,10 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m) + return NULL; + } + +- if (m->next != NULL) { ++ if (m->next != NULL) + m->next = NULL; ++ if (m->nb_segs != 1) + m->nb_segs = 1; +- } + rte_mbuf_refcnt_set(m, 1); + + return m; +@@ -1450,7 +1450,7 @@ rte_pktmbuf_clone(struct rte_mbuf *md, struct rte_mempool *mp); + * set of mbufs. The private data are is not copied. + * + * @param m +- * The packet mbuf to be copiedd. ++ * The packet mbuf to be copied. + * @param mp + * The mempool from which the "clone" mbufs are allocated. + * @param offset +diff --git a/dpdk/lib/librte_mbuf/rte_mbuf_core.h b/dpdk/lib/librte_mbuf/rte_mbuf_core.h +index 9d1609336a..b23764d102 100644 +--- a/dpdk/lib/librte_mbuf/rte_mbuf_core.h ++++ b/dpdk/lib/librte_mbuf/rte_mbuf_core.h +@@ -8,7 +8,7 @@ + + /** + * @file +- * This file contains definion of RTE mbuf structure itself, ++ * This file contains definition of RTE mbuf structure itself, + * packet offload flags and some related macros. + * For majority of DPDK entities, it is not recommended to include + * this file directly, use include instead. +@@ -496,7 +496,12 @@ struct rte_mbuf { + * or non-atomic) is controlled by the RTE_MBUF_REFCNT_ATOMIC flag. + */ + uint16_t refcnt; +- uint16_t nb_segs; /**< Number of segments. */ ++ ++ /** ++ * Number of segments. Only valid for the first segment of an mbuf ++ * chain. ++ */ ++ uint16_t nb_segs; + + /** Input port (16 bits to support more than 256 virtual ports). + * The event eth Tx adapter uses this field to specify the output port. +@@ -592,7 +597,11 @@ struct rte_mbuf { + /* second cache line - fields only used in slow path or on TX */ + RTE_MARKER cacheline1 __rte_cache_min_aligned; + +- struct rte_mbuf *next; /**< Next segment of scattered packet. */ ++ /** ++ * Next segment of scattered packet. Must be NULL in the last segment or ++ * in case of non-segmented packet. ++ */ ++ struct rte_mbuf *next; + + /* fields to support TX offloads */ + RTE_STD_C11 +@@ -710,7 +719,7 @@ struct rte_mbuf_ext_shared_info { + * The type to cast the result into. + */ + #define rte_pktmbuf_mtod_offset(m, t, o) \ +- ((t)((char *)(m)->buf_addr + (m)->data_off + (o))) ++ ((t)(void *)((char *)(m)->buf_addr + (m)->data_off + (o))) + + /** + * A macro that points to the start of the data in the mbuf. +diff --git a/dpdk/lib/librte_mbuf/rte_mbuf_dyn.c b/dpdk/lib/librte_mbuf/rte_mbuf_dyn.c +index 7d5e942bf0..0c463f5818 100644 +--- a/dpdk/lib/librte_mbuf/rte_mbuf_dyn.c ++++ b/dpdk/lib/librte_mbuf/rte_mbuf_dyn.c +@@ -115,8 +115,10 @@ init_shared_mem(void) + } else { + mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME); + } +- if (mz == NULL) ++ if (mz == NULL) { ++ RTE_LOG(ERR, MBUF, "Failed to get mbuf dyn shared memory\n"); + return -1; ++ } + + shm = mz->addr; + +@@ -496,6 +498,10 @@ rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params, + { + int ret; + ++ if (params->flags != 0) { ++ rte_errno = EINVAL; ++ return -1; ++ } + if (req >= RTE_SIZEOF_FIELD(struct rte_mbuf, ol_flags) * CHAR_BIT && + req != UINT_MAX) { + rte_errno = EINVAL; +@@ -525,7 +531,11 @@ void rte_mbuf_dyn_dump(FILE *out) + size_t i; + + rte_mcfg_tailq_write_lock(); +- init_shared_mem(); ++ if (shm == NULL && init_shared_mem() < 0) { ++ rte_mcfg_tailq_write_unlock(); ++ return; ++ } ++ + fprintf(out, "Reserved fields:\n"); + mbuf_dynfield_list = RTE_TAILQ_CAST( + mbuf_dynfield_tailq.head, mbuf_dynfield_list); +diff --git a/dpdk/lib/librte_mempool/rte_mempool.h b/dpdk/lib/librte_mempool/rte_mempool.h +index c551cf733a..9c990a5593 100644 +--- a/dpdk/lib/librte_mempool/rte_mempool.h ++++ b/dpdk/lib/librte_mempool/rte_mempool.h +@@ -112,10 +112,11 @@ struct rte_mempool_objsz { + /* "MP_" */ + #define RTE_MEMPOOL_MZ_FORMAT RTE_MEMPOOL_MZ_PREFIX "%s" + +-#define MEMPOOL_PG_SHIFT_MAX (sizeof(uintptr_t) * CHAR_BIT - 1) ++#define MEMPOOL_PG_SHIFT_MAX \ ++ RTE_DEPRECATED(MEMPOOL_PG_SHIFT_MAX) (sizeof(uintptr_t) * CHAR_BIT - 1) + +-/** Mempool over one chunk of physically continuous memory */ +-#define MEMPOOL_PG_NUM_DEFAULT 1 ++/** Deprecated. Mempool over one chunk of physically continuous memory */ ++#define MEMPOOL_PG_NUM_DEFAULT RTE_DEPRECATED(MEMPOOL_PG_NUM_DEFAULT) 1 + + #ifndef RTE_MEMPOOL_ALIGN + /** +diff --git a/dpdk/lib/librte_metrics/rte_metrics_telemetry.h b/dpdk/lib/librte_metrics/rte_metrics_telemetry.h +index 5dbb32ca0c..ce9408ab10 100644 +--- a/dpdk/lib/librte_metrics/rte_metrics_telemetry.h ++++ b/dpdk/lib/librte_metrics/rte_metrics_telemetry.h +@@ -13,6 +13,9 @@ + #ifndef _RTE_METRICS_TELEMETRY_H_ + #define _RTE_METRICS_TELEMETRY_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif + + enum rte_telemetry_stats_type { + PORT_STATS = 0, +@@ -60,4 +63,8 @@ __rte_experimental + int32_t + rte_metrics_tel_extract_data(struct telemetry_encode_param *ep, json_t *data); + ++#ifdef __cplusplus ++} ++#endif ++ + #endif +diff --git a/dpdk/lib/librte_net/rte_ether.h b/dpdk/lib/librte_net/rte_ether.h +index 060b63fc9b..0baceb2c4a 100644 +--- a/dpdk/lib/librte_net/rte_ether.h ++++ b/dpdk/lib/librte_net/rte_ether.h +@@ -358,7 +358,7 @@ static inline int rte_vlan_insert(struct rte_mbuf **m) + return -EINVAL; + + oh = rte_pktmbuf_mtod(*m, struct rte_ether_hdr *); +- nh = (struct rte_ether_hdr *) ++ nh = (struct rte_ether_hdr *)(void *) + rte_pktmbuf_prepend(*m, sizeof(struct rte_vlan_hdr)); + if (nh == NULL) + return -ENOSPC; +diff --git a/dpdk/lib/librte_net/rte_gtp.h b/dpdk/lib/librte_net/rte_gtp.h +index 104384cc53..95f0822a16 100644 +--- a/dpdk/lib/librte_net/rte_gtp.h ++++ b/dpdk/lib/librte_net/rte_gtp.h +@@ -37,9 +37,9 @@ struct rte_gtp_hdr { + /** GTP header length */ + #define RTE_ETHER_GTP_HLEN \ + (sizeof(struct rte_udp_hdr) + sizeof(struct rte_gtp_hdr)) +-/* GTP next protocal type */ +-#define RTE_GTP_TYPE_IPV4 0x40 /**< GTP next protocal type IPv4 */ +-#define RTE_GTP_TYPE_IPV6 0x60 /**< GTP next protocal type IPv6 */ ++/* GTP next protocol type */ ++#define RTE_GTP_TYPE_IPV4 0x40 /**< GTP next protocol type IPv4 */ ++#define RTE_GTP_TYPE_IPV6 0x60 /**< GTP next protocol type IPv6 */ + /* GTP destination port number */ + #define RTE_GTPC_UDP_PORT 2123 /**< GTP-C UDP destination port */ + #define RTE_GTPU_UDP_PORT 2152 /**< GTP-U UDP destination port */ +diff --git a/dpdk/lib/librte_net/rte_ip.h b/dpdk/lib/librte_net/rte_ip.h +index 212ff2c4fd..52ef3c8cc1 100644 +--- a/dpdk/lib/librte_net/rte_ip.h ++++ b/dpdk/lib/librte_net/rte_ip.h +@@ -134,29 +134,18 @@ rte_ipv4_hdr_len(const struct rte_ipv4_hdr *ipv4_hdr) + static inline uint32_t + __rte_raw_cksum(const void *buf, size_t len, uint32_t sum) + { +- /* workaround gcc strict-aliasing warning */ +- uintptr_t ptr = (uintptr_t)buf; ++ /* extend strict-aliasing rules */ + typedef uint16_t __attribute__((__may_alias__)) u16_p; +- const u16_p *u16_buf = (const u16_p *)ptr; +- +- while (len >= (sizeof(*u16_buf) * 4)) { +- sum += u16_buf[0]; +- sum += u16_buf[1]; +- sum += u16_buf[2]; +- sum += u16_buf[3]; +- len -= sizeof(*u16_buf) * 4; +- u16_buf += 4; +- } +- while (len >= sizeof(*u16_buf)) { ++ const u16_p *u16_buf = (const u16_p *)buf; ++ const u16_p *end = u16_buf + len / sizeof(*u16_buf); ++ ++ for (; u16_buf != end; ++u16_buf) + sum += *u16_buf; +- len -= sizeof(*u16_buf); +- u16_buf += 1; +- } + +- /* if length is in odd bytes */ +- if (len == 1) { ++ /* if length is odd, keeping it byte order independent */ ++ if (unlikely(len % 2)) { + uint16_t left = 0; +- *(uint8_t *)&left = *(const uint8_t *)u16_buf; ++ *(unsigned char *)&left = *(const unsigned char *)end; + sum += left; + } + +@@ -384,7 +373,7 @@ rte_ipv4_udptcp_cksum(const struct rte_ipv4_hdr *ipv4_hdr, const void *l4_hdr) + */ + struct rte_ipv6_hdr { + rte_be32_t vtc_flow; /**< IP version, traffic class & flow label. */ +- rte_be16_t payload_len; /**< IP packet length - includes header size */ ++ rte_be16_t payload_len; /**< IP payload size, including ext. headers */ + uint8_t proto; /**< Protocol, next header. */ + uint8_t hop_limits; /**< Hop limits. */ + uint8_t src_addr[16]; /**< IP address of source host. */ +@@ -444,15 +433,15 @@ rte_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags) + /** + * Process the IPv6 UDP or TCP checksum. + * +- * The IPv4 header should not contains options. The layer 4 checksum +- * must be set to 0 in the packet by the caller. ++ * The IPv6 header must not be followed by extension headers. The layer 4 ++ * checksum must be set to 0 in the L4 header by the caller. + * + * @param ipv6_hdr + * The pointer to the contiguous IPv6 header. + * @param l4_hdr + * The pointer to the beginning of the L4 header. + * @return +- * The complemented checksum to set in the IP packet. ++ * The complemented checksum to set in the L4 header. + */ + static inline uint16_t + rte_ipv6_udptcp_cksum(const struct rte_ipv6_hdr *ipv6_hdr, const void *l4_hdr) +diff --git a/dpdk/lib/librte_net/rte_net.h b/dpdk/lib/librte_net/rte_net.h +index 434435ffa2..42639bc154 100644 +--- a/dpdk/lib/librte_net/rte_net.h ++++ b/dpdk/lib/librte_net/rte_net.h +@@ -125,11 +125,22 @@ rte_net_intel_cksum_flags_prepare(struct rte_mbuf *m, uint64_t ol_flags) + * Mainly it is required to avoid fragmented headers check if + * no offloads are requested. + */ +- if (!(ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK | PKT_TX_TCP_SEG))) ++ if (!(ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK | PKT_TX_TCP_SEG | ++ PKT_TX_OUTER_IP_CKSUM))) + return 0; + +- if (ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6)) ++ if (ol_flags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IPV6)) { + inner_l3_offset += m->outer_l2_len + m->outer_l3_len; ++ /* ++ * prepare outer IPv4 header checksum by setting it to 0, ++ * in order to be computed by hardware NICs. ++ */ ++ if (ol_flags & PKT_TX_OUTER_IP_CKSUM) { ++ ipv4_hdr = rte_pktmbuf_mtod_offset(m, ++ struct rte_ipv4_hdr *, m->outer_l2_len); ++ ipv4_hdr->hdr_checksum = 0; ++ } ++ } + + /* + * Check if headers are fragmented. +diff --git a/dpdk/lib/librte_node/ethdev_rx_priv.h b/dpdk/lib/librte_node/ethdev_rx_priv.h +index efcd66dd48..76cf77d603 100644 +--- a/dpdk/lib/librte_node/ethdev_rx_priv.h ++++ b/dpdk/lib/librte_node/ethdev_rx_priv.h +@@ -71,7 +71,7 @@ struct ethdev_rx_node_main *ethdev_rx_get_node_data_get(void); + * + * Get the Ethernet Rx node. + * +- * @retrun ++ * @return + * Pointer to the Ethernet Rx node. + */ + struct rte_node_register *ethdev_rx_node_get(void); +diff --git a/dpdk/lib/librte_node/ethdev_tx_priv.h b/dpdk/lib/librte_node/ethdev_tx_priv.h +index 586bff44a7..9ced520bd2 100644 +--- a/dpdk/lib/librte_node/ethdev_tx_priv.h ++++ b/dpdk/lib/librte_node/ethdev_tx_priv.h +@@ -50,7 +50,7 @@ struct ethdev_tx_node_main *ethdev_tx_node_data_get(void); + * + * Get the Ethernet Tx node. + * +- * @retrun ++ * @return + * Pointer to the Ethernet Tx node. + */ + struct rte_node_register *ethdev_tx_node_get(void); +diff --git a/dpdk/lib/librte_node/ip4_rewrite_priv.h b/dpdk/lib/librte_node/ip4_rewrite_priv.h +index 80f0abdc94..036469746b 100644 +--- a/dpdk/lib/librte_node/ip4_rewrite_priv.h ++++ b/dpdk/lib/librte_node/ip4_rewrite_priv.h +@@ -53,7 +53,7 @@ struct ip4_rewrite_node_main { + * + * Get the ipv4 rewrite node. + * +- * @retrun ++ * @return + * Pointer to the ipv4 rewrite node. + */ + struct rte_node_register *ip4_rewrite_node_get(void); +diff --git a/dpdk/lib/librte_pipeline/rte_swx_pipeline.c b/dpdk/lib/librte_pipeline/rte_swx_pipeline.c +index eaaed7a0a9..4a96b77e28 100644 +--- a/dpdk/lib/librte_pipeline/rte_swx_pipeline.c ++++ b/dpdk/lib/librte_pipeline/rte_swx_pipeline.c +@@ -263,9 +263,11 @@ enum instruction_type { + * dst = src + * dst = HMEF, src = HMEFTI + */ +- INSTR_MOV, /* dst = MEF, src = MEFT */ +- INSTR_MOV_S, /* (dst, src) = (MEF, H) or (dst, src) = (H, MEFT) */ +- INSTR_MOV_I, /* dst = HMEF, src = I */ ++ INSTR_MOV, /* dst = MEF, src = MEFT */ ++ INSTR_MOV_MH, /* dst = MEF, src = H */ ++ INSTR_MOV_HM, /* dst = H, src = MEFT */ ++ INSTR_MOV_HH, /* dst = H, src = H */ ++ INSTR_MOV_I, /* dst = HMEF, src = I */ + + /* dma h.header t.field + * memcpy(h.header, t.field, sizeof(h.header)) +@@ -319,25 +321,31 @@ enum instruction_type { + * dst &= src + * dst = HMEF, src = HMEFTI + */ +- INSTR_ALU_AND, /* dst = MEF, src = MEFT */ +- INSTR_ALU_AND_S, /* (dst, src) = (MEF, H) or (dst, src) = (H, MEFT) */ +- INSTR_ALU_AND_I, /* dst = HMEF, src = I */ ++ INSTR_ALU_AND, /* dst = MEF, src = MEFT */ ++ INSTR_ALU_AND_MH, /* dst = MEF, src = H */ ++ INSTR_ALU_AND_HM, /* dst = H, src = MEFT */ ++ INSTR_ALU_AND_HH, /* dst = H, src = H */ ++ INSTR_ALU_AND_I, /* dst = HMEF, src = I */ + + /* or dst src + * dst |= src + * dst = HMEF, src = HMEFTI + */ +- INSTR_ALU_OR, /* dst = MEF, src = MEFT */ +- INSTR_ALU_OR_S, /* (dst, src) = (MEF, H) or (dst, src) = (H, MEFT) */ +- INSTR_ALU_OR_I, /* dst = HMEF, src = I */ ++ INSTR_ALU_OR, /* dst = MEF, src = MEFT */ ++ INSTR_ALU_OR_MH, /* dst = MEF, src = H */ ++ INSTR_ALU_OR_HM, /* dst = H, src = MEFT */ ++ INSTR_ALU_OR_HH, /* dst = H, src = H */ ++ INSTR_ALU_OR_I, /* dst = HMEF, src = I */ + + /* xor dst src + * dst ^= src + * dst = HMEF, src = HMEFTI + */ +- INSTR_ALU_XOR, /* dst = MEF, src = MEFT */ +- INSTR_ALU_XOR_S, /* (dst, src) = (MEF, H) or (dst, src) = (H, MEFT) */ +- INSTR_ALU_XOR_I, /* dst = HMEF, src = I */ ++ INSTR_ALU_XOR, /* dst = MEF, src = MEFT */ ++ INSTR_ALU_XOR_MH, /* dst = MEF, src = H */ ++ INSTR_ALU_XOR_HM, /* dst = H, src = MEFT */ ++ INSTR_ALU_XOR_HH, /* dst = H, src = H */ ++ INSTR_ALU_XOR_I, /* dst = HMEF, src = I */ + + /* shl dst src + * dst <<= src +@@ -406,41 +414,45 @@ enum instruction_type { + INSTR_JMP_ACTION_MISS, + + /* jmpeq LABEL a b +- * Jump is a is equal to b ++ * Jump if a is equal to b + * a = HMEFT, b = HMEFTI + */ +- INSTR_JMP_EQ, /* (a, b) = (MEFT, MEFT) or (a, b) = (H, H) */ +- INSTR_JMP_EQ_S, /* (a, b) = (MEFT, H) or (a, b) = (H, MEFT) */ +- INSTR_JMP_EQ_I, /* (a, b) = (MEFT, I) or (a, b) = (H, I) */ ++ INSTR_JMP_EQ, /* a = MEFT, b = MEFT */ ++ INSTR_JMP_EQ_MH, /* a = MEFT, b = H */ ++ INSTR_JMP_EQ_HM, /* a = H, b = MEFT */ ++ INSTR_JMP_EQ_HH, /* a = H, b = H */ ++ INSTR_JMP_EQ_I, /* (a, b) = (MEFT, I) or (a, b) = (H, I) */ + + /* jmpneq LABEL a b +- * Jump is a is not equal to b ++ * Jump if a is not equal to b + * a = HMEFT, b = HMEFTI + */ +- INSTR_JMP_NEQ, /* (a, b) = (MEFT, MEFT) or (a, b) = (H, H) */ +- INSTR_JMP_NEQ_S, /* (a, b) = (MEFT, H) or (a, b) = (H, MEFT) */ +- INSTR_JMP_NEQ_I, /* (a, b) = (MEFT, I) or (a, b) = (H, I) */ ++ INSTR_JMP_NEQ, /* a = MEFT, b = MEFT */ ++ INSTR_JMP_NEQ_MH, /* a = MEFT, b = H */ ++ INSTR_JMP_NEQ_HM, /* a = H, b = MEFT */ ++ INSTR_JMP_NEQ_HH, /* a = H, b = H */ ++ INSTR_JMP_NEQ_I, /* (a, b) = (MEFT, I) or (a, b) = (H, I) */ + + /* jmplt LABEL a b + * Jump if a is less than b + * a = HMEFT, b = HMEFTI + */ +- INSTR_JMP_LT, /* a = MEF, b = MEF */ +- INSTR_JMP_LT_MH, /* a = MEF, b = H */ +- INSTR_JMP_LT_HM, /* a = H, b = MEF */ ++ INSTR_JMP_LT, /* a = MEFT, b = MEFT */ ++ INSTR_JMP_LT_MH, /* a = MEFT, b = H */ ++ INSTR_JMP_LT_HM, /* a = H, b = MEFT */ + INSTR_JMP_LT_HH, /* a = H, b = H */ +- INSTR_JMP_LT_MI, /* a = MEF, b = I */ ++ INSTR_JMP_LT_MI, /* a = MEFT, b = I */ + INSTR_JMP_LT_HI, /* a = H, b = I */ + + /* jmpgt LABEL a b + * Jump if a is greater than b + * a = HMEFT, b = HMEFTI + */ +- INSTR_JMP_GT, /* a = MEF, b = MEF */ +- INSTR_JMP_GT_MH, /* a = MEF, b = H */ +- INSTR_JMP_GT_HM, /* a = H, b = MEF */ ++ INSTR_JMP_GT, /* a = MEFT, b = MEFT */ ++ INSTR_JMP_GT_MH, /* a = MEFT, b = H */ ++ INSTR_JMP_GT_HM, /* a = H, b = MEFT */ + INSTR_JMP_GT_HH, /* a = H, b = H */ +- INSTR_JMP_GT_MI, /* a = MEF, b = I */ ++ INSTR_JMP_GT_MI, /* a = MEFT, b = I */ + INSTR_JMP_GT_HI, /* a = H, b = I */ + + /* return +@@ -673,7 +685,7 @@ struct thread { + + #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + +-#define ALU_S(thread, ip, operator) \ ++#define ALU_MH(thread, ip, operator) \ + { \ + uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id]; \ + uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset]; \ +@@ -691,8 +703,6 @@ struct thread { + *dst64_ptr = (dst64 & ~dst64_mask) | (result & dst64_mask); \ + } + +-#define ALU_MH ALU_S +- + #define ALU_HM(thread, ip, operator) \ + { \ + uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id]; \ +@@ -713,6 +723,25 @@ struct thread { + *dst64_ptr = (dst64 & ~dst64_mask) | result; \ + } + ++#define ALU_HM_FAST(thread, ip, operator) \ ++{ \ ++ uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id]; \ ++ uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset]; \ ++ uint64_t dst64 = *dst64_ptr; \ ++ uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits); \ ++ uint64_t dst = dst64 & dst64_mask; \ ++ \ ++ uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id]; \ ++ uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset]; \ ++ uint64_t src64 = *src64_ptr; \ ++ uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->alu.src.n_bits); \ ++ uint64_t src = hton64(src64 & src64_mask) >> (64 - (ip)->alu.dst.n_bits); \ ++ \ ++ uint64_t result = dst operator src; \ ++ \ ++ *dst64_ptr = (dst64 & ~dst64_mask) | result; \ ++} ++ + #define ALU_HH(thread, ip, operator) \ + { \ + uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id]; \ +@@ -732,12 +761,31 @@ struct thread { + *dst64_ptr = (dst64 & ~dst64_mask) | result; \ + } + ++#define ALU_HH_FAST(thread, ip, operator) \ ++{ \ ++ uint8_t *dst_struct = (thread)->structs[(ip)->alu.dst.struct_id]; \ ++ uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->alu.dst.offset]; \ ++ uint64_t dst64 = *dst64_ptr; \ ++ uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->alu.dst.n_bits); \ ++ uint64_t dst = dst64 & dst64_mask; \ ++ \ ++ uint8_t *src_struct = (thread)->structs[(ip)->alu.src.struct_id]; \ ++ uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->alu.src.offset]; \ ++ uint64_t src64 = *src64_ptr; \ ++ uint64_t src = (src64 << (64 - (ip)->alu.src.n_bits)) >> (64 - (ip)->alu.dst.n_bits); \ ++ \ ++ uint64_t result = dst operator src; \ ++ \ ++ *dst64_ptr = (dst64 & ~dst64_mask) | result; \ ++} ++ + #else + +-#define ALU_S ALU + #define ALU_MH ALU + #define ALU_HM ALU ++#define ALU_HM_FAST ALU + #define ALU_HH ALU ++#define ALU_HH_FAST ALU + + #endif + +@@ -800,7 +848,7 @@ struct thread { + + #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + +-#define MOV_S(thread, ip) \ ++#define MOV_MH(thread, ip) \ + { \ + uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id]; \ + uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset]; \ +@@ -815,9 +863,44 @@ struct thread { + *dst64_ptr = (dst64 & ~dst64_mask) | (src & dst64_mask); \ + } + ++#define MOV_HM(thread, ip) \ ++{ \ ++ uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id]; \ ++ uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset]; \ ++ uint64_t dst64 = *dst64_ptr; \ ++ uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits); \ ++ \ ++ uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id]; \ ++ uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset]; \ ++ uint64_t src64 = *src64_ptr; \ ++ uint64_t src64_mask = UINT64_MAX >> (64 - (ip)->mov.src.n_bits); \ ++ uint64_t src = src64 & src64_mask; \ ++ \ ++ src = hton64(src) >> (64 - (ip)->mov.dst.n_bits); \ ++ *dst64_ptr = (dst64 & ~dst64_mask) | src; \ ++} ++ ++#define MOV_HH(thread, ip) \ ++{ \ ++ uint8_t *dst_struct = (thread)->structs[(ip)->mov.dst.struct_id]; \ ++ uint64_t *dst64_ptr = (uint64_t *)&dst_struct[(ip)->mov.dst.offset]; \ ++ uint64_t dst64 = *dst64_ptr; \ ++ uint64_t dst64_mask = UINT64_MAX >> (64 - (ip)->mov.dst.n_bits); \ ++ \ ++ uint8_t *src_struct = (thread)->structs[(ip)->mov.src.struct_id]; \ ++ uint64_t *src64_ptr = (uint64_t *)&src_struct[(ip)->mov.src.offset]; \ ++ uint64_t src64 = *src64_ptr; \ ++ \ ++ uint64_t src = src64 << (64 - (ip)->mov.src.n_bits); \ ++ src = src >> (64 - (ip)->mov.dst.n_bits); \ ++ *dst64_ptr = (dst64 & ~dst64_mask) | src; \ ++} ++ + #else + +-#define MOV_S MOV ++#define MOV_MH MOV ++#define MOV_HM MOV ++#define MOV_HH MOV + + #endif + +@@ -852,7 +935,7 @@ struct thread { + + #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + +-#define JMP_CMP_S(thread, ip, operator) \ ++#define JMP_CMP_MH(thread, ip, operator) \ + { \ + uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id]; \ + uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset]; \ +@@ -868,8 +951,6 @@ struct thread { + (thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1); \ + } + +-#define JMP_CMP_MH JMP_CMP_S +- + #define JMP_CMP_HM(thread, ip, operator) \ + { \ + uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id]; \ +@@ -901,12 +982,27 @@ struct thread { + (thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1); \ + } + ++#define JMP_CMP_HH_FAST(thread, ip, operator) \ ++{ \ ++ uint8_t *a_struct = (thread)->structs[(ip)->jmp.a.struct_id]; \ ++ uint64_t *a64_ptr = (uint64_t *)&a_struct[(ip)->jmp.a.offset]; \ ++ uint64_t a64 = *a64_ptr; \ ++ uint64_t a = a64 << (64 - (ip)->jmp.a.n_bits); \ ++ \ ++ uint8_t *b_struct = (thread)->structs[(ip)->jmp.b.struct_id]; \ ++ uint64_t *b64_ptr = (uint64_t *)&b_struct[(ip)->jmp.b.offset]; \ ++ uint64_t b64 = *b64_ptr; \ ++ uint64_t b = b64 << (64 - (ip)->jmp.b.n_bits); \ ++ \ ++ (thread)->ip = (a operator b) ? (ip)->jmp.ip : ((thread)->ip + 1); \ ++} ++ + #else + +-#define JMP_CMP_S JMP_CMP + #define JMP_CMP_MH JMP_CMP + #define JMP_CMP_HM JMP_CMP + #define JMP_CMP_HH JMP_CMP ++#define JMP_CMP_HH_FAST JMP_CMP + + #endif + +@@ -2280,10 +2376,14 @@ instruction_is_jmp(struct instruction *instr) + case INSTR_JMP_ACTION_HIT: + case INSTR_JMP_ACTION_MISS: + case INSTR_JMP_EQ: +- case INSTR_JMP_EQ_S: ++ case INSTR_JMP_EQ_MH: ++ case INSTR_JMP_EQ_HM: ++ case INSTR_JMP_EQ_HH: + case INSTR_JMP_EQ_I: + case INSTR_JMP_NEQ: +- case INSTR_JMP_NEQ_S: ++ case INSTR_JMP_NEQ_MH: ++ case INSTR_JMP_NEQ_HM: ++ case INSTR_JMP_NEQ_HH: + case INSTR_JMP_NEQ_I: + case INSTR_JMP_LT: + case INSTR_JMP_LT_MH: +@@ -3208,13 +3308,16 @@ instr_mov_translate(struct rte_swx_pipeline *p, + fdst = struct_field_parse(p, NULL, dst, &dst_struct_id); + CHECK(fdst, EINVAL); + +- /* MOV or MOV_S. */ ++ /* MOV, MOV_MH, MOV_HM or MOV_HH. */ + fsrc = struct_field_parse(p, action, src, &src_struct_id); + if (fsrc) { + instr->type = INSTR_MOV; +- if ((dst[0] == 'h' && src[0] != 'h') || +- (dst[0] != 'h' && src[0] == 'h')) +- instr->type = INSTR_MOV_S; ++ if (dst[0] != 'h' && src[0] == 'h') ++ instr->type = INSTR_MOV_MH; ++ if (dst[0] == 'h' && src[0] != 'h') ++ instr->type = INSTR_MOV_HM; ++ if (dst[0] == 'h' && src[0] == 'h') ++ instr->type = INSTR_MOV_HH; + + instr->mov.dst.struct_id = (uint8_t)dst_struct_id; + instr->mov.dst.n_bits = fdst->n_bits; +@@ -3256,15 +3359,45 @@ instr_mov_exec(struct rte_swx_pipeline *p) + } + + static inline void +-instr_mov_s_exec(struct rte_swx_pipeline *p) ++instr_mov_mh_exec(struct rte_swx_pipeline *p) + { + struct thread *t = &p->threads[p->thread_id]; + struct instruction *ip = t->ip; + +- TRACE("[Thread %2u] mov (s)\n", ++ TRACE("[Thread %2u] mov (mh)\n", + p->thread_id); + +- MOV_S(t, ip); ++ MOV_MH(t, ip); ++ ++ /* Thread. */ ++ thread_ip_inc(p); ++} ++ ++static inline void ++instr_mov_hm_exec(struct rte_swx_pipeline *p) ++{ ++ struct thread *t = &p->threads[p->thread_id]; ++ struct instruction *ip = t->ip; ++ ++ TRACE("[Thread %2u] mov (hm)\n", ++ p->thread_id); ++ ++ MOV_HM(t, ip); ++ ++ /* Thread. */ ++ thread_ip_inc(p); ++} ++ ++static inline void ++instr_mov_hh_exec(struct rte_swx_pipeline *p) ++{ ++ struct thread *t = &p->threads[p->thread_id]; ++ struct instruction *ip = t->ip; ++ ++ TRACE("[Thread %2u] mov (hh)\n", ++ p->thread_id); ++ ++ MOV_HH(t, ip); + + /* Thread. */ + thread_ip_inc(p); +@@ -3475,9 +3608,9 @@ instr_alu_add_translate(struct rte_swx_pipeline *p, + fsrc = struct_field_parse(p, action, src, &src_struct_id); + if (fsrc) { + instr->type = INSTR_ALU_ADD; +- if (dst[0] == 'h' && src[0] == 'm') ++ if (dst[0] == 'h' && src[0] != 'h') + instr->type = INSTR_ALU_ADD_HM; +- if (dst[0] == 'm' && src[0] == 'h') ++ if (dst[0] != 'h' && src[0] == 'h') + instr->type = INSTR_ALU_ADD_MH; + if (dst[0] == 'h' && src[0] == 'h') + instr->type = INSTR_ALU_ADD_HH; +@@ -3528,9 +3661,9 @@ instr_alu_sub_translate(struct rte_swx_pipeline *p, + fsrc = struct_field_parse(p, action, src, &src_struct_id); + if (fsrc) { + instr->type = INSTR_ALU_SUB; +- if (dst[0] == 'h' && src[0] == 'm') ++ if (dst[0] == 'h' && src[0] != 'h') + instr->type = INSTR_ALU_SUB_HM; +- if (dst[0] == 'm' && src[0] == 'h') ++ if (dst[0] != 'h' && src[0] == 'h') + instr->type = INSTR_ALU_SUB_MH; + if (dst[0] == 'h' && src[0] == 'h') + instr->type = INSTR_ALU_SUB_HH; +@@ -3658,9 +3791,9 @@ instr_alu_shl_translate(struct rte_swx_pipeline *p, + fsrc = struct_field_parse(p, action, src, &src_struct_id); + if (fsrc) { + instr->type = INSTR_ALU_SHL; +- if (dst[0] == 'h' && src[0] == 'm') ++ if (dst[0] == 'h' && src[0] != 'h') + instr->type = INSTR_ALU_SHL_HM; +- if (dst[0] == 'm' && src[0] == 'h') ++ if (dst[0] != 'h' && src[0] == 'h') + instr->type = INSTR_ALU_SHL_MH; + if (dst[0] == 'h' && src[0] == 'h') + instr->type = INSTR_ALU_SHL_HH; +@@ -3711,9 +3844,9 @@ instr_alu_shr_translate(struct rte_swx_pipeline *p, + fsrc = struct_field_parse(p, action, src, &src_struct_id); + if (fsrc) { + instr->type = INSTR_ALU_SHR; +- if (dst[0] == 'h' && src[0] == 'm') ++ if (dst[0] == 'h' && src[0] != 'h') + instr->type = INSTR_ALU_SHR_HM; +- if (dst[0] == 'm' && src[0] == 'h') ++ if (dst[0] != 'h' && src[0] == 'h') + instr->type = INSTR_ALU_SHR_MH; + if (dst[0] == 'h' && src[0] == 'h') + instr->type = INSTR_ALU_SHR_HH; +@@ -3760,13 +3893,16 @@ instr_alu_and_translate(struct rte_swx_pipeline *p, + fdst = struct_field_parse(p, NULL, dst, &dst_struct_id); + CHECK(fdst, EINVAL); + +- /* AND or AND_S. */ ++ /* AND, AND_MH, AND_HM, AND_HH. */ + fsrc = struct_field_parse(p, action, src, &src_struct_id); + if (fsrc) { + instr->type = INSTR_ALU_AND; +- if ((dst[0] == 'h' && src[0] != 'h') || +- (dst[0] != 'h' && src[0] == 'h')) +- instr->type = INSTR_ALU_AND_S; ++ if (dst[0] != 'h' && src[0] == 'h') ++ instr->type = INSTR_ALU_AND_MH; ++ if (dst[0] == 'h' && src[0] != 'h') ++ instr->type = INSTR_ALU_AND_HM; ++ if (dst[0] == 'h' && src[0] == 'h') ++ instr->type = INSTR_ALU_AND_HH; + + instr->alu.dst.struct_id = (uint8_t)dst_struct_id; + instr->alu.dst.n_bits = fdst->n_bits; +@@ -3810,13 +3946,16 @@ instr_alu_or_translate(struct rte_swx_pipeline *p, + fdst = struct_field_parse(p, NULL, dst, &dst_struct_id); + CHECK(fdst, EINVAL); + +- /* OR or OR_S. */ ++ /* OR, OR_MH, OR_HM, OR_HH. */ + fsrc = struct_field_parse(p, action, src, &src_struct_id); + if (fsrc) { + instr->type = INSTR_ALU_OR; +- if ((dst[0] == 'h' && src[0] != 'h') || +- (dst[0] != 'h' && src[0] == 'h')) +- instr->type = INSTR_ALU_OR_S; ++ if (dst[0] != 'h' && src[0] == 'h') ++ instr->type = INSTR_ALU_OR_MH; ++ if (dst[0] == 'h' && src[0] != 'h') ++ instr->type = INSTR_ALU_OR_HM; ++ if (dst[0] == 'h' && src[0] == 'h') ++ instr->type = INSTR_ALU_OR_HH; + + instr->alu.dst.struct_id = (uint8_t)dst_struct_id; + instr->alu.dst.n_bits = fdst->n_bits; +@@ -3860,13 +3999,16 @@ instr_alu_xor_translate(struct rte_swx_pipeline *p, + fdst = struct_field_parse(p, NULL, dst, &dst_struct_id); + CHECK(fdst, EINVAL); + +- /* XOR or XOR_S. */ ++ /* XOR, XOR_MH, XOR_HM, XOR_HH. */ + fsrc = struct_field_parse(p, action, src, &src_struct_id); + if (fsrc) { + instr->type = INSTR_ALU_XOR; +- if ((dst[0] == 'h' && src[0] != 'h') || +- (dst[0] != 'h' && src[0] == 'h')) +- instr->type = INSTR_ALU_XOR_S; ++ if (dst[0] != 'h' && src[0] == 'h') ++ instr->type = INSTR_ALU_XOR_MH; ++ if (dst[0] == 'h' && src[0] != 'h') ++ instr->type = INSTR_ALU_XOR_HM; ++ if (dst[0] == 'h' && src[0] == 'h') ++ instr->type = INSTR_ALU_XOR_HH; + + instr->alu.dst.struct_id = (uint8_t)dst_struct_id; + instr->alu.dst.n_bits = fdst->n_bits; +@@ -4268,15 +4410,45 @@ instr_alu_and_exec(struct rte_swx_pipeline *p) + } + + static inline void +-instr_alu_and_s_exec(struct rte_swx_pipeline *p) ++instr_alu_and_mh_exec(struct rte_swx_pipeline *p) ++{ ++ struct thread *t = &p->threads[p->thread_id]; ++ struct instruction *ip = t->ip; ++ ++ TRACE("[Thread %2u] and (mh)\n", p->thread_id); ++ ++ /* Structs. */ ++ ALU_MH(t, ip, &); ++ ++ /* Thread. */ ++ thread_ip_inc(p); ++} ++ ++static inline void ++instr_alu_and_hm_exec(struct rte_swx_pipeline *p) ++{ ++ struct thread *t = &p->threads[p->thread_id]; ++ struct instruction *ip = t->ip; ++ ++ TRACE("[Thread %2u] and (hm)\n", p->thread_id); ++ ++ /* Structs. */ ++ ALU_HM_FAST(t, ip, &); ++ ++ /* Thread. */ ++ thread_ip_inc(p); ++} ++ ++static inline void ++instr_alu_and_hh_exec(struct rte_swx_pipeline *p) + { + struct thread *t = &p->threads[p->thread_id]; + struct instruction *ip = t->ip; + +- TRACE("[Thread %2u] and (s)\n", p->thread_id); ++ TRACE("[Thread %2u] and (hh)\n", p->thread_id); + + /* Structs. */ +- ALU_S(t, ip, &); ++ ALU_HH_FAST(t, ip, &); + + /* Thread. */ + thread_ip_inc(p); +@@ -4313,15 +4485,45 @@ instr_alu_or_exec(struct rte_swx_pipeline *p) + } + + static inline void +-instr_alu_or_s_exec(struct rte_swx_pipeline *p) ++instr_alu_or_mh_exec(struct rte_swx_pipeline *p) + { + struct thread *t = &p->threads[p->thread_id]; + struct instruction *ip = t->ip; + +- TRACE("[Thread %2u] or (s)\n", p->thread_id); ++ TRACE("[Thread %2u] or (mh)\n", p->thread_id); + + /* Structs. */ +- ALU_S(t, ip, |); ++ ALU_MH(t, ip, |); ++ ++ /* Thread. */ ++ thread_ip_inc(p); ++} ++ ++static inline void ++instr_alu_or_hm_exec(struct rte_swx_pipeline *p) ++{ ++ struct thread *t = &p->threads[p->thread_id]; ++ struct instruction *ip = t->ip; ++ ++ TRACE("[Thread %2u] or (hm)\n", p->thread_id); ++ ++ /* Structs. */ ++ ALU_HM_FAST(t, ip, |); ++ ++ /* Thread. */ ++ thread_ip_inc(p); ++} ++ ++static inline void ++instr_alu_or_hh_exec(struct rte_swx_pipeline *p) ++{ ++ struct thread *t = &p->threads[p->thread_id]; ++ struct instruction *ip = t->ip; ++ ++ TRACE("[Thread %2u] or (hh)\n", p->thread_id); ++ ++ /* Structs. */ ++ ALU_HH_FAST(t, ip, |); + + /* Thread. */ + thread_ip_inc(p); +@@ -4358,15 +4560,45 @@ instr_alu_xor_exec(struct rte_swx_pipeline *p) + } + + static inline void +-instr_alu_xor_s_exec(struct rte_swx_pipeline *p) ++instr_alu_xor_mh_exec(struct rte_swx_pipeline *p) ++{ ++ struct thread *t = &p->threads[p->thread_id]; ++ struct instruction *ip = t->ip; ++ ++ TRACE("[Thread %2u] xor (mh)\n", p->thread_id); ++ ++ /* Structs. */ ++ ALU_MH(t, ip, ^); ++ ++ /* Thread. */ ++ thread_ip_inc(p); ++} ++ ++static inline void ++instr_alu_xor_hm_exec(struct rte_swx_pipeline *p) + { + struct thread *t = &p->threads[p->thread_id]; + struct instruction *ip = t->ip; + +- TRACE("[Thread %2u] xor (s)\n", p->thread_id); ++ TRACE("[Thread %2u] xor (hm)\n", p->thread_id); + + /* Structs. */ +- ALU_S(t, ip, ^); ++ ALU_HM_FAST(t, ip, ^); ++ ++ /* Thread. */ ++ thread_ip_inc(p); ++} ++ ++static inline void ++instr_alu_xor_hh_exec(struct rte_swx_pipeline *p) ++{ ++ struct thread *t = &p->threads[p->thread_id]; ++ struct instruction *ip = t->ip; ++ ++ TRACE("[Thread %2u] xor (hh)\n", p->thread_id); ++ ++ /* Structs. */ ++ ALU_HH_FAST(t, ip, ^); + + /* Thread. */ + thread_ip_inc(p); +@@ -4794,13 +5026,16 @@ instr_jmp_eq_translate(struct rte_swx_pipeline *p, + fa = struct_field_parse(p, action, a, &a_struct_id); + CHECK(fa, EINVAL); + +- /* JMP_EQ or JMP_EQ_S. */ ++ /* JMP_EQ, JMP_EQ_MH, JMP_EQ_HM, JMP_EQ_HH. */ + fb = struct_field_parse(p, action, b, &b_struct_id); + if (fb) { + instr->type = INSTR_JMP_EQ; +- if ((a[0] == 'h' && b[0] != 'h') || +- (a[0] != 'h' && b[0] == 'h')) +- instr->type = INSTR_JMP_EQ_S; ++ if (a[0] != 'h' && b[0] == 'h') ++ instr->type = INSTR_JMP_EQ_MH; ++ if (a[0] == 'h' && b[0] != 'h') ++ instr->type = INSTR_JMP_EQ_HM; ++ if (a[0] == 'h' && b[0] == 'h') ++ instr->type = INSTR_JMP_EQ_HH; + instr->jmp.ip = NULL; /* Resolved later. */ + + instr->jmp.a.struct_id = (uint8_t)a_struct_id; +@@ -4848,13 +5083,16 @@ instr_jmp_neq_translate(struct rte_swx_pipeline *p, + fa = struct_field_parse(p, action, a, &a_struct_id); + CHECK(fa, EINVAL); + +- /* JMP_NEQ or JMP_NEQ_S. */ ++ /* JMP_NEQ, JMP_NEQ_MH, JMP_NEQ_HM, JMP_NEQ_HH. */ + fb = struct_field_parse(p, action, b, &b_struct_id); + if (fb) { + instr->type = INSTR_JMP_NEQ; +- if ((a[0] == 'h' && b[0] != 'h') || +- (a[0] != 'h' && b[0] == 'h')) +- instr->type = INSTR_JMP_NEQ_S; ++ if (a[0] != 'h' && b[0] == 'h') ++ instr->type = INSTR_JMP_NEQ_MH; ++ if (a[0] == 'h' && b[0] != 'h') ++ instr->type = INSTR_JMP_NEQ_HM; ++ if (a[0] == 'h' && b[0] == 'h') ++ instr->type = INSTR_JMP_NEQ_HH; + instr->jmp.ip = NULL; /* Resolved later. */ + + instr->jmp.a.struct_id = (uint8_t)a_struct_id; +@@ -4906,9 +5144,9 @@ instr_jmp_lt_translate(struct rte_swx_pipeline *p, + fb = struct_field_parse(p, action, b, &b_struct_id); + if (fb) { + instr->type = INSTR_JMP_LT; +- if (a[0] == 'h' && b[0] == 'm') ++ if (a[0] == 'h' && b[0] != 'h') + instr->type = INSTR_JMP_LT_HM; +- if (a[0] == 'm' && b[0] == 'h') ++ if (a[0] != 'h' && b[0] == 'h') + instr->type = INSTR_JMP_LT_MH; + if (a[0] == 'h' && b[0] == 'h') + instr->type = INSTR_JMP_LT_HH; +@@ -4963,9 +5201,9 @@ instr_jmp_gt_translate(struct rte_swx_pipeline *p, + fb = struct_field_parse(p, action, b, &b_struct_id); + if (fb) { + instr->type = INSTR_JMP_GT; +- if (a[0] == 'h' && b[0] == 'm') ++ if (a[0] == 'h' && b[0] != 'h') + instr->type = INSTR_JMP_GT_HM; +- if (a[0] == 'm' && b[0] == 'h') ++ if (a[0] != 'h' && b[0] == 'h') + instr->type = INSTR_JMP_GT_MH; + if (a[0] == 'h' && b[0] == 'h') + instr->type = INSTR_JMP_GT_HH; +@@ -5089,14 +5327,36 @@ instr_jmp_eq_exec(struct rte_swx_pipeline *p) + } + + static inline void +-instr_jmp_eq_s_exec(struct rte_swx_pipeline *p) ++instr_jmp_eq_mh_exec(struct rte_swx_pipeline *p) ++{ ++ struct thread *t = &p->threads[p->thread_id]; ++ struct instruction *ip = t->ip; ++ ++ TRACE("[Thread %2u] jmpeq (mh)\n", p->thread_id); ++ ++ JMP_CMP_MH(t, ip, ==); ++} ++ ++static inline void ++instr_jmp_eq_hm_exec(struct rte_swx_pipeline *p) ++{ ++ struct thread *t = &p->threads[p->thread_id]; ++ struct instruction *ip = t->ip; ++ ++ TRACE("[Thread %2u] jmpeq (hm)\n", p->thread_id); ++ ++ JMP_CMP_HM(t, ip, ==); ++} ++ ++static inline void ++instr_jmp_eq_hh_exec(struct rte_swx_pipeline *p) + { + struct thread *t = &p->threads[p->thread_id]; + struct instruction *ip = t->ip; + +- TRACE("[Thread %2u] jmpeq (s)\n", p->thread_id); ++ TRACE("[Thread %2u] jmpeq (hh)\n", p->thread_id); + +- JMP_CMP_S(t, ip, ==); ++ JMP_CMP_HH_FAST(t, ip, ==); + } + + static inline void +@@ -5122,14 +5382,36 @@ instr_jmp_neq_exec(struct rte_swx_pipeline *p) + } + + static inline void +-instr_jmp_neq_s_exec(struct rte_swx_pipeline *p) ++instr_jmp_neq_mh_exec(struct rte_swx_pipeline *p) ++{ ++ struct thread *t = &p->threads[p->thread_id]; ++ struct instruction *ip = t->ip; ++ ++ TRACE("[Thread %2u] jmpneq (mh)\n", p->thread_id); ++ ++ JMP_CMP_MH(t, ip, !=); ++} ++ ++static inline void ++instr_jmp_neq_hm_exec(struct rte_swx_pipeline *p) ++{ ++ struct thread *t = &p->threads[p->thread_id]; ++ struct instruction *ip = t->ip; ++ ++ TRACE("[Thread %2u] jmpneq (hm)\n", p->thread_id); ++ ++ JMP_CMP_HM(t, ip, !=); ++} ++ ++static inline void ++instr_jmp_neq_hh_exec(struct rte_swx_pipeline *p) + { + struct thread *t = &p->threads[p->thread_id]; + struct instruction *ip = t->ip; + +- TRACE("[Thread %2u] jmpneq (s)\n", p->thread_id); ++ TRACE("[Thread %2u] jmpneq (hh)\n", p->thread_id); + +- JMP_CMP_S(t, ip, !=); ++ JMP_CMP_HH_FAST(t, ip, !=); + } + + static inline void +@@ -5633,7 +5915,7 @@ instr_label_check(struct instruction_data *instruction_data, + continue; + + for (j = i + 1; j < n_instructions; j++) +- CHECK(strcmp(label, data[j].label), EINVAL); ++ CHECK(strcmp(label, instruction_data[j].label), EINVAL); + } + + /* Get users for each instruction label. */ +@@ -6054,7 +6336,9 @@ static instr_exec_t instruction_table[] = { + [INSTR_HDR_INVALIDATE] = instr_hdr_invalidate_exec, + + [INSTR_MOV] = instr_mov_exec, +- [INSTR_MOV_S] = instr_mov_s_exec, ++ [INSTR_MOV_MH] = instr_mov_mh_exec, ++ [INSTR_MOV_HM] = instr_mov_hm_exec, ++ [INSTR_MOV_HH] = instr_mov_hh_exec, + [INSTR_MOV_I] = instr_mov_i_exec, + + [INSTR_DMA_HT] = instr_dma_ht_exec, +@@ -6086,15 +6370,21 @@ static instr_exec_t instruction_table[] = { + [INSTR_ALU_CKSUB_FIELD] = instr_alu_cksub_field_exec, + + [INSTR_ALU_AND] = instr_alu_and_exec, +- [INSTR_ALU_AND_S] = instr_alu_and_s_exec, ++ [INSTR_ALU_AND_MH] = instr_alu_and_mh_exec, ++ [INSTR_ALU_AND_HM] = instr_alu_and_hm_exec, ++ [INSTR_ALU_AND_HH] = instr_alu_and_hh_exec, + [INSTR_ALU_AND_I] = instr_alu_and_i_exec, + + [INSTR_ALU_OR] = instr_alu_or_exec, +- [INSTR_ALU_OR_S] = instr_alu_or_s_exec, ++ [INSTR_ALU_OR_MH] = instr_alu_or_mh_exec, ++ [INSTR_ALU_OR_HM] = instr_alu_or_hm_exec, ++ [INSTR_ALU_OR_HH] = instr_alu_or_hh_exec, + [INSTR_ALU_OR_I] = instr_alu_or_i_exec, + + [INSTR_ALU_XOR] = instr_alu_xor_exec, +- [INSTR_ALU_XOR_S] = instr_alu_xor_s_exec, ++ [INSTR_ALU_XOR_MH] = instr_alu_xor_mh_exec, ++ [INSTR_ALU_XOR_HM] = instr_alu_xor_hm_exec, ++ [INSTR_ALU_XOR_HH] = instr_alu_xor_hh_exec, + [INSTR_ALU_XOR_I] = instr_alu_xor_i_exec, + + [INSTR_ALU_SHL] = instr_alu_shl_exec, +@@ -6124,11 +6414,15 @@ static instr_exec_t instruction_table[] = { + [INSTR_JMP_ACTION_MISS] = instr_jmp_action_miss_exec, + + [INSTR_JMP_EQ] = instr_jmp_eq_exec, +- [INSTR_JMP_EQ_S] = instr_jmp_eq_s_exec, ++ [INSTR_JMP_EQ_MH] = instr_jmp_eq_mh_exec, ++ [INSTR_JMP_EQ_HM] = instr_jmp_eq_hm_exec, ++ [INSTR_JMP_EQ_HH] = instr_jmp_eq_hh_exec, + [INSTR_JMP_EQ_I] = instr_jmp_eq_i_exec, + + [INSTR_JMP_NEQ] = instr_jmp_neq_exec, +- [INSTR_JMP_NEQ_S] = instr_jmp_neq_s_exec, ++ [INSTR_JMP_NEQ_MH] = instr_jmp_neq_mh_exec, ++ [INSTR_JMP_NEQ_HM] = instr_jmp_neq_hm_exec, ++ [INSTR_JMP_NEQ_HH] = instr_jmp_neq_hh_exec, + [INSTR_JMP_NEQ_I] = instr_jmp_neq_i_exec, + + [INSTR_JMP_LT] = instr_jmp_lt_exec, +diff --git a/dpdk/lib/librte_power/guest_channel.c b/dpdk/lib/librte_power/guest_channel.c +index 2f7507a03c..474dd92998 100644 +--- a/dpdk/lib/librte_power/guest_channel.c ++++ b/dpdk/lib/librte_power/guest_channel.c +@@ -166,6 +166,17 @@ int power_guest_channel_read_msg(void *pkt, + if (pkt_len == 0 || pkt == NULL) + return -1; + ++ if (lcore_id >= RTE_MAX_LCORE) { ++ RTE_LOG(ERR, GUEST_CHANNEL, "Channel(%u) is out of range 0...%d\n", ++ lcore_id, RTE_MAX_LCORE-1); ++ return -1; ++ } ++ ++ if (global_fds[lcore_id] < 0) { ++ RTE_LOG(ERR, GUEST_CHANNEL, "Channel is not connected\n"); ++ return -1; ++ } ++ + fds.fd = global_fds[lcore_id]; + fds.events = POLLIN; + +@@ -179,17 +190,6 @@ int power_guest_channel_read_msg(void *pkt, + return -1; + } + +- if (lcore_id >= RTE_MAX_LCORE) { +- RTE_LOG(ERR, GUEST_CHANNEL, "Channel(%u) is out of range 0...%d\n", +- lcore_id, RTE_MAX_LCORE-1); +- return -1; +- } +- +- if (global_fds[lcore_id] < 0) { +- RTE_LOG(ERR, GUEST_CHANNEL, "Channel is not connected\n"); +- return -1; +- } +- + while (pkt_len > 0) { + ret = read(global_fds[lcore_id], + pkt, pkt_len); +diff --git a/dpdk/lib/librte_power/power_acpi_cpufreq.c b/dpdk/lib/librte_power/power_acpi_cpufreq.c +index 84a9d75207..66dd013427 100644 +--- a/dpdk/lib/librte_power/power_acpi_cpufreq.c ++++ b/dpdk/lib/librte_power/power_acpi_cpufreq.c +@@ -152,6 +152,9 @@ power_set_governor_userspace(struct rte_power_info *pi) + /* Strip off terminating '\n' */ + strtok(buf, "\n"); + ++ /* Save the original governor */ ++ rte_strscpy(pi->governor_ori, buf, sizeof(pi->governor_ori)); ++ + /* Check if current governor is userspace */ + if (strncmp(buf, POWER_GOVERNOR_USERSPACE, + sizeof(POWER_GOVERNOR_USERSPACE)) == 0) { +@@ -160,8 +163,6 @@ power_set_governor_userspace(struct rte_power_info *pi) + "already userspace\n", pi->lcore_id); + goto out; + } +- /* Save the original governor */ +- strlcpy(pi->governor_ori, buf, sizeof(pi->governor_ori)); + + /* Write 'userspace' to the governor */ + val = fseek(f, 0, SEEK_SET); +@@ -225,7 +226,7 @@ power_get_available_freqs(struct rte_power_info *pi) + goto out; + } + +- /* Store the available frequncies into power context */ ++ /* Store the available frequencies into power context */ + for (i = 0, pi->nb_freqs = 0; i < count; i++) { + POWER_DEBUG_TRACE("Lcore %u frequency[%d]: %s\n", pi->lcore_id, + i, freqs[i]); +diff --git a/dpdk/lib/librte_power/power_pstate_cpufreq.c b/dpdk/lib/librte_power/power_pstate_cpufreq.c +index edf6328e5f..dff9857713 100644 +--- a/dpdk/lib/librte_power/power_pstate_cpufreq.c ++++ b/dpdk/lib/librte_power/power_pstate_cpufreq.c +@@ -350,6 +350,9 @@ power_set_governor_performance(struct pstate_power_info *pi) + /* Strip off terminating '\n' */ + strtok(buf, "\n"); + ++ /* Save the original governor */ ++ rte_strscpy(pi->governor_ori, buf, sizeof(pi->governor_ori)); ++ + /* Check if current governor is performance */ + if (strncmp(buf, POWER_GOVERNOR_PERF, + sizeof(POWER_GOVERNOR_PERF)) == 0) { +@@ -358,8 +361,6 @@ power_set_governor_performance(struct pstate_power_info *pi) + "already performance\n", pi->lcore_id); + goto out; + } +- /* Save the original governor */ +- strlcpy(pi->governor_ori, buf, sizeof(pi->governor_ori)); + + /* Write 'performance' to the governor */ + val = fseek(f, 0, SEEK_SET); +diff --git a/dpdk/lib/librte_power/rte_power_empty_poll.c b/dpdk/lib/librte_power/rte_power_empty_poll.c +index 975aa92997..8a2d60c576 100644 +--- a/dpdk/lib/librte_power/rte_power_empty_poll.c ++++ b/dpdk/lib/librte_power/rte_power_empty_poll.c +@@ -207,7 +207,7 @@ update_training_stats(struct priority_worker *poll_stats, + static __rte_always_inline uint32_t + update_stats(struct priority_worker *poll_stats) + { +- uint64_t tot_edpi = 0, tot_ppi = 0; ++ uint64_t tot_edpi = 0; + uint32_t j, percent; + + struct priority_worker *s = poll_stats; +@@ -237,7 +237,6 @@ update_stats(struct priority_worker *poll_stats) + + for (j = 0; j < BINS_AV; j++) { + tot_edpi += s->edpi_av[j]; +- tot_ppi += s->ppi_av[j]; + } + + tot_edpi = tot_edpi / BINS_AV; +diff --git a/dpdk/lib/librte_power/rte_power_guest_channel.h b/dpdk/lib/librte_power/rte_power_guest_channel.h +index ed4fbfdcd3..b5de1bd243 100644 +--- a/dpdk/lib/librte_power/rte_power_guest_channel.h ++++ b/dpdk/lib/librte_power/rte_power_guest_channel.h +@@ -119,11 +119,6 @@ struct rte_power_channel_packet_caps_list { + }; + + /** +- * @internal +- * +- * @warning +- * @b EXPERIMENTAL: this API may change without prior notice. +- * + * Send a message contained in pkt over the Virtio-Serial to the host endpoint. + * + * @param pkt +@@ -136,13 +131,10 @@ struct rte_power_channel_packet_caps_list { + * - 0 on success. + * - Negative on error. + */ +-__rte_experimental + int rte_power_guest_channel_send_msg(struct rte_power_channel_packet *pkt, + unsigned int lcore_id); + + /** +- * @internal +- * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * +diff --git a/dpdk/lib/librte_power/version.map b/dpdk/lib/librte_power/version.map +index 13f0af3b2d..7e7baafbf1 100644 +--- a/dpdk/lib/librte_power/version.map ++++ b/dpdk/lib/librte_power/version.map +@@ -36,6 +36,4 @@ EXPERIMENTAL { + rte_power_poll_stat_update; + + # added in 21.02 +- rte_power_guest_channel_receive_msg; +- rte_power_guest_channel_send_msg; + }; +diff --git a/dpdk/lib/librte_rawdev/rte_rawdev_pmd.h b/dpdk/lib/librte_rawdev/rte_rawdev_pmd.h +index b1bed13ee2..3b7be57d3e 100644 +--- a/dpdk/lib/librte_rawdev/rte_rawdev_pmd.h ++++ b/dpdk/lib/librte_rawdev/rte_rawdev_pmd.h +@@ -126,7 +126,7 @@ rte_rawdev_pmd_is_valid_dev(uint8_t dev_id) + } + + /** +- * Definitions of all functions exported by a driver through the ++ * Definitions of all functions exported by a driver through + * the generic structure of type *rawdev_ops* supplied in the + * *rte_rawdev* structure associated with a device. + */ +diff --git a/dpdk/lib/librte_rcu/rte_rcu_qsbr.h b/dpdk/lib/librte_rcu/rte_rcu_qsbr.h +index fa2b881bd0..6f09d9c398 100644 +--- a/dpdk/lib/librte_rcu/rte_rcu_qsbr.h ++++ b/dpdk/lib/librte_rcu/rte_rcu_qsbr.h +@@ -367,7 +367,7 @@ rte_rcu_qsbr_thread_offline(struct rte_rcu_qsbr *v, unsigned int thread_id) + + /* The reader can go offline only after the load of the + * data structure is completed. i.e. any load of the +- * data strcture can not move after this store. ++ * data structure can not move after this store. + */ + + __atomic_store_n(&v->qsbr_cnt[thread_id].cnt, +diff --git a/dpdk/lib/librte_regexdev/rte_regexdev.h b/dpdk/lib/librte_regexdev/rte_regexdev.h +index 0001658925..df2312678c 100644 +--- a/dpdk/lib/librte_regexdev/rte_regexdev.h ++++ b/dpdk/lib/librte_regexdev/rte_regexdev.h +@@ -298,14 +298,14 @@ rte_regexdev_get_dev_id(const char *name); + * backtracking positions remembered by any tokens inside the group. + * Example RegEx is `a(?>bc|b)c` if the given patterns are `abc` and `abcc` then + * `a(bc|b)c` matches both where as `a(?>bc|b)c` matches only abcc because +- * atomic groups don't allow backtracing back to `b`. ++ * atomic groups don't allow backtracking back to `b`. + * + * @see struct rte_regexdev_info::regexdev_capa + */ + + #define RTE_REGEXDEV_SUPP_PCRE_BACKTRACKING_CTRL_F (1ULL << 3) + /**< RegEx device support PCRE backtracking control verbs. +- * Some examples of backtracing verbs are (*COMMIT), (*ACCEPT), (*FAIL), ++ * Some examples of backtracking verbs are (*COMMIT), (*ACCEPT), (*FAIL), + * (*SKIP), (*PRUNE). + * + * @see struct rte_regexdev_info::regexdev_capa +@@ -1015,7 +1015,7 @@ rte_regexdev_rule_db_update(uint8_t dev_id, + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Compile local rule set and burn the complied result to the +- * RegEx deive. ++ * RegEx device. + * + * @param dev_id + * RegEx device identifier. +diff --git a/dpdk/lib/librte_rib/rte_rib.c b/dpdk/lib/librte_rib/rte_rib.c +index 6c29e1c49a..1a4b10d728 100644 +--- a/dpdk/lib/librte_rib/rte_rib.c ++++ b/dpdk/lib/librte_rib/rte_rib.c +@@ -73,6 +73,8 @@ is_covered(uint32_t ip1, uint32_t ip2, uint8_t depth) + static inline struct rte_rib_node * + get_nxt_node(struct rte_rib_node *node, uint32_t ip) + { ++ if (node->depth == RIB_MAXDEPTH) ++ return NULL; + return (ip & (1 << (31 - node->depth))) ? node->right : node->left; + } + +diff --git a/dpdk/lib/librte_rib/rte_rib6.c b/dpdk/lib/librte_rib/rte_rib6.c +index f6c55ee454..70405113b4 100644 +--- a/dpdk/lib/librte_rib/rte_rib6.c ++++ b/dpdk/lib/librte_rib/rte_rib6.c +@@ -79,20 +79,33 @@ is_covered(const uint8_t ip1[RTE_RIB6_IPV6_ADDR_SIZE], + static inline int + get_dir(const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth) + { +- int i = 0; +- uint8_t p_depth, msk; +- +- for (p_depth = depth; p_depth >= 8; p_depth -= 8) +- i++; +- +- msk = 1 << (7 - p_depth); +- return (ip[i] & msk) != 0; ++ uint8_t index, msk; ++ ++ /* ++ * depth & 127 clamps depth to values that will not ++ * read off the end of ip. ++ * depth is the number of bits deep into ip to traverse, and ++ * is incremented in blocks of 8 (1 byte). This means the last ++ * 3 bits are irrelevant to what the index of ip should be. ++ */ ++ index = (depth & INT8_MAX) / CHAR_BIT; ++ ++ /* ++ * msk is the bitmask used to extract the bit used to decide the ++ * direction of the next step of the binary search. ++ */ ++ msk = 1 << (7 - (depth & 7)); ++ ++ return (ip[index] & msk) != 0; + } + + static inline struct rte_rib6_node * + get_nxt_node(struct rte_rib6_node *node, + const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE]) + { ++ if (node->depth == RIB6_MAXDEPTH) ++ return NULL; ++ + return (get_dir(ip, node->depth)) ? node->right : node->left; + } + +@@ -186,7 +199,7 @@ rte_rib6_lookup_exact(struct rte_rib6 *rib, + } + + /* +- * Traverses on subtree and retreeves more specific routes ++ * Traverses on subtree and retrieves more specific routes + * for a given in args ip/depth prefix + * last = NULL means the first invocation + */ +diff --git a/dpdk/lib/librte_rib/rte_rib6.h b/dpdk/lib/librte_rib/rte_rib6.h +index dbd52928a2..3ff3e593fd 100644 +--- a/dpdk/lib/librte_rib/rte_rib6.h ++++ b/dpdk/lib/librte_rib/rte_rib6.h +@@ -44,12 +44,12 @@ struct rte_rib6_node; + /** RIB configuration structure */ + struct rte_rib6_conf { + /** +- * Size of extension block inside rte_rib_node. ++ * Size of extension block inside rte_rib6_node. + * This space could be used to store additional user + * defined data. + */ + size_t ext_sz; +- /* size of rte_rib_node's pool */ ++ /* size of rte_rib6_node's pool */ + int max_nodes; + }; + +@@ -323,7 +323,7 @@ rte_rib6_create(const char *name, int socket_id, + * Find an existing RIB object and return a pointer to it. + * + * @param name +- * Name of the rib object as passed to rte_rib_create() ++ * Name of the rib object as passed to rte_rib6_create() + * @return + * Pointer to RIB object on success + * NULL otherwise with rte_errno indicating reason for failure. +diff --git a/dpdk/lib/librte_ring/rte_ring.c b/dpdk/lib/librte_ring/rte_ring.c +index f17bd966be..6a94a038c4 100644 +--- a/dpdk/lib/librte_ring/rte_ring.c ++++ b/dpdk/lib/librte_ring/rte_ring.c +@@ -75,7 +75,7 @@ rte_ring_get_memsize_elem(unsigned int esize, unsigned int count) + return -EINVAL; + } + +- sz = sizeof(struct rte_ring) + count * esize; ++ sz = sizeof(struct rte_ring) + (ssize_t)count * esize; + sz = RTE_ALIGN(sz, RTE_CACHE_LINE_SIZE); + return sz; + } +@@ -267,7 +267,7 @@ rte_ring_create_elem(const char *name, unsigned int esize, unsigned int count, + + ring_size = rte_ring_get_memsize_elem(esize, count); + if (ring_size < 0) { +- rte_errno = ring_size; ++ rte_errno = -ring_size; + return NULL; + } + +diff --git a/dpdk/lib/librte_ring/rte_ring_c11_mem.h b/dpdk/lib/librte_ring/rte_ring_c11_mem.h +index 0fb73a3371..ae886532ee 100644 +--- a/dpdk/lib/librte_ring/rte_ring_c11_mem.h ++++ b/dpdk/lib/librte_ring/rte_ring_c11_mem.h +@@ -111,7 +111,7 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp, + * @param is_sc + * Indicates whether multi-consumer path is needed or not + * @param n +- * The number of elements we will want to enqueue, i.e. how far should the ++ * The number of elements we will want to dequeue, i.e. how far should the + * head be moved + * @param behavior + * RTE_RING_QUEUE_FIXED: Dequeue a fixed number of items from a ring +diff --git a/dpdk/lib/librte_ring/rte_ring_core.h b/dpdk/lib/librte_ring/rte_ring_core.h +index 16718ca7f1..25d2244a69 100644 +--- a/dpdk/lib/librte_ring/rte_ring_core.h ++++ b/dpdk/lib/librte_ring/rte_ring_core.h +@@ -12,7 +12,7 @@ + + /** + * @file +- * This file contains definion of RTE ring structure itself, ++ * This file contains definition of RTE ring structure itself, + * init flags and some related macros. + * For majority of DPDK entities, it is not recommended to include + * this file directly, use include or +diff --git a/dpdk/lib/librte_ring/rte_ring_elem.h b/dpdk/lib/librte_ring/rte_ring_elem.h +index 7034d29c07..0057da3597 100644 +--- a/dpdk/lib/librte_ring/rte_ring_elem.h ++++ b/dpdk/lib/librte_ring/rte_ring_elem.h +@@ -112,7 +112,7 @@ __rte_ring_enqueue_elems_32(struct rte_ring *r, const uint32_t size, + unsigned int i; + uint32_t *ring = (uint32_t *)&r[1]; + const uint32_t *obj = (const uint32_t *)obj_table; +- if (likely(idx + n < size)) { ++ if (likely(idx + n <= size)) { + for (i = 0; i < (n & ~0x7); i += 8, idx += 8) { + ring[idx] = obj[i]; + ring[idx + 1] = obj[i + 1]; +@@ -157,7 +157,7 @@ __rte_ring_enqueue_elems_64(struct rte_ring *r, uint32_t prod_head, + uint32_t idx = prod_head & r->mask; + uint64_t *ring = (uint64_t *)&r[1]; + const unaligned_uint64_t *obj = (const unaligned_uint64_t *)obj_table; +- if (likely(idx + n < size)) { ++ if (likely(idx + n <= size)) { + for (i = 0; i < (n & ~0x3); i += 4, idx += 4) { + ring[idx] = obj[i]; + ring[idx + 1] = obj[i + 1]; +@@ -190,7 +190,7 @@ __rte_ring_enqueue_elems_128(struct rte_ring *r, uint32_t prod_head, + uint32_t idx = prod_head & r->mask; + rte_int128_t *ring = (rte_int128_t *)&r[1]; + const rte_int128_t *obj = (const rte_int128_t *)obj_table; +- if (likely(idx + n < size)) { ++ if (likely(idx + n <= size)) { + for (i = 0; i < (n & ~0x1); i += 2, idx += 2) + memcpy((void *)(ring + idx), + (const void *)(obj + i), 32); +@@ -246,7 +246,7 @@ __rte_ring_dequeue_elems_32(struct rte_ring *r, const uint32_t size, + unsigned int i; + uint32_t *ring = (uint32_t *)&r[1]; + uint32_t *obj = (uint32_t *)obj_table; +- if (likely(idx + n < size)) { ++ if (likely(idx + n <= size)) { + for (i = 0; i < (n & ~0x7); i += 8, idx += 8) { + obj[i] = ring[idx]; + obj[i + 1] = ring[idx + 1]; +@@ -291,7 +291,7 @@ __rte_ring_dequeue_elems_64(struct rte_ring *r, uint32_t prod_head, + uint32_t idx = prod_head & r->mask; + uint64_t *ring = (uint64_t *)&r[1]; + unaligned_uint64_t *obj = (unaligned_uint64_t *)obj_table; +- if (likely(idx + n < size)) { ++ if (likely(idx + n <= size)) { + for (i = 0; i < (n & ~0x3); i += 4, idx += 4) { + obj[i] = ring[idx]; + obj[i + 1] = ring[idx + 1]; +@@ -324,7 +324,7 @@ __rte_ring_dequeue_elems_128(struct rte_ring *r, uint32_t prod_head, + uint32_t idx = prod_head & r->mask; + rte_int128_t *ring = (rte_int128_t *)&r[1]; + rte_int128_t *obj = (rte_int128_t *)obj_table; +- if (likely(idx + n < size)) { ++ if (likely(idx + n <= size)) { + for (i = 0; i < (n & ~0x1); i += 2, idx += 2) + memcpy((void *)(obj + i), (void *)(ring + idx), 32); + switch (n & 0x1) { +diff --git a/dpdk/lib/librte_ring/rte_ring_generic.h b/dpdk/lib/librte_ring/rte_ring_generic.h +index 953cdbbd5e..79ae4ecb14 100644 +--- a/dpdk/lib/librte_ring/rte_ring_generic.h ++++ b/dpdk/lib/librte_ring/rte_ring_generic.h +@@ -107,7 +107,7 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp, + * @param is_sc + * Indicates whether multi-consumer path is needed or not + * @param n +- * The number of elements we will want to enqueue, i.e. how far should the ++ * The number of elements we will want to dequeue, i.e. how far should the + * head be moved + * @param behavior + * RTE_RING_QUEUE_FIXED: Dequeue a fixed number of items from a ring +diff --git a/dpdk/lib/librte_sched/rte_red.h b/dpdk/lib/librte_sched/rte_red.h +index 36273cac64..f5843dab1b 100644 +--- a/dpdk/lib/librte_sched/rte_red.h ++++ b/dpdk/lib/librte_sched/rte_red.h +@@ -303,7 +303,7 @@ __rte_red_drop(const struct rte_red_config *red_cfg, struct rte_red *red) + } + + /** +- * @brief Decides if new packet should be enqeued or dropped in queue non-empty case ++ * @brief Decides if new packet should be enqueued or dropped in queue non-empty case + * + * @param red_cfg [in] config pointer to a RED configuration parameter structure + * @param red [in,out] data pointer to RED runtime data +@@ -361,7 +361,7 @@ rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg, + } + + /** +- * @brief Decides if new packet should be enqeued or dropped ++ * @brief Decides if new packet should be enqueued or dropped + * Updates run time data based on new queue size value. + * Based on new queue average and RED configuration parameters + * gives verdict whether to enqueue or drop the packet. +diff --git a/dpdk/lib/librte_sched/rte_sched.c b/dpdk/lib/librte_sched/rte_sched.c +index 7c56880681..4ba95bc414 100644 +--- a/dpdk/lib/librte_sched/rte_sched.c ++++ b/dpdk/lib/librte_sched/rte_sched.c +@@ -228,7 +228,7 @@ struct rte_sched_port { + int socket; + + /* Timing */ +- uint64_t time_cpu_cycles; /* Current CPU time measured in CPU cyles */ ++ uint64_t time_cpu_cycles; /* Current CPU time measured in CPU cycles */ + uint64_t time_cpu_bytes; /* Current CPU time measured in bytes */ + uint64_t time; /* Current NIC TX time measured in bytes */ + struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */ +@@ -579,7 +579,7 @@ rte_sched_subport_config_qsize(struct rte_sched_subport *subport) + + subport->qsize_add[0] = 0; + +- /* Strict prority traffic class */ ++ /* Strict priority traffic class */ + for (i = 1; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) + subport->qsize_add[i] = subport->qsize_add[i-1] + subport->qsize[i-1]; + +@@ -961,9 +961,9 @@ rte_sched_port_config(struct rte_sched_port_params *params) + /* Allocate memory to store the subport profile */ + port->subport_profiles = rte_zmalloc_socket("subport_profile", size2, + RTE_CACHE_LINE_SIZE, params->socket); +- if (port == NULL) { ++ if (port->subport_profiles == NULL) { + RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__); +- ++ rte_free(port); + return NULL; + } + +@@ -1090,6 +1090,7 @@ rte_sched_subport_config(struct rte_sched_port *port, + uint32_t n_subport_pipe_queues, i; + uint32_t size0, size1, bmp_mem_size; + int status; ++ int ret; + + /* Check user parameters */ + if (port == NULL) { +@@ -1101,17 +1102,16 @@ rte_sched_subport_config(struct rte_sched_port *port, + if (subport_id >= port->n_subports_per_port) { + RTE_LOG(ERR, SCHED, + "%s: Incorrect value for subport id\n", __func__); +- +- rte_sched_free_memory(port, n_subports); +- return -EINVAL; ++ ret = -EINVAL; ++ goto out; + } + + if (subport_profile_id >= port->n_max_subport_profiles) { + RTE_LOG(ERR, SCHED, "%s: " + "Number of subport profile exceeds the max limit\n", + __func__); +- rte_sched_free_memory(port, n_subports); +- return -EINVAL; ++ ret = -EINVAL; ++ goto out; + } + + /** Memory is allocated only on first invocation of the api for a +@@ -1127,9 +1127,8 @@ rte_sched_subport_config(struct rte_sched_port *port, + RTE_LOG(NOTICE, SCHED, + "%s: Port scheduler params check failed (%d)\n", + __func__, status); +- +- rte_sched_free_memory(port, n_subports); +- return -EINVAL; ++ ret = -EINVAL; ++ goto out; + } + + /* Determine the amount of memory to allocate */ +@@ -1143,9 +1142,8 @@ rte_sched_subport_config(struct rte_sched_port *port, + if (s == NULL) { + RTE_LOG(ERR, SCHED, + "%s: Memory allocation fails\n", __func__); +- +- rte_sched_free_memory(port, n_subports); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto out; + } + + n_subports++; +@@ -1185,12 +1183,11 @@ rte_sched_subport_config(struct rte_sched_port *port, + params->red_params[i][j].min_th, + params->red_params[i][j].max_th, + params->red_params[i][j].maxp_inv) != 0) { +- rte_sched_free_memory(port, n_subports); +- + RTE_LOG(NOTICE, SCHED, + "%s: RED configuration init fails\n", + __func__); +- return -EINVAL; ++ ret = -EINVAL; ++ goto out; + } + } + } +@@ -1238,9 +1235,8 @@ rte_sched_subport_config(struct rte_sched_port *port, + if (s->bmp == NULL) { + RTE_LOG(ERR, SCHED, + "%s: Subport bitmap init error\n", __func__); +- +- rte_sched_free_memory(port, n_subports); +- return -EINVAL; ++ ret = -EINVAL; ++ goto out; + } + + for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) +@@ -1249,7 +1245,6 @@ rte_sched_subport_config(struct rte_sched_port *port, + #ifdef RTE_SCHED_SUBPORT_TC_OV + /* TC oversubscription */ + s->tc_ov_wm_min = port->mtu; +- s->tc_ov_wm = s->tc_ov_wm_max; + s->tc_ov_period_id = 0; + s->tc_ov = 0; + s->tc_ov_n = 0; +@@ -1277,6 +1272,7 @@ rte_sched_subport_config(struct rte_sched_port *port, + #ifdef RTE_SCHED_SUBPORT_TC_OV + s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period, + s->pipe_tc_be_rate_max); ++ s->tc_ov_wm = s->tc_ov_wm_max; + #endif + s->profile = subport_profile_id; + +@@ -1285,6 +1281,11 @@ rte_sched_subport_config(struct rte_sched_port *port, + rte_sched_port_log_subport_profile(port, subport_profile_id); + + return 0; ++ ++out: ++ rte_sched_free_memory(port, n_subports); ++ ++ return ret; + } + + int +@@ -1299,6 +1300,7 @@ rte_sched_pipe_config(struct rte_sched_port *port, + struct rte_sched_pipe_profile *params; + uint32_t n_subports = subport_id + 1; + uint32_t deactivate, profile, i; ++ int ret; + + /* Check user parameters */ + profile = (uint32_t) pipe_profile; +@@ -1313,26 +1315,23 @@ rte_sched_pipe_config(struct rte_sched_port *port, + if (subport_id >= port->n_subports_per_port) { + RTE_LOG(ERR, SCHED, + "%s: Incorrect value for parameter subport id\n", __func__); +- +- rte_sched_free_memory(port, n_subports); +- return -EINVAL; ++ ret = -EINVAL; ++ goto out; + } + + s = port->subports[subport_id]; + if (pipe_id >= s->n_pipes_per_subport_enabled) { + RTE_LOG(ERR, SCHED, + "%s: Incorrect value for parameter pipe id\n", __func__); +- +- rte_sched_free_memory(port, n_subports); +- return -EINVAL; ++ ret = -EINVAL; ++ goto out; + } + + if (!deactivate && profile >= s->n_pipe_profiles) { + RTE_LOG(ERR, SCHED, + "%s: Incorrect value for parameter pipe profile\n", __func__); +- +- rte_sched_free_memory(port, n_subports); +- return -EINVAL; ++ ret = -EINVAL; ++ goto out; + } + + sp = port->subport_profiles + s->profile; +@@ -1406,6 +1405,11 @@ rte_sched_pipe_config(struct rte_sched_port *port, + } + + return 0; ++ ++out: ++ rte_sched_free_memory(port, n_subports); ++ ++ return ret; + } + + int +diff --git a/dpdk/lib/librte_sched/rte_sched.h b/dpdk/lib/librte_sched/rte_sched.h +index c1a772b70c..9ceca22f7e 100644 +--- a/dpdk/lib/librte_sched/rte_sched.h ++++ b/dpdk/lib/librte_sched/rte_sched.h +@@ -330,7 +330,7 @@ rte_sched_subport_pipe_profile_add(struct rte_sched_port *port, + * + * Hierarchical scheduler subport bandwidth profile add + * Note that this function is safe to use in runtime for adding new +- * subport bandwidth profile as it doesn't have any impact on hiearchical ++ * subport bandwidth profile as it doesn't have any impact on hierarchical + * structure of the scheduler. + * @param port + * Handle to port scheduler instance +diff --git a/dpdk/lib/librte_sched/rte_sched_common.h b/dpdk/lib/librte_sched/rte_sched_common.h +index 96706df7bd..e4cbbd9077 100644 +--- a/dpdk/lib/librte_sched/rte_sched_common.h ++++ b/dpdk/lib/librte_sched/rte_sched_common.h +@@ -51,10 +51,10 @@ rte_min_pos_4_u16(uint16_t *x) + * gcd(a, b) = gcd(b, a mod b) + * + */ +-static inline uint32_t +-rte_get_gcd(uint32_t a, uint32_t b) ++static inline uint64_t ++rte_get_gcd64(uint64_t a, uint64_t b) + { +- uint32_t c; ++ uint64_t c; + + if (a == 0) + return b; +@@ -76,6 +76,15 @@ rte_get_gcd(uint32_t a, uint32_t b) + return a; + } + ++/* ++ * 32-bit version of Greatest Common Divisor (GCD). ++ */ ++static inline uint32_t ++rte_get_gcd(uint32_t a, uint32_t b) ++{ ++ return rte_get_gcd64(a, b); ++} ++ + /* + * Compute the Lowest Common Denominator (LCD) of two numbers. + * This implementation computes GCD first: +diff --git a/dpdk/lib/librte_stack/meson.build b/dpdk/lib/librte_stack/meson.build +index 8f82a40ec2..88ed276d9a 100644 +--- a/dpdk/lib/librte_stack/meson.build ++++ b/dpdk/lib/librte_stack/meson.build +@@ -6,4 +6,6 @@ headers = files('rte_stack.h', + 'rte_stack_std.h', + 'rte_stack_lf.h', + 'rte_stack_lf_generic.h', +- 'rte_stack_lf_c11.h') ++ 'rte_stack_lf_c11.h', ++ 'rte_stack_lf_stubs.h', ++) +diff --git a/dpdk/lib/librte_stack/rte_stack.c b/dpdk/lib/librte_stack/rte_stack.c +index 8a51fba17f..10d3b2eeb3 100644 +--- a/dpdk/lib/librte_stack/rte_stack.c ++++ b/dpdk/lib/librte_stack/rte_stack.c +@@ -64,9 +64,11 @@ rte_stack_create(const char *name, unsigned int count, int socket_id, + + #ifdef RTE_ARCH_64 + RTE_BUILD_BUG_ON(sizeof(struct rte_stack_lf_head) != 16); +-#else ++#endif ++#if !defined(RTE_STACK_LF_SUPPORTED) + if (flags & RTE_STACK_F_LF) { + STACK_LOG_ERR("Lock-free stack is not supported on your platform\n"); ++ rte_errno = ENOTSUP; + return NULL; + } + #endif +diff --git a/dpdk/lib/librte_stack/rte_stack.h b/dpdk/lib/librte_stack/rte_stack.h +index 395b9ef835..27640f87b2 100644 +--- a/dpdk/lib/librte_stack/rte_stack.h ++++ b/dpdk/lib/librte_stack/rte_stack.h +@@ -89,7 +89,7 @@ struct rte_stack { + + /** + * The stack uses lock-free push and pop functions. This flag is only +- * supported on x86_64 platforms, currently. ++ * supported on x86_64 or arm64 platforms, currently. + */ + #define RTE_STACK_F_LF 0x0001 + +@@ -205,6 +205,7 @@ rte_stack_free_count(struct rte_stack *s) + * - EEXIST - a stack with the same name already exists + * - ENOMEM - insufficient memory to create the stack + * - ENAMETOOLONG - name size exceeds RTE_STACK_NAMESIZE ++ * - ENOTSUP - platform does not support given flags combination. + */ + struct rte_stack * + rte_stack_create(const char *name, unsigned int count, int socket_id, +diff --git a/dpdk/lib/librte_stack/rte_stack_lf.h b/dpdk/lib/librte_stack/rte_stack_lf.h +index eb106e64e6..f2b012cd0e 100644 +--- a/dpdk/lib/librte_stack/rte_stack_lf.h ++++ b/dpdk/lib/librte_stack/rte_stack_lf.h +@@ -13,6 +13,11 @@ + #else + #include "rte_stack_lf_generic.h" + #endif ++ ++/** ++ * Indicates that RTE_STACK_F_LF is supported. ++ */ ++#define RTE_STACK_LF_SUPPORTED + #endif + + /** +diff --git a/dpdk/lib/librte_stack/rte_stack_lf_generic.h b/dpdk/lib/librte_stack/rte_stack_lf_generic.h +index 4850a05ee7..7fa29cedb2 100644 +--- a/dpdk/lib/librte_stack/rte_stack_lf_generic.h ++++ b/dpdk/lib/librte_stack/rte_stack_lf_generic.h +@@ -128,8 +128,10 @@ __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list, + /* If NULL was encountered, the list was modified while + * traversing it. Retry. + */ +- if (i != num) ++ if (i != num) { ++ old_head = list->head; + continue; ++ } + + new_head.top = tmp; + new_head.cnt = old_head.cnt + 1; +diff --git a/dpdk/lib/librte_table/meson.build b/dpdk/lib/librte_table/meson.build +index d69678386e..657ea7d193 100644 +--- a/dpdk/lib/librte_table/meson.build ++++ b/dpdk/lib/librte_table/meson.build +@@ -30,4 +30,6 @@ deps += ['mbuf', 'port', 'lpm', 'hash', 'acl'] + + if arch_subdir == 'x86' + headers += files('rte_lru_x86.h') ++elif arch_subdir == 'arm' and dpdk_conf.has('RTE_ARCH_ARM64') ++ headers += files('rte_lru_arm64.h') + endif +diff --git a/dpdk/lib/librte_table/rte_swx_table.h b/dpdk/lib/librte_table/rte_swx_table.h +index 5a3137ec53..ec9fba9630 100644 +--- a/dpdk/lib/librte_table/rte_swx_table.h ++++ b/dpdk/lib/librte_table/rte_swx_table.h +@@ -205,7 +205,7 @@ typedef int + * operations into the same table. + * + * The typical reason an implementation may choose to split the table lookup +- * operation into multiple steps is to hide the latency of the inherrent memory ++ * operation into multiple steps is to hide the latency of the inherent memory + * read operations: before a read operation with the source data likely not in + * the CPU cache, the source data prefetch is issued and the table lookup + * operation is postponed in favor of some other unrelated work, which the CPU +diff --git a/dpdk/lib/librte_table/rte_swx_table_em.c b/dpdk/lib/librte_table/rte_swx_table_em.c +index 5f67223060..03b28c4c9d 100644 +--- a/dpdk/lib/librte_table/rte_swx_table_em.c ++++ b/dpdk/lib/librte_table/rte_swx_table_em.c +@@ -280,7 +280,7 @@ table_key_data(struct table *t, uint32_t key_id) + static inline int + bkt_is_empty(struct bucket_extension *bkt) + { +- return (!bkt->sig[0] && !bkt->sig[1] && !bkt->sig[2] && !bkt->sig[2]) ? ++ return (!bkt->sig[0] && !bkt->sig[1] && !bkt->sig[2] && !bkt->sig[3]) ? + 1 : 0; + } + +@@ -337,7 +337,7 @@ bkt_key_install(struct table *t, + /* Key data. */ + bkt_data = table_key_data(t, bkt_key_id); + bkt_data[0] = input->action_id; +- if (t->params.action_data_size) ++ if (t->params.action_data_size && input->action_data) + memcpy(&bkt_data[1], + input->action_data, + t->params.action_data_size); +@@ -358,7 +358,7 @@ bkt_key_data_update(struct table *t, + /* Key data. */ + bkt_data = table_key_data(t, bkt_key_id); + bkt_data[0] = input->action_id; +- if (t->params.action_data_size) ++ if (t->params.action_data_size && input->action_data) + memcpy(&bkt_data[1], + input->action_data, + t->params.action_data_size); +@@ -485,8 +485,6 @@ table_add(void *table, struct rte_swx_table_entry *entry) + CHECK(t, EINVAL); + CHECK(entry, EINVAL); + CHECK(entry->key, EINVAL); +- CHECK((!t->params.action_data_size && !entry->action_data) || +- (t->params.action_data_size && entry->action_data), EINVAL); + + input_sig = hash(entry->key, t->key_mask, t->key_size, 0); + bkt_id = input_sig & (t->n_buckets - 1); +diff --git a/dpdk/lib/librte_table/rte_table_hash_func.h b/dpdk/lib/librte_table/rte_table_hash_func.h +index c4c35cc06a..a962ec2f68 100644 +--- a/dpdk/lib/librte_table/rte_table_hash_func.h ++++ b/dpdk/lib/librte_table/rte_table_hash_func.h +@@ -58,8 +58,8 @@ static inline uint64_t + rte_table_hash_crc_key8(void *key, void *mask, __rte_unused uint32_t key_size, + uint64_t seed) + { +- uint64_t *k = key; +- uint64_t *m = mask; ++ uint64_t *k = (uint64_t *)key; ++ uint64_t *m = (uint64_t *)mask; + uint64_t crc0; + + crc0 = rte_crc32_u64(seed, k[0] & m[0]); +@@ -72,8 +72,8 @@ static inline uint64_t + rte_table_hash_crc_key16(void *key, void *mask, __rte_unused uint32_t key_size, + uint64_t seed) + { +- uint64_t *k = key; +- uint64_t *m = mask; ++ uint64_t *k = (uint64_t *)key; ++ uint64_t *m = (uint64_t *)mask; + uint64_t k0, crc0, crc1; + + k0 = k[0] & m[0]; +@@ -91,8 +91,8 @@ static inline uint64_t + rte_table_hash_crc_key24(void *key, void *mask, __rte_unused uint32_t key_size, + uint64_t seed) + { +- uint64_t *k = key; +- uint64_t *m = mask; ++ uint64_t *k = (uint64_t *)key; ++ uint64_t *m = (uint64_t *)mask; + uint64_t k0, k2, crc0, crc1; + + k0 = k[0] & m[0]; +@@ -113,8 +113,8 @@ static inline uint64_t + rte_table_hash_crc_key32(void *key, void *mask, __rte_unused uint32_t key_size, + uint64_t seed) + { +- uint64_t *k = key; +- uint64_t *m = mask; ++ uint64_t *k = (uint64_t *)key; ++ uint64_t *m = (uint64_t *)mask; + uint64_t k0, k2, crc0, crc1, crc2, crc3; + + k0 = k[0] & m[0]; +@@ -139,8 +139,8 @@ static inline uint64_t + rte_table_hash_crc_key40(void *key, void *mask, __rte_unused uint32_t key_size, + uint64_t seed) + { +- uint64_t *k = key; +- uint64_t *m = mask; ++ uint64_t *k = (uint64_t *)key; ++ uint64_t *m = (uint64_t *)mask; + uint64_t k0, k2, crc0, crc1, crc2, crc3; + + k0 = k[0] & m[0]; +@@ -165,8 +165,8 @@ static inline uint64_t + rte_table_hash_crc_key48(void *key, void *mask, __rte_unused uint32_t key_size, + uint64_t seed) + { +- uint64_t *k = key; +- uint64_t *m = mask; ++ uint64_t *k = (uint64_t *)key; ++ uint64_t *m = (uint64_t *)mask; + uint64_t k0, k2, k5, crc0, crc1, crc2, crc3; + + k0 = k[0] & m[0]; +@@ -192,8 +192,8 @@ static inline uint64_t + rte_table_hash_crc_key56(void *key, void *mask, __rte_unused uint32_t key_size, + uint64_t seed) + { +- uint64_t *k = key; +- uint64_t *m = mask; ++ uint64_t *k = (uint64_t *)key; ++ uint64_t *m = (uint64_t *)mask; + uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5; + + k0 = k[0] & m[0]; +@@ -222,8 +222,8 @@ static inline uint64_t + rte_table_hash_crc_key64(void *key, void *mask, __rte_unused uint32_t key_size, + uint64_t seed) + { +- uint64_t *k = key; +- uint64_t *m = mask; ++ uint64_t *k = (uint64_t *)key; ++ uint64_t *m = (uint64_t *)mask; + uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5; + + k0 = k[0] & m[0]; +diff --git a/dpdk/lib/librte_telemetry/rte_telemetry.h b/dpdk/lib/librte_telemetry/rte_telemetry.h +index 76172222c9..fb4842a56e 100644 +--- a/dpdk/lib/librte_telemetry/rte_telemetry.h ++++ b/dpdk/lib/librte_telemetry/rte_telemetry.h +@@ -11,8 +11,13 @@ + #ifndef _RTE_TELEMETRY_H_ + #define _RTE_TELEMETRY_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** Maximum number of telemetry callbacks. */ + #define TELEMETRY_MAX_CALLBACKS 64 ++ + /** Maximum length for string used in object. */ + #define RTE_TEL_MAX_STRING_LEN 64 + /** Maximum length of string. */ +@@ -292,6 +297,8 @@ __rte_experimental + int + rte_telemetry_register_cmd(const char *cmd, telemetry_cb fn, const char *help); + ++#ifdef RTE_HAS_CPUSET ++ + /** + * @internal + * Initialize Telemetry. +@@ -314,6 +321,8 @@ int + rte_telemetry_init(const char *runtime_dir, rte_cpuset_t *cpuset, + const char **err_str); + ++#endif /* RTE_HAS_CPUSET */ ++ + /** + * Get a pointer to a container with memory allocated. The container is to be + * used embedded within an existing telemetry dict/array. +@@ -337,4 +346,8 @@ __rte_experimental + void + rte_tel_data_free(struct rte_tel_data *data); + ++#ifdef __cplusplus ++} ++#endif ++ + #endif +diff --git a/dpdk/lib/librte_telemetry/telemetry.c b/dpdk/lib/librte_telemetry/telemetry.c +index b142729da4..9d970d167e 100644 +--- a/dpdk/lib/librte_telemetry/telemetry.c ++++ b/dpdk/lib/librte_telemetry/telemetry.c +@@ -95,8 +95,10 @@ list_commands(const char *cmd __rte_unused, const char *params __rte_unused, + int i; + + rte_tel_data_start_array(d, RTE_TEL_STRING_VAL); ++ rte_spinlock_lock(&callback_sl); + for (i = 0; i < num_callbacks; i++) + rte_tel_data_add_array_string(d, callbacks[i].cmd); ++ rte_spinlock_unlock(&callback_sl); + return 0; + } + +diff --git a/dpdk/lib/librte_telemetry/telemetry_json.h b/dpdk/lib/librte_telemetry/telemetry_json.h +index ad270b9b30..db70690274 100644 +--- a/dpdk/lib/librte_telemetry/telemetry_json.h ++++ b/dpdk/lib/librte_telemetry/telemetry_json.h +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + + /** + * @file +@@ -22,14 +23,14 @@ + /** + * @internal + * Copies a value into a buffer if the buffer has enough available space. +- * Nothing written to buffer if an overflow ocurs. +- * This function is not for use for values larger than 1k. ++ * Nothing written to buffer if an overflow occurs. ++ * This function is not for use for values larger than given buffer length. + */ + __rte_format_printf(3, 4) + static inline int + __json_snprintf(char *buf, const int len, const char *format, ...) + { +- char tmp[1024]; ++ char tmp[len]; + va_list ap; + int ret; + +diff --git a/dpdk/lib/librte_vhost/rte_vdpa.h b/dpdk/lib/librte_vhost/rte_vdpa.h +index 1437f400bf..6ac85d1bbf 100644 +--- a/dpdk/lib/librte_vhost/rte_vdpa.h ++++ b/dpdk/lib/librte_vhost/rte_vdpa.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_VDPA_H_ + #define _RTE_VDPA_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + /** + * @file + * +@@ -183,4 +187,9 @@ rte_vdpa_get_stats(struct rte_vdpa_device *dev, uint16_t qid, + */ + int + rte_vdpa_reset_stats(struct rte_vdpa_device *dev, uint16_t qid); ++ ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_VDPA_H_ */ +diff --git a/dpdk/lib/librte_vhost/rte_vdpa_dev.h b/dpdk/lib/librte_vhost/rte_vdpa_dev.h +index bfada387b0..52f528ff20 100644 +--- a/dpdk/lib/librte_vhost/rte_vdpa_dev.h ++++ b/dpdk/lib/librte_vhost/rte_vdpa_dev.h +@@ -5,6 +5,10 @@ + #ifndef _RTE_VDPA_H_DEV_ + #define _RTE_VDPA_H_DEV_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #include + + #include "rte_vhost.h" +@@ -135,4 +139,8 @@ rte_vhost_host_notifier_ctrl(int vid, uint16_t qid, bool enable); + int + rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m); + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /* _RTE_VDPA_DEV_H_ */ +diff --git a/dpdk/lib/librte_vhost/rte_vhost.h b/dpdk/lib/librte_vhost/rte_vhost.h +index 010f160869..39fe428079 100644 +--- a/dpdk/lib/librte_vhost/rte_vhost.h ++++ b/dpdk/lib/librte_vhost/rte_vhost.h +@@ -21,10 +21,12 @@ + extern "C" { + #endif + ++#ifndef __cplusplus + /* These are not C++-aware. */ + #include + #include + #include ++#endif + + #define RTE_VHOST_USER_CLIENT (1ULL << 0) + #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1) +@@ -36,6 +38,7 @@ extern "C" { + /* support only linear buffers (no chained mbufs) */ + #define RTE_VHOST_USER_LINEARBUF_SUPPORT (1ULL << 6) + #define RTE_VHOST_USER_ASYNC_COPY (1ULL << 7) ++#define RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS (1ULL << 8) + + /* Features. */ + #ifndef VIRTIO_NET_F_GUEST_ANNOUNCE +@@ -760,7 +763,7 @@ rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx, + /** + * Set split inflight descriptor. + * +- * This function save descriptors that has been comsumed in available ++ * This function save descriptors that has been consumed in available + * ring + * + * @param vid +@@ -780,7 +783,7 @@ rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, + /** + * Set packed inflight descriptor and get corresponding inflight entry + * +- * This function save descriptors that has been comsumed ++ * This function save descriptors that has been consumed + * + * @param vid + * vhost device ID +diff --git a/dpdk/lib/librte_vhost/rte_vhost_crypto.h b/dpdk/lib/librte_vhost/rte_vhost_crypto.h +index 8531757285..ef01f94aa5 100644 +--- a/dpdk/lib/librte_vhost/rte_vhost_crypto.h ++++ b/dpdk/lib/librte_vhost/rte_vhost_crypto.h +@@ -5,6 +5,10 @@ + #ifndef _VHOST_CRYPTO_H_ + #define _VHOST_CRYPTO_H_ + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ + #include + + #include +@@ -137,4 +141,8 @@ uint16_t + rte_vhost_crypto_finalize_requests(struct rte_crypto_op **ops, + uint16_t nb_ops, int *callfds, uint16_t *nb_callfds); + ++#ifdef __cplusplus ++} ++#endif ++ + #endif /**< _VHOST_CRYPTO_H_ */ +diff --git a/dpdk/lib/librte_vhost/socket.c b/dpdk/lib/librte_vhost/socket.c +index 0169d36481..a3e5a0160b 100644 +--- a/dpdk/lib/librte_vhost/socket.c ++++ b/dpdk/lib/librte_vhost/socket.c +@@ -42,6 +42,7 @@ struct vhost_user_socket { + bool extbuf; + bool linearbuf; + bool async_copy; ++ bool net_compliant_ol_flags; + + /* + * The "supported_features" indicates the feature bits the +@@ -224,7 +225,8 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) + size = strnlen(vsocket->path, PATH_MAX); + vhost_set_ifname(vid, vsocket->path, size); + +- vhost_set_builtin_virtio_net(vid, vsocket->use_builtin_virtio_net); ++ vhost_setup_virtio_net(vid, vsocket->use_builtin_virtio_net, ++ vsocket->net_compliant_ol_flags); + + vhost_attach_vdpa_device(vid, vsocket->vdpa_dev); + +@@ -241,7 +243,7 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) + dev->async_copy = 1; + } + +- VHOST_LOG_CONFIG(INFO, "new device, handle is %d\n", vid); ++ VHOST_LOG_CONFIG(INFO, "new device, handle is %d, path is %s\n", vid, vsocket->path); + + if (vsocket->notify_ops->new_connection) { + ret = vsocket->notify_ops->new_connection(vid); +@@ -499,7 +501,7 @@ vhost_user_reconnect_init(void) + + ret = pthread_mutex_init(&reconn_list.mutex, NULL); + if (ret < 0) { +- VHOST_LOG_CONFIG(ERR, "failed to initialize mutex"); ++ VHOST_LOG_CONFIG(ERR, "failed to initialize mutex\n"); + return ret; + } + TAILQ_INIT(&reconn_list.head); +@@ -507,10 +509,10 @@ vhost_user_reconnect_init(void) + ret = rte_ctrl_thread_create(&reconn_tid, "vhost_reconn", NULL, + vhost_user_client_reconnect, NULL); + if (ret != 0) { +- VHOST_LOG_CONFIG(ERR, "failed to create reconnect thread"); ++ VHOST_LOG_CONFIG(ERR, "failed to create reconnect thread\n"); + if (pthread_mutex_destroy(&reconn_list.mutex)) { + VHOST_LOG_CONFIG(ERR, +- "failed to destroy reconnect mutex"); ++ "failed to destroy reconnect mutex\n"); + } + } + +@@ -877,6 +879,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags) + vsocket->extbuf = flags & RTE_VHOST_USER_EXTBUF_SUPPORT; + vsocket->linearbuf = flags & RTE_VHOST_USER_LINEARBUF_SUPPORT; + vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY; ++ vsocket->net_compliant_ol_flags = flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS; + + if (vsocket->async_copy && + (flags & (RTE_VHOST_USER_IOMMU_SUPPORT | +@@ -1020,66 +1023,65 @@ rte_vhost_driver_unregister(const char *path) + + for (i = 0; i < vhost_user.vsocket_cnt; i++) { + struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; ++ if (strcmp(vsocket->path, path)) ++ continue; + +- if (!strcmp(vsocket->path, path)) { +- pthread_mutex_lock(&vsocket->conn_mutex); +- for (conn = TAILQ_FIRST(&vsocket->conn_list); +- conn != NULL; +- conn = next) { +- next = TAILQ_NEXT(conn, next); +- +- /* +- * If r/wcb is executing, release vsocket's +- * conn_mutex and vhost_user's mutex locks, and +- * try again since the r/wcb may use the +- * conn_mutex and mutex locks. +- */ +- if (fdset_try_del(&vhost_user.fdset, +- conn->connfd) == -1) { +- pthread_mutex_unlock( +- &vsocket->conn_mutex); +- pthread_mutex_unlock(&vhost_user.mutex); +- goto again; +- } +- +- VHOST_LOG_CONFIG(INFO, +- "free connfd = %d for device '%s'\n", +- conn->connfd, path); +- close(conn->connfd); +- vhost_destroy_device(conn->vid); +- TAILQ_REMOVE(&vsocket->conn_list, conn, next); +- free(conn); +- } +- pthread_mutex_unlock(&vsocket->conn_mutex); +- +- if (vsocket->is_server) { +- /* +- * If r/wcb is executing, release vhost_user's +- * mutex lock, and try again since the r/wcb +- * may use the mutex lock. +- */ +- if (fdset_try_del(&vhost_user.fdset, +- vsocket->socket_fd) == -1) { +- pthread_mutex_unlock(&vhost_user.mutex); +- goto again; +- } +- +- close(vsocket->socket_fd); +- unlink(path); +- } else if (vsocket->reconnect) { +- vhost_user_remove_reconnect(vsocket); ++ if (vsocket->is_server) { ++ /* ++ * If r/wcb is executing, release vhost_user's ++ * mutex lock, and try again since the r/wcb ++ * may use the mutex lock. ++ */ ++ if (fdset_try_del(&vhost_user.fdset, vsocket->socket_fd) == -1) { ++ pthread_mutex_unlock(&vhost_user.mutex); ++ goto again; + } ++ } else if (vsocket->reconnect) { ++ vhost_user_remove_reconnect(vsocket); ++ } + +- pthread_mutex_destroy(&vsocket->conn_mutex); +- vhost_user_socket_mem_free(vsocket); ++ pthread_mutex_lock(&vsocket->conn_mutex); ++ for (conn = TAILQ_FIRST(&vsocket->conn_list); ++ conn != NULL; ++ conn = next) { ++ next = TAILQ_NEXT(conn, next); + +- count = --vhost_user.vsocket_cnt; +- vhost_user.vsockets[i] = vhost_user.vsockets[count]; +- vhost_user.vsockets[count] = NULL; +- pthread_mutex_unlock(&vhost_user.mutex); ++ /* ++ * If r/wcb is executing, release vsocket's ++ * conn_mutex and vhost_user's mutex locks, and ++ * try again since the r/wcb may use the ++ * conn_mutex and mutex locks. ++ */ ++ if (fdset_try_del(&vhost_user.fdset, ++ conn->connfd) == -1) { ++ pthread_mutex_unlock(&vsocket->conn_mutex); ++ pthread_mutex_unlock(&vhost_user.mutex); ++ goto again; ++ } + +- return 0; ++ VHOST_LOG_CONFIG(INFO, ++ "free connfd = %d for device '%s'\n", ++ conn->connfd, path); ++ close(conn->connfd); ++ vhost_destroy_device(conn->vid); ++ TAILQ_REMOVE(&vsocket->conn_list, conn, next); ++ free(conn); + } ++ pthread_mutex_unlock(&vsocket->conn_mutex); ++ ++ if (vsocket->is_server) { ++ close(vsocket->socket_fd); ++ unlink(path); ++ } ++ ++ pthread_mutex_destroy(&vsocket->conn_mutex); ++ vhost_user_socket_mem_free(vsocket); ++ ++ count = --vhost_user.vsocket_cnt; ++ vhost_user.vsockets[i] = vhost_user.vsockets[count]; ++ vhost_user.vsockets[count] = NULL; ++ pthread_mutex_unlock(&vhost_user.mutex); ++ return 0; + } + pthread_mutex_unlock(&vhost_user.mutex); + +@@ -1145,8 +1147,7 @@ rte_vhost_driver_start(const char *path) + &vhost_user.fdset); + if (ret != 0) { + VHOST_LOG_CONFIG(ERR, +- "failed to create fdset handling thread"); +- ++ "failed to create fdset handling thread\n"); + fdset_pipe_uninit(&vhost_user.fdset); + return -1; + } +diff --git a/dpdk/lib/librte_vhost/vhost.c b/dpdk/lib/librte_vhost/vhost.c +index 4de588d752..11704d4a5f 100644 +--- a/dpdk/lib/librte_vhost/vhost.c ++++ b/dpdk/lib/librte_vhost/vhost.c +@@ -598,7 +598,7 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx) + if (dev->virtqueue[i]) + continue; + +- vq = rte_malloc(NULL, sizeof(struct vhost_virtqueue), 0); ++ vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0); + if (vq == NULL) { + VHOST_LOG_CONFIG(ERR, + "Failed to allocate memory for vring:%u.\n", i); +@@ -742,7 +742,7 @@ vhost_set_ifname(int vid, const char *if_name, unsigned int if_len) + } + + void +-vhost_set_builtin_virtio_net(int vid, bool enable) ++vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags) + { + struct virtio_net *dev = get_device(vid); + +@@ -753,6 +753,10 @@ vhost_set_builtin_virtio_net(int vid, bool enable) + dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET; + else + dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET; ++ if (!compliant_ol_flags) ++ dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS; ++ else ++ dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS; + } + + void +@@ -1187,6 +1191,9 @@ rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx, + if (unlikely(!vq->inflight_split)) + return -1; + ++ if (unlikely(idx >= vq->size)) ++ return -1; ++ + vq->inflight_split->last_inflight_io = idx; + return 0; + } +@@ -1258,11 +1265,15 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx) + if (!vq) + return -1; + ++ rte_spinlock_lock(&vq->access_lock); ++ + if (vq_is_packed(dev)) + vhost_vring_call_packed(dev, vq); + else + vhost_vring_call_split(dev, vq); + ++ rte_spinlock_unlock(&vq->access_lock); ++ + return 0; + } + +@@ -1614,6 +1625,11 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id, + ops->transfer_data == NULL)) + return -1; + ++ VHOST_LOG_CONFIG(ERR, "async vhost is not supported by 20.11 LTS, " ++ "as deadlock may occur if this function is called " ++ "inside vhost callback functions."); ++ return -1; ++ + rte_spinlock_lock(&vq->access_lock); + + if (unlikely(vq->async_registered)) { +@@ -1686,31 +1702,26 @@ int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id) + if (vq == NULL) + return ret; + +- ret = 0; +- +- if (!vq->async_registered) +- return ret; +- + if (!rte_spinlock_trylock(&vq->access_lock)) { + VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. " + "virt queue busy.\n"); +- return -1; ++ return ret; + } + +- if (vq->async_pkts_inflight_n) { ++ if (!vq->async_registered) { ++ ret = 0; ++ } else if (vq->async_pkts_inflight_n) { + VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. " + "async inflight packets must be completed before unregistration.\n"); +- ret = -1; +- goto out; +- } +- +- vhost_free_async_mem(vq); ++ } else { ++ ret = 0; ++ vhost_free_async_mem(vq); + +- vq->async_ops.transfer_data = NULL; +- vq->async_ops.check_completed_copies = NULL; +- vq->async_registered = false; ++ vq->async_ops.transfer_data = NULL; ++ vq->async_ops.check_completed_copies = NULL; ++ vq->async_registered = false; ++ } + +-out: + rte_spinlock_unlock(&vq->access_lock); + + return ret; +diff --git a/dpdk/lib/librte_vhost/vhost.h b/dpdk/lib/librte_vhost/vhost.h +index 361c9f79b3..92b67a2c6f 100644 +--- a/dpdk/lib/librte_vhost/vhost.h ++++ b/dpdk/lib/librte_vhost/vhost.h +@@ -27,15 +27,17 @@ + #include "rte_vhost_async.h" + + /* Used to indicate that the device is running on a data core */ +-#define VIRTIO_DEV_RUNNING 1 ++#define VIRTIO_DEV_RUNNING ((uint32_t)1 << 0) + /* Used to indicate that the device is ready to operate */ +-#define VIRTIO_DEV_READY 2 ++#define VIRTIO_DEV_READY ((uint32_t)1 << 1) + /* Used to indicate that the built-in vhost net device backend is enabled */ +-#define VIRTIO_DEV_BUILTIN_VIRTIO_NET 4 ++#define VIRTIO_DEV_BUILTIN_VIRTIO_NET ((uint32_t)1 << 2) + /* Used to indicate that the device has its own data path and configured */ +-#define VIRTIO_DEV_VDPA_CONFIGURED 8 ++#define VIRTIO_DEV_VDPA_CONFIGURED ((uint32_t)1 << 3) + /* Used to indicate that the feature negotiation failed */ +-#define VIRTIO_DEV_FEATURES_FAILED 16 ++#define VIRTIO_DEV_FEATURES_FAILED ((uint32_t)1 << 4) ++/* Used to indicate that the virtio_net tx code should fill TX ol_flags */ ++#define VIRTIO_DEV_LEGACY_OL_FLAGS ((uint32_t)1 << 5) + + /* Backend value set by guest. */ + #define VIRTIO_DEV_STOPPED -1 +@@ -331,7 +333,7 @@ struct vring_packed_desc_event { + + struct guest_page { + uint64_t guest_phys_addr; +- uint64_t host_phys_addr; ++ uint64_t host_iova; + uint64_t size; + }; + +@@ -563,6 +565,20 @@ static __rte_always_inline int guest_page_addrcmp(const void *p1, + return 0; + } + ++static __rte_always_inline int guest_page_rangecmp(const void *p1, const void *p2) ++{ ++ const struct guest_page *page1 = (const struct guest_page *)p1; ++ const struct guest_page *page2 = (const struct guest_page *)p2; ++ ++ if (page1->guest_phys_addr >= page2->guest_phys_addr) { ++ if (page1->guest_phys_addr < page2->guest_phys_addr + page2->size) ++ return 0; ++ else ++ return 1; ++ } else ++ return -1; ++} ++ + static __rte_always_inline rte_iova_t + gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa, + uint64_t gpa_size, uint64_t *hpa_size) +@@ -573,20 +589,20 @@ gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa, + + *hpa_size = gpa_size; + if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) { +- key.guest_phys_addr = gpa & ~(dev->guest_pages[0].size - 1); ++ key.guest_phys_addr = gpa; + page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages, +- sizeof(struct guest_page), guest_page_addrcmp); ++ sizeof(struct guest_page), guest_page_rangecmp); + if (page) { + if (gpa + gpa_size <= + page->guest_phys_addr + page->size) { + return gpa - page->guest_phys_addr + +- page->host_phys_addr; ++ page->host_iova; + } else if (gpa < page->guest_phys_addr + + page->size) { + *hpa_size = page->guest_phys_addr + + page->size - gpa; + return gpa - page->guest_phys_addr + +- page->host_phys_addr; ++ page->host_iova; + } + } + } else { +@@ -597,13 +613,13 @@ gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa, + if (gpa + gpa_size <= + page->guest_phys_addr + page->size) { + return gpa - page->guest_phys_addr + +- page->host_phys_addr; ++ page->host_iova; + } else if (gpa < page->guest_phys_addr + + page->size) { + *hpa_size = page->guest_phys_addr + + page->size - gpa; + return gpa - page->guest_phys_addr + +- page->host_phys_addr; ++ page->host_iova; + } + } + } +@@ -672,7 +688,7 @@ int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx); + void vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *dev); + + void vhost_set_ifname(int, const char *if_name, unsigned int if_len); +-void vhost_set_builtin_virtio_net(int vid, bool enable); ++void vhost_setup_virtio_net(int vid, bool enable, bool legacy_ol_flags); + void vhost_enable_extbuf(int vid); + void vhost_enable_linearbuf(int vid); + int vhost_enable_guest_notification(struct virtio_net *dev, +diff --git a/dpdk/lib/librte_vhost/vhost_crypto.c b/dpdk/lib/librte_vhost/vhost_crypto.c +index 6689c52df2..7d1d6a1861 100644 +--- a/dpdk/lib/librte_vhost/vhost_crypto.c ++++ b/dpdk/lib/librte_vhost/vhost_crypto.c +@@ -565,94 +565,57 @@ get_data_ptr(struct vhost_crypto_data_req *vc_req, + return data; + } + +-static __rte_always_inline int +-copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, +- struct vhost_crypto_desc *head, +- struct vhost_crypto_desc **cur_desc, +- uint32_t size, uint32_t max_n_descs) ++static __rte_always_inline uint32_t ++copy_data_from_desc(void *dst, struct vhost_crypto_data_req *vc_req, ++ struct vhost_crypto_desc *desc, uint32_t size) + { +- struct vhost_crypto_desc *desc = *cur_desc; +- uint64_t remain, addr, dlen, len; +- uint32_t to_copy; +- uint8_t *data = dst_data; +- uint8_t *src; +- int left = size; +- +- to_copy = RTE_MIN(desc->len, (uint32_t)left); +- dlen = to_copy; +- src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, +- VHOST_ACCESS_RO); +- if (unlikely(!src || !dlen)) +- return -1; ++ uint64_t remain; ++ uint64_t addr; ++ ++ remain = RTE_MIN(desc->len, size); ++ addr = desc->addr; ++ do { ++ uint64_t len; ++ void *src; ++ ++ len = remain; ++ src = IOVA_TO_VVA(void *, vc_req, addr, &len, VHOST_ACCESS_RO); ++ if (unlikely(src == NULL || len == 0)) ++ return 0; + +- rte_memcpy((uint8_t *)data, src, dlen); +- data += dlen; ++ rte_memcpy(dst, src, len); ++ remain -= len; ++ /* cast is needed for 32-bit architecture */ ++ dst = RTE_PTR_ADD(dst, (size_t)len); ++ addr += len; ++ } while (unlikely(remain != 0)); + +- if (unlikely(dlen < to_copy)) { +- remain = to_copy - dlen; +- addr = desc->addr + dlen; ++ return RTE_MIN(desc->len, size); ++} + +- while (remain) { +- len = remain; +- src = IOVA_TO_VVA(uint8_t *, vc_req, addr, &len, +- VHOST_ACCESS_RO); +- if (unlikely(!src || !len)) { +- VC_LOG_ERR("Failed to map descriptor"); +- return -1; +- } + +- rte_memcpy(data, src, len); +- addr += len; +- remain -= len; +- data += len; +- } +- } ++static __rte_always_inline int ++copy_data(void *data, struct vhost_crypto_data_req *vc_req, ++ struct vhost_crypto_desc *head, struct vhost_crypto_desc **cur_desc, ++ uint32_t size, uint32_t max_n_descs) ++{ ++ struct vhost_crypto_desc *desc = *cur_desc; ++ uint32_t left = size; + +- left -= to_copy; ++ do { ++ uint32_t copied; + +- while (desc >= head && desc - head < (int)max_n_descs && left) { +- desc++; +- to_copy = RTE_MIN(desc->len, (uint32_t)left); +- dlen = to_copy; +- src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, +- VHOST_ACCESS_RO); +- if (unlikely(!src || !dlen)) { +- VC_LOG_ERR("Failed to map descriptor"); ++ copied = copy_data_from_desc(data, vc_req, desc, left); ++ if (copied == 0) + return -1; +- } +- +- rte_memcpy(data, src, dlen); +- data += dlen; +- +- if (unlikely(dlen < to_copy)) { +- remain = to_copy - dlen; +- addr = desc->addr + dlen; ++ left -= copied; ++ data = RTE_PTR_ADD(data, copied); ++ } while (left != 0 && ++desc < head + max_n_descs); + +- while (remain) { +- len = remain; +- src = IOVA_TO_VVA(uint8_t *, vc_req, addr, &len, +- VHOST_ACCESS_RO); +- if (unlikely(!src || !len)) { +- VC_LOG_ERR("Failed to map descriptor"); +- return -1; +- } +- +- rte_memcpy(data, src, len); +- addr += len; +- remain -= len; +- data += len; +- } +- } +- +- left -= to_copy; +- } +- +- if (unlikely(left > 0)) { +- VC_LOG_ERR("Incorrect virtio descriptor"); ++ if (unlikely(left != 0)) + return -1; +- } + +- if (unlikely(desc - head == (int)max_n_descs)) ++ if (unlikely(desc == head + max_n_descs)) + *cur_desc = NULL; + else + *cur_desc = desc + 1; +@@ -852,6 +815,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + /* iv */ + if (unlikely(copy_data(iv_data, vc_req, head, &desc, + cipher->para.iv_len, max_n_descs))) { ++ VC_LOG_ERR("Incorrect virtio descriptor"); + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } +@@ -883,6 +847,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *), + vc_req, head, &desc, cipher->para.src_data_len, + max_n_descs) < 0)) { ++ VC_LOG_ERR("Incorrect virtio descriptor"); + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } +@@ -1006,6 +971,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + /* iv */ + if (unlikely(copy_data(iv_data, vc_req, head, &desc, + chain->para.iv_len, max_n_descs) < 0)) { ++ VC_LOG_ERR("Incorrect virtio descriptor"); + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } +@@ -1037,6 +1003,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *), + vc_req, head, &desc, chain->para.src_data_len, + max_n_descs) < 0)) { ++ VC_LOG_ERR("Incorrect virtio descriptor"); + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } +@@ -1121,6 +1088,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, + if (unlikely(copy_data(digest_addr, vc_req, head, &digest_desc, + chain->para.hash_result_len, + max_n_descs) < 0)) { ++ VC_LOG_ERR("Incorrect virtio descriptor"); + ret = VIRTIO_CRYPTO_BADMSG; + goto error_exit; + } +@@ -1337,13 +1305,15 @@ vhost_crypto_finalize_one_request(struct rte_crypto_op *op, + struct rte_mbuf *m_src = op->sym->m_src; + struct rte_mbuf *m_dst = op->sym->m_dst; + struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(m_src); +- struct vhost_virtqueue *vq = vc_req->vq; +- uint16_t used_idx = vc_req->desc_idx, desc_idx; ++ struct vhost_virtqueue *vq; ++ uint16_t used_idx, desc_idx; + + if (unlikely(!vc_req)) { + VC_LOG_ERR("Failed to retrieve vc_req"); + return NULL; + } ++ vq = vc_req->vq; ++ used_idx = vc_req->desc_idx; + + if (old_vq && (vq != old_vq)) + return vq; diff --git a/dpdk/lib/librte_vhost/vhost_user.c b/dpdk/lib/librte_vhost/vhost_user.c -index 45c8ac09da..70d206dcf8 100644 +index 45c8ac09da..25ebc0c37c 100644 --- a/dpdk/lib/librte_vhost/vhost_user.c +++ b/dpdk/lib/librte_vhost/vhost_user.c -@@ -1416,6 +1416,9 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev, +@@ -474,8 +474,8 @@ vhost_user_set_vring_num(struct virtio_net **pdev, + } + + /* +- * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the +- * same numa node as the memory of vring descriptor. ++ * Reallocate virtio_dev, vhost_virtqueue and related data structures to ++ * make them on the same numa node as the memory of vring descriptor. + */ + #ifdef RTE_LIBRTE_VHOST_NUMA + static struct virtio_net* +@@ -489,12 +489,16 @@ numa_realloc(struct virtio_net *dev, int index) + struct batch_copy_elem *new_batch_copy_elems; + int ret; + +- if (dev->flags & VIRTIO_DEV_RUNNING) +- return dev; +- + old_dev = dev; + vq = old_vq = dev->virtqueue[index]; + ++ /* ++ * If VQ is ready, it is too late to reallocate, it certainly already ++ * happened anyway on VHOST_USER_SET_VRING_ADRR. ++ */ ++ if (vq->ready) ++ return dev; ++ + ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc, + MPOL_F_NODE | MPOL_F_ADDR); + +@@ -549,6 +553,9 @@ numa_realloc(struct virtio_net *dev, int index) + rte_free(old_vq); + } + ++ if (dev->flags & VIRTIO_DEV_RUNNING) ++ goto out; ++ + /* check if we need to reallocate dev */ + ret = get_mempolicy(&oldnode, NULL, 0, old_dev, + MPOL_F_NODE | MPOL_F_ADDR); +@@ -558,6 +565,10 @@ numa_realloc(struct virtio_net *dev, int index) + goto out; + } + if (oldnode != newnode) { ++ struct rte_vhost_memory *old_mem; ++ struct guest_page *old_gp; ++ ssize_t mem_size, gp_size; ++ + VHOST_LOG_CONFIG(INFO, + "reallocate dev from %d to %d node\n", + oldnode, newnode); +@@ -569,6 +580,29 @@ numa_realloc(struct virtio_net *dev, int index) + + memcpy(dev, old_dev, sizeof(*dev)); + rte_free(old_dev); ++ ++ mem_size = sizeof(struct rte_vhost_memory) + ++ sizeof(struct rte_vhost_mem_region) * dev->mem->nregions; ++ old_mem = dev->mem; ++ dev->mem = rte_malloc_socket(NULL, mem_size, 0, newnode); ++ if (!dev->mem) { ++ dev->mem = old_mem; ++ goto out; ++ } ++ ++ memcpy(dev->mem, old_mem, mem_size); ++ rte_free(old_mem); ++ ++ gp_size = dev->max_guest_pages * sizeof(*dev->guest_pages); ++ old_gp = dev->guest_pages; ++ dev->guest_pages = rte_malloc_socket(NULL, gp_size, RTE_CACHE_LINE_SIZE, newnode); ++ if (!dev->guest_pages) { ++ dev->guest_pages = old_gp; ++ goto out; ++ } ++ ++ memcpy(dev->guest_pages, old_gp, gp_size); ++ rte_free(old_gp); + } + + out: +@@ -869,7 +903,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev, + + static int + add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, +- uint64_t host_phys_addr, uint64_t size) ++ uint64_t host_iova, uint64_t size) + { + struct guest_page *page, *last_page; + struct guest_page *old_pages; +@@ -890,7 +924,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, + if (dev->nr_guest_pages > 0) { + last_page = &dev->guest_pages[dev->nr_guest_pages - 1]; + /* merge if the two pages are continuous */ +- if (host_phys_addr == last_page->host_phys_addr + ++ if (host_iova == last_page->host_iova + + last_page->size) { + last_page->size += size; + return 0; +@@ -899,7 +933,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, + + page = &dev->guest_pages[dev->nr_guest_pages++]; + page->guest_phys_addr = guest_phys_addr; +- page->host_phys_addr = host_phys_addr; ++ page->host_iova = host_iova; + page->size = size; + + return 0; +@@ -912,14 +946,14 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, + uint64_t reg_size = reg->size; + uint64_t host_user_addr = reg->host_user_addr; + uint64_t guest_phys_addr = reg->guest_phys_addr; +- uint64_t host_phys_addr; ++ uint64_t host_iova; + uint64_t size; + +- host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t)host_user_addr); ++ host_iova = rte_mem_virt2iova((void *)(uintptr_t)host_user_addr); + size = page_size - (guest_phys_addr & (page_size - 1)); + size = RTE_MIN(size, reg_size); + +- if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0) ++ if (add_one_guest_page(dev, guest_phys_addr, host_iova, size) < 0) + return -1; + + host_user_addr += size; +@@ -928,9 +962,9 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, + + while (reg_size > 0) { + size = RTE_MIN(reg_size, page_size); +- host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t) ++ host_iova = rte_mem_virt2iova((void *)(uintptr_t) + host_user_addr); +- if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, ++ if (add_one_guest_page(dev, guest_phys_addr, host_iova, + size) < 0) + return -1; + +@@ -962,11 +996,11 @@ dump_guest_pages(struct virtio_net *dev) + VHOST_LOG_CONFIG(INFO, + "guest physical page region %u\n" + "\t guest_phys_addr: %" PRIx64 "\n" +- "\t host_phys_addr : %" PRIx64 "\n" ++ "\t host_iova : %" PRIx64 "\n" + "\t size : %" PRIx64 "\n", + i, + page->guest_phys_addr, +- page->host_phys_addr, ++ page->host_iova, + page->size); + } + } +@@ -1416,6 +1450,9 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev, int fd, i, j; void *addr; @@ -899,7 +80268,7 @@ index 45c8ac09da..70d206dcf8 100644 if (msg->size != sizeof(msg->payload.inflight)) { VHOST_LOG_CONFIG(ERR, "invalid get_inflight_fd message size is %d\n", -@@ -1509,6 +1512,9 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, +@@ -1509,6 +1546,9 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, void *addr; int fd, i; @@ -909,7 +80278,59 @@ index 45c8ac09da..70d206dcf8 100644 fd = msg->fds[0]; if (msg->size != sizeof(msg->payload.inflight) || fd < 0) { VHOST_LOG_CONFIG(ERR, -@@ -2652,6 +2658,9 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, +@@ -1876,9 +1916,6 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg, + */ + if (!(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) { + vq->enabled = 1; +- if (dev->notify_ops->vring_state_changed) +- dev->notify_ops->vring_state_changed( +- dev->vid, file.index, 1); + } + + if (vq->ready) { +@@ -1981,6 +2018,8 @@ vhost_user_get_vring_base(struct virtio_net **pdev, + msg->size = sizeof(msg->payload.state); + msg->fd_num = 0; + ++ vhost_user_iotlb_flush_all(vq); ++ + vring_invalidate(dev, vq); + + return RTE_VHOST_MSG_RESULT_REPLY; +@@ -2346,8 +2385,11 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg, + vhost_user_iotlb_cache_insert(vq, imsg->iova, vva, + len, imsg->perm); + +- if (is_vring_iotlb(dev, vq, imsg)) ++ if (is_vring_iotlb(dev, vq, imsg)) { ++ rte_spinlock_lock(&vq->access_lock); + *pdev = dev = translate_ring_addresses(dev, i); ++ rte_spinlock_unlock(&vq->access_lock); ++ } + } + break; + case VHOST_IOTLB_INVALIDATE: +@@ -2360,8 +2402,11 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg, + vhost_user_iotlb_cache_remove(vq, imsg->iova, + imsg->size); + +- if (is_vring_iotlb(dev, vq, imsg)) ++ if (is_vring_iotlb(dev, vq, imsg)) { ++ rte_spinlock_lock(&vq->access_lock); + vring_invalidate(dev, vq); ++ rte_spinlock_unlock(&vq->access_lock); ++ } + } + break; + default: +@@ -2646,12 +2691,16 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, + break; + case VHOST_USER_SET_VRING_NUM: + case VHOST_USER_SET_VRING_BASE: ++ case VHOST_USER_GET_VRING_BASE: + case VHOST_USER_SET_VRING_ENABLE: + vring_idx = msg->payload.state.index; + break; case VHOST_USER_SET_VRING_ADDR: vring_idx = msg->payload.addr.index; break; @@ -919,6 +80340,903 @@ index 45c8ac09da..70d206dcf8 100644 default: return 0; } +@@ -2740,7 +2789,6 @@ vhost_user_msg_handler(int vid, int fd) + return -1; + } + +- ret = 0; + request = msg.request.master; + if (request > VHOST_USER_NONE && request < VHOST_USER_MAX && + vhost_message_str[request]) { +@@ -2861,9 +2909,6 @@ vhost_user_msg_handler(int vid, int fd) + } + } + +- if (unlock_required) +- vhost_user_unlock_all_queue_pairs(dev); +- + /* If message was not handled at this stage, treat it as an error */ + if (!handled) { + VHOST_LOG_CONFIG(ERR, +@@ -2885,9 +2930,11 @@ vhost_user_msg_handler(int vid, int fd) + } else if (ret == RTE_VHOST_MSG_RESULT_ERR) { + VHOST_LOG_CONFIG(ERR, + "vhost message handling failed.\n"); +- return -1; ++ ret = -1; ++ goto unlock; + } + ++ ret = 0; + for (i = 0; i < dev->nr_vring; i++) { + struct vhost_virtqueue *vq = dev->virtqueue[i]; + bool cur_ready = vq_is_ready(dev, vq); +@@ -2898,8 +2945,11 @@ vhost_user_msg_handler(int vid, int fd) + } + } + ++unlock: ++ if (unlock_required) ++ vhost_user_unlock_all_queue_pairs(dev); + +- if (!virtio_is_ready(dev)) ++ if (ret != 0 || !virtio_is_ready(dev)) + goto out; + + /* +@@ -2926,7 +2976,7 @@ vhost_user_msg_handler(int vid, int fd) + } + + out: +- return 0; ++ return ret; + } + + static int process_slave_message_reply(struct virtio_net *dev, +diff --git a/dpdk/lib/librte_vhost/virtio_net.c b/dpdk/lib/librte_vhost/virtio_net.c +index 55bfc161b5..dd571b60a0 100644 +--- a/dpdk/lib/librte_vhost/virtio_net.c ++++ b/dpdk/lib/librte_vhost/virtio_net.c +@@ -8,6 +8,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -427,6 +428,16 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) + csum_l4 |= PKT_TX_TCP_CKSUM; + + if (csum_l4) { ++ /* ++ * Pseudo-header checksum must be set as per Virtio spec. ++ * ++ * Note: We don't propagate rte_net_intel_cksum_prepare() ++ * errors, as it would have an impact on performance, and an ++ * error would mean the packet is dropped by the guest instead ++ * of being dropped here. ++ */ ++ rte_net_intel_cksum_prepare(m_buf); ++ + net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; + +@@ -571,10 +582,11 @@ fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, + return -1; + } + +- len += descs[idx].len; ++ dlen = descs[idx].len; ++ len += dlen; + + if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, +- descs[idx].addr, descs[idx].len, ++ descs[idx].addr, dlen, + perm))) { + free_ind_table(idesc); + return -1; +@@ -691,9 +703,10 @@ fill_vec_buf_packed_indirect(struct virtio_net *dev, + return -1; + } + +- *len += descs[i].len; ++ dlen = descs[i].len; ++ *len += dlen; + if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, +- descs[i].addr, descs[i].len, ++ descs[i].addr, dlen, + perm))) + return -1; + } +@@ -714,6 +727,7 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, + bool wrap_counter = vq->avail_wrap_counter; + struct vring_packed_desc *descs = vq->desc_packed; + uint16_t vec_id = *vec_idx; ++ uint64_t dlen; + + if (avail_idx < vq->last_avail_idx) + wrap_counter ^= 1; +@@ -746,11 +760,12 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, + len, perm) < 0)) + return -1; + } else { +- *len += descs[avail_idx].len; ++ dlen = descs[avail_idx].len; ++ *len += dlen; + + if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id, + descs[avail_idx].addr, +- descs[avail_idx].len, ++ dlen, + perm))) + return -1; + } +@@ -831,9 +846,10 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + + hdr_mbuf = m; + hdr_addr = buf_addr; +- if (unlikely(buf_len < dev->vhost_hlen)) ++ if (unlikely(buf_len < dev->vhost_hlen)) { ++ memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); + hdr = &tmp_hdr; +- else ++ } else + hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; + + VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n", +@@ -984,7 +1000,7 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + + uint32_t tlen = 0; + int tvec_idx = 0; +- void *hpa; ++ void *host_iova; + + if (unlikely(m == NULL)) { + error = -1; +@@ -1004,9 +1020,10 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + + hdr_mbuf = m; + hdr_addr = buf_addr; +- if (unlikely(buf_len < dev->vhost_hlen)) ++ if (unlikely(buf_len < dev->vhost_hlen)) { ++ memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); + hdr = &tmp_hdr; +- else ++ } else + hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr; + + VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n", +@@ -1074,11 +1091,11 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + cpy_len = RTE_MIN(buf_avail, mbuf_avail); + + while (unlikely(cpy_len && cpy_len >= cpy_threshold)) { +- hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev, ++ host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev, + buf_iova + buf_offset, + cpy_len, &mapped_len); + +- if (unlikely(!hpa || mapped_len < cpy_threshold)) ++ if (unlikely(!host_iova || mapped_len < cpy_threshold)) + break; + + async_fill_vec(src_iovec + tvec_idx, +@@ -1086,7 +1103,7 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + mbuf_offset), (size_t)mapped_len); + + async_fill_vec(dst_iovec + tvec_idx, +- hpa, (size_t)mapped_len); ++ host_iova, (size_t)mapped_len); + + tlen += (uint32_t)mapped_len; + cpy_len -= (uint32_t)mapped_len; +@@ -1790,14 +1807,17 @@ virtio_net_with_host_offload(struct virtio_net *dev) + return false; + } + +-static void +-parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr) ++static int ++parse_headers(struct rte_mbuf *m, uint8_t *l4_proto) + { + struct rte_ipv4_hdr *ipv4_hdr; + struct rte_ipv6_hdr *ipv6_hdr; +- void *l3_hdr = NULL; + struct rte_ether_hdr *eth_hdr; + uint16_t ethertype; ++ uint16_t data_len = rte_pktmbuf_data_len(m); ++ ++ if (data_len < sizeof(struct rte_ether_hdr)) ++ return -EINVAL; + + eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + +@@ -1805,6 +1825,10 @@ parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr) + ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); + + if (ethertype == RTE_ETHER_TYPE_VLAN) { ++ if (data_len < sizeof(struct rte_ether_hdr) + ++ sizeof(struct rte_vlan_hdr)) ++ goto error; ++ + struct rte_vlan_hdr *vlan_hdr = + (struct rte_vlan_hdr *)(eth_hdr + 1); + +@@ -1812,73 +1836,118 @@ parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr) + ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); + } + +- l3_hdr = (char *)eth_hdr + m->l2_len; +- + switch (ethertype) { + case RTE_ETHER_TYPE_IPV4: +- ipv4_hdr = l3_hdr; +- *l4_proto = ipv4_hdr->next_proto_id; ++ if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr)) ++ goto error; ++ ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, ++ m->l2_len); + m->l3_len = rte_ipv4_hdr_len(ipv4_hdr); +- *l4_hdr = (char *)l3_hdr + m->l3_len; ++ if (data_len < m->l2_len + m->l3_len) ++ goto error; + m->ol_flags |= PKT_TX_IPV4; ++ *l4_proto = ipv4_hdr->next_proto_id; + break; + case RTE_ETHER_TYPE_IPV6: +- ipv6_hdr = l3_hdr; +- *l4_proto = ipv6_hdr->proto; ++ if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr)) ++ goto error; ++ ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, ++ m->l2_len); + m->l3_len = sizeof(struct rte_ipv6_hdr); +- *l4_hdr = (char *)l3_hdr + m->l3_len; + m->ol_flags |= PKT_TX_IPV6; ++ *l4_proto = ipv6_hdr->proto; + break; + default: +- m->l3_len = 0; +- *l4_proto = 0; +- *l4_hdr = NULL; ++ /* a valid L3 header is needed for further L4 parsing */ ++ goto error; ++ } ++ ++ /* both CSUM and GSO need a valid L4 header */ ++ switch (*l4_proto) { ++ case IPPROTO_TCP: ++ if (data_len < m->l2_len + m->l3_len + ++ sizeof(struct rte_tcp_hdr)) ++ goto error; ++ break; ++ case IPPROTO_UDP: ++ if (data_len < m->l2_len + m->l3_len + ++ sizeof(struct rte_udp_hdr)) ++ goto error; + break; ++ case IPPROTO_SCTP: ++ if (data_len < m->l2_len + m->l3_len + ++ sizeof(struct rte_sctp_hdr)) ++ goto error; ++ break; ++ default: ++ goto error; + } ++ ++ return 0; ++ ++error: ++ m->l2_len = 0; ++ m->l3_len = 0; ++ m->ol_flags = 0; ++ return -EINVAL; + } + + static __rte_always_inline void +-vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m) ++vhost_dequeue_offload_legacy(struct virtio_net_hdr *hdr, struct rte_mbuf *m) + { +- uint16_t l4_proto = 0; +- void *l4_hdr = NULL; ++ uint8_t l4_proto = 0; + struct rte_tcp_hdr *tcp_hdr = NULL; ++ uint16_t tcp_len; ++ uint16_t data_len = rte_pktmbuf_data_len(m); + +- if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) ++ if (parse_headers(m, &l4_proto) < 0) + return; + +- parse_ethernet(m, &l4_proto, &l4_hdr); + if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { + if (hdr->csum_start == (m->l2_len + m->l3_len)) { + switch (hdr->csum_offset) { + case (offsetof(struct rte_tcp_hdr, cksum)): +- if (l4_proto == IPPROTO_TCP) +- m->ol_flags |= PKT_TX_TCP_CKSUM; ++ if (l4_proto != IPPROTO_TCP) ++ goto error; ++ m->ol_flags |= PKT_TX_TCP_CKSUM; + break; + case (offsetof(struct rte_udp_hdr, dgram_cksum)): +- if (l4_proto == IPPROTO_UDP) +- m->ol_flags |= PKT_TX_UDP_CKSUM; ++ if (l4_proto != IPPROTO_UDP) ++ goto error; ++ m->ol_flags |= PKT_TX_UDP_CKSUM; + break; + case (offsetof(struct rte_sctp_hdr, cksum)): +- if (l4_proto == IPPROTO_SCTP) +- m->ol_flags |= PKT_TX_SCTP_CKSUM; ++ if (l4_proto != IPPROTO_SCTP) ++ goto error; ++ m->ol_flags |= PKT_TX_SCTP_CKSUM; + break; + default: +- break; ++ goto error; + } ++ } else { ++ goto error; + } + } + +- if (l4_hdr && hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { ++ if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_TCPV6: +- tcp_hdr = l4_hdr; ++ if (l4_proto != IPPROTO_TCP) ++ goto error; ++ tcp_hdr = rte_pktmbuf_mtod_offset(m, ++ struct rte_tcp_hdr *, ++ m->l2_len + m->l3_len); ++ tcp_len = (tcp_hdr->data_off & 0xf0) >> 2; ++ if (data_len < m->l2_len + m->l3_len + tcp_len) ++ goto error; + m->ol_flags |= PKT_TX_TCP_SEG; + m->tso_segsz = hdr->gso_size; +- m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; ++ m->l4_len = tcp_len; + break; + case VIRTIO_NET_HDR_GSO_UDP: ++ if (l4_proto != IPPROTO_UDP) ++ goto error; + m->ol_flags |= PKT_TX_UDP_SEG; + m->tso_segsz = hdr->gso_size; + m->l4_len = sizeof(struct rte_udp_hdr); +@@ -1886,6 +1955,100 @@ vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m) + default: + VHOST_LOG_DATA(WARNING, + "unsupported gso type %u.\n", hdr->gso_type); ++ goto error; ++ } ++ } ++ return; ++ ++error: ++ m->l2_len = 0; ++ m->l3_len = 0; ++ m->ol_flags = 0; ++} ++ ++static __rte_always_inline void ++vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m, ++ bool legacy_ol_flags) ++{ ++ struct rte_net_hdr_lens hdr_lens; ++ int l4_supported = 0; ++ uint32_t ptype; ++ ++ if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) ++ return; ++ ++ if (legacy_ol_flags) { ++ vhost_dequeue_offload_legacy(hdr, m); ++ return; ++ } ++ ++ m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; ++ ++ ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); ++ m->packet_type = ptype; ++ if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || ++ (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || ++ (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) ++ l4_supported = 1; ++ ++ /* According to Virtio 1.1 spec, the device only needs to look at ++ * VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path. ++ * This differs from the processing incoming packets path where the ++ * driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the ++ * device. ++ * ++ * 5.1.6.2.1 Driver Requirements: Packet Transmission ++ * The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and ++ * VIRTIO_NET_HDR_F_RSC_INFO bits in flags. ++ * ++ * 5.1.6.2.2 Device Requirements: Packet Transmission ++ * The device MUST ignore flag bits that it does not recognize. ++ */ ++ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { ++ uint32_t hdrlen; ++ ++ hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; ++ if (hdr->csum_start <= hdrlen && l4_supported != 0) { ++ m->ol_flags |= PKT_RX_L4_CKSUM_NONE; ++ } else { ++ /* Unknown proto or tunnel, do sw cksum. We can assume ++ * the cksum field is in the first segment since the ++ * buffers we provided to the host are large enough. ++ * In case of SCTP, this will be wrong since it's a CRC ++ * but there's nothing we can do. ++ */ ++ uint16_t csum = 0, off; ++ ++ if (rte_raw_cksum_mbuf(m, hdr->csum_start, ++ rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0) ++ return; ++ if (likely(csum != 0xffff)) ++ csum = ~csum; ++ off = hdr->csum_offset + hdr->csum_start; ++ if (rte_pktmbuf_data_len(m) >= off + 1) ++ *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum; ++ } ++ } ++ ++ if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { ++ if (hdr->gso_size == 0) ++ return; ++ ++ switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { ++ case VIRTIO_NET_HDR_GSO_TCPV4: ++ case VIRTIO_NET_HDR_GSO_TCPV6: ++ if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP) ++ break; ++ m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE; ++ m->tso_segsz = hdr->gso_size; ++ break; ++ case VIRTIO_NET_HDR_GSO_UDP: ++ if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP) ++ break; ++ m->ol_flags |= PKT_RX_LRO | PKT_RX_L4_CKSUM_NONE; ++ m->tso_segsz = hdr->gso_size; ++ break; ++ default: + break; + } + } +@@ -1915,30 +2078,28 @@ copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr, + static __rte_always_inline int + copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct buf_vector *buf_vec, uint16_t nr_vec, +- struct rte_mbuf *m, struct rte_mempool *mbuf_pool) ++ struct rte_mbuf *m, struct rte_mempool *mbuf_pool, ++ bool legacy_ol_flags) + { + uint32_t buf_avail, buf_offset; + uint64_t buf_addr, buf_len; + uint32_t mbuf_avail, mbuf_offset; ++ uint32_t hdr_remain = dev->vhost_hlen; + uint32_t cpy_len; + struct rte_mbuf *cur = m, *prev = m; + struct virtio_net_hdr tmp_hdr; + struct virtio_net_hdr *hdr = NULL; +- /* A counter to avoid desc dead loop chain */ +- uint16_t vec_idx = 0; ++ uint16_t vec_idx; + struct batch_copy_elem *batch_copy = vq->batch_copy_elems; + int error = 0; + +- buf_addr = buf_vec[vec_idx].buf_addr; +- buf_len = buf_vec[vec_idx].buf_len; +- +- if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) { +- error = -1; +- goto out; +- } ++ /* ++ * The caller has checked the descriptors chain is larger than the ++ * header size. ++ */ + + if (virtio_net_with_host_offload(dev)) { +- if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) { ++ if (unlikely(buf_vec[0].buf_len < sizeof(struct virtio_net_hdr))) { + /* + * No luck, the virtio-net header doesn't fit + * in a contiguous virtual area. +@@ -1946,34 +2107,22 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, + copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec); + hdr = &tmp_hdr; + } else { +- hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr); ++ hdr = (struct virtio_net_hdr *)((uintptr_t)buf_vec[0].buf_addr); + } + } + +- /* +- * A virtio driver normally uses at least 2 desc buffers +- * for Tx: the first for storing the header, and others +- * for storing the data. +- */ +- if (unlikely(buf_len < dev->vhost_hlen)) { +- buf_offset = dev->vhost_hlen - buf_len; +- vec_idx++; +- buf_addr = buf_vec[vec_idx].buf_addr; +- buf_len = buf_vec[vec_idx].buf_len; +- buf_avail = buf_len - buf_offset; +- } else if (buf_len == dev->vhost_hlen) { +- if (unlikely(++vec_idx >= nr_vec)) +- goto out; +- buf_addr = buf_vec[vec_idx].buf_addr; +- buf_len = buf_vec[vec_idx].buf_len; ++ for (vec_idx = 0; vec_idx < nr_vec; vec_idx++) { ++ if (buf_vec[vec_idx].buf_len > hdr_remain) ++ break; + +- buf_offset = 0; +- buf_avail = buf_len; +- } else { +- buf_offset = dev->vhost_hlen; +- buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; ++ hdr_remain -= buf_vec[vec_idx].buf_len; + } + ++ buf_addr = buf_vec[vec_idx].buf_addr; ++ buf_len = buf_vec[vec_idx].buf_len; ++ buf_offset = hdr_remain; ++ buf_avail = buf_vec[vec_idx].buf_len - hdr_remain; ++ + PRINT_PACKET(dev, + (uintptr_t)(buf_addr + buf_offset), + (uint32_t)buf_avail, 0); +@@ -2048,7 +2197,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, + m->pkt_len += mbuf_offset; + + if (hdr) +- vhost_dequeue_offload(hdr, m); ++ vhost_dequeue_offload(hdr, m, legacy_ol_flags); + + out: + +@@ -2131,9 +2280,11 @@ virtio_dev_pktmbuf_alloc(struct virtio_net *dev, struct rte_mempool *mp, + return NULL; + } + +-static __rte_noinline uint16_t ++__rte_always_inline ++static uint16_t + virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, +- struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) ++ struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count, ++ bool legacy_ol_flags) + { + uint16_t i; + uint16_t free_entries; +@@ -2174,6 +2325,14 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, + + update_shadow_used_ring_split(vq, head_idx, 0); + ++ if (unlikely(buf_len <= dev->vhost_hlen)) { ++ dropped += 1; ++ i++; ++ break; ++ } ++ ++ buf_len -= dev->vhost_hlen; ++ + pkts[i] = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len); + if (unlikely(pkts[i] == NULL)) { + /* +@@ -2193,7 +2352,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, + } + + err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i], +- mbuf_pool); ++ mbuf_pool, legacy_ol_flags); + if (unlikely(err)) { + rte_pktmbuf_free(pkts[i]); + if (!allocerr_warned) { +@@ -2221,6 +2380,24 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, + return (i - dropped); + } + ++__rte_noinline ++static uint16_t ++virtio_dev_tx_split_legacy(struct virtio_net *dev, ++ struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, ++ struct rte_mbuf **pkts, uint16_t count) ++{ ++ return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true); ++} ++ ++__rte_noinline ++static uint16_t ++virtio_dev_tx_split_compliant(struct virtio_net *dev, ++ struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool, ++ struct rte_mbuf **pkts, uint16_t count) ++{ ++ return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false); ++} ++ + static __rte_always_inline int + vhost_reserve_avail_batch_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, +@@ -2283,7 +2460,7 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev, + } + + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { +- pkts[i]->pkt_len = descs[avail_idx + i].len - buf_offset; ++ pkts[i]->pkt_len = lens[i] - buf_offset; + pkts[i]->data_len = pkts[i]->pkt_len; + ids[i] = descs[avail_idx + i].id; + } +@@ -2301,7 +2478,8 @@ static __rte_always_inline int + virtio_dev_tx_batch_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, +- struct rte_mbuf **pkts) ++ struct rte_mbuf **pkts, ++ bool legacy_ol_flags) + { + uint16_t avail_idx = vq->last_avail_idx; + uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); +@@ -2325,7 +2503,7 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev, + if (virtio_net_with_host_offload(dev)) { + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + hdr = (struct virtio_net_hdr *)(desc_addrs[i]); +- vhost_dequeue_offload(hdr, pkts[i]); ++ vhost_dequeue_offload(hdr, pkts[i], legacy_ol_flags); + } + } + +@@ -2346,7 +2524,8 @@ vhost_dequeue_single_packed(struct virtio_net *dev, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **pkts, + uint16_t *buf_id, +- uint16_t *desc_count) ++ uint16_t *desc_count, ++ bool legacy_ol_flags) + { + struct buf_vector buf_vec[BUF_VECTOR_MAX]; + uint32_t buf_len; +@@ -2361,6 +2540,11 @@ vhost_dequeue_single_packed(struct virtio_net *dev, + VHOST_ACCESS_RO) < 0)) + return -1; + ++ if (unlikely(buf_len <= dev->vhost_hlen)) ++ return -1; ++ ++ buf_len -= dev->vhost_hlen; ++ + *pkts = virtio_dev_pktmbuf_alloc(dev, mbuf_pool, buf_len); + if (unlikely(*pkts == NULL)) { + if (!allocerr_warned) { +@@ -2373,7 +2557,7 @@ vhost_dequeue_single_packed(struct virtio_net *dev, + } + + err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, *pkts, +- mbuf_pool); ++ mbuf_pool, legacy_ol_flags); + if (unlikely(err)) { + if (!allocerr_warned) { + VHOST_LOG_DATA(ERR, +@@ -2392,14 +2576,15 @@ static __rte_always_inline int + virtio_dev_tx_single_packed(struct virtio_net *dev, + struct vhost_virtqueue *vq, + struct rte_mempool *mbuf_pool, +- struct rte_mbuf **pkts) ++ struct rte_mbuf **pkts, ++ bool legacy_ol_flags) + { + + uint16_t buf_id, desc_count = 0; + int ret; + + ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id, +- &desc_count); ++ &desc_count, legacy_ol_flags); + + if (likely(desc_count > 0)) { + if (virtio_net_is_inorder(dev)) +@@ -2415,12 +2600,14 @@ virtio_dev_tx_single_packed(struct virtio_net *dev, + return ret; + } + +-static __rte_noinline uint16_t ++__rte_always_inline ++static uint16_t + virtio_dev_tx_packed(struct virtio_net *dev, + struct vhost_virtqueue *__rte_restrict vq, + struct rte_mempool *mbuf_pool, + struct rte_mbuf **__rte_restrict pkts, +- uint32_t count) ++ uint32_t count, ++ bool legacy_ol_flags) + { + uint32_t pkt_idx = 0; + uint32_t remained = count; +@@ -2430,7 +2617,8 @@ virtio_dev_tx_packed(struct virtio_net *dev, + + if (remained >= PACKED_BATCH_SIZE) { + if (!virtio_dev_tx_batch_packed(dev, vq, mbuf_pool, +- &pkts[pkt_idx])) { ++ &pkts[pkt_idx], ++ legacy_ol_flags)) { + pkt_idx += PACKED_BATCH_SIZE; + remained -= PACKED_BATCH_SIZE; + continue; +@@ -2438,7 +2626,8 @@ virtio_dev_tx_packed(struct virtio_net *dev, + } + + if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool, +- &pkts[pkt_idx])) ++ &pkts[pkt_idx], ++ legacy_ol_flags)) + break; + pkt_idx++; + remained--; +@@ -2455,6 +2644,24 @@ virtio_dev_tx_packed(struct virtio_net *dev, + return pkt_idx; + } + ++__rte_noinline ++static uint16_t ++virtio_dev_tx_packed_legacy(struct virtio_net *dev, ++ struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, ++ struct rte_mbuf **__rte_restrict pkts, uint32_t count) ++{ ++ return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true); ++} ++ ++__rte_noinline ++static uint16_t ++virtio_dev_tx_packed_compliant(struct virtio_net *dev, ++ struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool, ++ struct rte_mbuf **__rte_restrict pkts, uint32_t count) ++{ ++ return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false); ++} ++ + uint16_t + rte_vhost_dequeue_burst(int vid, uint16_t queue_id, + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) +@@ -2530,10 +2737,17 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, + count -= 1; + } + +- if (vq_is_packed(dev)) +- count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count); +- else +- count = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count); ++ if (vq_is_packed(dev)) { ++ if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) ++ count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count); ++ else ++ count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count); ++ } else { ++ if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS) ++ count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count); ++ else ++ count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count); ++ } + + out: + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) +diff --git a/dpdk/lib/meson.build b/dpdk/lib/meson.build +index ed00f89146..74dfe88d19 100644 +--- a/dpdk/lib/meson.build ++++ b/dpdk/lib/meson.build +@@ -3,7 +3,7 @@ + + + # process all libraries equally, as far as possible +-# "core" libs first, then others alphebetically as far as possible ++# "core" libs first, then others alphabetically as far as possible + # NOTE: for speed of meson runs, the dependencies in the subdirectories + # sometimes skip deps that would be implied by others, e.g. if mempool is + # given as a dep, no need to mention ring. This is especially true for the +diff --git a/dpdk/license/README b/dpdk/license/README +index 874abaf4cd..79dac86440 100644 +--- a/dpdk/license/README ++++ b/dpdk/license/README +@@ -49,7 +49,7 @@ with SPDX-License-Identifiers. + Any exception to the DPDK IP policies shall be approved by DPDK Tech Board and + DPDK Governing Board. Steps for any exception approval: + 1. Mention the appropriate license identifier form SPDX. If the license is not +- listed in SPDX Licenses. It is the submitters responsibiliity to get it ++ listed in SPDX Licenses. It is the submitters responsibility to get it + first listed. + 2. Get the required approval from the DPDK Technical Board. Technical Board may + advise the author to check alternate means first. If no other alternative +@@ -72,6 +72,6 @@ DPDK project supported licenses are: + URL: http://spdx.org/licenses/GPL-2.0.html#licenseText + DPDK License text: licenses/gpl-2.0.txt + 3. GNU Lesser General Public License v2.1 +- SPDX-License-Identifieri: LGPL-2.1 ++ SPDX-License-Identifier: LGPL-2.1 + URL: http://spdx.org/licenses/LGPL-2.1.html#licenseText + DPDK License text: licenses/lgpl-2.1.txt +diff --git a/dpdk/meson.build b/dpdk/meson.build +index 45d974cd2c..290d99cdf9 100644 +--- a/dpdk/meson.build ++++ b/dpdk/meson.build +@@ -5,14 +5,20 @@ project('DPDK', 'C', + # Get version number from file. + # Fallback to "more" for Windows compatibility. + version: run_command(find_program('cat', 'more'), +- files('VERSION')).stdout().strip(), ++ files('VERSION'), check: true).stdout().strip(), + license: 'BSD', +- default_options: ['buildtype=release', 'default_library=static'], ++ default_options: [ ++ 'buildtype=release', ++ 'default_library=static', ++ 'warning_level=2', ++ ], + meson_version: '>= 0.47.1' + ) + + # set up some global vars for compiler, platform, configuration, etc. + cc = meson.get_compiler('c') ++dpdk_source_root = meson.current_source_dir() ++dpdk_build_root = meson.current_build_dir() + dpdk_conf = configuration_data() + dpdk_libraries = [] + dpdk_static_libraries = [] +@@ -61,7 +67,7 @@ subdir('doc') + subdir('examples') + install_subdir('examples', + install_dir: get_option('datadir') + '/dpdk', +- exclude_files: 'meson.build') ++ exclude_files: ex_file_excludes) + + # build kernel modules if enabled + if get_option('enable_kmods') +diff --git a/dpdk/usertools/dpdk-pmdinfo.py b/dpdk/usertools/dpdk-pmdinfo.py +index 3381aa616c..40ef5cec6c 100755 +--- a/dpdk/usertools/dpdk-pmdinfo.py ++++ b/dpdk/usertools/dpdk-pmdinfo.py +@@ -593,7 +593,7 @@ def main(stream=None): + exit(1) + + if args.pdir: +- exit(scan_for_autoload_pmds(args[0])) ++ exit(scan_for_autoload_pmds(args.elf_file)) + + ldlibpath = os.environ.get('LD_LIBRARY_PATH') + if ldlibpath is None: +diff --git a/dpdk/usertools/dpdk-telemetry.py b/dpdk/usertools/dpdk-telemetry.py +index 181859658f..a71f1e9ff9 100755 +--- a/dpdk/usertools/dpdk-telemetry.py ++++ b/dpdk/usertools/dpdk-telemetry.py +@@ -51,13 +51,17 @@ def handle_socket(path): + CMDS = read_socket(sock, output_buf_len, False)["/"] + + # interactive prompt +- text = input('--> ').strip() +- while text != "quit": +- if text.startswith('/'): +- sock.send(text.encode()) +- read_socket(sock, output_buf_len) ++ try: + text = input('--> ').strip() +- sock.close() ++ while text != "quit": ++ if text.startswith('/'): ++ sock.send(text.encode()) ++ read_socket(sock, output_buf_len) ++ text = input('--> ').strip() ++ except EOFError: ++ pass ++ finally: ++ sock.close() + + + def readline_complete(text, state): diff --git a/include/linux/automake.mk b/include/linux/automake.mk index 8f063f482e..f857c7e088 100644 --- a/include/linux/automake.mk diff --git a/SPECS/openvswitch2.16.spec b/SPECS/openvswitch2.16.spec index fd2bf0a..f1e5a6f 100644 --- a/SPECS/openvswitch2.16.spec +++ b/SPECS/openvswitch2.16.spec @@ -57,7 +57,7 @@ Summary: Open vSwitch Group: System Environment/Daemons daemon/database/utilities URL: http://www.openvswitch.org/ Version: 2.16.0 -Release: 97%{?dist} +Release: 98%{?dist} # Nearly all of openvswitch is ASL 2.0. The bugtool is LGPLv2+, and the # lib/sflow*.[ch] files are SISSL @@ -699,6 +699,1463 @@ exit 0 %endif %changelog +* Tue Sep 06 2022 Timothy Redaelli - 2.16.0-98 +- Merging 73655c6414 version: 20.11.6 [RH git: 6bbfe5da8f] + Commit list: + 73655c6414 version: 20.11.6 + ef311075d2 net/mlx5: fix Rx queue recovery mechanism + 089e01b375 vhost: fix header spanned across more than two descriptors + 8fff8520f3 vhost: discard too small descriptor chains + 28596f9ebc vhost: prevent async register + ff7d483aaa ethdev: fix RSS update when RSS is disabled + 5a6aadbeec kni: fix build + 01609d56cb net/virtio-user: fix socket non-blocking mode + c5dc4f6eb5 version: 20.11.6-rc1 + 3e61bd97d9 doc: add more instructions for running as non-root + aebfd0d746 net/mlx5: fix MPRQ pool registration + ca78fec981 net/hns3: support backplane media type + 9a7f5c16d0 net/mlx5: destroy indirect actions on port stop + 4a35f74c8a net/mlx5/linux: fix missed Rx packet stats + 153cef1fbf net/mlx5: handle MPRQ incompatibility with external buffers + 38f1f47db1 vdpa/mlx5: workaround var offset within page + 55942e4c4d vdpa/mlx5: fix maximum number of virtqs + d702d0a0bd examples/link_status_interrupt: fix stats refresh rate + 20f57bfbd7 gro: fix identifying fragmented packets + da29560252 service: fix lingering active status + edc80cd334 net/igc: support multi-process + dcf5529145 vhost: add some trailing newline in log messages + ecb5596929 net/vhost: fix deadlock on vring state change + e29fe1799c doc: fix readability in vhost guide + 9d474a9565 net/netvsc: fix vmbus device reference in multi-process + 04e6368ccd app/testpmd: fix supported RSS offload display + d3b24b9ea2 eventdev/eth_tx: fix queue delete + e00e29a3ef doc: fix grammar and parameters in l2fwd-crypto guide + d2384ccb95 doc: fix grammar and formatting in compressdev guide + 0e387937c4 crypto/qat: fix DOCSIS crash + c24e314609 examples/fips_validation: handle empty payload + 4985a6b051 baseband/acc100: remove prefix of internal file + 85c979edf5 test/crypto: fix SNOW3G vector IV format + c263e1d047 test/crypto: fix ZUC vector IV format + c268809192 test/crypto: fix authentication IV for ZUC SGL + 21f394a60d net/bnxt: fix setting forced speed + 9722929756 net/bnxt: allow Tx only or Rx only + 71234d08d2 net/bnxt: fix switch domain allocation + 3f6c57b6a3 examples/distributor: fix distributor on Rx core + a035314c1e net/hns3: fix descriptors check with SVE + 4fef76f744 test: check memory allocation for CRC + ef31c19b76 app/procinfo: show all non-owned ports + 67350d412d test/hash: fix out of bound access + ed1bd718de rib: fix references for IPv6 implementation + eb88adfe30 raw/ioat: fix build when ioat dmadev enabled + 40538d0b04 net/mlx5: fix stack buffer overflow in drop action + f371572a76 net/mlx5: fix RSS expansion for patterns with ICMP item + 06375d5c71 net/mlx5: fix build with clang 14 + 35ab757a02 net/qede: fix build with GCC 12 + d26e008757 net/ice/base: fix build with GCC 12 + b9db1042c3 net/qede: fix build with GCC 13 + ac50182289 net/octeontx: fix port close + c9f6c89d9a ci: enable C++ check for Arm and PPC + 45eb4af456 config: fix C++ cross compiler for Arm and PPC + dbbdee4c12 malloc: fix allocation of almost hugepage size + a0f0c7d9c5 vhost/crypto: fix descriptor processing + 8aa20e45c6 vhost/crypto: fix build with GCC 12 + 338b9cbeb4 vhost: fix missing enqueue pseudo-header calculation + bbba8ef0cd app/testpmd: revert MAC update in checksum forwarding + 9b4779bfac net/txgbe: fix register polling + 50c6ddd642 app/testpmd: fix bonding slave devices not released + 35768b682b net/hns3: fix return value for unsupported tuple + ecb5bc37dd net/hns3: remove duplicate definition + 5b5935a922 net/hns3: fix an unreasonable memset + 1adbdac76c test/bonding: fix RSS test when disable RSS + da86d09eac net/bonding: fix RSS inconsistency between ports + a6ae86b161 eventdev/eth_tx: fix adapter creation + b39e829bac test/ipsec: fix performance test + 8d49e2f6d1 test/crypto: fix cipher offset for ZUC + cb03983da4 crypto/scheduler: fix queue pair in scheduler failover + df7022dae8 test/ipsec: fix build with GCC 12 + 5dbe3a6dad common/cpt: fix build with GCC 12 + 0af3e2fef9 baseband/acc100: remove RTE prefix for internal macros + 437389abd9 baseband/acc100: add protection for some negative scenario + 34d98de156 baseband/acc100: update companion PF configure function + 10cdabfc10 eal/x86: drop export of internal alignment macro + a6388c7595 trace: fix init with long file prefix + 3582b40156 app/flow-perf: fix build with GCC 12 + b089558de0 vdpa/ifc: fix build with GCC 12 + 03197727fa net/ice: fix build with GCC 12 + cd10d48dcc net/mlx5: fix LRO validation in Rx setup + c352cf12f3 app/testpmd: fix port status of bonding slave device + 93ca5fcf94 crypto/dpaa2_sec: fix crypto operation pointer + 40b12a0561 net/mlx5: fix RSS hash types adjustment + 145c5b453d net/bnxt: fix tunnel stateless offloads + ee29f5fa6b vhost: fix async access + c9a4ddc1e8 raw/ifpga: remove virtual devices on close + 3590f93137 kni: use dedicated function to set MAC address + 628b8d88c4 kni: use dedicated function to set random MAC address + 59d08006aa app/testpmd: fix multicast address pool leak + 9c1410c9b0 app/testpmd: fix packet segment allocation + 641e8b889e doc: fix formatting and link in BPF library guide + 8f68d6fdc6 bus/fslmc: fix VFIO setup + 30c0b36a01 raw/ifpga: unregister interrupt on close + 14a2726331 examples/dma: fix Tx drop statistics + 6c17d92697 net/iavf: increase reset complete wait count + 4d11ba721d net/ice: fix outer L4 checksum in scalar Rx + 2d9b2c2190 doc: update matching versions in i40e guide + 789223ef41 net/iavf: fix Rx queue interrupt setting + 7ec8629367 net/iavf: fix mbuf release in multi-process + 755ea301db net/iavf: fix queue start exception handling + 5b55b9b1ea net/i40e: fix max frame size config at port level + d731c37a84 net/ice: fix MTU info for DCF + e2660944da net/ice/base: fix getting sched node from ID type + 43eac062d1 net/ixgbe: add option for link up check on pin SDP3 + e0658c3196 net/iavf: fix data path selection + 133ec0ce1c kni: fix build with Linux 5.18 + 8048354bb7 net/mlx5: fix Tx recovery + 27dda79b5d examples/vhost: fix crash when no VMDq + 63005df100 vhost: fix deadlock when message handling failed + 4daae0ccb8 doc: fix vhost multi-queue reconnection + ff874e673a net/bnxt: fix compatibility with some old firmwares + fa4da49c81 ethdev: fix port close in secondary process + ac86ba961e net/nfp: fix disabling VLAN stripping + ef9b301421 net/txgbe: fix max number of queues for SR-IOV + d096c590de ethdev: fix port state when stop + 89f54969e7 net/memif: fix overwriting of head segment + 711575cd7f net/bonding: fix mbuf fast free usage + bdf2967cd5 app/testpmd: do not poll stopped queues + f6060b1689 ethdev: prohibit polling stopped queue + b4f13e0fdb app/testpmd: fix metering and policing command for RFC4115 + 070f2b2b8a app/testpmd: replace hardcoded min mbuf number with macro + fe09e56cb5 ethdev: fix possible null pointer access + 7386b53220 ethdev: fix memory leak in xstats telemetry + 7d17ff9a81 net/axgbe: fix xstats get return if xstats is null + 2b88b790c1 net/mvpp2: fix xstats get return if xstats is null + 007dfe4102 net/ipn3ke: fix xstats get return if xstats is null + 17467579f7 net/hns3: fix xstats get return if xstats is null + eb7b05b5de ethdev: clarify null location case in xstats get + cf28773c93 app/testpmd: remove useless pointer checks + 9463f695d7 app/testpmd: perform SW IP checksum for GRO/GSO packets + 3b918bc711 doc: add missing auth algo for IPsec example + 5e4a008e4e drivers/crypto: fix warnings for OpenSSL version + 387d4b3ca7 test/crypto: fix null check for ZUC authentication + b9b994dcf2 examples/ipsec-secgw: fix promiscuous mode option + 78059ea5a5 examples/ipsec-secgw: fix uninitialized memory access + b4797829e7 devtools: fix null test for NUMA systems + 1d5a0d3379 doc: fix API index Markdown syntax + de28f76399 mbuf: dump outer VLAN + 52822a9dd8 rib: fix traversal with /32 route + 3267a37602 acl: fix rules with 8-byte field size + 20ee31283d test: avoid hang if queues are full and Tx fails + 875e84259c eal/freebsd: fix use of newer cpuset macros + 8d8739b082 test/ring: remove excessive inlining + 597718802f examples/l3fwd: fix scalar LPM + 705be73150 eal/x86: fix unaligned access for small memcpy + ca4a019b45 net/bnxt: fix freeing VNIC filters + 03a539bf17 net/bnxt: recheck FW readiness if in reset process + c105904aaa net/bnxt: fix link status when port is stopped + f5cee4b2ca net/bnxt: force PHY update on certain configurations + 214e9d80be net/bnxt: fix speed autonegotiation + adb37db48f net/bnxt: avoid unnecessary endianness conversion + 167d0cbbfd net/bnxt: fix ring group on Rx restart + c69db7f7f7 net/bnxt: fix Rx configuration + 7ded422e25 net/bnxt: fix device capability reporting + ae5516cd7a net/bnxt: remove unused macro + 1dc7ed3e26 net/bnxt: fix reordering in NEON Rx + 472844ca06 app/testpmd: check statistics query before printing + c72194a754 net/hns3: remove unnecessary RSS switch + 21c2dd6011 net/hns3: remove redundant RSS tuple field + 7bce9bbfb8 net/hns3: fix rollback on RSS hash update + 9bf4eb4b1a net/hns3: fix RSS disable + 4997f02390 net/bonding: fix slave stop and remove on port close + 5f9aca077f net/bonding: fix stopping non-active slaves + d18ed4ab86 doc: update matching versions in ice guide + ad18b9f42d net/dpaa: fix event queue detach + 6111eb8b88 vdpa/mlx5: fix dead loop when process interrupted + e8971ff299 vdpa/mlx5: fix interrupt trash that leads to crash + 3763af6f3e vhost: fix missing virtqueue lock protection + 45f14afcf9 net/vhost: fix TSO feature default disablement + 15d4339cfd net/virtio: restore some optimisations with AVX512 + e4729f10d0 net/vhost: fix access to freed memory + 730c2afc40 net/cxgbe: fix Tx queue stuck with mbuf chain coalescing + 063ce7b91c net/cxgbe: fix port ID in Rx mbuf + 5e7c16ee76 net/bonding: fix RSS key config with extended key length + 72144a630e net/nfp: remove unneeded header inclusion + 23bfe0031f test/hash: report non HTM numbers for single thread + c29b05c216 examples/l2fwd-crypto: fix stats refresh rate + 25165ac241 common/dpaax: fix short MAC-I IV calculation for ZUC + 6b38d08f0c crypto/dpaa2_sec: fix operation status for simple FD + 45650937b4 crypto/dpaa_sec: fix secondary process probing + faf1c3baa8 crypto/dpaa2_sec: fix buffer pool ID check + 2a680bbbc7 crypto/dpaa2_sec: fix fle buffer leak + 8856e5ecd7 crypto/dpaa_sec: fix digest size + c1d1c4fdc3 eal: fix C++ include for device event and DMA + ae1ed789dc mem: skip attaching external memory in secondary process + 072f7ee470 test/table: fix buffer overflow on lpm entry + c4d51b3f05 net/mlx5: fix Rx/Tx stats concurrency + 8a2529276a net/mlx5: fix GTP handling in header modify action + e8995a2540 net/mlx5: fix Tx when inlining is impossible + a61bd9df25 net/netvsc: fix calculation of checksums based on mbuf flag + 4d6996d25a net/i40e: populate error in flow director parser + 236835c94f net/iavf: fix HW ring scan method selection + faca682333 examples/bond: fix invalid use of trylock + 9526e5c4a5 eal/windows: add missing C++ include guards + 5888f84ccd eal/windows: fix data race when creating threads + 3a9281f20e crypto/ipsec_mb: fix GMAC parameters setting + 0fb580b1c0 crypto/ipsec_mb: fix length and offset settings + abf68ccc8b version: 20.11.5 + 5ae73012c8 dpaa2: fix build with RTE_LIBRTE_IEEE1588 + 652658269f pmdinfogen: fix compilation with Clang 3.4.2 on CentOS 7 + 9b390c4866 Revert "regexdev: fix section attribute of symbols" + e0c907be84 net/cxgbe: remove useless address check + 576842a59a version: 20.11.5-rc1 + fb2e2836bf cryptodev: add backward-compatible enum + 5cb14d7436 raw/ifpga: fix build with optimization + c93d1201cc doc: fix missing note on UIO module in Linux guide + 5bc5bab0dc doc: replace characters for (R) symbol in Linux guide + 1d25333ce0 net/mlx5: fix port matching in sample flow rule + 6b92223d98 net/qede: fix Rx bulk mbuf allocation + 540401728f net/octeontx2: fix flow MCAM priority management + 975469beaa cryptodev: fix clang C++ include + b16e4220e9 compressdev: fix missing space in log macro + eca0c20f67 eal/freebsd: add missing C++ include guards + dd8a852944 examples/l3fwd: fix buffer overflow in Tx + 63f7788b20 app/testpmd: fix show RSS RETA on Windows + 6fc2a8d499 bpf: fix build with some libpcap version on FreeBSD + 63f156544d net/mlx5: fix sample flow action on trusted device + 8c3269273e vhost: fix FD leak with inflight messages + 9bac6ad862 vhost: fix queue number check when setting inflight FD + cc761b30f0 net/mlx5: fix initial link status detection + 76cfc69e0e net/mlx5: fix link status change detection + c885a4871f common/mlx5: add Netlink event helpers + 67cb024a79 raw/ifpga: fix interrupt handle allocation + 5500586d7b examples/l3fwd: make Rx and Tx queue size configurable + 920e858767 examples/l3fwd: share queue size variables + d4b61e1bc9 net/iavf: fix potential out-of-bounds access + 4c3ee24406 net/mlx5: fix MPLS/GRE Verbs spec ordering + 5beedd0836 net/mlx5: remove unused reference counter + 6979a661df net/mlx5: fix modify port action validation + 093fbd470b net/mlx5: fix NIC egress flow mismatch in switchdev mode + 42e5440e3f net/mlx5: fix shared RSS destroy + cfcd67541c net/mlx5: fix next protocol RSS expansion + f26a66edd5 net/mlx5: fix inet IPIP protocol type + 5e42591829 net/bnxt: fix null dereference in session cleanup + be45c834db ethdev: fix doxygen comments for device info struct + 64452c878f regexdev: fix section attribute of symbols + 2087ed6ae6 examples/kni: add missing trailing newline in log + 9b1f77b32c examples/flow_classify: fix failure message + 2ab2cfb2e8 examples/distributor: reduce Tx queue number to 1 + 5a62d76f89 app/pdump: abort on multi-core capture limit + 40252a4876 raw/ifpga: fix monitor thread + 3e06257cf5 raw/ifpga: fix variable initialization in probing + a7b346dd4d examples/vhost: fix launch with physical port + 76e812e3a5 net/ice: fix Tx offload path choice + ee2cc7d349 net/mlx5: fix matcher priority with ICMP or ICMPv6 + 83781fe25a net/mlx5: fix GRE item translation in Verbs + fa1579b29f doc: fix typos and punctuation in flow API guide + 910925bc31 net/kni: fix config initialization + fbc5f6b5fa net/txgbe: fix queue statistics mapping + 63030f8b7b net/iavf: fix function pointer in multi-process + f0a47837dd net/ixgbe: fix FSP check for X550EM devices + 38dee9e84a net/hns3: increase time waiting for PF reset completion + b27dbae232 net/hns3: fix VF RSS TC mode entry + ff48dd55d6 net/hns3: fix RSS TC mode entry + af3bd943ee net/hns3: remove duplicate macro definition + 52e0989cce compressdev: fix socket ID type + 6b75eeccec app/compress-perf: fix number of queue pairs to setup + 9df88bfefe app/compress-perf: fix socket ID type during init + c30254b5da app/compress-perf: optimize operations pool allocation + a166f7044b app/compress-perf: fix cycle count operations allocation + e4ca3c9295 net/mlx5: workaround ASO memory region creation + 14849b1fa2 examples/qos_sched: fix compile failure + eb0f31af87 net/mlx5: fix sibling device config check + 950554a680 common/octeontx2: update mailbox version to 0xb + 8d6bbbc514 kni: fix freeing order in device release + 3cb68884dd eal/linux: fix illegal memory access in uevent handler + 240dc513c2 distributor: fix potential overflow + ffc6e4ea38 efd: fix uninitialized structure + 5cd3c071a3 test/efd: fix sockets mask size + 3fc564b2d0 app/testpmd: fix build without drivers + 2d1c255f30 net/i40e: fix unintentional integer overflow + 073988b862 doc: correct name of BlueField-2 in mlx5 guide + dc599f85f7 doc: replace broken links in mlx guides + d07cbead1d doc: remove obsolete vector Tx explanations from mlx5 guide + 15f171576e common/mlx5: fix queue pair ack timeout configuration + b94aa34547 net/ena: fix checksum flag for L4 + c73e88da70 net/ena: check memory BAR before initializing LLQ + abbaa0b9d1 net/ena: fix meta descriptor DF flag setup + 5bb3ff7b47 net/ena: fix reset reason being overwritten + b1d55a40d6 net/ena: skip timer if reset is triggered + 39f6adbf54 net/ena: remove unused offload variables + d609358e18 net/ena: remove unused enumeration + 2c0325226c net/txgbe: fix debug logs + a274e8922f app/testpmd: fix GENEVE parsing in checksum mode + 8d5484e7a1 net/mlx5: fix ineffective metadata argument adjustment + 1b7f6d2ad9 net/i40e: enable maximum frame size at port level + 7eaca36d9c net/ice: fix overwriting of LSE bit by DCF + b1b1cd71e9 net/af_xdp: ensure socket is deleted on Rx queue setup error + 92f88226d9 net/af_xdp: add missing trailing newline in logs + 33f2e37561 vhost: fix unsafe vring addresses modifications + 37936eb9ef vhost: fix field naming in guest page struct + 85ec94182b app/testpmd: check starting port is not in bonding + f85bba14cc net/ixgbe: reset security context pointer on close + 3a4dd2649f net/nfb: fix multicast/promiscuous mode switching + aa27fa7968 net/nfb: fix array indexes in deinit functions + 92d7cb4446 crypto/ipsec_mb: fix ZUC operation overwrite + 4bc8222a12 crypto/ipsec_mb: fix ZUC authentication verify + 2eb55ed024 crypto/virtio: fix out-of-bounds access + dceebf9eeb baseband/acc100: avoid out-of-bounds access + ced8638611 examples/l2fwd-crypto: fix port mask overflow + 9d1d70c9c8 doc: fix FIPS guide + 7b5609a5a0 cryptodev: fix RSA key type name + a7aa7d8812 examples/qos_sched: fix core mask overflow + b56b636c57 dma/idxd: configure maximum batch size to high value + 26bebaab55 ethdev: fix cast for C++ compatibility + 20baa98d62 vhost: add missing C++ guards + 581dea537e kni: add missing C++ guards + 9c5dd18e18 eventdev: add missing C++ guards + f85ebc37b1 compressdev: add missing C++ guards + 4fd5a95b49 acl: add missing C++ guards + 8f9c5d8497 metrics: add missing C++ guards + c840104e23 ethdev: add missing C++ guards + f749277d08 telemetry: add missing C++ guards + 2c2d7d7af1 eal: add missing C++ guards + 5d76457095 eventdev/eth_tx: fix queue add error code + 40f28803d5 common/mlx5: fix error handling in multi-class probe + e08edd8ac6 net/mlx5: fix memory socket selection in ASO management + b6b775f223 net/mlx5: fix MPRQ stride devargs adjustment + 0d9346a115 net/mlx5: improve stride parameter names + 3adcf39013 common/mlx5: add minimum WQE size for striding RQ + d8fadb1f7e net/nfp: free HW rings memzone on queue release + 8a78f023b7 config: add arch define for Arm + c32be7490e net/octeontx2:: fix base rule merge + 2bc455ec39 kni: update kernel API to set random MAC address + 105b5a4130 raw/ntb: clear all valid doorbell bits on init + cf9be04290 crypto/dpaax_sec: fix auth/cipher xform chain checks + bdd4b322cf compress/octeontx: fix null pointer dereference + 045d6f7c38 net/bnxt: fix ring calculation for representors + c9eb38cd06 net/mlx5: fix inline length for multi-segment TSO + c45f526d8a net/mlx5: fix committed bucket size + 64d94d1e8f net/ice: fix build with 16-byte Rx descriptor + dc92390efe net/ice/base: add profile validation on switch filter + da0833ea1f net/iavf: count continuous DD bits for Arm in flex Rx + 3152179d55 net/iavf: count continuous DD bits for Arm + 813b5994cf net/sfc: demand Tx fast free offload on EF10 simple datapath + 964a78f4be net/sfc: do not push fast free offload to default TxQ config + 3915e71a30 net/memif: remove pointer deference before null check + 468cbff3fe vfio: cleanup the multiprocess sync handle + 184d1f7ae0 ipc: end multiprocess thread during cleanup + b8e818df2a test/mbuf: fix mbuf data content check + 32d2194f4f app/fib: fix division by zero + 1b84be5c00 mem: check allocation in dynamic hugepage init + c7bd2f4354 vhost: fix C++ include + b6dd9d6b2c table: fix C++ include + 97f298e315 ipsec: fix C++ include + 04eb6cecc9 graph: fix C++ include + 48735e1e43 eventdev: fix C++ include + ae0613e133 eal: fix C++ include + 7cabaf23be stack: fix stubs header export + c53cb87b51 regex/mlx5: fix memory allocation check + 0e63db2022 vhost: fix guest to host physical address mapping + 58767a90f4 app/testpmd: fix stack overflow for EEPROM display + aea2c5be01 net/tap: fix to populate FDs in secondary process + eea12b2874 ethdev: add internal function to device struct from name + b040eaa335 app/testpmd: fix bonding mode set + c4a4ba788f net/bonding: fix reference count on mbufs + 78ee1995c9 net/bonding: fix promiscuous and allmulticast state + 8189e99b3a net/ixgbe: check filter init failure + 061f2416ab net/hns3: delete duplicated RSS type + 3f03bc5d20 net/hns3: fix operating queue when TCAM table is invalid + b65114d8a3 net/hns3: fix insecure way to query MAC statistics + 73d4297244 net/hns3: fix RSS key with null + edfefb90ca net/hns3: fix max packet size rollback in PF + 463673460a net/enic: fix dereference before null check + cafb0cbdb4 eal/windows: remove useless C++ include guard + e9734a61da net/dpaa2: remove useless C++ include guard + 6506a4b485 net/cxgbe: remove useless C++ include guard + 8650a0c17c bus/dpaa: fix C++ include guard + d3f8892862 test/mem: fix error check + 2da6c30247 eal/windows: fix error code for not supported API + 19746aaeab ring: fix overflow in memory size calculation + 6e47aebf24 ring: fix error code when creating ring + 6c4c4398a4 ring: optimize corner case for enqueue/dequeue + 25f563d388 doc: fix KNI PMD name typo + 1b61157ec9 kni: fix ioctl signature + 04a29bf8a8 build: remove deprecated Meson functions + 3215df7480 build: fix warning about using -Wextra flag + 14421740c4 build: fix warnings when running external commands + 29649b4590 doc: update matching versions in ice guide + 12eaf885f5 net/mlx5: reject jump to root table + a133cd9add net/mlx5: fix mark enabling for Rx + d8090fd8c4 net/virtio-user: check FD flags getting failure + ac0c52244d net/virtio-user: fix resource leak on probing failure + 61144dff74 vdpa/ifc: fix log info mismatch + 9605f71afa net/virtio: fix Tx queue 0 overriden by queue 128 + 16ba91d4f3 vdpa/mlx5: workaround queue stop with traffic + f3cd5320d8 net/hns3: fix using enum as boolean + 90ada5e388 net/bonding: fix RSS with early configure + 1ed391676f net/memif: remove unnecessary Rx interrupt stub + 1044516947 raw/ifpga/base: fix port feature ID + edea3f39dd net/bnxt: fix VF resource allocation strategy + 3e0a066400 net/bnxt: fix memzone allocation per VNIC + 7ee6d43e5e net/bnxt: handle ring cleanup in case of error + e7f6c7629a net/bnxt: fix check for autoneg enablement + f05952e7bf raw/ifpga: fix thread closing + ab9cde2e72 net/ice: fix link up when starting device + 06665489e6 raw/ifpga/base: fix SPI transaction + ff474dde7b net/sfc: validate queue span when parsing flow action RSS + 238f205dd5 net/nfp: remove useless range checks + e977e8ef8e net/nfp: remove duplicated check when setting MAC address + f06710409f net/mlx5: fix maximum packet headers size for TSO + 1d10966a22 net/dpaa2: fix timestamping for IEEE1588 + bab9d520cb net/dpaa2: fix unregistering interrupt handler + b5b90b6ea3 net/cxgbe: fix dangling pointer by mailbox access rework + a5f8244a5f app/testpmd: fix external buffer allocation + a2f86fa32f app/testpmd: fix dereference before null check + 239f57b000 net/bonding: fix mode type mismatch + 8b6401daed net/af_xdp: fix build with -Wunused-function + a5018b1aa6 net/axgbe: use PCI root complex device to distinguish device + a26506bce1 app/testpmd: fix Tx scheduling interval + 8d0afb3f68 net/bonding: fix offloading configuration + e93a5f4479 net/bnxt: check VF representor pointer before access + 31080d4c62 net/bnxt: fix xstats query + 52d79e8244 net/bnxt: fix PAM4 mask setting + 766d7701ba net/bnxt: fix handling of VF configuration change + ef80ca417e net/bnxt: get maximum supported multicast filters count + 893c784eef net/bnxt: add null check for mark table + 5889a24544 net/bnxt: cap maximum number of unicast MAC addresses + fa8cc81989 net/bnxt: fix restoring VLAN filtering after recovery + 7275db81e8 net/bnxt: restore RSS configuration after reset recovery + c612a4bbf3 net/bnxt: fix queue stop operation + 3ea46b608a net/bnxt: fix multicast MAC restore during reset recovery + 5ee96222ab net/bnxt: fix multicast address set + e2c9b9902d net/bnxt: fix xstats names query overrun + 66aa6f9467 net/mlx5: relax headroom assertion + 0374774e88 net/mlx5: fix GRE protocol type translation for Verbs + a5edf85175 net/mlx5: fix RSS expansion with explicit next protocol + 1c64873e8d net/mlx5: fix assertion on flags set in packet mbuf + 24e61aa014 net/ixgbe: add vector Rx parameter check + a02fbcde75 net/ice: fix Tx checksum offload + 8577641fca net/ice: track DCF state of PF + f7b02e1127 net/ice: fix Tx checksum offload capability + 793c820620 net/qede: fix redundant condition in debug code + 4ee719c72e devtools: fix comment detection in forbidden token check + 33a5d155d0 examples/ipsec-secgw: fix default flow rule creation + dff4380aae examples/ipsec-secgw: fix eventdev start sequence + 19524c9cb1 examples/l3fwd: fix Rx burst size for event mode + 6bfaf85e5b doc: fix dlb2 guide + eb5a21b54a eal/linux: log hugepage create errors with filename + 0602aa0fd4 dma/idxd: fix paths to driver sysfs directory + 4548934a6d bus/ifpga: remove useless check while browsing devices + 7c66f7f679 doc: remove dependency on findutils on FreeBSD + cf1a7f66a9 maintainers: update for stable branches + 269489433c doc: replace deprecated distutils version parsing + 0b5a6c7b32 fix spelling in comments and strings + b05d183a7f config/ppc: fix build with GCC >= 10 + 4d47f37431 version: 20.11.4 + ebdc786158 common/qat: revert fix queut pairs number + 35fb9c4b93 build: disable Windows warnings for insecure funtions + 395bb64d1b kni: fix build for SLES15-SP3 + c3e1d196e5 table: fix missing headers on ARM64 + 04d8f7c496 eal/common: exclude code unsupported on Windows + bab4318634 version: 20.11.4-rc1 + 16fd24c265 raw/octeontx2_ep: remove unused variable + 66b0d3a2f4 net/mlx5: fix flow shared age action reference counting + 930ac3db1a crypto/octeontx2: fix lookaside IPsec IPv6 + 67992959ee baseband/acc100: fix 4GUL outbound size + 86cbc9786f net/mlx5: fix RSS expansion with EtherType + 460136f414 net/mlx5: fix RSS expansion for L2/L3 VXLAN + fa62ff901f net/mlx5: fix RSS expansion traversal over next nodes + 8ab211780c net/mlx5: fix RSS expansion for explicit graph node + 519154bdd5 net/mlx5: fix RSS expansion for inner tunnel VLAN + d94b467435 doc: fix a typo in EAL guide + ab2ec45d52 net/mlx5: fix devargs validation for multi-class probing + 368163da05 doc: fix typo in coding style + 951ab7b608 doc: capitalise PMD + cea3552ab7 fix PMD wording + ed9a13b0fa remove repeated 'the' in the code + 769cd1d909 net/mlx5: fix GENEVE and VXLAN-GPE flow item matching + 62f37b7a5b net/mlx5: fix GRE flow item matching + 22dc2d42b2 app/testpmd: fix hexadecimal parser with odd length + 585669bf46 doc: fix memif driver acronyms + fed3abcad0 net/memif: allow stopping and closing device + f489ca40fb net/mlx5: fix multi-segment packet wraparound + 14f47af82f net/mlx5: fix flow mark with sampling and metering + 03f92022bb net/mlx4: fix empty Ethernet spec with VLAN + acf010a8e5 net/mlx5: fix metadata and meter split shared tag + f803c82df7 net/bnxt: fix autoneg on PAM4 links + 401a4bc91e doc: remove repeated repeated words + 62b23da816 examples/ptpclient: fix delay request message + e8e74b5804 doc: strip build artefacts for examples file list + 47e5dbb3a6 mbuf: fix dump of dynamic fields and flags + 07ab7b7d9a kni: restrict bifurcated device support + 1f3b7af315 drivers/crypto: fix IPsec TTL decrement option + 2aa2b0f1e5 crypto/ipsec_mb: fix cipher key setting + 7695cdb5e2 common/mlx5: fix user mode register access attribute + 5045cabbec net/mlx5: fix MPLS tunnel outer layer overwrite + c897e773f9 net/mlx5: fix partial inline of fine grain packets + 81836e2c89 app/testpmd: fix tunnel offload validation + 47c838de7c net/failsafe: fix secondary process probe + 14bb775f70 net/bnxt: fix Rx next consumer index in mbuf alloc fail + b0ecc5765f net/mlx5: fix mutex unlock in Tx packet pacing cleanup + 820f954788 net/hns3: optimize Tx performance by mbuf fast free + e628b925e5 net/mlx5: fix GRE protocol type translation + b831cc80b2 net/mlx5: fix GENEVE protocol type translation + 8708f00342 net/mlx5: fix RSS expansion scheme for GRE header + 3d4d2600f3 net/mlx5: add Ethernet header to GENEVE RSS expansion + 5b82df45c1 net/mlx5: fix VXLAN-GPE next protocol translation + 01029bb978 vdpa/mlx5: fix mkey creation check + 0da7f8f9f2 doc: fix Doxygen examples build on FreeBSD + d3a4e55ca1 app/flow-perf: fix parsing of invalid option + 9827b33ea1 examples/ntb: fix build dependency + e8a8503d42 config/x86: skip GNU binutils bug check for LLVM + 07e1e6e798 fix spelling in comments and doxygen + 1967878181 examples/multi_process: fix Rx packets distribution + 75af4401a4 examples/l3fwd-power: fix early shutdown + 3a8dc3e8dc test/crypto: remove unnecessary stats retrieval + cd50e6f438 common/cpt: fix KASUMI input length + bc4695a138 test/crypto: fix missing return checks + fa912be2c5 test/crypto: fix data lengths + 139df45885 test/crypto: skip plain text compare for null cipher + 985e9324d9 event/dlb2: fix delayed pop test in selftest + 4f17d46380 eventdev/eth_tx: fix queue delete logic + 0d266c9fe9 examples/performance-thread: remove unused hits count + 9a514a21d8 test/distributor: remove unused counter + 7db7d2e658 net/vmxnet3: fix build with clang 13 + 0c8aa41249 net/qede/base: remove unused message size + e72e4962d5 net/nfp: remove unused message length + 6fdb54e73f net/liquidio: remove unused counter + 415c636992 net/bnxt: remove some unused variables + ae0fe7d799 event/sw: remove unused inflight events count + 22ebe5378e bus/fslmc: remove unused device count + 14e420ac85 net/octeontx: remove unused packet length + 0a475878ef net/hinic/base: remove some unused variables + 878f71050e test/red: fix typo in test description + 0ed3a7333f ethdev: fix typos + 5d3a7aab19 app/testpmd: fix DCB in VT configuration + 40588e9f6f net/mlx5: fix Tx scheduling check + 712ace31c5 net/iavf: fix pointer of meta data + 2f5b7df1e4 net/i40e: fix risk in descriptor read in scalar Rx + fdb91c91e8 doc: describe timestamp limitations for mlx5 + 7ae03fc171 common/mlx5: fix build for zero-length headroom array + 6d132c0f73 net/mlx5: fix RETA update without stopping device + 2a9a0c9d63 net/mlx5: fix tag ID conflict with sample action + 5744208d62 net/mlx5: fix tunnel offload validation + 180fa49727 power: fix build with clang 13 + 3088dda469 net/mlx5: workaround MR creation for flow counter + 2e7f6f9336 vdpa/mlx5: workaround guest MR registrations + 4f691f2c97 vdpa/mlx5: workaround dirty bitmap MR creation + 6108eff3be common/mlx5: create wrapped MR + 2991d7abc2 common/mlx5: glue MR registration with IOVA + dfae8dc0ec net/virtio: fix Tx checksum for tunnel packets + 5de5f15c5b net/bnxt: fix VLAN indication in Rx mbuf + 8ef3bed388 net/mlx5: do not close stdin on error + b99528eb2d net/nfp: cancel delayed LSC work in port close logic + 463cd893e2 net/af_packet: fix ignoring full ring on Tx + ce48d01167 net/ixgbe: fix port initialization if MTU config fails + 6c0517f529 net/iavf: fix multi-process shared data + 1d5d3847af net/hns3: fix interrupt vector freeing + 5ff9c28131 net/hns3: fix residual MAC after setting default MAC + dbf27ac1e7 net/i40e: fix i40evf device initialization + 89a78fde25 app/testpmd: remove unused header file + 4fd42c0a44 net/hns3: simplify queue DMA address arithmetic + 480eecce54 interrupt: fix request notifier interrupt processing + e963a3650b vfio: set errno on unsupported OS + 1fd6329364 vfio: fix FreeBSD documentation + 07e02c596a vfio: fix FreeBSD clear group stub + d2a21b149a kni: check error code of allmulticast mode switch + e5f58ae926 net/mlx5: remove duplicated reference of Tx doorbell + 19c487a7cd common/mlx5: fix UAR allocation diagnostics messages + f368f1bbb5 common/mlx5: remove unreachable branch in UAR allocation + 87e20726cc app/testpmd: remove double dependency on bitrate lib + 3be19a63d4 common/sfc_efx: fix debug compilation control + ac08c1ba97 ethdev: fix crash on owner delete + 5ec9795c68 net/i40e: fix forward outer IPv6 VXLAN + b2d0ed96d5 net/mlx5: fix Rx queue memory allocation return value + 63ba41d7ee net/mlx5: fix Altivec Rx + 6385edbe1c common/mlx5: fix flex parser DevX creation routine + 2f2c2b5b7e common/qat: fix queue pairs number + b332923763 examples/ipsec-secgw: move global array from header + 01f69bbd74 test/compress-perf: remove unused variable + c10c8edfa4 examples/fips_validation: fix device start + 13dc08c1ca crypto/qat: fix uncleared cookies after operation + 7bacbc5eb3 crypto/qat: fix status in RSA decryption + 9120474fcc test/crypto: fix max length for raw data path + d5b7c084fc net/txgbe: fix packet statistics + 9e127ea969 net/hns3: unregister MP action on close for secondary + 5bf8d5b2f7 net/hns3: fix multi-process action register and unregister + 69212ae06b net/hns3: fix secondary process reference count + 9488e784fc net/ice: fix flow redirect + e3632cfa14 net/ice: save rule on switch filter creation + 4c5c31b120 net/enic: avoid error message when no advanced filtering + 5927fdf154 net/bnxt: fix firmware version query + 6b4e43eaaa net/i40e: fix 32-bit build + 8d4494154e net/hns3: fix mailbox communication with HW + c4d20e838e net/virtio: fix link update in speed feature + 1c5f0499f5 net/mlx5: fix RSS RETA update + 223d61646c app/testpmd: fix RSS type display + 6a4ec07e26 app/testpmd: fix RSS key length + a5e3534481 doc: update NIC feature matrix for bnxt + e4082697ba net/iavf: fix shared data in multi-process + 0e28edc58a net/ice: fix function pointer in multi-process + 8c4227ddae mem: fix dynamic hugepage mapping in container + e324ec6bc4 malloc: fix allocation with unknown socket ID + 394952356a eal/linux: fix uevent message parsing + 558953d965 eal/linux: remove unused variable for socket memory + 2af8cf0bae eal: fix device iterator when no bus is selected + d2f33a2cbe test/mbuf: fix access to freed memory + d43921f4af test/cmdline: fix memory leak + 7cc4ec4aba eal/freebsd: fix IOVA mode selection + e306e6d9b3 test: fix ring PMD initialisation + d6f5ee8904 net/i40evf: extend the polling times of vf reset + 6b33455411 net/i40e: fix buffer size alignment + 982e2c63d1 common/mlx5: fix physical port name recognition + e567087a43 eal/windows: fix IOVA mode detection and handling + 4272080bb1 rib: fix IPv6 depth mask + 9902d7dcb9 lpm6: fix buffer overflow + 1729b2c337 hash: fix Doxygen comment of Toeplitz file + f1b1f1186a eal: reset lcore task callback and argument + 0cab294bc0 eal/x86: avoid cast-align warning in memcpy functions + b91dcac711 mbuf: avoid cast-align warning in data offset macro + 11895408d9 net: avoid cast-align warning in VLAN insert function + 0f1d36a746 doc: fix default mempool option in guides + d60f7f0105 usertools/pmdinfo: fix plugin auto scan + a0347e7e7e pipeline: fix instruction label check + ff1898349e test/event: fix timer adapter creation test + 1057ce433c app/testpmd: fix packet burst spreading stats + 3d0f003632 ethdev: fix PCI device release in secondary process + e880b1c163 net/virtio: fix avail descriptor ID + 04bd8e7fcb net/virtio: fix indirect descriptor reconnection + abd207e5fa vhost: add sanity check on inflight last index + 2074d20561 vdpa/mlx5: retry VAR allocation during vDPA restart + 3713e9c370 vdpa/mlx5: workaround FW first completion in start + d8ce32c71e net/virtio: fix check scatter on all Rx queues + 136944a2cb net/mlx5: close tools socket with last device + 8ce5c675a8 net/mlx5: fix Rx queue resource cleanup + 829fbd7252 devtools: fix letter case check in commit title + 74fb264b05 bpf: allow self-xor operation + edb5fcae31 eventdev/eth_rx: fix WRR buffer overrun + f7fa666eec app/eventdev: fix terminal colour after control-c exit + d95c7a669d mbuf: fix reset on mbuf free + 2eb840f1ae test/hash: fix buffer overflow with jhash + 869a35948e ethdev: forbid closing started device + c95ffe5671 net/i40e: fix risk in descriptor read in NEON Rx + dd5334c4ac net/ice: fix generic build on FreeBSD + 6e46115943 net/mlx5: support more tunnel types + 8a28600abc app/testpmd: add tunnel types + 73250dca43 app/testpmd: fix access to DSCP table entries + 5d3a512994 net/ena: advertise scattered Rx capability + 84a35ce308 net/ena: fix per-queue offload capabilities + d1ccc7019d net/ena: fix offload capabilities verification + ac9f3ffd5d net: fix aliasing in checksum computation + 44cd82b519 doc: fix emulated device names in e1000 guide + 6d32420ce9 net/ice: fix deadlock on flow redirect + 137ea50f60 drivers/net: remove queue xstats auto-fill flag + d19e3bea52 net/txgbe: fix to get interrupt status + 349ac4cf40 app/testpmd: fix hex string parser in flow commands + 1edd186c4f net/softnic: fix useless address check + 2dcdba115e net/enic: fix filter mode detection + 8cb630d9e4 net/mlx5: fix tunneling support query + 15242b3f60 net/mlx5: fix software parsing support query + 7379aa7aa8 net/i40e/base: fix using checksum before check + 610b598aa0 net/i40e/base: fix potentially uninitialized variables + c1977a22f6 net/i40e/base: fix function name in comments + a02cb9ff21 net/i40e/base: fix AOC media type + abc841874f net/i40e/base: fix update link data for X722 + 7dcf2e3db4 net/i40e/base: fix PF reset + 37817b554a net/i40e/base: fix PHY identifiers for 2.5G and 5G adapters + 63e0206bdb net/ixgbe: fix queue release + e69e21048d net/i40e: fix Rx packet statistics + ce17996fc6 net/sfc: update comment about representor support + f74eaa5869 net/sfc: free MAE lock once switch domain is assigned + b391213ff8 app/testpmd: retain all original dev conf when config DCB + 710a47c4c6 net/bonding: fix RSS key length + e0d9039f2e net/bonding: fix dedicated queue mode in vector burst + e788cc2006 app/testpmd: fix txonly forwarding + 4a2204af7b app/testpmd: update forward engine beginning + 4370d53fb5 net/af_xdp: disable secondary process support + 6ec9dcdfed test/bpf: fix undefined behavior with clang + 939685eaec cryptodev: fix multi-segment raw vector processing + ca9b74af25 app/crypto-perf: fix AAD template copy overrun + 386085df05 mempool: deprecate unused physical page defines + 3446f6d200 mbuf: enforce no option for dynamic fields and flags + a8590be363 test/atomic: fix 128-bit atomic test with many cores + 2bd66f32c9 mbuf: fix typo in comment + aa4a2ef3fe telemetry: fix JSON output buffer length + 341804051a eal/freebsd: ignore in-memory option + be9717a4cc bus/vmbus: fix ring buffer mapping in secondary process + c41706bb6e eal/x86: fix some CPU extended features definitions + cc7ded572d test/service: fix race in attr check + 3ee8970823 test/service: fix some comment + cd24395f65 test/event_crypto: fix event crypto metadata write + de983dfa6d examples/fips_validation: fix resetting pointer + 1aab8371d3 examples/fips_validation: remove unused allocation + 6d4f5a1639 eal/windows: do not install virt2phys header + 822f885986 eal/windows: fix CPU cores counting + 449612955b net: fix checksum API documentation + 288a450bb9 net/hns3: fix input parameters of MAC functions + 2adcdc8cc7 net/ixgbe: fix memzone leak on queue re-configure + 4e60a45559 net/i40e: fix memzone leak on queue re-configure + 8b56d27644 net/ice: fix memzone leak on queue re-configure + b51d3a4970 net/e1000: fix memzone leak on queue re-configure + 8f825b3b13 ethdev: fix xstats by ID API documentation + b4108eb62c common/dpaax: fix physical address conversion + a7bb99fc86 raw/ifpga/base: fix linking with librt + 1b8fac0312 test/latency: fix loop boundary + a00d38a2e1 bus/vmbus: fix leak on device scan + 3df9064884 net/mlx5: fix flow tables double release + 95cf6a360f net/bnxt: fix tunnel port accounting + 5b3f9bec58 net/bnxt: fix memzone free for Tx and Rx rings + f1c89a7edd net/bnxt: fix Tx queue startup state + ea39d70a50 net/bnxt: fix function driver register/unregister + c9eddf61f5 net/ice: retry getting VF VSI map after failure + 90d7fd5b10 common/iavf: fix ARQ resource leak + eca9795521 net/iavf: fix Rx queue IRQ resource leak + cd90c7a5b2 net/ice: fix double free ACL flow entry + 1900a18518 net/iavf: fix high CPU usage on frequent command + 49a2b0ca69 net/virtio: do not use PMD log type + 758eb05f2b net/virtio: fix Tx completed mbuf leak on device stop + 0da178c94e net/virtio: fix Tx cleanup functions to have same signature + 9786a89ed4 vhost: clean IOTLB cache on vring stop + 192d2f2dcd test/mem: fix memory autotests on FreeBSD + 95934ebba9 eal/freebsd: lock memory device to prevent conflicts + ad4051ca8f usertools: fix handling EOF for telemetry input pipe + 45f9d14e11 bitrate: fix calculation to match API description + 102d6df14d bitrate: fix registration to match API description + 1e697abcfd ring: fix Doxygen comment of internal function + 51a9cd2327 eal: remove Windows-specific list of common files + 8a43d7c5f4 eal/windows: export version function + 196cda0e31 bus/pci: fix unknown NUMA node value on Windows + 7b3cdb7c64 kvargs: fix comments style + edfd68842c net/memif: fix chained mbuf determination + 39f117c3b5 net/mlx5: fix shared RSS destruction + 5d3374aff1 net/mlx5: fix memory leak on context allocation failure + 4649ead943 net/octeontx: fix access to indirect buffers + 474bbafd6f net/iavf: fix mbuf leak + 60974c7c2c net/ice/base: calculate logical PF ID + 8532dae4d6 net/bonding: fix memory leak on closing device + 25533b38a6 test/compress: fix buffer overflow + 8f07dfbc18 examples/ipsec-secgw: fix parsing of flow queue + e78d085e36 stack: fix reload head when pop fails + 6f7c9fde71 vdpa/mlx5: fix large VM memory region registration + bb7f3bc24b sched: get 64-bit greatest common divisor + 2a5a421658 bus/pci: fix unknown NUMA node value on Windows + d593c0569e doc: fix numbers power of 2 in LPM6 guide + 88719f1273 net/iavf: fix Rx queue buffer size alignment + 64734ba6e2 net/i40e/base: fix resource leakage + 012f9cfceb net/iavf: fix mbuf leak + af659df64e net/ice/base: fix PF ID for DCF + 4fc96ab6c9 net/i40e: fix device startup resource release + 30fcdc4f50 net/i40e: fix mbuf leak + c7afc99804 net/octeontx2: fix MTU when PTP is enabled + 92569f9804 net/virtio: fix device configure without jumbo Rx offload + aafb232afb vhost: log socket path on adding connection + e0d08d0c03 net/virtio: fix repeated freeing of virtqueue + e543f89ba5 vhost: fix crash on port deletion + dd6e6e33c7 net/virtio-user: fix Rx interrupts with multi-queue + b00b073569 net/virtio: avoid unneeded link interrupt configuration + b954047654 net/virtio: fix split queue vectorized Rx + 24ae55b075 net/virtio: fix mbuf count on Rx queue setup + 8cbd4cec76 net: fix checksum offload for outer IPv4 + 7e0a1eee89 ethdev: fix typo in Rx queue setup API comment + fcece17c51 eal: fix memory leak when saving arguments + 64c6120185 examples/service_cores: fix lcore count check + 8192dfc388 test/func_reentrancy: free memzones after test + 91f3769c3f build: propagate Windows system dependencies to pkg-config + 8404c8c99a net/ice: fix performance with writeback policy + d9958c1907 net/ixgbe: fix mbuf leak + a1ebe4da32 net/ixgbe: fix MAC resource leak + 5787cc1187 net/ixgbe: fix queue resource leak + c1723e2d14 net/ixgbe: fix hash handle leak + ed49cafb25 net/sfc: set FDIR bit for flow mark in EF100 Rx + dd8e8fcf1f net/hns3: fix taskqueue pair reset command + 0c355fecc0 net/hns3: fix queue flow action validation + 6b6aacee71 net/pcap: fix resource leakage on port probe + a202064ef3 net/axgbe: fix unreleased lock in I2C transfer + 19acac8cef doc: fix bonding driver name + af6efb8cb2 net/af_xdp: fix zero-copy Tx queue drain + fc300c1c4a net/bnxt: fix double allocation of ring groups + 8516f35456 net/bnxt: fix ring group free + ddc5464088 net/bnxt: check FW capability for VLAN offloads + 16f8bcc3d8 net/bnxt: fix mbuf VLAN in scalar Rx + 9f02c498bf net/ixgbe: fix Rx multicast statistics after reset + c660ad64aa net/iavf: fix overflow in maximum packet length config + 073599bf92 net/ice: fix queue config in DCF + 7722837b52 net/ice: fix deadlock on flow query + e244e8c066 net/ice: fix RXDID default value in DCF + 34a2e17afc net/ice: fix memzone leak after device init failure + 877a05209a net/nfp: fix minimum descriptor sizes + 1f83882dac common/dpaax/caamflib: fix IV for short MAC-I in SNOW3G + 52ed92cfb6 crypto/openssl: fix CCM processing 0 length source + c3672a36e6 config/ppc: ignore GCC 11 psabi warnings + 9102608a1c eal/ppc: ignore GCC 10 stringop-overflow warnings + e4509540be crypto/octeontx2: fix unaligned access to device memory + 6e5dbe1586 app/testpmd: fix dump of Tx offload flags + 3dc611ee15 app/testpmd: fix check without outer checksum + 8f75f8b588 net/bnxt: fix crash after port stop/start + d53332318f app/testpmd: fix Tx retry in flowgen engine + 83ec79465d net/bnxt: update ring group after ring stop start + 4ba7ab1ebd net/mlx5: fix eCPRI matching + f50cec54fd net/mlx5: fix mbuf replenishment check for zipped CQE + 05af857e43 net/txgbe: fix reading SFP module SFF-8472 data + e2eae48793 net/ice: fix max entry number for ACL normal priority + 2cbc3c42d2 net/ice/base: fix typo in comment + 39a30eb884 drivers/net: fix vector Rx comments + 99e95a04c4 drivers/net: fix typo in vector Rx comment + d65672bac2 examples/performance-thread: fix build with clang 12.0.1 + cd9f079d87 net/i40e: support 25G AOC/ACC cables + cfcca69903 version: 20.11.3 + fcdf769a98 test/power: fix CPU frequency when turbo enabled + ede02cfc47 net/mlx5: fix imissed statistics + c5f4e9dd15 version: 20.11.3-rc1 + 62ff84ca2a app/testpmd: fix IPv4 checksum + bf76709d20 bus: clarify log for non-NUMA-aware devices + 53193aeeba net/mlx5: workaround drop action with old kernel + 12cc60e507 doc: update atomic operation deprecation + 78b8978ef7 doc: remove old deprecation notice for sched + cf52376b2d doc: fix spelling + 853a987716 crypto/qat: disable asymmetric crypto on GEN3 + fb63987b6d crypto/octeontx: fix freeing after device release + 151569886c cryptodev: fix freeing after device release + 946df43747 eal/windows: cleanup virt2phys handle + d06cadbbbd eventdev: fix event port setup in Tx adapter + c9c391ea60 app/testpmd: fix Tx checksum calculation for tunnel + 3a0ced629f net/softnic: fix memory leak as profile is freed + c3e2390817 net/softnic: fix null dereference in arguments parsing + d43a754af8 net/memif: fix abstract socket address length + 62a186761f net/ena: enable multi-segment in Tx offload flags + 72f6920480 net/mlx5: add Tx scheduling check on queue creation + f46e2c750a net/mlx5: fix timestamp initialization on empty clock queue + 0564825638 net/mlx5: fix flow engine type in function name + 4ac789fddc net/mlx5: fix default queue number in RSS flow rule + 38779aa845 net/mlx5: fix RSS flow rule with L4 mismatch + f67d7c848e net/mlx5: fix queue leaking in hairpin auto bind check + 3003560e24 net/mlx5: fix representor interrupt handler + ad67a31e0a net/iavf: fix Tx threshold check + 13ac5a5101 net/virtio: fix default duplex mode + f21bc78175 net/virtio: fix interrupt handle leak + dc023498c9 vhost: fix crash on reconnect + 47d67fb4bb net/virtio: report maximum MTU in device info + 84a32847c9 net/octeontx2: fix default MCAM allocation size + 2f39890f4a app/testpmd: fix MAC address after port reset + d0eebf4a00 app/testpmd: fix help string for port reset + 36c937ac72 sched: rework configuration failure handling + 41a170fc81 sched: fix profile allocation failure handling + 0cd4f7ee81 net/sfc: fix MAC stats update for stopped device + b84a0ebd17 net/sfc: fix xstats query by unsorted list of IDs + e4ebfdeb0d net/sfc: fix xstats query by ID according to ethdev + 99bcdae014 net/sfc: fix reading adapter state without locking + e5e8e0aa61 net/sfc: fix MAC stats lock in xstats query by ID + 4b44020ca7 net/dpaa: fix headroom in VSP case + b2ac79d89a bus/dpaa: fix freeing in FMAN interface destructor + 66d9de1cde net/ena: trigger reset on Tx prepare failure + f91f60bb47 net/hinic: fix MTU consistency with firmware + 3e6a9fa0e2 net/hinic/base: fix LRO + 2ca9d750c9 net/hinic: increase protection of the VLAN + 4e210bb141 net/hns3: fix Tx prepare after stop + 28a94eea0c net/hns3: fix flow rule list in multi-process + fc0e7a4358 net/hns3: fix timing of clearing interrupt source + 01dcb16186 net/hns3: fix filter parsing comment + 471ed659c1 net/hns3: fix residual MAC address entry + 752b19f91f net/softnic: fix memory leak in arguments parsing + ce81944f3d eal/windows: check callback parameter of alarm functions + 806a18751b net/bnxt: fix null dereference in interrupt handler + 3e8a6d6b1d net/bnxt: remove workaround for default VNIC + 03348e9bec net/mlx5: export PMD-specific API file + d0213e26bb net/mlx5: reject inner ethernet matching in GTP + cf3ae2009c net/mlx5: fix RSS expansion for GTP + 3fd282e728 net/mlx5: fix RoCE LAG bond device probing + 453f8bb235 net/mlx5: fix indirect action modify rollback + a20d4d2506 net/mlx5: fix Rx/Tx queue checks + da1a6d5e46 regex/mlx5: fix redundancy in device removal + 0760fa23a0 net/mlx5: fix overflow in mempool argument + c571fde575 vdpa/mlx5: fix overflow in queue attribute + 4eb4301b07 regex/mlx5: fix size of setup constants + a961df1650 net/virtio: fix Rx scatter offload + e1b663f8b3 vhost: fix lock on device readiness notification + 12e277dee6 net/virtio: fix refill order in packed ring datapath + 2df90802c6 vhost: check header for legacy dequeue offload + db878744bc test/crypto: fix mempool size for session-less + 89d903beb4 crypto/octeontx2: fix lookaside IPsec IV pointer + 48181d181d crypto/octeontx2: fix IPsec session member overlap + 7bbe274958 test/power: fix CPU frequency check for intel_pstate + 1f4a450852 raw/ioat: fix config script queue size calculation + 133edc5f0f distributor: fix 128-bit write alignment + fc9d2f0dbe net/bnxt: clear cached statistics + 3f2f6be303 net/bnxt: fix nested lock during bonding + 5584a03fe9 net/bnxt: fix missing barriers in completion handling + f58d25a579 net/octeontx2: fix TM node statistics query + 6595f06986 net/mvpp2: fix configured state dependency + 9c8609a356 net/mvpp2: fix port speed overflow + ef16dc1cc8 net/mlx5: fix typo in vectorized Rx comments + 4c8e04c056 net/mlx5: fix threshold for mbuf replenishment in MPRQ + a6a787bf7d net/mlx5: fix missing RSS expansion of IPv6 frag + 5b34c2ad6f net/mlx5: fix missing RSS expandable items + 2a5c46697b net/mlx5: remove redundant operations in NEON Rx + a5fb806241 app/testpmd: fix offloads for newly attached port + 96ad1e25fc net/softnic: fix connection memory leak + 6807067d71 net/bonding: check flow setting + 2b916ad7eb net/bonding: fix error message on flow verify + e77955f0ed net/bnxt: fix ring allocation and free + 2346170dd3 net/bnxt: detect bad opaque in Rx completion + d375abc717 table: fix bucket empty check + 4b8bd031fe net/hns3: fix Arm SVE build with GCC 8.3 + 2bf923fc8f net/virtio: fix aarch32 build + 39156c3208 net/bnxt: fix aarch32 build + 7590791b9e net/sfc: fix aarch32 build + 2da39fd204 build: support drivers symlink on Windows + fd2f9a4932 doc: fix build on Windows with Meson 0.58 + 97d5862b30 net/octeontx/base: fix debug build with clang + d232a49bf4 net/ixgbe: fix flow entry access after freeing + 7cdf5af809 net/i40e: fix descriptor scan on Arm + ebe009027c net/ice: fix memzone leak when firmware is missing + 386524e3f5 common/mlx5: fix compatibility with OFED port query API + 32b98abf1b common/mlx5: use new port query API if available + 10beb1ace1 net/mlx5: fix MPLS RSS expansion + 29c7cf41a2 net/mlx5: remove unsupported flow item MPLS over IP + d709081562 common/mlx5: fix Netlink receive message buffer size + 0e61040a35 net/mlx5: fix match MPLS over GRE with key + 6452e165df net/mlx5: fix pattern expansion in RSS flow rules + c1ed88d89b net/mlx5: fix r/w lock usage in DMA unmap + ce3be50034 doc: add limitation for ConnectX-4 with L2 in mlx5 guide + 79bd409861 net/mlx5: fix TSO multi-segment inline length + 1e2b9848ab common/mlx5: fix memory region leak + 06e38e2e83 net/mlx5: fix multi-segment inline for the first segments + 98d41069f1 net/bnxt: fix Rx interrupt setting + ec202acb1c net/bnxt: fix scalar Tx completion handling + 8eb3ca3f92 net/bnxt: fix Tx descriptor status implementation + 0e27d70604 net/bnxt: fix ring and context memory allocation + 43bcce71fe net/bnxt: invoke device removal event on recovery failure + 6d2d072cc1 net/bnxt: fix auto-negociation on Whitney+ + 39d5e6fea3 net/bnxt: fix typo in log message + a12b17cb05 net/bnxt: cleanup code + 8a742f542c ipc: stop mp control thread on cleanup + fba329627d crypto/mvsam: fix options parsing + 1a3903c0d3 crypto/mvsam: fix session data reset + bbfd3f227b crypto/mvsam: fix capabilities + 929b5fec9d crypto/mvsam: fix AES-GCM session parameters + a3c1ad39e4 test/crypto: fix typo in ESN case + cc48322720 test/crypto: fix typo in AES case + bb9d13ec90 test/crypto: fix autotest function parameters + a1242338e1 crypto/aesni_gcm: fix performance on some AVX512 CPUs + a5405a2eba test/crypto: fix mbuf reset after null check + 63f28457dc app/crypto-perf: fix out-of-place mempool allocation + 1b7530a088 crypto/qat: fix Arm build with special memcpy + 85c2f7fed5 app/testpmd: change port link speed without stopping all + 55585a5f1a ethdev: fix doc of flow action + 4f3fba99a7 app/testpmd: fix type of FEC mode parsing output + b4d36b226b net/tap: fix Rx checksum flags on TCP packets + 936bdd642b net/tap: fix Rx checksum flags on IP options packets + dccbbfc8ee net/sfc: fix outer L4 checksum Rx + e942ab2144 net/pfe: remove unnecessary null check + 2799483e12 net/hns3: fix maximum queues on configuration failure + 2157046ca0 net/hns3: fix VLAN strip log + bf6bd022ef net/hns3: fix fake queue rollback + 14a0af5994 net/hns3: fix delay for waiting to stop Rx/Tx + 24dc86183d net/hns3: increase VF reset retry maximum + 21ec6d9eb2 drivers/net: fix memzone allocations for DMA memory + 55b7e77a7d net/sfc: fix outer match in MAE backend + b19930090a net/sfc: check ID overflow in action port ID + 97fcc59a87 mempool/octeontx2: fix shift calculation + 7009b90679 vdpa/mlx5: fix TSO offload without checksum + cb97272d8e vhost: fix NUMA reallocation with multi-queue + 1b6e6c1754 vhost: fix missing guest pages table NUMA realloc + 2412914cdb vhost: fix missing memory table NUMA realloc + b386dd5b4b net/i40e: fix multi-process shared data + 142e3badc2 net/i40e: fix flow director input set conflict + ef03fc3f01 net/ice: fix overflow in maximum packet length config + 0ac008ccca net/octeontx2: use runtime LSO format indices + b1cb0f2fcb net/octeontx2: fix flow creation limit on CN98xx + b719b89624 test/mbuf: fix virtual address conversion + a2d9e63eec bus/pci: fix leak for unbound devices + c24244d08f examples/l2fwd: fix [no-]mac-updating options + 8f9f2da7e3 app/test: fix IPv6 header initialization + ef0558c8be bus/pci: fix IOVA as VA support for PowerNV + f0847028f3 common/mlx5: fix Netlink port name padding in probing + 6b23ae0cad net/mlx5: fix IPIP multi-tunnel validation + 63c0a9b54e net/mlx5: fix switchdev mode recognition + 82758719aa net/mlx5: fix RSS pattern expansion + b6690ca0b1 tests/eal: fix memory leak + 72bff6df5f tests/cmdline: fix memory leaks + d4a803fd53 rib: fix max depth IPv6 lookup + 36dbd9df87 flow_classify: fix leaking rules on delete + 16d0682d67 kni: fix crash on userspace VA for segmented packets + 3fc6330a65 kni: fix mbuf allocation for kernel side use + 979fecc857 vhost/crypto: check request pointer before dereference + bd03c14aaf devtools: fix file listing in maintainers check + 5ca9e6b50d vfio: add stdbool include + 24b3c18127 doc: fix default burst size in testpmd + 6dfb0b782f doc: fix typo in SPDX tag + ac4a67a5f9 net/iavf: fix scalar Rx + 5f70ea4e97 net/i40e: fix use after free in FDIR release + 1050357ef7 net/ice: fix data path in secondary process + 744e86e089 net/ice: fix data path selection in secondary process + 18db4a4d84 net/i40e: fix raw packet flow director + d5052b1a2e net/iavf: fix handling of unsupported promiscuous + ea3ef0c977 net/ice: fix default RSS key generation + 58fbfecc2a net/iavf: fix RSS key access out of bound + 70b84a4e1b net/bnxt: remove unnecessary comment + 386efec380 net/bnxt: improve probing log message + a2e10ac19d net/bnxt: fix check for PTP support in FW + 92e631ec91 net/bnxt: use common function to free VNIC resource + cb99c42fe0 net/bnxt: set flow error after tunnel redirection free + 2f6cc4ff5f net/bnxt: fix error handling in VNIC prepare + 3d3770a60f net/bnxt: remove unnecessary code + da7d8b7da3 net/bnxt: set flow error when free filter not available + 78f6a49fd4 net/bnxt: fix error messages in VNIC prepare + 47b40639d9 net/bnxt: workaround spurious zero stats in Thor + 70314ce028 net/bnxt: fix Rx burst size constraint + d19b2017fc net/bnxt: check access to possible null pointer + 73b4b37a14 malloc: fix size annotation for NUMA-aware realloc + cd12bf5357 bitmap: fix buffer overrun in bitmap init + 7af7de2a52 graph: fix null dereference in stats + 3d328cf711 graph: fix memory leak in stats + a860247483 version: 20.11.2 + aac916d95c version: 20.11.2-rc2 + 8db55d6a07 net/mlx5: fix receiving queue timestamp format + 6b3e11c79d net/ice: fix RSS for L2 packet + f7b699f8e1 event/octeontx2: fix XAQ pool reconfigure + a0481453af event/octeontx2: configure crypto adapter xaq pool + a71455d9f5 event/octeontx2: fix crypto adapter queue pair operations + ffa8fb5f54 build: fix drivers selection without Python + d997326fe3 net/ark: fix leak on thread termination + e11b10e00a net/hns3: fix concurrent interrupt handling + 621a74d828 test/cmdline: silence clang 12 warning + bd41e2bc99 doc: fix runtime options in DLB2 guide + 8ba82ed84d event/dlb2: remove references to deferred scheduling + 4d182a84d5 test: fix build with GCC 11 + 0b1753ac72 net/memif: fix Tx bps statistics for zero-copy + b1ec8ac5ec common/sfc_efx/base: add missing MCDI response length checks + 6f41c82e52 common/sfc_efx/base: limit reported MCDI response length + 83fcaa37fe net/mlx5: fix loopback for Direct Verbs queue + 91f0c38c0d net/hns3: fix link speed when VF device is down + f4a3f4a6b9 net/hns3: fix DCB reconfiguration + 0eafb399ba net/hns3: fix DCB configuration + aeaba9b829 net/hns3: remove meaningless packet buffer rollback + 47af5229ce net/hns3: fix requested FC mode rollback + 6442b97894 net/hns3: fix Rx/Tx queue numbers check + a6967ee4a8 vdpa/mlx5: fix device unplug + 4079bce204 net/vhost: restore pseudo TSO support + d565e160b1 net/mlx5: fix counter offset detection + 570fa795f0 net/mlx5: fix leak when configured repeatedly + 2471e99da3 net/mlx4: fix leak when configured repeatedly + 6156da1041 crypto/zuc: fix build with GCC 11 + 87eec97f52 test/crypto: fix build with GCC 11 + 18918e1884 devtools: fix orphan symbols check with busybox + 934abbb8b8 test: fix division by zero + 99755af674 examples/l3fwd-power: fix empty poll thresholds + 64ac670fa0 test/table: fix build with GCC 11 + 61238b46be test/power: fix turbo test + 4498bac0f5 test/power: fix low frequency test when turbo enabled + c5a5a60d9e test/power: add turbo mode to frequency check + c729d4f125 test/power: fix CPU frequency check + 9a1044da25 test: check flow classifier creation + acf64e9685 examples/skeleton: fix NUMA check of port and core + 12822e56a0 examples/l2fwd-cat: fix NUMA check of port and core + 110a22070f examples/flow_classify: fix NUMA check of port and core + b993ebf7bb examples/rxtx_callbacks: fix port ID format specifier + fc88e04a9a app/crypto-perf: check memory allocation + cfd635034c crypto/qat: fix null authentication request + 010e63f5ee test/crypto: fix return value of a skipped test + 975a9831e1 net/mlx5: fix RSS flow item expansion for NVGRE + cb97d59428 net/mlx5: fix secondary process initialization ordering + 14b6df2399 net/mlx4: fix secondary process initialization ordering + 071855618a net/tap: fix build with GCC 11 + c2155d8297 net/ice/base: fix build with GCC 11 + 84c5ec2505 net/bnx2x: fix build with GCC 11 + 724378c1a1 net/bnx2x: fix build with GCC 11 + a83980d079 net/igc: fix speed configuration + e0305fc830 net/i40e: fix VF RSS configuration + 2e077407fe net/ice: fix VSI array out of bounds access + e5c01fbc22 net/ena: indicate Rx RSS hash presence + 96883cec2a net/mlx5: fix tunnel offload private items location + 6bdf384447 net/enic: enable GENEVE offload via VNIC configuration + 1cf7b4c769 net/ice: fix leak on thread termination + 18151dfdcb app/testpmd: fix tunnel offload flows cleanup + 98f23aad56 net/hns3: fail setting FEC if one bit mode is not supported + a6c272ac76 net/hns3: fix ordering in secondary process initialization + edd8521ace net/hns3: fix secondary process request start/stop Rx/Tx + a795428a29 net/hns3: fix mailbox message ID in log + 5244852443 net/hns3: fix TM QCN error event report by MSI-X + ec14e37220 net/txgbe: fix QinQ strip + 33f33e3e7c net/hns3: fix querying flow director counter for out param + 8e72b7a2ee net/hns3: fix VF alive notification after config restore + 6ed898b297 net/hns3: clear hash map on flow director clear + 8f93ec6288 net/hns3: fix log on flow director clear + 9e15a154c5 net/hns3: return error on PCI config write failure + 3387f2c95f net/nfp: fix reporting of RSS capabilities + c9000c686e net/ena: report default ring size + 4a79d25bd1 net/ena: remove endian swap functions + 967dcfb168 net/ena: fix crash with unsupported device argument + a5c0cca39a net/ena: fix parsing of large LLQ header device argument + 1c44277b56 net/ena/base: destroy multiple wait events + 74cc4f7d97 net/ena/base: fix type conversions by explicit casting + 47ed9f3e81 net/ena/base: improve style and comments + 70f1f80edb net/ena: switch memcpy to optimized version + 48af30a4c3 net/mlx5/linux: fix firmware version + 106f00833d net/mlx5: fix RSS flow item expansion for GRE key + 6db31e3304 net/ice/base: fix memory allocation wrapper + d8351b6c9f app/eventdev: fix lcore parsing skipping last core + d4138b565d event/dpaa2: remove unused macros + b7ab9f121d power: fix sanity checks for guest channel read + 5d30751003 doc: remove PDF requirements + 14e975dea9 test/timer: check memzone allocation + 3ce8842e18 examples/timer: fix time interval + 764a01e145 ipc: use monotonic clock + 45c0d2e47e raw/skeleton: add missing check after setting attribute + 3aadd33dd2 eal: fix memory mapping on 32-bit target + 3cb6827a32 eal: fix leak in shared lib mode detection + 5be4837391 bus/fslmc: remove unused debug macro + 07d17061ab test/crypto: copy offset data to OOP destination buffer + e5bf617fa8 crypto/dpaa2_sec: fix close and uninit functions + 39b13992e9 crypto/dpaa_sec: affine the thread portal affinity + 8038030f29 test/crypto: fix auth-cipher compare length in OOP + afe3a7f202 compress/qat: enable compression on GEN3 + 95fd32f696 common/qat: increase IM buffer size for GEN3 + 6856433bed app/bbdev: fix HARQ error messages + 90ca87dd69 app/bbdev: check memory allocation + 2194792b3a eal: fix service core list parsing + 689d3a982b ipc: check malloc sync reply result + 89bbedc455 raw/ntb: check memory allocations + 1a62a37afe raw/ntb: check SPAD user index + 10ddae775c examples: fix pkg-config override + 8ff559890a regex/octeontx2: remove unused include directory + a0a21cb0c7 net/bnxt: prevent device access in error state + 00b2343239 net/bnxt: fix mismatched type comparison in Rx + e00127b777 net/bnxt: check PCI config read + 91d4a1731a net/bnxt: fix mismatched type comparison in MAC restore + 1cfcaccb0b net/bnxt: fix single PF per port check + 85b0241b6b net/bnxt: fix dynamic VNIC count + a7375b06e8 net/bnxt: fix Rx timestamp when FIFO pending bit is set + 699e70a0b6 net/bnxt: refactor multi-queue Rx configuration + ca60f84823 vhost: fix offload flags in Rx path + 96c209b1a6 net/virtio: fix vectorized Rx queue rearm + b6659faacc telemetry: fix race on callbacks list + 31619530c8 test/distributor: fix burst flush on worker quit + 8e681713b3 test/distributor: fix worker notification in burst mode + 8064f75d9b ethdev: add missing buses in device iterator + 8e6d9cfe2d net/hns3: increase readability in logs + ce498169d4 net/hns3: remove unused VMDq code + 5d84f2b422 net/hns3: remove read when enabling TM QCN error event + 29da2a8b3d net/hns3: fix vector Rx burst limitation + 88be45dbba net/bnxt: drop unused attribute + 15cf480f1a net/sfc: fix mark support in EF100 native Rx datapath + 40c046441f net/i40e: fix primary MAC type when starting port + 38b01e7fa3 net/iavf: fix primary MAC type when starting port + 5c4358f054 raw/ifpga: fix device name format + cb6f74724d net/mlx5: fix flow age event triggering + 053299cf1d net/hns3: remove unused macros + 2f0e34a575 net/hns3: fix time delta calculation + 589600a03a net/hns3: log time delta in decimal format + 1f6d0c6f6c app/testpmd: verify DCB config during forward config + 313bd50dab app/testpmd: fix DCB re-configuration + a3da207349 app/testpmd: fix DCB forwarding configuration + 95d258ab14 app/testpmd: fix forward lcores number for DCB + 9874e06dc5 net/kni: warn on stop failure + 3caffc5447 net/tap: check ioctl on restore + 6af34d2abd app/testpmd: fix division by zero on socket memory dump + 180a37c3ef net/hns3: fix link speed when port is down + 3502d412a0 net/hns3: fix link status when port is stopped + 47b2b44b9f net/mlx5: fix probing device in legacy bonding mode + 1c43f69982 net/mlx4: fix buffer leakage on device close + 04475743c8 net/mlx5: remove drop queue function prototypes + 8965b66896 net/bnxt: use prefix on global function + 52d2337eac net/bnxt: remove unused function parameters + 5396d57fc3 net/bnxt: remove unnecessary forward declarations + 0f0469489b net/virtio: fix getline memory leakage + 24743b77a7 vhost: fix redundant vring status change notification + b0a7aab7d8 vhost: fix queue initialization + 351bc34645 net/e1000: fix flow error message object + 5d2c2857f4 common/iavf: fix duplicated offload bit + dc22d3cfab net/iavf: fix VF to PF command failure handling + e488fd804d net/ice: fix fast mbuf freeing + c7db959613 net/i40e: remove redundant VSI check in Tx queue setup + 35e133887c net/i40e: fix negative VEB index + 3085e8ab3e common/sfc_efx/base: fix dereferencing null pointer + 551f3198b8 net/hns3: fix handling link update + adefa69ef3 net/bonding: fix socket ID check + 8d04d026a7 doc: fix formatting in testpmd guide + 9886a1aed2 app/testpmd: fix segment number check + bef47e0c78 net/hns3: fix typos on comments + 58155c9ecf net/tap: fix interrupt vector array size + 6ca567cc48 app/testpmd: fix max queue number for Tx offloads + eaee68d852 test/kni: check init result + 3b949ee57a test/kni: fix a comment + 448c880b6e net/bonding: fix leak on remove + 8abec7f317 net/hns3: remove unused mailbox macro and struct + b012ce6634 net/hns3: fix processing link status message on PF + b47af28ffd net/hns3: fix mailbox error message + 3ab9cfbc6a drivers/net: fix FW version query + b65e812dd2 net/kni: check init result + bfaab1571e doc: fix multiport syntax in nfp guide + 7abfd667da power: save original ACPI governor always + 2c6016c807 bpf: fix JSLT validation + a5fd2098d6 acl: fix build with GCC 11 + efdd260f6e eventdev: fix memory leakage on thread creation failure + 78dfdbc90f eventdev: remove redundant thread name setting + d173cc85c9 app/eventdev: fix overflow in lcore list parsing + 8ad8d124a5 test/mempool: fix object initializer + f532cbbedb mbuf: check shared memory before dumping dynamic space + 5f90abb232 eal/arm64: fix platform register bit + 35ad25c5b1 raw/ioat: fix script for configuring small number of queues + 1c96bfbc4c config/ppc: reduce number of cores and NUMA nodes + cf948fe9c5 stack: allow lock-free only on relevant architectures + ad11991368 version: 20.11.2-rc1 + 06ed5a2729 app: fix exit messages + 3dce6da8bc bus/pci: support I/O port operations with musl + 8aeb5c3538 net/bnxt: fix ring count calculation for Thor + d225df791f regex/mlx5: support timestamp format + 1607156dcd net/mlx5: support timestamp format + 2ceb5afbf2 doc: fix build with Sphinx 4 + 7fbddcaa0e net/bnxt: fix PTP support for Thor + 05d828f0b6 net/bnxt: fix Rx queue count + 189a17d086 net/bnxt: fix Rx descriptor status + 51fef82607 net/sfc: fix outer rule rollback on error + 1524a5460b net/hns3: fix verification of NEON support + c6bc1e8980 net/hns3: fix timing in mailbox + 41b2680227 net/hns3: fix VF handling LSC event in secondary process + 58d8850553 net/hns3: fix possible mismatched response of mailbox + 595ee1c60a net/virtio: fix interrupt unregistering for listening socket + 442a4977a9 net/iavf: fix wrong Tx context descriptor + 6a6af19db5 net/hns3: fix setting default MAC address in bonding of VF + d24915f072 net/hns3: fix mbuf leakage + 67fe9bc80f ci: catch coredumps + 0f9f25a1d1 ci: ignore APT update failure in GitHub Actions + a93e472781 ci: fix package installation in GitHub Actions + 2f415afb56 ci: enable v21 ABI checks + bb63ff4aa1 ci: hook to GitHub Actions + 92ef3bbc7a kni: fix kernel deadlock with bifurcated device + d4d212786a kni: refactor user request processing + 0cf8916e01 kni: support async user request + 20bbce05a8 doc: fix names of UIO drivers + 7d3e01ef10 net/e1000/base: fix timeout for shadow RAM write + bd0291a99a net/i40e: fix flow director for common pctypes + abb66bf463 net/ice: fix disabling promiscuous mode + 32698b8d32 net/hns3: remove unused macro + 8d6377e172 net/igc: fix Rx packet size + 608d69a62f net/bnxt: fix health check alarm cancellation + 8ffdca19f3 net/bnxt: fix resource cleanup + 9231460d33 net/bonding: fix adding itself as its slave + ba1a310e04 net/hns3: fix flow director lock + 5eaa5abcd8 net/hns3: fix VMDq mode check + cdffbfb77f net/hns3: fix DCB mode check + 2782c994db net/hns3: remove redundant mailbox response + 5c61f0e991 doc: fix matching versions in ice guide + bd0494c495 net/hns3: remove unused macros + f81a67f4b8 doc: fix HiSilicon copyright syntax + 1eea3b7a27 examples/ethtool: remove unused parsing + aa94d640eb examples: add eal cleanup to examples + 8347ba1646 test/power: round CPU frequency to check + 2266f65a23 test/power: add delay before checking CPU frequency + 323d01cf8d test/bpf: fix error message + 79273b13a0 common/dpaax: fix possible null pointer access + 1bcde22826 sched: fix traffic class oversubscription parameter + 9ffa1fee27 ip_frag: fix fragmenting IPv4 packet with header option + b6b219b87c test: check thread creation + 7b3cbf5315 test/cmdline: fix inputs array + c62ae18b64 examples/l3fwd: fix LPM IPv6 subnets + 512393d56e examples/ptpclient: remove wrong comment + 51ca4146bb pipeline: fix endianness conversions + 6b76e06108 vfio: fix duplicated user mem map + 879dc56ebd eventdev: fix case to initiate crypto adapter service + 86ec5aeef9 net/iavf: fix lack of MAC type when set MAC address + 674d4cb4de net/i40e: fix lack of MAC type when set MAC address + 6ac6d7e852 net/hns3: fix flow control mode + 20a6184f88 net/hns3: remove VLAN/QinQ ptypes from support list + dc7d063415 net/hns3: fix missing outer L4 UDP flag for VXLAN + f20ccda35a net/hns3: fix use of command status enumeration + a0cffe358d net/ice: fix crash in AVX512 + b59be07a77 net/iavf: fix crash in AVX512 + 5ecc163bf9 ethdev: update flow item GTP QFI definition + 42c05be1fe app/testpmd: fix bitmap of link speeds when force speed + be168e4fec net/ixgbe: fix Rx errors statistics for UDP checksum + 8e59d734a2 net/mlx5: fix resource release for mirror flow + 00d769c518 net/mlx4: fix RSS action with null hash key + fb455a8129 net/mlx5: fix redundant flow after RSS expansion + 1cb30b35c9 net/ice: fix illegal access when removing MAC filter + ac002ace6e net/e1000: fix max Rx packet size + 9c9d25bce1 common/sfc_efx/base: fix indication of MAE encap support + 41111ae623 net/hns3: fix configure FEC when concurrent with reset + 33c960aad1 net/hns3: fix queue state when concurrent with reset + da8413c234 net/hns3: fix timing in resetting queues + 94bb3ef988 net/hns3: fix some packet types + 60065a5a61 test: fix TCP header initialization + 3d8025a121 buildtools: fix all drivers disabled on Windows + acbb986965 crypto/qat: fix offset for out-of-place scatter-gather + ce88f40d2c examples/l2fwd-crypto: fix packet length while decryption + 9c07408cef examples/l2fwd-crypto: skip masked devices + 96d2d64bf1 crypto/octeontx: fix session-less mode + 9e520a5a7a eal: add C++ include guard for reciprocal header + 45b58e4ac4 raw/octeontx2_dma: assign PCI device in DPI VF + d2d434683d test/trace: fix race on collected perf data + cf45856b8b license: fix typos + eb30365e9e event/octeontx2: fix device reconfigure for single slot + dc7e8df00f app/eventdev: fix timeout accuracy + 5b91f48bc8 test/event: fix timeout accuracy + ba0aecb72a eal/windows: fix return codes of pthread shim layer + 2e83b42a99 app/flow-perf: fix encap/decap actions + 2b03f8ca36 net/enic: fix flow initialization error handling + 44bd01347f net/hns3: delete redundant blank line + 7764ed833a net/hns3: support get device version when dump register + d0897ad2fb net/hns3: fix VF mailbox head field + bb24098f63 net/hns3: fix flow counter value + 654aeb2619 net/hns3: fix flow control exception + 73db182cb0 net/hns3: fix rollback after setting PVID failure + c718e751f4 net/hns3: fix FLR miss detection + 539f3b7802 net/hns3: fix copyright date + a096b11ef5 ethdev: validate input in EEPROM info + 963fcacd95 ethdev: validate input in register info + 2e6638d897 ethdev: validate input in module EEPROM dump + ec4d4d10f3 vhost: fix initialization of async temporary header + de93dc0502 vhost: fix initialization of temporary header + d43a7f6be8 net/bnxt: fix configuring LRO + bc53c25124 net/bnxt: fix double free in port start failure + f30dc57e78 net/mlx5: fix drop action for Direct Rules/Verbs + b52bd28182 net/mlx5: fix missing shared RSS hash types + 43af55f5bf net/mlx5: fix shared inner RSS + 81c1c91037 net/i40e: fix flow director config after flow validate + 5a4a7bb830 doc: update recommended versions for i40e + b1fab4dc0d net/e1000: fix Rx error counter for bad length + fa6df61bf2 net/igc: fix Rx error counter for bad length + 13e3e5e93c net/ena: fix releasing Tx ring mbufs + 7151983d32 net/hns3: update HiSilicon copyright syntax + fdd1a58762 net/hns3: fix MTU config complexity + 1324beafba eal: fix hang in control thread creation + ea6d0d6561 eal: fix race in control thread creation + 5a324f3e4d app/testpmd: fix usage text + 7b581788ee app/regex: fix usage text + 3ad213dfa7 eal: fix evaluation of log level option + 4fc1996894 test: proceed if timer subsystem already initialized + 92805a55da drivers: fix log level after loading + 6baf95a98d service: clean references to removed symbol + e0a41b8c47 mem: fix freeing segments in --huge-unlink mode + 8ee0fdee90 power: do not skip saving original P-state governor + 4370808640 doc: fix sphinx rtd theme import in GHA + 8d3f8b347f vdpa/mlx5: fix virtq cleaning + 42ed69a37b examples/vhost_crypto: remove unused short option + 429dd55035 vhost: fix batch dequeue potential buffer overflow + 358cba78c0 vhost: fix packed ring potential buffer overflow + 25d53e1eb8 vhost: fix split ring potential buffer overflow + 1716e66878 examples/vhost: check memory table query + 42457347e2 vdpa/ifc: check PCI config read + f90c6e9d41 net/mlx5: fix using flow tunnel before null check + 7f55ac318c net/ixgbe: fix RSS RETA being reset after port start + 843b7caa8e net/iavf: fix TSO max segment size + eefc6b16bc net/igc: fix Rx RSS hash offload capability + 8235f3fdfd net/i40e: announce request queue capability in PF + ddb17b5462 net/iavf: fix packet length parsing in AVX512 + 7f8de73228 net/i40e: fix parsing packet type for NEON + 38824647ed app/testpmd: fix Tx/Rx descriptor query error log + cd33dd08a7 net/sfc: fix error path inconsistency + de81fe259a net/hinic: fix crash in secondary process + a1b0bff2d5 net/hns3: fix long task queue pairs reset time + 15b6974306 net/hns3: fix link update when failed to get link info + 7537fafbc7 net/hns3: fix Tx checksum for UDP packets with special port + a7bf8336a2 net/hns3: fix processing Tx offload flags + e29b92b998 net/hns3: fix reporting undefined speed + 530d228d3e net/mlx5: support RSS expansion for IPv6 GRE + 2291013139 net/mlx5: fix flow actions index in cache + 162f87fce1 net/ice/base: fix memory allocation for MAC addresses + 7b7af2fd73 net/ice: fix RSS hash update + fe1461f8b6 net/i40e: fix input set field mask + 74fc31b48f net/qede: accept bigger RSS table + 50650a3ab2 net/qede: reduce log verbosity + 75f780af8b net/bnxt: fix memory allocation for command response + 34972cfd96 net/bnxt: check kvargs parsing + b1f8911ea3 net/bnxt: fix handling of null flow mask + 28e5e8b722 net/bnxt: fix Tx length hint threshold + b21daacf92 net/bnxt: fix Rx buffer posting + 6260a81282 net/bnxt: fix timesync when PTP is not supported + 23f07e5fdc net/bnxt: fix link state operations + 88f2faa23c net/bnxt: fix RSS context cleanup + 832c6464ce net/bnxt: fix PCI write check + 2b68ea0468 net/bnxt: fix Tx timestamp init + 763cb5c708 app/testpmd: fix NVGRE encap configuration + 516cebd9dd common/sfc_efx: remove GENEVE from supported tunnels + b22415b70a net/ark: refactor Rx buffer recovery + f6beb20cd9 net/ark: update packet director initial state + d1b41662e0 test: fix autotest handling of skipped tests + 7c0c441826 pipeline: fix instruction translation + a8c9a82b54 examples/packet_ordering: fix port configuration + 67becbfe57 table: fix actions with different data size + 19bb5235f0 examples/bbdev: fix header include for musl + ba7c3d6bef app/testpmd: fix build with musl + 0999e13c7a event/dlb: fix header includes for musl + c8f64e248d net/igc: remove use of uint type + 6bee873fbb net/cxgbe: remove use of uint type + fc4664ada5 bus/dpaa: fix build with musl + a01c94a0c3 bus/dpaa: fix 64-bit arch detection + a4553bb9d2 common/dpaax/caamflib: fix build with musl + efa745cbb2 eal: fix build with musl + 9c3bb2603c build: remove redundant _GNU_SOURCE definitions + 14702af2e5 build: detect execinfo library on Linux + 1555f48f85 buildtools: fix build with busybox + 838da36866 eal: fix comment of OS-specific header files + cf10220946 net/mlx5: fix Rx metadata leftovers + cd5184145e vdpa/mlx5: support timestamp format + 7ddf9eaed9 common/mlx5: add timestamp format support to DevX + 604068df87 net/ice: check some functions return + 9a7fd13201 app/testpmd: check MAC address query + 211541b614 net/bnxt: fix Rx and Tx timestamps + 7c69b27093 net/bnxt: fix xstats get + f061789e7c net/bnxt: mute some failure logs + 15d81d5f9e net/bnxt: fix HWRM and FW incompatibility handling + 274ff8673a net/bnxt: fix VF info allocation + 3d051e75a4 net/bnxt: fix device readiness check + 2d5c161605 net/bnxt: fix FW readiness check during recovery + 3fd1f9e8ec net/bnxt: fix firmware fatal error handling + 86e9785dbd net/bnxt: fix queues per VNIC + 74451465f7 net/bnxt: fix VNIC configuration + 262c0649f5 net/bnxt: remove unused macro + 7c078fd0e9 net: fix comment in IPv6 header + a7e79f5d2c bus/pci: fix Windows kernel driver categories + 75c0d71c9f bus/pci: skip probing some Windows NDIS devices + d68bec0b17 eal/windows: fix default thread priority + 87af5c7023 eal/windows: add missing SPDX license tag + d43987787c log/linux: make default output stderr + f59f4e98e4 build: exclude meson files from examples installation + b66b37a2b5 net/octeontx2: fix VLAN filter + 6f1a03c53e net/mlx5: fix Rx segmented packets on mbuf starvation + d0de930b7b net/i40e: fix IPv4 fragment offload + 9461039311 net/i40evf: fix packet loss for X722 + 0d32ae0c6f net/ice/base: cleanup filter list on error + 8d88d38931 net/ice/base: fix uninitialized struct + d4ef2f169e net/ice/base: fix payload indicator on ptype + e3486cbde8 net/e1000: remove MTU setting limitation + f9b6f0ca8b net/igc: remove MTU setting limitation + f27ac1c009 net/ice: fix VLAN filter with PF + 58fc7f65f2 net/txgbe: update packet type + 4d4ae30d07 net/txgbe: fix Rx missed packet counter + 991c44b257 net/txgbe: remove unused functions + 96cc541d11 net/bonding: fix LACP system address check + 19922dfa9e net/hns3: remove unused parameter markers + 3997b85241 net/hns3: fix HW buffer size on MTU update + e4e0a6505d net/hns3: fix device capabilities for copper media type + 4371d3b12b common/mlx5: add DevX commands for queue counters + 6e06d42907 common/mlx5: add DevX command to query WQ + fba807cae9 common/mlx5/linux: add glue function to query WQ + 44e87e7c10 net/pcap: fix file descriptor leak on close + 82f21305c4 net/mlx5: fix UAR allocation diagnostics messages + 1cac75b516 net/mlx5: fix hashed list size for tunnel flow groups + cf30b35eb3 net/pcap: fix format string + 40313397da net/af_xdp: fix error handling during Rx queue setup + 32ae43ccb7 net/sfc: fix buffer size for flow parse + 40072bc599 app/testpmd: remove unnecessary UDP tunnel check + 7a866f25e6 net/ionic: fix completion type in lif init + c725e11eb3 net/dpaa: fix getting link status + d287fda7da net/dpaa2: fix getting link status + e518710835 bus/dpaa: fix statistics reading + 1965f4ee95 bus/fslmc: fix random portal hangs with qbman 5.0 + 58200ed66a net/mlx5: fix metadata item validation for ingress flows + 8d2066d2ec common/mlx5: fix DevX read output buffer size + 23b584d6cc net/mlx5: fix external buffer pool registration for Rx queue + 7fe1e5cdb9 net/failsafe: report minimum and maximum MTU + 9da42b8f93 net/failsafe: fix RSS hash offload reporting + 4f140c14a2 fbarray: fix log message on truncation error + db950ecec2 vfio: fix API description + b8bde7fa4b power: remove duplicated symbols from map file + e41a908c66 test/mem: fix page size for external memory + 7f904ea0c6 vfio: fix DMA mapping granularity for IOVA as VA + 0e42f2b7ea vfio: do not merge contiguous areas + + * Wed Aug 31 2022 Open vSwitch CI - 2.16.0-97 - Merging upstream branch-2.16 [RH git: e6de3f5eee] Commit list: