diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh index 3e5136fd4e..1acf5012db 100755 --- a/.ci/linux-build.sh +++ b/.ci/linux-build.sh @@ -201,7 +201,7 @@ fi if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then if [ -z "$DPDK_VER" ]; then - DPDK_VER="20.11" + DPDK_VER="20.11.1" fi install_dpdk $DPDK_VER if [ "$CC" = "clang" ]; then @@ -235,7 +235,7 @@ if [ "$TESTSUITE" ]; then configure_ovs export DISTCHECK_CONFIGURE_FLAGS="$OPTS" - if ! make distcheck CFLAGS="${CFLAGS_FOR_OVS}" \ + if ! make distcheck -j4 CFLAGS="${CFLAGS_FOR_OVS}" \ TESTSUITEFLAGS=-j4 RECHECK=yes; then # testsuite.log is necessary for debugging. cat */_build/sub/tests/testsuite.log diff --git a/.cirrus.yml b/.cirrus.yml index 2caf36b85c..480fea2421 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -5,11 +5,11 @@ freebsd_build_task: image_family: freebsd-12-2-snap image_family: freebsd-11-4-snap cpu: 4 - memory: 8G + memory: 4G env: - DEPENDENCIES: automake libtool gmake gcc wget openssl - python3 py37-openssl py37-sphinx + DEPENDENCIES: automake libtool gmake gcc wget openssl python3 + PY_DEPS: sphinx|openssl matrix: COMPILER: gcc COMPILER: clang @@ -18,6 +18,7 @@ freebsd_build_task: - sysctl -w kern.coredump=0 - pkg update -f - pkg install -y ${DEPENDENCIES} + $(pkg search -xq "^py3[0-9]+-(${PY_DEPS})-[0-9]+" | xargs) configure_script: - ./boot.sh diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index c4487226be..c96666b305 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -113,6 +113,12 @@ jobs: - name: checkout uses: actions/checkout@v2 + - name: fix up /etc/hosts + # https://github.com/actions/virtual-environments/issues/3353 + run: | + cat /etc/hosts + sudo sed -i "/don't remove this line/d" /etc/hosts || true + - name: create ci signature file for the dpdk cache key if: matrix.dpdk != '' || matrix.dpdk_shared != '' # This will collect most of DPDK related lines, so hash will be different diff --git a/Documentation/automake.mk b/Documentation/automake.mk index ea3475f356..2a590b3a79 100644 --- a/Documentation/automake.mk +++ b/Documentation/automake.mk @@ -216,8 +216,13 @@ install-man-rst: docs-check $(extract_stem_and_section); \ echo " $(MKDIR_P) '$(DESTDIR)'\"$$mandir\""; \ $(MKDIR_P) '$(DESTDIR)'"$$mandir"; \ - echo " $(INSTALL_DATA) $(SPHINXBUILDDIR)/man/$$stem.$$section '$(DESTDIR)'\"$$mandir/$$stem.$$section\""; \ - $(INSTALL_DATA) $(SPHINXBUILDDIR)/man/$$stem.$$section '$(DESTDIR)'"$$mandir/$$stem.$$section"; \ + if test -f $(SPHINXBUILDDIR)/man/$$stem.$$section; then \ + filepath=$(SPHINXBUILDDIR)/man/$$stem.$$section; \ + else \ + filepath=$(SPHINXBUILDDIR)/man/$$section/$$stem.$$section; \ + fi; \ + echo " $(INSTALL_DATA) $$filepath '$(DESTDIR)'\"$$mandir/$$stem.$$section\""; \ + $(INSTALL_DATA) $$filepath '$(DESTDIR)'"$$mandir/$$stem.$$section"; \ done else install-man-rst: diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index 6a5e4141f1..3bc34c892f 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -204,9 +204,9 @@ Q: What DPDK version does each Open vSwitch release work with? 2.10.x 17.11.10 2.11.x 18.11.9 2.12.x 18.11.9 - 2.13.x 19.11.2 - 2.14.x 19.11.2 - 2.15.x 20.11.0 + 2.13.x 19.11.8 + 2.14.x 19.11.8 + 2.15.x 20.11.1 ============ ======== Q: Are all the DPDK releases that OVS versions work with maintained? diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst index 3a24e54f97..612f2fdbc3 100644 --- a/Documentation/intro/install/dpdk.rst +++ b/Documentation/intro/install/dpdk.rst @@ -42,7 +42,7 @@ Build requirements In addition to the requirements described in :doc:`general`, building Open vSwitch with DPDK will require the following: -- DPDK 20.11 +- DPDK 20.11.1 - A `DPDK supported NIC`_ @@ -73,9 +73,9 @@ Install DPDK #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``:: $ cd /usr/src/ - $ wget https://fast.dpdk.org/rel/dpdk-20.11.tar.xz - $ tar xf dpdk-20.11.tar.xz - $ export DPDK_DIR=/usr/src/dpdk-20.11 + $ wget https://fast.dpdk.org/rel/dpdk-20.11.1.tar.xz + $ tar xf dpdk-20.11.1.tar.xz + $ export DPDK_DIR=/usr/src/dpdk-stable-20.11.1 $ cd $DPDK_DIR #. Configure and install DPDK using Meson diff --git a/Documentation/topics/dpdk/pmd.rst b/Documentation/topics/dpdk/pmd.rst index caa7d97bef..e481e79414 100644 --- a/Documentation/topics/dpdk/pmd.rst +++ b/Documentation/topics/dpdk/pmd.rst @@ -239,7 +239,9 @@ If not set, the default variance improvement threshold is 25%. PMD Auto Load Balancing doesn't currently work if queues are assigned cross NUMA as actual processing load could get worse after assignment - as compared to what dry run predicts. + as compared to what dry run predicts. The only exception is when all + PMD threads are running on cores from a single NUMA node. In this case + Auto Load Balancing is still possible. The minimum time between 2 consecutive PMD auto load balancing iterations can also be configured by:: diff --git a/Documentation/topics/dpdk/qos.rst b/Documentation/topics/dpdk/qos.rst index 103495415a..a98ec672fc 100644 --- a/Documentation/topics/dpdk/qos.rst +++ b/Documentation/topics/dpdk/qos.rst @@ -69,22 +69,24 @@ to prioritize certain traffic over others at a port level. For example, the following configuration will limit the traffic rate at a port level to a maximum of 2000 packets a second (64 bytes IPv4 packets). -100pps as CIR (Committed Information Rate) and 1000pps as EIR (Excess -Information Rate). High priority traffic is routed to queue 10, which marks +1000pps as CIR (Committed Information Rate) and 1000pps as EIR (Excess +Information Rate). CIR and EIR are measured in bytes without Ethernet header. +As a result, 1000pps means (64-byte - 14-byte) * 1000 = 50,000 in the +configuration below. High priority traffic is routed to queue 10, which marks all traffic as CIR, i.e. Green. All low priority traffic, queue 20, is marked as EIR, i.e. Yellow:: $ ovs-vsctl --timeout=5 set port dpdk1 qos=@myqos -- \ --id=@myqos create qos type=trtcm-policer \ - other-config:cir=52000 other-config:cbs=2048 \ - other-config:eir=52000 other-config:ebs=2048 \ + other-config:cir=50000 other-config:cbs=2048 \ + other-config:eir=50000 other-config:ebs=2048 \ queues:10=@dpdk1Q10 queues:20=@dpdk1Q20 -- \ --id=@dpdk1Q10 create queue \ - other-config:cir=41600000 other-config:cbs=2048 \ + other-config:cir=100000 other-config:cbs=2048 \ other-config:eir=0 other-config:ebs=0 -- \ --id=@dpdk1Q20 create queue \ other-config:cir=0 other-config:cbs=0 \ - other-config:eir=41600000 other-config:ebs=2048 \ + other-config:eir=50000 other-config:ebs=2048 This configuration accomplishes that the high priority traffic has a guaranteed bandwidth egressing the ports at CIR (1000pps), but it can also diff --git a/NEWS b/NEWS index bc901efdb1..154a299d93 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,21 @@ +v2.15.2 - xx xxx xxxx +--------------------- + - OVS now reports the datapath capability 'ct_zero_snat', which reflects + whether the SNAT with all-zero IP address is supported. + See ovs-vswitchd.conf.db(5) for details. + +v2.15.1 - 01 Jul 2021 +--------------------- + - Bug fixes + - ovs-ctl: + * New option '--no-record-hostname' to disable hostname configuration + in ovsdb on startup. + * New command 'record-hostname-if-not-set' to update hostname in ovsdb. + - DPDK: + * OVS validated with DPDK 20.11.1. It is recommended to use this version + until further releases. + + v2.15.0 - 15 Feb 2021 --------------------- - OVSDB: diff --git a/acinclude.m4 b/acinclude.m4 index 435685c93d..15a54d636f 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -209,10 +209,10 @@ dnl Configure Linux tc compat. AC_DEFUN([OVS_CHECK_LINUX_TC], [ AC_COMPILE_IFELSE([ AC_LANG_PROGRAM([#include ], [ - int x = TCA_ACT_FLAGS; + int x = TCA_FLOWER_KEY_CT_FLAGS_REPLY; ])], - [AC_DEFINE([HAVE_TCA_ACT_FLAGS], [1], - [Define to 1 if TCA_ACT_FLAGS is available.])]) + [AC_DEFINE([HAVE_TCA_FLOWER_KEY_CT_FLAGS_REPLY], [1], + [Define to 1 if TCA_FLOWER_KEY_CT_FLAGS_REPLY is available.])]) AC_CHECK_MEMBERS([struct tcf_t.firstuse], [], [], [#include ]) diff --git a/configure.ac b/configure.ac index fd82d7d270..bcee218005 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.63) -AC_INIT(openvswitch, 2.15.0, bugs@openvswitch.org) +AC_INIT(openvswitch, 2.15.2, bugs@openvswitch.org) AC_CONFIG_SRCDIR([datapath/datapath.c]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index 4f43369844..90ecb59f06 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -1112,9 +1112,9 @@ OvsPopFieldInPacketBuf(OvsForwardingContext *ovsFwdCtx, * should split the function and refactor. */ if (!bufferData) { EthHdr *ethHdr = (EthHdr *)bufferStart; - /* If the frame is not VLAN make it a no op */ if (ethHdr->Type != ETH_TYPE_802_1PQ_NBO) { - return NDIS_STATUS_SUCCESS; + OVS_LOG_ERROR("Invalid ethHdr type %u, nbl %p", ethHdr->Type, ovsFwdCtx->curNbl); + return NDIS_STATUS_INVALID_PACKET; } } RtlMoveMemory(bufferStart + shiftLength, bufferStart, shiftOffset); @@ -1137,6 +1137,9 @@ OvsPopFieldInPacketBuf(OvsForwardingContext *ovsFwdCtx, static __inline NDIS_STATUS OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx) { + NDIS_STATUS status; + OVS_PACKET_HDR_INFO* layers = &ovsFwdCtx->layers; + /* * Declare a dummy vlanTag structure since we need to compute the size * of shiftLength. The NDIS one is a unionized structure. @@ -1145,7 +1148,15 @@ OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx) UINT32 shiftLength = sizeof(vlanTag.TagHeader); UINT32 shiftOffset = sizeof(DL_EUI48) + sizeof(DL_EUI48); - return OvsPopFieldInPacketBuf(ovsFwdCtx, shiftOffset, shiftLength, NULL); + status = OvsPopFieldInPacketBuf(ovsFwdCtx, shiftOffset, shiftLength, + NULL); + + if (status == NDIS_STATUS_SUCCESS) { + layers->l3Offset -= (UINT16) shiftLength; + layers->l4Offset -= (UINT16) shiftLength; + } + + return status; } @@ -1550,9 +1561,21 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, if (tcpHdr) { portField = &tcpHdr->dest; checkField = &tcpHdr->check; + l4Offload = isTx ? (BOOLEAN)csumInfo.Transmit.TcpChecksum : + ((BOOLEAN)csumInfo.Receive.TcpChecksumSucceeded || + (BOOLEAN)csumInfo.Receive.TcpChecksumFailed); } else if (udpHdr) { portField = &udpHdr->dest; checkField = &udpHdr->check; + l4Offload = isTx ? (BOOLEAN)csumInfo.Transmit.UdpChecksum : + ((BOOLEAN)csumInfo.Receive.UdpChecksumSucceeded || + (BOOLEAN)csumInfo.Receive.UdpChecksumFailed); + } + + if (l4Offload) { + *checkField = IPPseudoChecksum(&ipHdr->saddr, &newAddr, + tcpHdr ? IPPROTO_TCP : IPPROTO_UDP, + ntohs(ipHdr->tot_len) - ipHdr->ihl * 4); } } @@ -2088,6 +2111,7 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext, */ status = OvsPopVlanInPktBuf(&ovsFwdCtx); if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_ERROR("OVS-pop vlan action failed status = %lu", status); dropReason = L"OVS-pop vlan action failed"; goto dropit; } diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj index d50a126b43..18f884f41b 100644 --- a/datapath-windows/ovsext/ovsext.vcxproj +++ b/datapath-windows/ovsext/ovsext.vcxproj @@ -192,22 +192,39 @@ true + $(CRT_IncludePath);$(KM_IncludePath); true + $(CRT_IncludePath);$(KM_IncludePath); true ..\misc\DriverRecommendedRules.ruleset true + $(CRT_IncludePath);$(KM_IncludePath); true ..\misc\DriverRecommendedRules.ruleset + $(CRT_IncludePath);$(KM_IncludePath); true ..\misc\DriverRecommendedRules.ruleset + $(CRT_IncludePath);$(KM_IncludePath); + + + $(CRT_IncludePath);$(KM_IncludePath); + + + $(CRT_IncludePath);$(KM_IncludePath); + + + $(CRT_IncludePath);$(KM_IncludePath); + + + $(CRT_IncludePath);$(KM_IncludePath); diff --git a/debian/changelog b/debian/changelog index 1f2b7a3668..ed5b127e59 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,15 @@ +openvswitch (2.15.2-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Thu, 01 Jul 2021 20:19:24 +0200 + +openvswitch (2.15.1-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Thu, 01 Jul 2021 20:19:24 +0200 + openvswitch (2.15.0-1) unstable; urgency=low * New upstream version diff --git a/dpdk/VERSION b/dpdk/VERSION index 8b0beab16a..2dbbe00e67 100644 --- a/dpdk/VERSION +++ b/dpdk/VERSION @@ -1 +1 @@ -20.11.0 +20.11.1 diff --git a/dpdk/app/meson.build b/dpdk/app/meson.build index eb74f215a3..87fc195dbf 100644 --- a/dpdk/app/meson.build +++ b/dpdk/app/meson.build @@ -25,6 +25,10 @@ apps = [ lib_execinfo = cc.find_library('execinfo', required: false) default_cflags = machine_args + ['-DALLOW_EXPERIMENTAL_API'] +default_ldflags = [] +if get_option('default_library') == 'static' and not is_windows + default_ldflags += ['-Wl,--export-dynamic'] +endif foreach app:apps build = true @@ -32,6 +36,7 @@ foreach app:apps sources = [] includes = [] cflags = default_cflags + ldflags = default_ldflags objs = [] # other object files to link against, used e.g. for # instruction-set optimized versions of code @@ -58,8 +63,10 @@ foreach app:apps executable('dpdk-' + name, sources, c_args: cflags, + link_args: ldflags, link_whole: link_libs, dependencies: dep_objs, + include_directories: includes, install_rpath: join_paths(get_option('prefix'), driver_install_path), install: true) diff --git a/dpdk/app/proc-info/main.c b/dpdk/app/proc-info/main.c index d743209f0d..b9587f7ded 100644 --- a/dpdk/app/proc-info/main.c +++ b/dpdk/app/proc-info/main.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -301,14 +302,13 @@ proc_info_parse_args(int argc, char **argv) } else if (!strncmp(long_option[option_index].name, "xstats-ids", MAX_LONG_OPT_SZ)) { - nb_xstats_ids = parse_xstats_ids(optarg, + int ret = parse_xstats_ids(optarg, xstats_ids, MAX_NB_XSTATS_IDS); - - if (nb_xstats_ids <= 0) { + if (ret <= 0) { printf("xstats-id list parse error.\n"); return -1; } - + nb_xstats_ids = ret; } break; default: @@ -420,11 +420,9 @@ static void collectd_resolve_cnt_type(char *cnt_type, size_t cnt_type_len, } else if ((type_end != NULL) && (strncmp(cnt_name, "flow_", strlen("flow_"))) == 0) { if (strncmp(type_end, "_filters", strlen("_filters")) == 0) - strlcpy(cnt_type, "operations", cnt_type_len); + strlcpy(cnt_type, "filter_result", cnt_type_len); else if (strncmp(type_end, "_errors", strlen("_errors")) == 0) strlcpy(cnt_type, "errors", cnt_type_len); - else if (strncmp(type_end, "_filters", strlen("_filters")) == 0) - strlcpy(cnt_type, "filter_result", cnt_type_len); } else if ((type_end != NULL) && (strncmp(cnt_name, "mac_", strlen("mac_"))) == 0) { if (strncmp(type_end, "_errors", strlen("_errors")) == 0) @@ -648,11 +646,16 @@ metrics_display(int port_id) } static void -show_security_context(uint16_t portid) +show_security_context(uint16_t portid, bool inline_offload) { - void *p_ctx = rte_eth_dev_get_sec_ctx(portid); + void *p_ctx; const struct rte_security_capability *s_cap; + if (inline_offload) + p_ctx = rte_eth_dev_get_sec_ctx(portid); + else + p_ctx = rte_cryptodev_get_sec_ctx(portid); + if (p_ctx == NULL) return; @@ -859,7 +862,7 @@ show_port(void) } #ifdef RTE_LIB_SECURITY - show_security_context(i); + show_security_context(i, true); #endif } } @@ -1210,7 +1213,6 @@ show_crypto(void) display_crypto_feature_info(dev_info.feature_flags); - memset(&stats, 0, sizeof(0)); if (rte_cryptodev_stats_get(i, &stats) == 0) { printf("\t -- stats\n"); printf("\t\t + enqueue count (%"PRIu64")" @@ -1224,7 +1226,7 @@ show_crypto(void) } #ifdef RTE_LIB_SECURITY - show_security_context(i); + show_security_context(i, false); #endif } } @@ -1268,8 +1270,6 @@ show_ring(char *name) static void show_mempool(char *name) { - uint64_t flags = 0; - snprintf(bdr_str, MAX_STRING_LEN, " show - MEMPOOL "); STATS_BDR_STR(10, bdr_str); @@ -1277,8 +1277,8 @@ show_mempool(char *name) struct rte_mempool *ptr = rte_mempool_lookup(name); if (ptr != NULL) { struct rte_mempool_ops *ops; + uint64_t flags = ptr->flags; - flags = ptr->flags; ops = rte_mempool_get_ops(ptr->ops_index); printf(" - Name: %s on socket %d\n" " - flags:\n" diff --git a/dpdk/app/test-crypto-perf/cperf_options_parsing.c b/dpdk/app/test-crypto-perf/cperf_options_parsing.c index 03ed6f5942..0466f7baf8 100644 --- a/dpdk/app/test-crypto-perf/cperf_options_parsing.c +++ b/dpdk/app/test-crypto-perf/cperf_options_parsing.c @@ -24,7 +24,7 @@ usage(char *progname) { printf("%s [EAL options] --\n" " --silent: disable options dump\n" - " --ptest throughput / latency / verify / pmd-cycleount :" + " --ptest throughput / latency / verify / pmd-cyclecount :" " set test type\n" " --pool_sz N: set the number of crypto ops/mbufs allocated\n" " --total-ops N: set the number of total operations performed\n" diff --git a/dpdk/app/test-crypto-perf/cperf_test_latency.c b/dpdk/app/test-crypto-perf/cperf_test_latency.c index 0e4d0e1538..159fe8492b 100644 --- a/dpdk/app/test-crypto-perf/cperf_test_latency.c +++ b/dpdk/app/test-crypto-perf/cperf_test_latency.c @@ -310,11 +310,11 @@ cperf_latency_test_runner(void *arg) if (ctx->options->csv) { if (rte_atomic16_test_and_set(&display_once)) printf("\n# lcore, Buffer Size, Burst Size, Pakt Seq #, " - "Packet Size, cycles, time (us)"); + "cycles, time (us)"); for (i = 0; i < ctx->options->total_ops; i++) { - printf("\n%u;%u;%u;%"PRIu64";%"PRIu64";%.3f", + printf("\n%u,%u,%u,%"PRIu64",%"PRIu64",%.3f", ctx->lcore_id, ctx->options->test_buffer_size, test_burst_size, i + 1, ctx->res[i].tsc_end - ctx->res[i].tsc_start, diff --git a/dpdk/app/test-crypto-perf/cperf_test_pmd_cyclecount.c b/dpdk/app/test-crypto-perf/cperf_test_pmd_cyclecount.c index 4e67d3aebd..844659aeca 100644 --- a/dpdk/app/test-crypto-perf/cperf_test_pmd_cyclecount.c +++ b/dpdk/app/test-crypto-perf/cperf_test_pmd_cyclecount.c @@ -16,7 +16,7 @@ #define PRETTY_HDR_FMT "%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s\n\n" #define PRETTY_LINE_FMT "%12u%12u%12u%12u%12u%12u%12u%12.0f%12.0f%12.0f\n" #define CSV_HDR_FMT "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" -#define CSV_LINE_FMT "%10u;%10u;%u;%u;%u;%u;%u;%.3f;%.3f;%.3f\n" +#define CSV_LINE_FMT "%10u,%10u,%u,%u,%u,%u,%u,%.3f,%.3f,%.3f\n" struct cperf_pmd_cyclecount_ctx { uint8_t dev_id; diff --git a/dpdk/app/test-crypto-perf/cperf_test_throughput.c b/dpdk/app/test-crypto-perf/cperf_test_throughput.c index f30f7d5c2c..f6eb8cf259 100644 --- a/dpdk/app/test-crypto-perf/cperf_test_throughput.c +++ b/dpdk/app/test-crypto-perf/cperf_test_throughput.c @@ -299,8 +299,8 @@ cperf_throughput_test_runner(void *test_ctx) "Failed Deq,Ops(Millions),Throughput(Gbps)," "Cycles/Buf\n\n"); - printf("%u;%u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";" - "%.3f;%.3f;%.3f\n", + printf("%u,%u,%u,%"PRIu64",%"PRIu64",%"PRIu64",%"PRIu64"," + "%.3f,%.3f,%.3f\n", ctx->lcore_id, ctx->options->test_buffer_size, test_burst_size, diff --git a/dpdk/app/test-crypto-perf/cperf_test_verify.c b/dpdk/app/test-crypto-perf/cperf_test_verify.c index 833bc9a552..2939aeaa93 100644 --- a/dpdk/app/test-crypto-perf/cperf_test_verify.c +++ b/dpdk/app/test-crypto-perf/cperf_test_verify.c @@ -406,7 +406,7 @@ cperf_verify_test_runner(void *test_ctx) "Burst Size,Enqueued,Dequeued,Failed Enq," "Failed Deq,Failed Ops\n"); - printf("%10u;%10u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";" + printf("%10u,%10u,%u,%"PRIu64",%"PRIu64",%"PRIu64",%"PRIu64"," "%"PRIu64"\n", ctx->lcore_id, ctx->options->max_buffer_size, diff --git a/dpdk/app/test-crypto-perf/main.c b/dpdk/app/test-crypto-perf/main.c index 5f035519c3..49af812d8b 100644 --- a/dpdk/app/test-crypto-perf/main.c +++ b/dpdk/app/test-crypto-perf/main.c @@ -390,7 +390,7 @@ cperf_check_test_vector(struct cperf_options *opts, if (opts->cipher_algo == RTE_CRYPTO_CIPHER_NULL) { if (test_vec->plaintext.data == NULL) return -1; - } else if (opts->cipher_algo != RTE_CRYPTO_CIPHER_NULL) { + } else { if (test_vec->plaintext.data == NULL) return -1; if (test_vec->plaintext.length < opts->max_buffer_size) @@ -440,7 +440,7 @@ cperf_check_test_vector(struct cperf_options *opts, return -1; if (test_vec->plaintext.length < opts->max_buffer_size) return -1; - } else if (opts->cipher_algo != RTE_CRYPTO_CIPHER_NULL) { + } else { if (test_vec->plaintext.data == NULL) return -1; if (test_vec->plaintext.length < opts->max_buffer_size) @@ -530,14 +530,14 @@ main(int argc, char **argv) ret = cperf_options_parse(&opts, argc, argv); if (ret) { - RTE_LOG(ERR, USER1, "Parsing on or more user options failed\n"); + RTE_LOG(ERR, USER1, "Parsing one or more user options failed\n"); goto err; } ret = cperf_options_check(&opts); if (ret) { RTE_LOG(ERR, USER1, - "Checking on or more user options failed\n"); + "Checking one or more user options failed\n"); goto err; } diff --git a/dpdk/app/test-eventdev/test_perf_common.h b/dpdk/app/test-eventdev/test_perf_common.h index ff9705df88..e7233e5a5b 100644 --- a/dpdk/app/test-eventdev/test_perf_common.h +++ b/dpdk/app/test-eventdev/test_perf_common.h @@ -97,8 +97,13 @@ perf_process_last_stage(struct rte_mempool *const pool, void *bufs[], int const buf_sz, uint8_t count) { bufs[count++] = ev->event_ptr; - w->processed_pkts++; + + /* wmb here ensures event_prt is stored before + * updating the number of processed packets + * for worker lcores + */ rte_smp_wmb(); + w->processed_pkts++; if (unlikely(count == buf_sz)) { count = 0; @@ -116,6 +121,12 @@ perf_process_last_stage_latency(struct rte_mempool *const pool, struct perf_elt *const m = ev->event_ptr; bufs[count++] = ev->event_ptr; + + /* wmb here ensures event_prt is stored before + * updating the number of processed packets + * for worker lcores + */ + rte_smp_wmb(); w->processed_pkts++; if (unlikely(count == buf_sz)) { @@ -127,7 +138,6 @@ perf_process_last_stage_latency(struct rte_mempool *const pool, } w->latency += latency; - rte_smp_wmb(); return count; } diff --git a/dpdk/app/test-eventdev/test_pipeline_queue.c b/dpdk/app/test-eventdev/test_pipeline_queue.c index 7bebac34fc..9a9febb199 100644 --- a/dpdk/app/test-eventdev/test_pipeline_queue.c +++ b/dpdk/app/test-eventdev/test_pipeline_queue.c @@ -83,16 +83,15 @@ pipeline_queue_worker_single_stage_burst_tx(void *arg) rte_prefetch0(ev[i + 1].mbuf); if (ev[i].sched_type == RTE_SCHED_TYPE_ATOMIC) { pipeline_event_tx(dev, port, &ev[i]); - ev[i].op = RTE_EVENT_OP_RELEASE; w->processed_pkts++; } else { ev[i].queue_id++; pipeline_fwd_event(&ev[i], RTE_SCHED_TYPE_ATOMIC); + pipeline_event_enqueue_burst(dev, port, ev, + nb_rx); } } - - pipeline_event_enqueue_burst(dev, port, ev, nb_rx); } return 0; @@ -180,13 +179,13 @@ pipeline_queue_worker_multi_stage_fwd(void *arg) ev.queue_id = tx_queue[ev.mbuf->port]; rte_event_eth_tx_adapter_txq_set(ev.mbuf, 0); pipeline_fwd_event(&ev, RTE_SCHED_TYPE_ATOMIC); + pipeline_event_enqueue(dev, port, &ev); w->processed_pkts++; } else { ev.queue_id++; pipeline_fwd_event(&ev, sched_type_list[cq_id]); + pipeline_event_enqueue(dev, port, &ev); } - - pipeline_event_enqueue(dev, port, &ev); } return 0; @@ -213,7 +212,6 @@ pipeline_queue_worker_multi_stage_burst_tx(void *arg) if (ev[i].queue_id == tx_queue[ev[i].mbuf->port]) { pipeline_event_tx(dev, port, &ev[i]); - ev[i].op = RTE_EVENT_OP_RELEASE; w->processed_pkts++; continue; } @@ -222,9 +220,8 @@ pipeline_queue_worker_multi_stage_burst_tx(void *arg) pipeline_fwd_event(&ev[i], cq_id != last_queue ? sched_type_list[cq_id] : RTE_SCHED_TYPE_ATOMIC); + pipeline_event_enqueue_burst(dev, port, ev, nb_rx); } - - pipeline_event_enqueue_burst(dev, port, ev, nb_rx); } return 0; @@ -237,6 +234,7 @@ pipeline_queue_worker_multi_stage_burst_fwd(void *arg) const uint8_t *tx_queue = t->tx_evqueue_id; while (t->done == false) { + uint16_t processed_pkts = 0; uint16_t nb_rx = rte_event_dequeue_burst(dev, port, ev, BURST_SIZE, 0); @@ -254,7 +252,7 @@ pipeline_queue_worker_multi_stage_burst_fwd(void *arg) rte_event_eth_tx_adapter_txq_set(ev[i].mbuf, 0); pipeline_fwd_event(&ev[i], RTE_SCHED_TYPE_ATOMIC); - w->processed_pkts++; + processed_pkts++; } else { ev[i].queue_id++; pipeline_fwd_event(&ev[i], @@ -263,6 +261,7 @@ pipeline_queue_worker_multi_stage_burst_fwd(void *arg) } pipeline_event_enqueue_burst(dev, port, ev, nb_rx); + w->processed_pkts += processed_pkts; } return 0; diff --git a/dpdk/app/test-flow-perf/actions_gen.c b/dpdk/app/test-flow-perf/actions_gen.c index ac525f6fdb..f265894247 100644 --- a/dpdk/app/test-flow-perf/actions_gen.c +++ b/dpdk/app/test-flow-perf/actions_gen.c @@ -145,12 +145,10 @@ add_set_meta(struct rte_flow_action *actions, uint8_t actions_counter, __rte_unused struct additional_para para) { - static struct rte_flow_action_set_meta meta_action; - - do { - meta_action.data = RTE_BE32(META_DATA); - meta_action.mask = RTE_BE32(0xffffffff); - } while (0); + static struct rte_flow_action_set_meta meta_action = { + .data = RTE_BE32(META_DATA), + .mask = RTE_BE32(0xffffffff), + }; actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_META; actions[actions_counter].conf = &meta_action; @@ -161,13 +159,11 @@ add_set_tag(struct rte_flow_action *actions, uint8_t actions_counter, __rte_unused struct additional_para para) { - static struct rte_flow_action_set_tag tag_action; - - do { - tag_action.data = RTE_BE32(META_DATA); - tag_action.mask = RTE_BE32(0xffffffff); - tag_action.index = TAG_INDEX; - } while (0); + static struct rte_flow_action_set_tag tag_action = { + .data = RTE_BE32(META_DATA), + .mask = RTE_BE32(0xffffffff), + .index = TAG_INDEX, + }; actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_TAG; actions[actions_counter].conf = &tag_action; @@ -178,11 +174,9 @@ add_port_id(struct rte_flow_action *actions, uint8_t actions_counter, __rte_unused struct additional_para para) { - static struct rte_flow_action_port_id port_id; - - do { - port_id.id = PORT_ID_DST; - } while (0); + static struct rte_flow_action_port_id port_id = { + .id = PORT_ID_DST, + }; actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_PORT_ID; actions[actions_counter].conf = &port_id; diff --git a/dpdk/app/test-flow-perf/items_gen.c b/dpdk/app/test-flow-perf/items_gen.c index 2b1ab41467..aaa243a7c4 100644 --- a/dpdk/app/test-flow-perf/items_gen.c +++ b/dpdk/app/test-flow-perf/items_gen.c @@ -25,9 +25,6 @@ add_ether(struct rte_flow_item *items, static struct rte_flow_item_eth eth_spec; static struct rte_flow_item_eth eth_mask; - memset(ð_spec, 0, sizeof(struct rte_flow_item_eth)); - memset(ð_mask, 0, sizeof(struct rte_flow_item_eth)); - items[items_counter].type = RTE_FLOW_ITEM_TYPE_ETH; items[items_counter].spec = ð_spec; items[items_counter].mask = ð_mask; @@ -38,16 +35,12 @@ add_vlan(struct rte_flow_item *items, uint8_t items_counter, __rte_unused struct additional_para para) { - static struct rte_flow_item_vlan vlan_spec; - static struct rte_flow_item_vlan vlan_mask; - - uint16_t vlan_value = VLAN_VALUE; - - memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan)); - memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan)); - - vlan_spec.tci = RTE_BE16(vlan_value); - vlan_mask.tci = RTE_BE16(0xffff); + static struct rte_flow_item_vlan vlan_spec = { + .tci = RTE_BE16(VLAN_VALUE), + }; + static struct rte_flow_item_vlan vlan_mask = { + .tci = RTE_BE16(0xffff), + }; items[items_counter].type = RTE_FLOW_ITEM_TYPE_VLAN; items[items_counter].spec = &vlan_spec; @@ -61,9 +54,6 @@ add_ipv4(struct rte_flow_item *items, static struct rte_flow_item_ipv4 ipv4_spec; static struct rte_flow_item_ipv4 ipv4_mask; - memset(&ipv4_spec, 0, sizeof(struct rte_flow_item_ipv4)); - memset(&ipv4_mask, 0, sizeof(struct rte_flow_item_ipv4)); - ipv4_spec.hdr.src_addr = RTE_BE32(para.src_ip); ipv4_mask.hdr.src_addr = RTE_BE32(0xffffffff); @@ -80,9 +70,6 @@ add_ipv6(struct rte_flow_item *items, static struct rte_flow_item_ipv6 ipv6_spec; static struct rte_flow_item_ipv6 ipv6_mask; - memset(&ipv6_spec, 0, sizeof(struct rte_flow_item_ipv6)); - memset(&ipv6_mask, 0, sizeof(struct rte_flow_item_ipv6)); - /** Set ipv6 src **/ memset(&ipv6_spec.hdr.src_addr, para.src_ip, sizeof(ipv6_spec.hdr.src_addr) / 2); @@ -104,9 +91,6 @@ add_tcp(struct rte_flow_item *items, static struct rte_flow_item_tcp tcp_spec; static struct rte_flow_item_tcp tcp_mask; - memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp)); - memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp)); - items[items_counter].type = RTE_FLOW_ITEM_TYPE_TCP; items[items_counter].spec = &tcp_spec; items[items_counter].mask = &tcp_mask; @@ -120,9 +104,6 @@ add_udp(struct rte_flow_item *items, static struct rte_flow_item_udp udp_spec; static struct rte_flow_item_udp udp_mask; - memset(&udp_spec, 0, sizeof(struct rte_flow_item_udp)); - memset(&udp_mask, 0, sizeof(struct rte_flow_item_udp)); - items[items_counter].type = RTE_FLOW_ITEM_TYPE_UDP; items[items_counter].spec = &udp_spec; items[items_counter].mask = &udp_mask; @@ -141,9 +122,6 @@ add_vxlan(struct rte_flow_item *items, vni_value = VNI_VALUE; - memset(&vxlan_spec, 0, sizeof(struct rte_flow_item_vxlan)); - memset(&vxlan_mask, 0, sizeof(struct rte_flow_item_vxlan)); - /* Set standard vxlan vni */ for (i = 0; i < 3; i++) { vxlan_spec.vni[2 - i] = vni_value >> (i * 8); @@ -171,9 +149,6 @@ add_vxlan_gpe(struct rte_flow_item *items, vni_value = VNI_VALUE; - memset(&vxlan_gpe_spec, 0, sizeof(struct rte_flow_item_vxlan_gpe)); - memset(&vxlan_gpe_mask, 0, sizeof(struct rte_flow_item_vxlan_gpe)); - /* Set vxlan-gpe vni */ for (i = 0; i < 3; i++) { vxlan_gpe_spec.vni[2 - i] = vni_value >> (i * 8); @@ -193,18 +168,12 @@ add_gre(struct rte_flow_item *items, uint8_t items_counter, __rte_unused struct additional_para para) { - static struct rte_flow_item_gre gre_spec; - static struct rte_flow_item_gre gre_mask; - - uint16_t proto; - - proto = RTE_ETHER_TYPE_TEB; - - memset(&gre_spec, 0, sizeof(struct rte_flow_item_gre)); - memset(&gre_mask, 0, sizeof(struct rte_flow_item_gre)); - - gre_spec.protocol = RTE_BE16(proto); - gre_mask.protocol = RTE_BE16(0xffff); + static struct rte_flow_item_gre gre_spec = { + .protocol = RTE_BE16(RTE_ETHER_TYPE_TEB), + }; + static struct rte_flow_item_gre gre_mask = { + .protocol = RTE_BE16(0xffff), + }; items[items_counter].type = RTE_FLOW_ITEM_TYPE_GRE; items[items_counter].spec = &gre_spec; @@ -224,9 +193,6 @@ add_geneve(struct rte_flow_item *items, vni_value = VNI_VALUE; - memset(&geneve_spec, 0, sizeof(struct rte_flow_item_geneve)); - memset(&geneve_mask, 0, sizeof(struct rte_flow_item_geneve)); - for (i = 0; i < 3; i++) { geneve_spec.vni[2 - i] = vni_value >> (i * 8); geneve_mask.vni[2 - i] = 0xff; @@ -242,18 +208,12 @@ add_gtp(struct rte_flow_item *items, uint8_t items_counter, __rte_unused struct additional_para para) { - static struct rte_flow_item_gtp gtp_spec; - static struct rte_flow_item_gtp gtp_mask; - - uint32_t teid_value; - - teid_value = TEID_VALUE; - - memset(>p_spec, 0, sizeof(struct rte_flow_item_gtp)); - memset(>p_mask, 0, sizeof(struct rte_flow_item_gtp)); - - gtp_spec.teid = RTE_BE32(teid_value); - gtp_mask.teid = RTE_BE32(0xffffffff); + static struct rte_flow_item_gtp gtp_spec = { + .teid = RTE_BE32(TEID_VALUE), + }; + static struct rte_flow_item_gtp gtp_mask = { + .teid = RTE_BE32(0xffffffff), + }; items[items_counter].type = RTE_FLOW_ITEM_TYPE_GTP; items[items_counter].spec = >p_spec; @@ -265,18 +225,12 @@ add_meta_data(struct rte_flow_item *items, uint8_t items_counter, __rte_unused struct additional_para para) { - static struct rte_flow_item_meta meta_spec; - static struct rte_flow_item_meta meta_mask; - - uint32_t data; - - data = META_DATA; - - memset(&meta_spec, 0, sizeof(struct rte_flow_item_meta)); - memset(&meta_mask, 0, sizeof(struct rte_flow_item_meta)); - - meta_spec.data = RTE_BE32(data); - meta_mask.data = RTE_BE32(0xffffffff); + static struct rte_flow_item_meta meta_spec = { + .data = RTE_BE32(META_DATA), + }; + static struct rte_flow_item_meta meta_mask = { + .data = RTE_BE32(0xffffffff), + }; items[items_counter].type = RTE_FLOW_ITEM_TYPE_META; items[items_counter].spec = &meta_spec; @@ -289,21 +243,14 @@ add_meta_tag(struct rte_flow_item *items, uint8_t items_counter, __rte_unused struct additional_para para) { - static struct rte_flow_item_tag tag_spec; - static struct rte_flow_item_tag tag_mask; - uint32_t data; - uint8_t index; - - data = META_DATA; - index = TAG_INDEX; - - memset(&tag_spec, 0, sizeof(struct rte_flow_item_tag)); - memset(&tag_mask, 0, sizeof(struct rte_flow_item_tag)); - - tag_spec.data = RTE_BE32(data); - tag_mask.data = RTE_BE32(0xffffffff); - tag_spec.index = index; - tag_mask.index = 0xff; + static struct rte_flow_item_tag tag_spec = { + .data = RTE_BE32(META_DATA), + .index = TAG_INDEX, + }; + static struct rte_flow_item_tag tag_mask = { + .data = RTE_BE32(0xffffffff), + .index = 0xff, + }; items[items_counter].type = RTE_FLOW_ITEM_TYPE_TAG; items[items_counter].spec = &tag_spec; @@ -318,9 +265,6 @@ add_icmpv4(struct rte_flow_item *items, static struct rte_flow_item_icmp icmpv4_spec; static struct rte_flow_item_icmp icmpv4_mask; - memset(&icmpv4_spec, 0, sizeof(struct rte_flow_item_icmp)); - memset(&icmpv4_mask, 0, sizeof(struct rte_flow_item_icmp)); - items[items_counter].type = RTE_FLOW_ITEM_TYPE_ICMP; items[items_counter].spec = &icmpv4_spec; items[items_counter].mask = &icmpv4_mask; @@ -334,9 +278,6 @@ add_icmpv6(struct rte_flow_item *items, static struct rte_flow_item_icmp6 icmpv6_spec; static struct rte_flow_item_icmp6 icmpv6_mask; - memset(&icmpv6_spec, 0, sizeof(struct rte_flow_item_icmp6)); - memset(&icmpv6_mask, 0, sizeof(struct rte_flow_item_icmp6)); - items[items_counter].type = RTE_FLOW_ITEM_TYPE_ICMP6; items[items_counter].spec = &icmpv6_spec; items[items_counter].mask = &icmpv6_mask; diff --git a/dpdk/app/test-pmd/cmdline.c b/dpdk/app/test-pmd/cmdline.c index 0d2d6aad05..2b9dd3e1f4 100644 --- a/dpdk/app/test-pmd/cmdline.c +++ b/dpdk/app/test-pmd/cmdline.c @@ -163,7 +163,7 @@ static void cmd_help_long_parsed(void *parsed_result, "Display:\n" "--------\n\n" - "show port (info|stats|summary|xstats|fdir|stat_qmap|dcb_tc|cap) (port_id|all)\n" + "show port (info|stats|summary|xstats|fdir|dcb_tc|cap) (port_id|all)\n" " Display information for port_id, or all.\n\n" "show port port_id (module_eeprom|eeprom)\n" @@ -177,7 +177,7 @@ static void cmd_help_long_parsed(void *parsed_result, "show port (port_id) rss-hash [key]\n" " Display the RSS hash functions and RSS hash key of port\n\n" - "clear port (info|stats|xstats|fdir|stat_qmap) (port_id|all)\n" + "clear port (info|stats|xstats|fdir) (port_id|all)\n" " Clear information for port_id, or all.\n\n" "show (rxq|txq) info (port_id) (queue_id)\n" @@ -1877,7 +1877,9 @@ cmd_config_max_pkt_len_parsed(void *parsed_result, __rte_unused void *data) { struct cmd_config_max_pkt_len_result *res = parsed_result; + uint32_t max_rx_pkt_len_backup = 0; portid_t pid; + int ret; if (!all_ports_stopped()) { printf("Please stop all ports first\n"); @@ -1886,7 +1888,6 @@ cmd_config_max_pkt_len_parsed(void *parsed_result, RTE_ETH_FOREACH_DEV(pid) { struct rte_port *port = &ports[pid]; - uint64_t rx_offloads = port->dev_conf.rxmode.offloads; if (!strcmp(res->name, "max-pkt-len")) { if (res->value < RTE_ETHER_MIN_LEN) { @@ -1897,12 +1898,18 @@ cmd_config_max_pkt_len_parsed(void *parsed_result, if (res->value == port->dev_conf.rxmode.max_rx_pkt_len) return; + ret = eth_dev_info_get_print_err(pid, &port->dev_info); + if (ret != 0) { + printf("rte_eth_dev_info_get() failed for port %u\n", + pid); + return; + } + + max_rx_pkt_len_backup = port->dev_conf.rxmode.max_rx_pkt_len; + port->dev_conf.rxmode.max_rx_pkt_len = res->value; - if (res->value > RTE_ETHER_MAX_LEN) - rx_offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; - else - rx_offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME; - port->dev_conf.rxmode.offloads = rx_offloads; + if (update_jumbo_frame_offload(pid) != 0) + port->dev_conf.rxmode.max_rx_pkt_len = max_rx_pkt_len_backup; } else { printf("Unknown parameter\n"); return; @@ -3782,6 +3789,7 @@ cmd_set_rxoffs_parsed(void *parsed_result, MAX_SEGS_BUFFER_SPLIT, seg_offsets, 0); if (nb_segs > 0) set_rx_pkt_offsets(seg_offsets, nb_segs); + cmd_reconfig_device_queue(RTE_PORT_ALL, 0, 1); } cmdline_parse_token_string_t cmd_set_rxoffs_keyword = @@ -3828,6 +3836,7 @@ cmd_set_rxpkts_parsed(void *parsed_result, MAX_SEGS_BUFFER_SPLIT, seg_lengths, 0); if (nb_segs > 0) set_rx_pkt_segments(seg_lengths, nb_segs); + cmd_reconfig_device_queue(RTE_PORT_ALL, 0, 1); } cmdline_parse_token_string_t cmd_set_rxpkts_keyword = @@ -7555,9 +7564,6 @@ static void cmd_showportall_parsed(void *parsed_result, RTE_ETH_FOREACH_DEV(i) fdir_get_infos(i); #endif - else if (!strcmp(res->what, "stat_qmap")) - RTE_ETH_FOREACH_DEV(i) - nic_stats_mapping_display(i); else if (!strcmp(res->what, "dcb_tc")) RTE_ETH_FOREACH_DEV(i) port_dcb_info_display(i); @@ -7573,14 +7579,14 @@ cmdline_parse_token_string_t cmd_showportall_port = TOKEN_STRING_INITIALIZER(struct cmd_showportall_result, port, "port"); cmdline_parse_token_string_t cmd_showportall_what = TOKEN_STRING_INITIALIZER(struct cmd_showportall_result, what, - "info#summary#stats#xstats#fdir#stat_qmap#dcb_tc#cap"); + "info#summary#stats#xstats#fdir#dcb_tc#cap"); cmdline_parse_token_string_t cmd_showportall_all = TOKEN_STRING_INITIALIZER(struct cmd_showportall_result, all, "all"); cmdline_parse_inst_t cmd_showportall = { .f = cmd_showportall_parsed, .data = NULL, .help_str = "show|clear port " - "info|summary|stats|xstats|fdir|stat_qmap|dcb_tc|cap all", + "info|summary|stats|xstats|fdir|dcb_tc|cap all", .tokens = { (void *)&cmd_showportall_show, (void *)&cmd_showportall_port, @@ -7622,8 +7628,6 @@ static void cmd_showport_parsed(void *parsed_result, else if (!strcmp(res->what, "fdir")) fdir_get_infos(res->portnum); #endif - else if (!strcmp(res->what, "stat_qmap")) - nic_stats_mapping_display(res->portnum); else if (!strcmp(res->what, "dcb_tc")) port_dcb_info_display(res->portnum); else if (!strcmp(res->what, "cap")) @@ -7637,7 +7641,7 @@ cmdline_parse_token_string_t cmd_showport_port = TOKEN_STRING_INITIALIZER(struct cmd_showport_result, port, "port"); cmdline_parse_token_string_t cmd_showport_what = TOKEN_STRING_INITIALIZER(struct cmd_showport_result, what, - "info#summary#stats#xstats#fdir#stat_qmap#dcb_tc#cap"); + "info#summary#stats#xstats#fdir#dcb_tc#cap"); cmdline_parse_token_num_t cmd_showport_portnum = TOKEN_NUM_INITIALIZER(struct cmd_showport_result, portnum, RTE_UINT16); @@ -7645,7 +7649,7 @@ cmdline_parse_inst_t cmd_showport = { .f = cmd_showport_parsed, .data = NULL, .help_str = "show|clear port " - "info|summary|stats|xstats|fdir|stat_qmap|dcb_tc|cap " + "info|summary|stats|xstats|fdir|dcb_tc|cap " "", .tokens = { (void *)&cmd_showport_show, @@ -17112,6 +17116,7 @@ cmdline_read_from_file(const char *filename) void prompt(void) { + int ret; /* initialize non-constant commands */ cmd_set_fwd_mode_init(); cmd_set_fwd_retry_mode_init(); @@ -17119,15 +17124,23 @@ prompt(void) testpmd_cl = cmdline_stdin_new(main_ctx, "testpmd> "); if (testpmd_cl == NULL) return; + + ret = atexit(prompt_exit); + if (ret != 0) + printf("Cannot set exit function for cmdline\n"); + cmdline_interact(testpmd_cl); - cmdline_stdin_exit(testpmd_cl); + if (ret != 0) + cmdline_stdin_exit(testpmd_cl); } void prompt_exit(void) { - if (testpmd_cl != NULL) + if (testpmd_cl != NULL) { cmdline_quit(testpmd_cl); + cmdline_stdin_exit(testpmd_cl); + } } static void diff --git a/dpdk/app/test-pmd/cmdline_flow.c b/dpdk/app/test-pmd/cmdline_flow.c index 585cab98b4..de80924e7c 100644 --- a/dpdk/app/test-pmd/cmdline_flow.c +++ b/dpdk/app/test-pmd/cmdline_flow.c @@ -3403,7 +3403,10 @@ static const struct token token_list[] = { .name = "key", .help = "RSS hash key", .next = NEXT(action_rss, NEXT_ENTRY(HEX)), - .args = ARGS(ARGS_ENTRY_ARB(0, 0), + .args = ARGS(ARGS_ENTRY_ARB + (offsetof(struct action_rss_data, conf) + + offsetof(struct rte_flow_action_rss, key), + sizeof(((struct rte_flow_action_rss *)0)->key)), ARGS_ENTRY_ARB (offsetof(struct action_rss_data, conf) + offsetof(struct rte_flow_action_rss, key_len), diff --git a/dpdk/app/test-pmd/cmdline_mtr.c b/dpdk/app/test-pmd/cmdline_mtr.c index 399ee56e07..3982787d20 100644 --- a/dpdk/app/test-pmd/cmdline_mtr.c +++ b/dpdk/app/test-pmd/cmdline_mtr.c @@ -312,7 +312,7 @@ static void cmd_show_port_meter_cap_parsed(void *parsed_result, cmdline_parse_inst_t cmd_show_port_meter_cap = { .f = cmd_show_port_meter_cap_parsed, .data = NULL, - .help_str = "Show port meter cap", + .help_str = "show port meter cap ", .tokens = { (void *)&cmd_show_port_meter_cap_show, (void *)&cmd_show_port_meter_cap_port, @@ -408,7 +408,7 @@ static void cmd_add_port_meter_profile_srtcm_parsed(void *parsed_result, cmdline_parse_inst_t cmd_add_port_meter_profile_srtcm = { .f = cmd_add_port_meter_profile_srtcm_parsed, .data = NULL, - .help_str = "Add port meter profile srtcm (rfc2697)", + .help_str = "add port meter profile srtcm_rfc2697 ", .tokens = { (void *)&cmd_add_port_meter_profile_srtcm_add, (void *)&cmd_add_port_meter_profile_srtcm_port, @@ -515,7 +515,7 @@ static void cmd_add_port_meter_profile_trtcm_parsed(void *parsed_result, cmdline_parse_inst_t cmd_add_port_meter_profile_trtcm = { .f = cmd_add_port_meter_profile_trtcm_parsed, .data = NULL, - .help_str = "Add port meter profile trtcm (rfc2698)", + .help_str = "add port meter profile trtcm_rfc2698 ", .tokens = { (void *)&cmd_add_port_meter_profile_trtcm_add, (void *)&cmd_add_port_meter_profile_trtcm_port, @@ -627,7 +627,7 @@ static void cmd_add_port_meter_profile_trtcm_rfc4115_parsed( cmdline_parse_inst_t cmd_add_port_meter_profile_trtcm_rfc4115 = { .f = cmd_add_port_meter_profile_trtcm_rfc4115_parsed, .data = NULL, - .help_str = "Add port meter profile trtcm (rfc4115)", + .help_str = "add port meter profile trtcm_rfc4115 ", .tokens = { (void *)&cmd_add_port_meter_profile_trtcm_rfc4115_add, (void *)&cmd_add_port_meter_profile_trtcm_rfc4115_port, @@ -702,7 +702,7 @@ static void cmd_del_port_meter_profile_parsed(void *parsed_result, cmdline_parse_inst_t cmd_del_port_meter_profile = { .f = cmd_del_port_meter_profile_parsed, .data = NULL, - .help_str = "Delete port meter profile", + .help_str = "del port meter profile ", .tokens = { (void *)&cmd_del_port_meter_profile_del, (void *)&cmd_del_port_meter_profile_port, @@ -827,7 +827,10 @@ static void cmd_create_port_meter_parsed(void *parsed_result, cmdline_parse_inst_t cmd_create_port_meter = { .f = cmd_create_port_meter_parsed, .data = NULL, - .help_str = "Create port meter", + .help_str = "create port meter (yes|no) " + "(R|Y|G|D) (R|Y|G|D) (R|Y|G|D) " + " " + "[ ...]", .tokens = { (void *)&cmd_create_port_meter_create, (void *)&cmd_create_port_meter_port, @@ -896,7 +899,7 @@ static void cmd_enable_port_meter_parsed(void *parsed_result, cmdline_parse_inst_t cmd_enable_port_meter = { .f = cmd_enable_port_meter_parsed, .data = NULL, - .help_str = "Enable port meter", + .help_str = "enable port meter ", .tokens = { (void *)&cmd_enable_port_meter_enable, (void *)&cmd_enable_port_meter_port, @@ -957,7 +960,7 @@ static void cmd_disable_port_meter_parsed(void *parsed_result, cmdline_parse_inst_t cmd_disable_port_meter = { .f = cmd_disable_port_meter_parsed, .data = NULL, - .help_str = "Disable port meter", + .help_str = "disable port meter ", .tokens = { (void *)&cmd_disable_port_meter_disable, (void *)&cmd_disable_port_meter_port, @@ -1018,7 +1021,7 @@ static void cmd_del_port_meter_parsed(void *parsed_result, cmdline_parse_inst_t cmd_del_port_meter = { .f = cmd_del_port_meter_parsed, .data = NULL, - .help_str = "Delete port meter", + .help_str = "del port meter ", .tokens = { (void *)&cmd_del_port_meter_del, (void *)&cmd_del_port_meter_port, @@ -1092,7 +1095,7 @@ static void cmd_set_port_meter_profile_parsed(void *parsed_result, cmdline_parse_inst_t cmd_set_port_meter_profile = { .f = cmd_set_port_meter_profile_parsed, .data = NULL, - .help_str = "Set port meter profile", + .help_str = "set port meter profile ", .tokens = { (void *)&cmd_set_port_meter_profile_set, (void *)&cmd_set_port_meter_profile_port, @@ -1166,7 +1169,8 @@ static void cmd_set_port_meter_dscp_table_parsed(void *parsed_result, cmdline_parse_inst_t cmd_set_port_meter_dscp_table = { .f = cmd_set_port_meter_dscp_table_parsed, .data = NULL, - .help_str = "Update port meter dscp table", + .help_str = "set port meter dscp table " + "[ ... ]", .tokens = { (void *)&cmd_set_port_meter_dscp_table_set, (void *)&cmd_set_port_meter_dscp_table_port, @@ -1276,7 +1280,8 @@ static void cmd_set_port_meter_policer_action_parsed(void *parsed_result, cmdline_parse_inst_t cmd_set_port_meter_policer_action = { .f = cmd_set_port_meter_policer_action_parsed, .data = NULL, - .help_str = "Set port meter policer action", + .help_str = "set port meter policer action " + " [ ]", .tokens = { (void *)&cmd_set_port_meter_policer_action_set, (void *)&cmd_set_port_meter_policer_action_port, @@ -1355,7 +1360,7 @@ static void cmd_set_port_meter_stats_mask_parsed(void *parsed_result, cmdline_parse_inst_t cmd_set_port_meter_stats_mask = { .f = cmd_set_port_meter_stats_mask_parsed, .data = NULL, - .help_str = "Set port meter stats mask", + .help_str = "set port meter stats mask ", .tokens = { (void *)&cmd_set_port_meter_stats_mask_set, (void *)&cmd_set_port_meter_stats_mask_port, @@ -1459,7 +1464,7 @@ static void cmd_show_port_meter_stats_parsed(void *parsed_result, cmdline_parse_inst_t cmd_show_port_meter_stats = { .f = cmd_show_port_meter_stats_parsed, .data = NULL, - .help_str = "Show port meter stats", + .help_str = "show port meter stats (yes|no)", .tokens = { (void *)&cmd_show_port_meter_stats_show, (void *)&cmd_show_port_meter_stats_port, diff --git a/dpdk/app/test-pmd/config.c b/dpdk/app/test-pmd/config.c index b51de59e1e..dab8afe5dd 100644 --- a/dpdk/app/test-pmd/config.c +++ b/dpdk/app/test-pmd/config.c @@ -183,8 +183,6 @@ nic_stats_display(portid_t port_id) diff_ns; uint64_t mpps_rx, mpps_tx, mbps_rx, mbps_tx; struct rte_eth_stats stats; - struct rte_port *port = &ports[port_id]; - uint8_t i; static const char *nic_stats_border = "########################"; @@ -196,46 +194,12 @@ nic_stats_display(portid_t port_id) printf("\n %s NIC statistics for port %-2d %s\n", nic_stats_border, port_id, nic_stats_border); - if ((!port->rx_queue_stats_mapping_enabled) && (!port->tx_queue_stats_mapping_enabled)) { - printf(" RX-packets: %-10"PRIu64" RX-missed: %-10"PRIu64" RX-bytes: " - "%-"PRIu64"\n", - stats.ipackets, stats.imissed, stats.ibytes); - printf(" RX-errors: %-"PRIu64"\n", stats.ierrors); - printf(" RX-nombuf: %-10"PRIu64"\n", - stats.rx_nombuf); - printf(" TX-packets: %-10"PRIu64" TX-errors: %-10"PRIu64" TX-bytes: " - "%-"PRIu64"\n", - stats.opackets, stats.oerrors, stats.obytes); - } - else { - printf(" RX-packets: %10"PRIu64" RX-errors: %10"PRIu64 - " RX-bytes: %10"PRIu64"\n", - stats.ipackets, stats.ierrors, stats.ibytes); - printf(" RX-errors: %10"PRIu64"\n", stats.ierrors); - printf(" RX-nombuf: %10"PRIu64"\n", - stats.rx_nombuf); - printf(" TX-packets: %10"PRIu64" TX-errors: %10"PRIu64 - " TX-bytes: %10"PRIu64"\n", - stats.opackets, stats.oerrors, stats.obytes); - } - - if (port->rx_queue_stats_mapping_enabled) { - printf("\n"); - for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) { - printf(" Stats reg %2d RX-packets: %10"PRIu64 - " RX-errors: %10"PRIu64 - " RX-bytes: %10"PRIu64"\n", - i, stats.q_ipackets[i], stats.q_errors[i], stats.q_ibytes[i]); - } - } - if (port->tx_queue_stats_mapping_enabled) { - printf("\n"); - for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) { - printf(" Stats reg %2d TX-packets: %10"PRIu64 - " TX-bytes: %10"PRIu64"\n", - i, stats.q_opackets[i], stats.q_obytes[i]); - } - } + printf(" RX-packets: %-10"PRIu64" RX-missed: %-10"PRIu64" RX-bytes: " + "%-"PRIu64"\n", stats.ipackets, stats.imissed, stats.ibytes); + printf(" RX-errors: %-"PRIu64"\n", stats.ierrors); + printf(" RX-nombuf: %-10"PRIu64"\n", stats.rx_nombuf); + printf(" TX-packets: %-10"PRIu64" TX-errors: %-10"PRIu64" TX-bytes: " + "%-"PRIu64"\n", stats.opackets, stats.oerrors, stats.obytes); diff_ns = 0; if (clock_gettime(CLOCK_TYPE_ID, &cur_time) == 0) { @@ -398,54 +362,6 @@ nic_xstats_clear(portid_t port_id) } } -void -nic_stats_mapping_display(portid_t port_id) -{ - struct rte_port *port = &ports[port_id]; - uint16_t i; - - static const char *nic_stats_mapping_border = "########################"; - - if (port_id_is_invalid(port_id, ENABLED_WARN)) { - print_valid_ports(); - return; - } - - if ((!port->rx_queue_stats_mapping_enabled) && (!port->tx_queue_stats_mapping_enabled)) { - printf("Port id %d - either does not support queue statistic mapping or" - " no queue statistic mapping set\n", port_id); - return; - } - - printf("\n %s NIC statistics mapping for port %-2d %s\n", - nic_stats_mapping_border, port_id, nic_stats_mapping_border); - - if (port->rx_queue_stats_mapping_enabled) { - for (i = 0; i < nb_rx_queue_stats_mappings; i++) { - if (rx_queue_stats_mappings[i].port_id == port_id) { - printf(" RX-queue %2d mapped to Stats Reg %2d\n", - rx_queue_stats_mappings[i].queue_id, - rx_queue_stats_mappings[i].stats_counter_id); - } - } - printf("\n"); - } - - - if (port->tx_queue_stats_mapping_enabled) { - for (i = 0; i < nb_tx_queue_stats_mappings; i++) { - if (tx_queue_stats_mappings[i].port_id == port_id) { - printf(" TX-queue %2d mapped to Stats Reg %2d\n", - tx_queue_stats_mappings[i].queue_id, - tx_queue_stats_mappings[i].stats_counter_id); - } - } - } - - printf(" %s####################################%s\n", - nic_stats_mapping_border, nic_stats_mapping_border); -} - void rx_queue_infos_display(portid_t port_id, uint16_t queue_id) { @@ -1518,7 +1434,7 @@ port_mtu_set(portid_t port_id, uint16_t mtu) * device supports jumbo frame. */ eth_overhead = dev_info.max_rx_pktlen - dev_info.max_mtu; - if (mtu > RTE_ETHER_MAX_LEN - eth_overhead) { + if (mtu > RTE_ETHER_MTU) { rte_port->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; rte_port->dev_conf.rxmode.max_rx_pkt_len = @@ -1963,6 +1879,7 @@ port_shared_action_query(portid_t port_id, uint32_t id) return -EINVAL; switch (psa->type) { case RTE_FLOW_ACTION_TYPE_RSS: + case RTE_FLOW_ACTION_TYPE_AGE: data = &default_data; break; default: @@ -1979,6 +1896,20 @@ port_shared_action_query(portid_t port_id, uint32_t id) *((uint32_t *)data)); data = NULL; break; + case RTE_FLOW_ACTION_TYPE_AGE: + if (!ret) { + struct rte_flow_query_age *resp = data; + + printf("AGE:\n" + " aged: %u\n" + " sec_since_last_hit_valid: %u\n" + " sec_since_last_hit: %" PRIu32 "\n", + resp->aged, + resp->sec_since_last_hit_valid, + resp->sec_since_last_hit); + } + data = NULL; + break; default: printf("Shared action %u (type: %d) on port %u doesn't support" " query\n", id, psa->type, port_id); @@ -1986,6 +1917,7 @@ port_shared_action_query(portid_t port_id, uint32_t id) } return ret; } + static struct port_flow_tunnel * port_flow_tunnel_offload_cmd_prep(portid_t port_id, const struct rte_flow_item *pattern, @@ -2573,7 +2505,7 @@ tx_queue_id_is_invalid(queueid_t txq_id) { if (txq_id < nb_txq) return 0; - printf("Invalid TX queue %d (must be < nb_rxq=%d)\n", txq_id, nb_txq); + printf("Invalid TX queue %d (must be < nb_txq=%d)\n", txq_id, nb_txq); return 1; } @@ -3785,7 +3717,7 @@ show_fec_capability(unsigned int num, struct rte_eth_fec_capa *speed_fec_capa) printf("%s : ", rte_eth_link_speed_to_str(speed_fec_capa[i].speed)); - for (j = RTE_ETH_FEC_AUTO; j < RTE_DIM(fec_mode_name); j++) { + for (j = 0; j < RTE_DIM(fec_mode_name); j++) { if (RTE_ETH_FEC_MODE_TO_CAPA(j) & speed_fec_capa[i].capa) printf("%s ", fec_mode_name[j].name); @@ -4528,8 +4460,7 @@ tx_vlan_pvid_set(portid_t port_id, uint16_t vlan_id, int on) void set_qmap(portid_t port_id, uint8_t is_rx, uint16_t queue_id, uint8_t map_value) { - uint16_t i; - uint8_t existing_mapping_found = 0; + int ret; if (port_id_is_invalid(port_id, ENABLED_WARN)) return; @@ -4539,40 +4470,23 @@ set_qmap(portid_t port_id, uint8_t is_rx, uint16_t queue_id, uint8_t map_value) if (map_value >= RTE_ETHDEV_QUEUE_STAT_CNTRS) { printf("map_value not in required range 0..%d\n", - RTE_ETHDEV_QUEUE_STAT_CNTRS - 1); + RTE_ETHDEV_QUEUE_STAT_CNTRS - 1); return; } - if (!is_rx) { /*then tx*/ - for (i = 0; i < nb_tx_queue_stats_mappings; i++) { - if ((tx_queue_stats_mappings[i].port_id == port_id) && - (tx_queue_stats_mappings[i].queue_id == queue_id)) { - tx_queue_stats_mappings[i].stats_counter_id = map_value; - existing_mapping_found = 1; - break; - } - } - if (!existing_mapping_found) { /* A new additional mapping... */ - tx_queue_stats_mappings[nb_tx_queue_stats_mappings].port_id = port_id; - tx_queue_stats_mappings[nb_tx_queue_stats_mappings].queue_id = queue_id; - tx_queue_stats_mappings[nb_tx_queue_stats_mappings].stats_counter_id = map_value; - nb_tx_queue_stats_mappings++; - } - } - else { /*rx*/ - for (i = 0; i < nb_rx_queue_stats_mappings; i++) { - if ((rx_queue_stats_mappings[i].port_id == port_id) && - (rx_queue_stats_mappings[i].queue_id == queue_id)) { - rx_queue_stats_mappings[i].stats_counter_id = map_value; - existing_mapping_found = 1; - break; - } + if (!is_rx) { /* tx */ + ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, queue_id, + map_value); + if (ret) { + printf("failed to set tx queue stats mapping.\n"); + return; } - if (!existing_mapping_found) { /* A new additional mapping... */ - rx_queue_stats_mappings[nb_rx_queue_stats_mappings].port_id = port_id; - rx_queue_stats_mappings[nb_rx_queue_stats_mappings].queue_id = queue_id; - rx_queue_stats_mappings[nb_rx_queue_stats_mappings].stats_counter_id = map_value; - nb_rx_queue_stats_mappings++; + } else { /* rx */ + ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, queue_id, + map_value); + if (ret) { + printf("failed to set rx queue stats mapping.\n"); + return; } } } diff --git a/dpdk/app/test-pmd/flowgen.c b/dpdk/app/test-pmd/flowgen.c index acf3e24605..cabfc688ff 100644 --- a/dpdk/app/test-pmd/flowgen.c +++ b/dpdk/app/test-pmd/flowgen.c @@ -53,8 +53,11 @@ static struct rte_ether_addr cfg_ether_dst = #define IP_DEFTTL 64 /* from RFC 1340. */ +/* Use this type to inform GCC that ip_sum violates aliasing rules. */ +typedef unaligned_uint16_t alias_int16_t __attribute__((__may_alias__)); + static inline uint16_t -ip_sum(const unaligned_uint16_t *hdr, int hdr_len) +ip_sum(const alias_int16_t *hdr, int hdr_len) { uint32_t sum = 0; @@ -150,7 +153,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs) next_flow); ip_hdr->total_length = RTE_CPU_TO_BE_16(pkt_size - sizeof(*eth_hdr)); - ip_hdr->hdr_checksum = ip_sum((unaligned_uint16_t *)ip_hdr, + ip_hdr->hdr_checksum = ip_sum((const alias_int16_t *)ip_hdr, sizeof(*ip_hdr)); /* Initialize UDP header. */ diff --git a/dpdk/app/test-pmd/parameters.c b/dpdk/app/test-pmd/parameters.c index bbb68a55ff..df5eb10d84 100644 --- a/dpdk/app/test-pmd/parameters.c +++ b/dpdk/app/test-pmd/parameters.c @@ -176,12 +176,6 @@ usage(char* progname) "(0 <= N <= value of txd).\n"); printf(" --txrst=N: set the transmit RS bit threshold of TX rings to N " "(0 <= N <= value of txd).\n"); - printf(" --tx-queue-stats-mapping=(port,queue,mapping)[,(port,queue,mapping]: " - "tx queues statistics counters mapping " - "(0 <= mapping <= %d).\n", RTE_ETHDEV_QUEUE_STAT_CNTRS - 1); - printf(" --rx-queue-stats-mapping=(port,queue,mapping)[,(port,queue,mapping]: " - "rx queues statistics counters mapping " - "(0 <= mapping <= %d).\n", RTE_ETHDEV_QUEUE_STAT_CNTRS - 1); printf(" --no-flush-rx: Don't flush RX streams before forwarding." " Used mainly with PCAP drivers.\n"); printf(" --rxoffs=X[,Y]*: set RX segment offsets for split.\n"); @@ -300,93 +294,6 @@ parse_fwd_portmask(const char *portmask) set_fwd_ports_mask((uint64_t) pm); } - -static int -parse_queue_stats_mapping_config(const char *q_arg, int is_rx) -{ - char s[256]; - const char *p, *p0 = q_arg; - char *end; - enum fieldnames { - FLD_PORT = 0, - FLD_QUEUE, - FLD_STATS_COUNTER, - _NUM_FLD - }; - unsigned long int_fld[_NUM_FLD]; - char *str_fld[_NUM_FLD]; - int i; - unsigned size; - - /* reset from value set at definition */ - is_rx ? (nb_rx_queue_stats_mappings = 0) : (nb_tx_queue_stats_mappings = 0); - - while ((p = strchr(p0,'(')) != NULL) { - ++p; - if((p0 = strchr(p,')')) == NULL) - return -1; - - size = p0 - p; - if(size >= sizeof(s)) - return -1; - - snprintf(s, sizeof(s), "%.*s", size, p); - if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) - return -1; - for (i = 0; i < _NUM_FLD; i++){ - errno = 0; - int_fld[i] = strtoul(str_fld[i], &end, 0); - if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) - return -1; - } - /* Check mapping field is in correct range (0..RTE_ETHDEV_QUEUE_STAT_CNTRS-1) */ - if (int_fld[FLD_STATS_COUNTER] >= RTE_ETHDEV_QUEUE_STAT_CNTRS) { - printf("Stats counter not in the correct range 0..%d\n", - RTE_ETHDEV_QUEUE_STAT_CNTRS - 1); - return -1; - } - - if (!is_rx) { - if ((nb_tx_queue_stats_mappings >= - MAX_TX_QUEUE_STATS_MAPPINGS)) { - printf("exceeded max number of TX queue " - "statistics mappings: %hu\n", - nb_tx_queue_stats_mappings); - return -1; - } - tx_queue_stats_mappings_array[nb_tx_queue_stats_mappings].port_id = - (uint8_t)int_fld[FLD_PORT]; - tx_queue_stats_mappings_array[nb_tx_queue_stats_mappings].queue_id = - (uint8_t)int_fld[FLD_QUEUE]; - tx_queue_stats_mappings_array[nb_tx_queue_stats_mappings].stats_counter_id = - (uint8_t)int_fld[FLD_STATS_COUNTER]; - ++nb_tx_queue_stats_mappings; - } - else { - if ((nb_rx_queue_stats_mappings >= - MAX_RX_QUEUE_STATS_MAPPINGS)) { - printf("exceeded max number of RX queue " - "statistics mappings: %hu\n", - nb_rx_queue_stats_mappings); - return -1; - } - rx_queue_stats_mappings_array[nb_rx_queue_stats_mappings].port_id = - (uint8_t)int_fld[FLD_PORT]; - rx_queue_stats_mappings_array[nb_rx_queue_stats_mappings].queue_id = - (uint8_t)int_fld[FLD_QUEUE]; - rx_queue_stats_mappings_array[nb_rx_queue_stats_mappings].stats_counter_id = - (uint8_t)int_fld[FLD_STATS_COUNTER]; - ++nb_rx_queue_stats_mappings; - } - - } -/* Reassign the rx/tx_queue_stats_mappings pointer to point to this newly populated array rather */ -/* than to the default array (that was set at its definition) */ - is_rx ? (rx_queue_stats_mappings = rx_queue_stats_mappings_array) : - (tx_queue_stats_mappings = tx_queue_stats_mappings_array); - return 0; -} - static void print_invalid_socket_id_error(void) { @@ -664,8 +571,6 @@ launch_args_parse(int argc, char** argv) { "rxht", 1, 0, 0 }, { "rxwt", 1, 0, 0 }, { "rxfreet", 1, 0, 0 }, - { "tx-queue-stats-mapping", 1, 0, 0 }, - { "rx-queue-stats-mapping", 1, 0, 0 }, { "no-flush-rx", 0, 0, 0 }, { "flow-isolate-all", 0, 0, 0 }, { "rxoffs", 1, 0, 0 }, @@ -929,12 +834,9 @@ launch_args_parse(int argc, char** argv) } if (!strcmp(lgopts[opt_idx].name, "max-pkt-len")) { n = atoi(optarg); - if (n >= RTE_ETHER_MIN_LEN) { + if (n >= RTE_ETHER_MIN_LEN) rx_mode.max_rx_pkt_len = (uint32_t) n; - if (n > RTE_ETHER_MAX_LEN) - rx_offloads |= - DEV_RX_OFFLOAD_JUMBO_FRAME; - } else + else rte_exit(EXIT_FAILURE, "Invalid max-pkt-len=%d - should be > %d\n", n, RTE_ETHER_MIN_LEN); @@ -1279,18 +1181,6 @@ launch_args_parse(int argc, char** argv) else rte_exit(EXIT_FAILURE, "rxfreet must be >= 0\n"); } - if (!strcmp(lgopts[opt_idx].name, "tx-queue-stats-mapping")) { - if (parse_queue_stats_mapping_config(optarg, TX)) { - rte_exit(EXIT_FAILURE, - "invalid TX queue statistics mapping config entered\n"); - } - } - if (!strcmp(lgopts[opt_idx].name, "rx-queue-stats-mapping")) { - if (parse_queue_stats_mapping_config(optarg, RX)) { - rte_exit(EXIT_FAILURE, - "invalid RX queue statistics mapping config entered\n"); - } - } if (!strcmp(lgopts[opt_idx].name, "rxoffs")) { unsigned int seg_off[MAX_SEGS_BUFFER_SPLIT]; unsigned int nb_offs; diff --git a/dpdk/app/test-pmd/testpmd.c b/dpdk/app/test-pmd/testpmd.c index 33fc0fddf5..555852ae5e 100644 --- a/dpdk/app/test-pmd/testpmd.c +++ b/dpdk/app/test-pmd/testpmd.c @@ -443,8 +443,11 @@ lcoreid_t latencystats_lcore_id = -1; * Ethernet device configuration. */ struct rte_eth_rxmode rx_mode = { - .max_rx_pkt_len = RTE_ETHER_MAX_LEN, - /**< Default maximum frame length. */ + /* Default maximum frame length. + * Zero is converted to "RTE_ETHER_MTU + PMD Ethernet overhead" + * in init_config(). + */ + .max_rx_pkt_len = 0, }; struct rte_eth_txmode tx_mode = { @@ -476,15 +479,6 @@ struct rte_fdir_conf fdir_conf = { volatile int test_done = 1; /* stop packet forwarding when set to 1. */ -struct queue_stats_mappings tx_queue_stats_mappings_array[MAX_TX_QUEUE_STATS_MAPPINGS]; -struct queue_stats_mappings rx_queue_stats_mappings_array[MAX_RX_QUEUE_STATS_MAPPINGS]; - -struct queue_stats_mappings *tx_queue_stats_mappings = tx_queue_stats_mappings_array; -struct queue_stats_mappings *rx_queue_stats_mappings = rx_queue_stats_mappings_array; - -uint16_t nb_tx_queue_stats_mappings = 0; -uint16_t nb_rx_queue_stats_mappings = 0; - /* * Display zero values by default for xstats */ @@ -520,8 +514,6 @@ enum rte_eth_rx_mq_mode rx_mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS; /* Forward function declarations */ static void setup_attached_port(portid_t pi); -static void map_port_queue_stats_mapping_registers(portid_t pi, - struct rte_port *port); static void check_all_ports_link_status(uint32_t port_mask); static int eth_event_callback(portid_t port_id, enum rte_eth_event_type type, @@ -1457,6 +1449,11 @@ init_config(void) rte_exit(EXIT_FAILURE, "rte_eth_dev_info_get() failed\n"); + ret = update_jumbo_frame_offload(pid); + if (ret != 0) + printf("Updating jumbo frame offload failed for port %u\n", + pid); + if (!(port->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)) port->dev_conf.txmode.offloads &= @@ -1857,8 +1854,6 @@ fwd_stats_display(void) fwd_cycles += fs->core_cycles; } for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) { - uint8_t j; - pt_id = fwd_ports_ids[i]; port = &ports[pt_id]; @@ -1881,88 +1876,34 @@ fwd_stats_display(void) printf("\n %s Forward statistics for port %-2d %s\n", fwd_stats_border, pt_id, fwd_stats_border); - if (!port->rx_queue_stats_mapping_enabled && - !port->tx_queue_stats_mapping_enabled) { - printf(" RX-packets: %-14"PRIu64 - " RX-dropped: %-14"PRIu64 - "RX-total: %-"PRIu64"\n", - stats.ipackets, stats.imissed, - stats.ipackets + stats.imissed); - - if (cur_fwd_eng == &csum_fwd_engine) - printf(" Bad-ipcsum: %-14"PRIu64 - " Bad-l4csum: %-14"PRIu64 - "Bad-outer-l4csum: %-14"PRIu64"\n", - ports_stats[pt_id].rx_bad_ip_csum, - ports_stats[pt_id].rx_bad_l4_csum, - ports_stats[pt_id].rx_bad_outer_l4_csum); - if (stats.ierrors + stats.rx_nombuf > 0) { - printf(" RX-error: %-"PRIu64"\n", - stats.ierrors); - printf(" RX-nombufs: %-14"PRIu64"\n", - stats.rx_nombuf); - } - - printf(" TX-packets: %-14"PRIu64 - " TX-dropped: %-14"PRIu64 - "TX-total: %-"PRIu64"\n", - stats.opackets, ports_stats[pt_id].tx_dropped, - stats.opackets + ports_stats[pt_id].tx_dropped); - } else { - printf(" RX-packets: %14"PRIu64 - " RX-dropped:%14"PRIu64 - " RX-total:%14"PRIu64"\n", - stats.ipackets, stats.imissed, - stats.ipackets + stats.imissed); - - if (cur_fwd_eng == &csum_fwd_engine) - printf(" Bad-ipcsum:%14"PRIu64 - " Bad-l4csum:%14"PRIu64 - " Bad-outer-l4csum: %-14"PRIu64"\n", - ports_stats[pt_id].rx_bad_ip_csum, - ports_stats[pt_id].rx_bad_l4_csum, - ports_stats[pt_id].rx_bad_outer_l4_csum); - if ((stats.ierrors + stats.rx_nombuf) > 0) { - printf(" RX-error:%"PRIu64"\n", stats.ierrors); - printf(" RX-nombufs: %14"PRIu64"\n", - stats.rx_nombuf); - } + printf(" RX-packets: %-14"PRIu64" RX-dropped: %-14"PRIu64 + "RX-total: %-"PRIu64"\n", stats.ipackets, stats.imissed, + stats.ipackets + stats.imissed); - printf(" TX-packets: %14"PRIu64 - " TX-dropped:%14"PRIu64 - " TX-total:%14"PRIu64"\n", - stats.opackets, ports_stats[pt_id].tx_dropped, - stats.opackets + ports_stats[pt_id].tx_dropped); + if (cur_fwd_eng == &csum_fwd_engine) + printf(" Bad-ipcsum: %-14"PRIu64 + " Bad-l4csum: %-14"PRIu64 + "Bad-outer-l4csum: %-14"PRIu64"\n", + ports_stats[pt_id].rx_bad_ip_csum, + ports_stats[pt_id].rx_bad_l4_csum, + ports_stats[pt_id].rx_bad_outer_l4_csum); + if (stats.ierrors + stats.rx_nombuf > 0) { + printf(" RX-error: %-"PRIu64"\n", stats.ierrors); + printf(" RX-nombufs: %-14"PRIu64"\n", stats.rx_nombuf); } + printf(" TX-packets: %-14"PRIu64" TX-dropped: %-14"PRIu64 + "TX-total: %-"PRIu64"\n", + stats.opackets, ports_stats[pt_id].tx_dropped, + stats.opackets + ports_stats[pt_id].tx_dropped); + if (record_burst_stats) { if (ports_stats[pt_id].rx_stream) pkt_burst_stats_display("RX", &ports_stats[pt_id].rx_stream->rx_burst_stats); if (ports_stats[pt_id].tx_stream) pkt_burst_stats_display("TX", - &ports_stats[pt_id].tx_stream->tx_burst_stats); - } - - if (port->rx_queue_stats_mapping_enabled) { - printf("\n"); - for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) { - printf(" Stats reg %2d RX-packets:%14"PRIu64 - " RX-errors:%14"PRIu64 - " RX-bytes:%14"PRIu64"\n", - j, stats.q_ipackets[j], - stats.q_errors[j], stats.q_ibytes[j]); - } - printf("\n"); - } - if (port->tx_queue_stats_mapping_enabled) { - for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) { - printf(" Stats reg %2d TX-packets:%14"PRIu64 - " TX-bytes:%14" - PRIu64"\n", - j, stats.q_opackets[j], - stats.q_obytes[j]); - } + &ports_stats[pt_id].tx_stream->tx_burst_stats); } printf(" %s--------------------------------%s\n", @@ -2236,11 +2177,6 @@ start_packet_forwarding(int with_tx_first) rxtx_config_display(); fwd_stats_reset(); - for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) { - pt_id = fwd_ports_ids[i]; - port = &ports[pt_id]; - map_port_queue_stats_mapping_registers(pt_id, port); - } if (with_tx_first) { port_fwd_begin = tx_only_engine.port_fwd_begin; if (port_fwd_begin != NULL) { @@ -2806,6 +2742,9 @@ stop_port(portid_t pid) } } + if (port->flow_list) + port_flow_flush(pi); + if (rte_eth_dev_stop(pi) != 0) RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port %u\n", pi); @@ -3352,84 +3291,6 @@ dev_event_callback(const char *device_name, enum rte_dev_event_type type, } } -static int -set_tx_queue_stats_mapping_registers(portid_t port_id, struct rte_port *port) -{ - uint16_t i; - int diag; - uint8_t mapping_found = 0; - - for (i = 0; i < nb_tx_queue_stats_mappings; i++) { - if ((tx_queue_stats_mappings[i].port_id == port_id) && - (tx_queue_stats_mappings[i].queue_id < nb_txq )) { - diag = rte_eth_dev_set_tx_queue_stats_mapping(port_id, - tx_queue_stats_mappings[i].queue_id, - tx_queue_stats_mappings[i].stats_counter_id); - if (diag != 0) - return diag; - mapping_found = 1; - } - } - if (mapping_found) - port->tx_queue_stats_mapping_enabled = 1; - return 0; -} - -static int -set_rx_queue_stats_mapping_registers(portid_t port_id, struct rte_port *port) -{ - uint16_t i; - int diag; - uint8_t mapping_found = 0; - - for (i = 0; i < nb_rx_queue_stats_mappings; i++) { - if ((rx_queue_stats_mappings[i].port_id == port_id) && - (rx_queue_stats_mappings[i].queue_id < nb_rxq )) { - diag = rte_eth_dev_set_rx_queue_stats_mapping(port_id, - rx_queue_stats_mappings[i].queue_id, - rx_queue_stats_mappings[i].stats_counter_id); - if (diag != 0) - return diag; - mapping_found = 1; - } - } - if (mapping_found) - port->rx_queue_stats_mapping_enabled = 1; - return 0; -} - -static void -map_port_queue_stats_mapping_registers(portid_t pi, struct rte_port *port) -{ - int diag = 0; - - diag = set_tx_queue_stats_mapping_registers(pi, port); - if (diag != 0) { - if (diag == -ENOTSUP) { - port->tx_queue_stats_mapping_enabled = 0; - printf("TX queue stats mapping not supported port id=%d\n", pi); - } - else - rte_exit(EXIT_FAILURE, - "set_tx_queue_stats_mapping_registers " - "failed for port id=%d diag=%d\n", - pi, diag); - } - - diag = set_rx_queue_stats_mapping_registers(pi, port); - if (diag != 0) { - if (diag == -ENOTSUP) { - port->rx_queue_stats_mapping_enabled = 0; - printf("RX queue stats mapping not supported port id=%d\n", pi); - } - else - rte_exit(EXIT_FAILURE, - "set_rx_queue_stats_mapping_registers " - "failed for port id=%d diag=%d\n", - pi, diag); - } -} - static void rxtx_port_config(struct rte_port *port) { @@ -3487,6 +3348,80 @@ rxtx_port_config(struct rte_port *port) } } +/* + * Helper function to arrange max_rx_pktlen value and JUMBO_FRAME offload, + * MTU is also aligned if JUMBO_FRAME offload is not set. + * + * port->dev_info should be set before calling this function. + * + * return 0 on success, negative on error + */ +int +update_jumbo_frame_offload(portid_t portid) +{ + struct rte_port *port = &ports[portid]; + uint32_t eth_overhead; + uint64_t rx_offloads; + int ret; + bool on; + + /* Update the max_rx_pkt_len to have MTU as RTE_ETHER_MTU */ + if (port->dev_info.max_mtu != UINT16_MAX && + port->dev_info.max_rx_pktlen > port->dev_info.max_mtu) + eth_overhead = port->dev_info.max_rx_pktlen - + port->dev_info.max_mtu; + else + eth_overhead = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN; + + rx_offloads = port->dev_conf.rxmode.offloads; + + /* Default config value is 0 to use PMD specific overhead */ + if (port->dev_conf.rxmode.max_rx_pkt_len == 0) + port->dev_conf.rxmode.max_rx_pkt_len = RTE_ETHER_MTU + eth_overhead; + + if (port->dev_conf.rxmode.max_rx_pkt_len <= RTE_ETHER_MTU + eth_overhead) { + rx_offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME; + on = false; + } else { + if ((port->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) { + printf("Frame size (%u) is not supported by port %u\n", + port->dev_conf.rxmode.max_rx_pkt_len, + portid); + return -1; + } + rx_offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; + on = true; + } + + if (rx_offloads != port->dev_conf.rxmode.offloads) { + uint16_t qid; + + port->dev_conf.rxmode.offloads = rx_offloads; + + /* Apply JUMBO_FRAME offload configuration to Rx queue(s) */ + for (qid = 0; qid < port->dev_info.nb_rx_queues; qid++) { + if (on) + port->rx_conf[qid].offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; + else + port->rx_conf[qid].offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME; + } + } + + /* If JUMBO_FRAME is set MTU conversion done by ethdev layer, + * if unset do it here + */ + if ((rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) { + ret = rte_eth_dev_set_mtu(portid, + port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead); + if (ret) + printf("Failed to set MTU to %u for port %u\n", + port->dev_conf.rxmode.max_rx_pkt_len - eth_overhead, + portid); + } + + return 0; +} + void init_port_config(void) { @@ -3526,7 +3461,6 @@ init_port_config(void) if (ret != 0) return; - map_port_queue_stats_mapping_registers(pid, port); #if defined RTE_NET_IXGBE && defined RTE_LIBRTE_IXGBE_BYPASS rte_pmd_ixgbe_bypass_init(pid); #endif @@ -3737,8 +3671,6 @@ init_port_dcb_config(portid_t pid, if (retval != 0) return retval; - map_port_queue_stats_mapping_registers(pid, rte_port); - rte_port->dcb_flag = 1; return 0; diff --git a/dpdk/app/test-pmd/testpmd.h b/dpdk/app/test-pmd/testpmd.h index 6b901a894f..2f8f5a92e4 100644 --- a/dpdk/app/test-pmd/testpmd.h +++ b/dpdk/app/test-pmd/testpmd.h @@ -206,8 +206,6 @@ struct rte_port { uint16_t tunnel_tso_segsz; /**< Segmentation offload MSS for tunneled pkts. */ uint16_t tx_vlan_id;/**< The tag ID */ uint16_t tx_vlan_id_outer;/**< The outer tag ID */ - uint8_t tx_queue_stats_mapping_enabled; - uint8_t rx_queue_stats_mapping_enabled; volatile uint16_t port_status; /**< port started or not */ uint8_t need_setup; /**< port just attached */ uint8_t need_reconfig; /**< need reconfiguring port or not */ @@ -326,25 +324,6 @@ enum dcb_mode_enable DCB_ENABLED }; -#define MAX_TX_QUEUE_STATS_MAPPINGS 1024 /* MAX_PORT of 32 @ 32 tx_queues/port */ -#define MAX_RX_QUEUE_STATS_MAPPINGS 4096 /* MAX_PORT of 32 @ 128 rx_queues/port */ - -struct queue_stats_mappings { - portid_t port_id; - uint16_t queue_id; - uint8_t stats_counter_id; -} __rte_cache_aligned; - -extern struct queue_stats_mappings tx_queue_stats_mappings_array[]; -extern struct queue_stats_mappings rx_queue_stats_mappings_array[]; - -/* Assign both tx and rx queue stats mappings to the same default values */ -extern struct queue_stats_mappings *tx_queue_stats_mappings; -extern struct queue_stats_mappings *rx_queue_stats_mappings; - -extern uint16_t nb_tx_queue_stats_mappings; -extern uint16_t nb_rx_queue_stats_mappings; - extern uint8_t xstats_hide_zero; /**< Hide zero values for xstats display */ /* globals used for configuration */ @@ -790,7 +769,6 @@ void nic_stats_display(portid_t port_id); void nic_stats_clear(portid_t port_id); void nic_xstats_display(portid_t port_id); void nic_xstats_clear(portid_t port_id); -void nic_stats_mapping_display(portid_t port_id); void device_infos_display(const char *identifier); void port_infos_display(portid_t port_id); void port_summary_display(portid_t port_id); @@ -1027,6 +1005,7 @@ uint16_t tx_pkt_set_dynf(uint16_t port_id, __rte_unused uint16_t queue, __rte_unused void *user_param); void add_tx_dynf_callback(portid_t portid); void remove_tx_dynf_callback(portid_t portid); +int update_jumbo_frame_offload(portid_t portid); /* * Work-around of a compilation error with ICC on invocations of the diff --git a/dpdk/app/test-pmd/util.c b/dpdk/app/test-pmd/util.c index 649bf8f53a..a9e431a8b2 100644 --- a/dpdk/app/test-pmd/util.c +++ b/dpdk/app/test-pmd/util.c @@ -15,12 +15,23 @@ #include "testpmd.h" +#define MAX_STRING_LEN 8192 + +#define MKDUMPSTR(buf, buf_size, cur_len, ...) \ +do { \ + if (cur_len >= buf_size) \ + break; \ + cur_len += snprintf(buf + cur_len, buf_size - cur_len, __VA_ARGS__); \ +} while (0) + static inline void -print_ether_addr(const char *what, const struct rte_ether_addr *eth_addr) +print_ether_addr(const char *what, const struct rte_ether_addr *eth_addr, + char print_buf[], size_t buf_size, size_t *cur_len) { char buf[RTE_ETHER_ADDR_FMT_SIZE]; + rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr); - printf("%s%s", what, buf); + MKDUMPSTR(print_buf, buf_size, *cur_len, "%s%s", what, buf); } static inline bool @@ -74,13 +85,15 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[], uint32_t vx_vni; const char *reason; int dynf_index; + char print_buf[MAX_STRING_LEN]; + size_t buf_size = MAX_STRING_LEN; + size_t cur_len = 0; if (!nb_pkts) return; - printf("port %u/queue %u: %s %u packets\n", - port_id, queue, - is_rx ? "received" : "sent", - (unsigned int) nb_pkts); + MKDUMPSTR(print_buf, buf_size, cur_len, + "port %u/queue %u: %s %u packets\n", port_id, queue, + is_rx ? "received" : "sent", (unsigned int) nb_pkts); for (i = 0; i < nb_pkts; i++) { int ret; struct rte_flow_error error; @@ -93,95 +106,128 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[], is_encapsulation = RTE_ETH_IS_TUNNEL_PKT(packet_type); ret = rte_flow_get_restore_info(port_id, mb, &info, &error); if (!ret) { - printf("restore info:"); + MKDUMPSTR(print_buf, buf_size, cur_len, + "restore info:"); if (info.flags & RTE_FLOW_RESTORE_INFO_TUNNEL) { struct port_flow_tunnel *port_tunnel; port_tunnel = port_flow_locate_tunnel (port_id, &info.tunnel); - printf(" - tunnel"); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - tunnel"); if (port_tunnel) - printf(" #%u", port_tunnel->id); + MKDUMPSTR(print_buf, buf_size, cur_len, + " #%u", port_tunnel->id); else - printf(" %s", "-none-"); - printf(" type %s", - port_flow_tunnel_type(&info.tunnel)); + MKDUMPSTR(print_buf, buf_size, cur_len, + " %s", "-none-"); + MKDUMPSTR(print_buf, buf_size, cur_len, + " type %s", port_flow_tunnel_type + (&info.tunnel)); } else { - printf(" - no tunnel info"); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - no tunnel info"); } if (info.flags & RTE_FLOW_RESTORE_INFO_ENCAPSULATED) - printf(" - outer header present"); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - outer header present"); else - printf(" - no outer header"); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - no outer header"); if (info.flags & RTE_FLOW_RESTORE_INFO_GROUP_ID) - printf(" - miss group %u", info.group_id); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - miss group %u", info.group_id); else - printf(" - no miss group"); - printf("\n"); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - no miss group"); + MKDUMPSTR(print_buf, buf_size, cur_len, "\n"); } - print_ether_addr(" src=", ð_hdr->s_addr); - print_ether_addr(" - dst=", ð_hdr->d_addr); - printf(" - type=0x%04x - length=%u - nb_segs=%d", - eth_type, (unsigned int) mb->pkt_len, - (int)mb->nb_segs); + print_ether_addr(" src=", ð_hdr->s_addr, + print_buf, buf_size, &cur_len); + print_ether_addr(" - dst=", ð_hdr->d_addr, + print_buf, buf_size, &cur_len); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - type=0x%04x - length=%u - nb_segs=%d", + eth_type, (unsigned int) mb->pkt_len, + (int)mb->nb_segs); ol_flags = mb->ol_flags; if (ol_flags & PKT_RX_RSS_HASH) { - printf(" - RSS hash=0x%x", (unsigned int) mb->hash.rss); - printf(" - RSS queue=0x%x", (unsigned int) queue); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - RSS hash=0x%x", + (unsigned int) mb->hash.rss); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - RSS queue=0x%x", (unsigned int) queue); } if (ol_flags & PKT_RX_FDIR) { - printf(" - FDIR matched "); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - FDIR matched "); if (ol_flags & PKT_RX_FDIR_ID) - printf("ID=0x%x", - mb->hash.fdir.hi); + MKDUMPSTR(print_buf, buf_size, cur_len, + "ID=0x%x", mb->hash.fdir.hi); else if (ol_flags & PKT_RX_FDIR_FLX) - printf("flex bytes=0x%08x %08x", - mb->hash.fdir.hi, mb->hash.fdir.lo); + MKDUMPSTR(print_buf, buf_size, cur_len, + "flex bytes=0x%08x %08x", + mb->hash.fdir.hi, mb->hash.fdir.lo); else - printf("hash=0x%x ID=0x%x ", - mb->hash.fdir.hash, mb->hash.fdir.id); + MKDUMPSTR(print_buf, buf_size, cur_len, + "hash=0x%x ID=0x%x ", + mb->hash.fdir.hash, mb->hash.fdir.id); } if (is_timestamp_enabled(mb)) - printf(" - timestamp %"PRIu64" ", get_timestamp(mb)); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - timestamp %"PRIu64" ", get_timestamp(mb)); if (ol_flags & PKT_RX_QINQ) - printf(" - QinQ VLAN tci=0x%x, VLAN tci outer=0x%x", - mb->vlan_tci, mb->vlan_tci_outer); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - QinQ VLAN tci=0x%x, VLAN tci outer=0x%x", + mb->vlan_tci, mb->vlan_tci_outer); else if (ol_flags & PKT_RX_VLAN) - printf(" - VLAN tci=0x%x", mb->vlan_tci); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - VLAN tci=0x%x", mb->vlan_tci); if (!is_rx && (ol_flags & PKT_TX_DYNF_METADATA)) - printf(" - Tx metadata: 0x%x", - *RTE_FLOW_DYNF_METADATA(mb)); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - Tx metadata: 0x%x", + *RTE_FLOW_DYNF_METADATA(mb)); if (is_rx && (ol_flags & PKT_RX_DYNF_METADATA)) - printf(" - Rx metadata: 0x%x", - *RTE_FLOW_DYNF_METADATA(mb)); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - Rx metadata: 0x%x", + *RTE_FLOW_DYNF_METADATA(mb)); for (dynf_index = 0; dynf_index < 64; dynf_index++) { if (dynf_names[dynf_index][0] != '\0') - printf(" - dynf %s: %d", - dynf_names[dynf_index], - !!(ol_flags & (1UL << dynf_index))); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - dynf %s: %d", + dynf_names[dynf_index], + !!(ol_flags & (1UL << dynf_index))); } if (mb->packet_type) { rte_get_ptype_name(mb->packet_type, buf, sizeof(buf)); - printf(" - hw ptype: %s", buf); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - hw ptype: %s", buf); } sw_packet_type = rte_net_get_ptype(mb, &hdr_lens, RTE_PTYPE_ALL_MASK); rte_get_ptype_name(sw_packet_type, buf, sizeof(buf)); - printf(" - sw ptype: %s", buf); + MKDUMPSTR(print_buf, buf_size, cur_len, " - sw ptype: %s", buf); if (sw_packet_type & RTE_PTYPE_L2_MASK) - printf(" - l2_len=%d", hdr_lens.l2_len); + MKDUMPSTR(print_buf, buf_size, cur_len, " - l2_len=%d", + hdr_lens.l2_len); if (sw_packet_type & RTE_PTYPE_L3_MASK) - printf(" - l3_len=%d", hdr_lens.l3_len); + MKDUMPSTR(print_buf, buf_size, cur_len, " - l3_len=%d", + hdr_lens.l3_len); if (sw_packet_type & RTE_PTYPE_L4_MASK) - printf(" - l4_len=%d", hdr_lens.l4_len); + MKDUMPSTR(print_buf, buf_size, cur_len, " - l4_len=%d", + hdr_lens.l4_len); if (sw_packet_type & RTE_PTYPE_TUNNEL_MASK) - printf(" - tunnel_len=%d", hdr_lens.tunnel_len); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - tunnel_len=%d", hdr_lens.tunnel_len); if (sw_packet_type & RTE_PTYPE_INNER_L2_MASK) - printf(" - inner_l2_len=%d", hdr_lens.inner_l2_len); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - inner_l2_len=%d", hdr_lens.inner_l2_len); if (sw_packet_type & RTE_PTYPE_INNER_L3_MASK) - printf(" - inner_l3_len=%d", hdr_lens.inner_l3_len); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - inner_l3_len=%d", hdr_lens.inner_l3_len); if (sw_packet_type & RTE_PTYPE_INNER_L4_MASK) - printf(" - inner_l4_len=%d", hdr_lens.inner_l4_len); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - inner_l4_len=%d", hdr_lens.inner_l4_len); if (is_encapsulation) { struct rte_ipv4_hdr *ipv4_hdr; struct rte_ipv6_hdr *ipv6_hdr; @@ -218,18 +264,27 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[], l2_len + l3_len + l4_len); udp_port = RTE_BE_TO_CPU_16(udp_hdr->dst_port); vx_vni = rte_be_to_cpu_32(vxlan_hdr->vx_vni); - printf(" - VXLAN packet: packet type =%d, " - "Destination UDP port =%d, VNI = %d", - packet_type, udp_port, vx_vni >> 8); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - VXLAN packet: packet type =%d, " + "Destination UDP port =%d, VNI = %d", + packet_type, udp_port, vx_vni >> 8); } } - printf(" - %s queue=0x%x", is_rx ? "Receive" : "Send", - (unsigned int) queue); - printf("\n"); + MKDUMPSTR(print_buf, buf_size, cur_len, + " - %s queue=0x%x", is_rx ? "Receive" : "Send", + (unsigned int) queue); + MKDUMPSTR(print_buf, buf_size, cur_len, "\n"); rte_get_rx_ol_flag_list(mb->ol_flags, buf, sizeof(buf)); - printf(" ol_flags: %s\n", buf); + MKDUMPSTR(print_buf, buf_size, cur_len, + " ol_flags: %s\n", buf); if (rte_mbuf_check(mb, 1, &reason) < 0) - printf("INVALID mbuf: %s\n", reason); + MKDUMPSTR(print_buf, buf_size, cur_len, + "INVALID mbuf: %s\n", reason); + if (cur_len >= buf_size) + printf("%s ...\n", print_buf); + else + printf("%s", print_buf); + cur_len = 0; } } diff --git a/dpdk/app/test/meson.build b/dpdk/app/test/meson.build index 94fd39fecb..bdbc619476 100644 --- a/dpdk/app/test/meson.build +++ b/dpdk/app/test/meson.build @@ -406,7 +406,7 @@ cflags += ['-DALLOW_INTERNAL_API'] test_dep_objs = [] if dpdk_conf.has('RTE_LIB_COMPRESSDEV') - compress_test_dep = dependency('zlib', required: false) + compress_test_dep = dependency('zlib', required: false, method: 'pkg-config') if compress_test_dep.found() test_dep_objs += compress_test_dep test_sources += 'test_compressdev.c' diff --git a/dpdk/app/test/test.c b/dpdk/app/test/test.c index ba0b0309b5..624dd48042 100644 --- a/dpdk/app/test/test.c +++ b/dpdk/app/test/test.c @@ -164,29 +164,38 @@ main(int argc, char **argv) #ifdef RTE_LIB_CMDLINE - cl = cmdline_stdin_new(main_ctx, "RTE>>"); - if (cl == NULL) { - ret = -1; - goto out; - } - char *dpdk_test = getenv("DPDK_TEST"); if (dpdk_test && strlen(dpdk_test)) { char buf[1024]; + + cl = cmdline_new(main_ctx, "RTE>>", 0, 1); + if (cl == NULL) { + ret = -1; + goto out; + } + snprintf(buf, sizeof(buf), "%s\n", dpdk_test); if (cmdline_in(cl, buf, strlen(buf)) < 0) { printf("error on cmdline input\n"); + + ret = -1; + } else { + ret = last_test_result; + } + cmdline_free(cl); + goto out; + } else { + /* if no DPDK_TEST env variable, go interactive */ + cl = cmdline_stdin_new(main_ctx, "RTE>>"); + if (cl == NULL) { ret = -1; goto out; } + cmdline_interact(cl); cmdline_stdin_exit(cl); - ret = last_test_result; - goto out; + cmdline_free(cl); } - /* if no DPDK_TEST env variable, go interactive */ - cmdline_interact(cl); - cmdline_stdin_exit(cl); #endif ret = 0; diff --git a/dpdk/app/test/test_distributor.c b/dpdk/app/test/test_distributor.c index f4c6229f16..961f326cd5 100644 --- a/dpdk/app/test/test_distributor.c +++ b/dpdk/app/test/test_distributor.c @@ -217,6 +217,8 @@ sanity_test(struct worker_params *wp, struct rte_mempool *p) clear_packet_count(); struct rte_mbuf *many_bufs[BIG_BATCH], *return_bufs[BIG_BATCH]; unsigned num_returned = 0; + unsigned int num_being_processed = 0; + unsigned int return_buffer_capacity = 127;/* RTE_DISTRIB_RETURNS_MASK */ /* flush out any remaining packets */ rte_distributor_flush(db); @@ -233,16 +235,16 @@ sanity_test(struct worker_params *wp, struct rte_mempool *p) for (i = 0; i < BIG_BATCH/BURST; i++) { rte_distributor_process(db, &many_bufs[i*BURST], BURST); - count = rte_distributor_returned_pkts(db, - &return_bufs[num_returned], - BIG_BATCH - num_returned); - num_returned += count; + num_being_processed += BURST; + do { + count = rte_distributor_returned_pkts(db, + &return_bufs[num_returned], + BIG_BATCH - num_returned); + num_being_processed -= count; + num_returned += count; + rte_distributor_flush(db); + } while (num_being_processed + BURST > return_buffer_capacity); } - rte_distributor_flush(db); - count = rte_distributor_returned_pkts(db, - &return_bufs[num_returned], - BIG_BATCH - num_returned); - num_returned += count; retries = 0; do { rte_distributor_flush(db); diff --git a/dpdk/app/test/test_event_crypto_adapter.c b/dpdk/app/test/test_event_crypto_adapter.c index a0169aa6cf..335211cd8c 100644 --- a/dpdk/app/test/test_event_crypto_adapter.c +++ b/dpdk/app/test/test_event_crypto_adapter.c @@ -183,6 +183,7 @@ test_op_forward_mode(uint8_t session_less) cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; cipher_xform.next = NULL; cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_NULL; + cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; op = rte_crypto_op_alloc(params.op_mpool, RTE_CRYPTO_OP_TYPE_SYMMETRIC); @@ -382,6 +383,7 @@ test_op_new_mode(uint8_t session_less) cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; cipher_xform.next = NULL; cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_NULL; + cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; op = rte_crypto_op_alloc(params.op_mpool, RTE_CRYPTO_OP_TYPE_SYMMETRIC); diff --git a/dpdk/app/test/test_ipsec.c b/dpdk/app/test/test_ipsec.c index 9ad07a1790..d18220a885 100644 --- a/dpdk/app/test/test_ipsec.c +++ b/dpdk/app/test/test_ipsec.c @@ -744,7 +744,7 @@ create_sa(enum rte_security_session_action_type action_type, ut->ss[j].type = action_type; rc = create_session(ut, &ts->qp_conf, ts->valid_dev, j); if (rc != 0) - return TEST_FAILED; + return rc; rc = rte_ipsec_sa_init(ut->ss[j].sa, &ut->sa_prm, sz); rc = (rc > 0 && (uint32_t)rc <= sz) ? 0 : -EINVAL; @@ -1247,7 +1247,7 @@ test_ipsec_crypto_inb_burst_null_null(int i) test_cfg[i].replay_win_sz, test_cfg[i].flags, 0); if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa failed, cfg %d\n", i); - return TEST_FAILED; + return rc; } /* Generate test mbuf data */ @@ -1349,7 +1349,7 @@ test_ipsec_crypto_outb_burst_null_null(int i) test_cfg[i].replay_win_sz, test_cfg[i].flags, 0); if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa failed, cfg %d\n", i); - return TEST_FAILED; + return rc; } /* Generate input mbuf data */ @@ -1458,7 +1458,7 @@ test_ipsec_inline_crypto_inb_burst_null_null(int i) test_cfg[i].replay_win_sz, test_cfg[i].flags, 0); if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa failed, cfg %d\n", i); - return TEST_FAILED; + return rc; } /* Generate inbound mbuf data */ @@ -1536,7 +1536,7 @@ test_ipsec_inline_proto_inb_burst_null_null(int i) test_cfg[i].replay_win_sz, test_cfg[i].flags, 0); if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa failed, cfg %d\n", i); - return TEST_FAILED; + return rc; } /* Generate inbound mbuf data */ @@ -1644,7 +1644,7 @@ test_ipsec_inline_crypto_outb_burst_null_null(int i) test_cfg[i].replay_win_sz, test_cfg[i].flags, 0); if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa failed, cfg %d\n", i); - return TEST_FAILED; + return rc; } /* Generate test mbuf data */ @@ -1722,7 +1722,7 @@ test_ipsec_inline_proto_outb_burst_null_null(int i) test_cfg[i].replay_win_sz, test_cfg[i].flags, 0); if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa failed, cfg %d\n", i); - return TEST_FAILED; + return rc; } /* Generate test mbuf data */ @@ -1798,7 +1798,7 @@ test_ipsec_lksd_proto_inb_burst_null_null(int i) test_cfg[i].replay_win_sz, test_cfg[i].flags, 0); if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa failed, cfg %d\n", i); - return TEST_FAILED; + return rc; } /* Generate test mbuf data */ @@ -1911,7 +1911,7 @@ test_ipsec_replay_inb_inside_null_null(int i) test_cfg[i].replay_win_sz, test_cfg[i].flags, 0); if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa failed, cfg %d\n", i); - return TEST_FAILED; + return rc; } /* Generate inbound mbuf data */ @@ -2004,7 +2004,7 @@ test_ipsec_replay_inb_outside_null_null(int i) test_cfg[i].replay_win_sz, test_cfg[i].flags, 0); if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa failed, cfg %d\n", i); - return TEST_FAILED; + return rc; } /* Generate test mbuf data */ @@ -2104,7 +2104,7 @@ test_ipsec_replay_inb_repeat_null_null(int i) test_cfg[i].replay_win_sz, test_cfg[i].flags, 0); if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa failed, cfg %d\n", i); - return TEST_FAILED; + return rc; } /* Generate test mbuf data */ @@ -2205,7 +2205,7 @@ test_ipsec_replay_inb_inside_burst_null_null(int i) test_cfg[i].replay_win_sz, test_cfg[i].flags, 0); if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa failed, cfg %d\n", i); - return TEST_FAILED; + return rc; } /* Generate inbound mbuf data */ @@ -2338,7 +2338,7 @@ test_ipsec_crypto_inb_burst_2sa_null_null(int i) test_cfg[i].replay_win_sz, test_cfg[i].flags, 0); if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa 0 failed, cfg %d\n", i); - return TEST_FAILED; + return rc; } /* create second rte_ipsec_sa */ @@ -2348,7 +2348,7 @@ test_ipsec_crypto_inb_burst_2sa_null_null(int i) if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa 1 failed, cfg %d\n", i); destroy_sa(0); - return TEST_FAILED; + return rc; } /* Generate test mbuf data */ @@ -2424,7 +2424,7 @@ test_ipsec_crypto_inb_burst_2sa_4grp_null_null(int i) test_cfg[i].replay_win_sz, test_cfg[i].flags, 0); if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa 0 failed, cfg %d\n", i); - return TEST_FAILED; + return rc; } /* create second rte_ipsec_sa */ @@ -2434,7 +2434,7 @@ test_ipsec_crypto_inb_burst_2sa_4grp_null_null(int i) if (rc != 0) { RTE_LOG(ERR, USER1, "create_sa 1 failed, cfg %d\n", i); destroy_sa(0); - return TEST_FAILED; + return rc; } /* Generate test mbuf data */ diff --git a/dpdk/app/test/test_mcslock.c b/dpdk/app/test/test_mcslock.c index fbca78707d..80eaecc90a 100644 --- a/dpdk/app/test/test_mcslock.c +++ b/dpdk/app/test/test_mcslock.c @@ -37,10 +37,6 @@ * lock multiple times. */ -RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_me); -RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_try_me); -RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me); - rte_mcslock_t *p_ml; rte_mcslock_t *p_ml_try; rte_mcslock_t *p_ml_perf; @@ -53,7 +49,7 @@ static int test_mcslock_per_core(__rte_unused void *arg) { /* Per core me node. */ - rte_mcslock_t ml_me = RTE_PER_LCORE(_ml_me); + rte_mcslock_t ml_me; rte_mcslock_lock(&p_ml, &ml_me); printf("MCS lock taken on core %u\n", rte_lcore_id()); @@ -77,7 +73,7 @@ load_loop_fn(void *func_param) const unsigned int lcore = rte_lcore_id(); /**< Per core me node. */ - rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me); + rte_mcslock_t ml_perf_me; /* wait synchro */ while (rte_atomic32_read(&synchro) == 0) @@ -151,8 +147,8 @@ static int test_mcslock_try(__rte_unused void *arg) { /**< Per core me node. */ - rte_mcslock_t ml_me = RTE_PER_LCORE(_ml_me); - rte_mcslock_t ml_try_me = RTE_PER_LCORE(_ml_try_me); + rte_mcslock_t ml_me; + rte_mcslock_t ml_try_me; /* Locked ml_try in the main lcore, so it should fail * when trying to lock it in the worker lcore. @@ -178,8 +174,8 @@ test_mcslock(void) int i; /* Define per core me node. */ - rte_mcslock_t ml_me = RTE_PER_LCORE(_ml_me); - rte_mcslock_t ml_try_me = RTE_PER_LCORE(_ml_try_me); + rte_mcslock_t ml_me; + rte_mcslock_t ml_try_me; /* * Test mcs lock & unlock on each core diff --git a/dpdk/app/test/test_pmd_perf.c b/dpdk/app/test/test_pmd_perf.c index 4db816a360..3a248d512c 100644 --- a/dpdk/app/test/test_pmd_perf.c +++ b/dpdk/app/test/test_pmd_perf.c @@ -606,10 +606,10 @@ poll_burst(void *args) static int exec_burst(uint32_t flags, int lcore) { - unsigned i, portid, nb_tx = 0; + unsigned int portid, nb_tx = 0; struct lcore_conf *conf; uint32_t pkt_per_port; - int num, idx = 0; + int num, i, idx = 0; int diff_tsc; conf = &lcore_conf[lcore]; @@ -628,16 +628,14 @@ exec_burst(uint32_t flags, int lcore) rte_atomic64_set(&start, 1); /* start xmit */ + i = 0; while (num) { nb_tx = RTE_MIN(MAX_PKT_BURST, num); - for (i = 0; i < conf->nb_ports; i++) { - portid = conf->portlist[i]; - nb_tx = rte_eth_tx_burst(portid, 0, - &tx_burst[idx], nb_tx); - idx += nb_tx; - num -= nb_tx; - } - + portid = conf->portlist[i]; + nb_tx = rte_eth_tx_burst(portid, 0, &tx_burst[idx], nb_tx); + idx += nb_tx; + num -= nb_tx; + i = (i >= conf->nb_ports - 1) ? 0 : (i + 1); } sleep(5); diff --git a/dpdk/app/test/test_ring_perf.c b/dpdk/app/test/test_ring_perf.c index e63e25a867..fd82e20412 100644 --- a/dpdk/app/test/test_ring_perf.c +++ b/dpdk/app/test/test_ring_perf.c @@ -178,7 +178,7 @@ enqueue_dequeue_bulk_helper(const unsigned int flag, const int esize, struct thread_params *p) { int ret; - const unsigned int iter_shift = 23; + const unsigned int iter_shift = 15; const unsigned int iterations = 1 << iter_shift; struct rte_ring *r = p->r; unsigned int bsize = p->size; diff --git a/dpdk/app/test/test_rwlock.c b/dpdk/app/test/test_rwlock.c index 701187f398..b47150a86a 100644 --- a/dpdk/app/test/test_rwlock.c +++ b/dpdk/app/test/test_rwlock.c @@ -46,6 +46,7 @@ enum { static struct { rte_rwlock_t lock; uint64_t tick; + volatile union { uint8_t u8[RTE_CACHE_LINE_SIZE]; uint64_t u64[RTE_CACHE_LINE_SIZE / sizeof(uint64_t)]; @@ -182,7 +183,7 @@ rwlock_test1(void) int i; rte_rwlock_init(&sl); - for (i=0; i`_. + ``_. - Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup the basic DPDK environment. diff --git a/dpdk/doc/guides/nics/i40e.rst b/dpdk/doc/guides/nics/i40e.rst index 4e5c4679b8..64f20e7dab 100644 --- a/dpdk/doc/guides/nics/i40e.rst +++ b/dpdk/doc/guides/nics/i40e.rst @@ -562,9 +562,9 @@ Generic flow API - ``RSS Flow`` RSS Flow supports to set hash input set, hash function, enable hash - and configure queue region. + and configure queues. For example: - Configure queue region as queue 0, 1, 2, 3. + Configure queues as queue 0, 1, 2, 3. .. code-block:: console diff --git a/dpdk/doc/guides/nics/ice.rst b/dpdk/doc/guides/nics/ice.rst index a0887f129f..ccda26f82f 100644 --- a/dpdk/doc/guides/nics/ice.rst +++ b/dpdk/doc/guides/nics/ice.rst @@ -211,9 +211,12 @@ are chosen based on 2 conditions. - ``CPU`` On the X86 platform, the driver checks if the CPU supports AVX2. If it's supported, AVX2 paths will be chosen. If not, SSE is chosen. + If the CPU supports AVX512 and EAL argument ``--force-max-simd-bitwidth`` + is set to 512, AVX512 paths will be chosen. - ``Offload features`` - The supported HW offload features are described in the document ice_vec.ini. + The supported HW offload features are described in the document ice.ini, + A value "P" means the offload feature is not supported by vector path. If any not supported features are used, ICE vector PMD is disabled and the normal paths are chosen. diff --git a/dpdk/doc/guides/nics/ixgbe.rst b/dpdk/doc/guides/nics/ixgbe.rst index c801dbae81..4f4d3b1c2c 100644 --- a/dpdk/doc/guides/nics/ixgbe.rst +++ b/dpdk/doc/guides/nics/ixgbe.rst @@ -252,6 +252,16 @@ Before binding ``vfio`` with legacy mode in X550 NICs, use ``modprobe vfio `` ``nointxmask=1`` to load ``vfio`` module if the intx is not shared with other devices. +UDP with zero checksum is reported as error +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Intel 82599 10 Gigabit Ethernet Controller Specification Update (Revision 2.87) +Errata: 44 Integrity Error Reported for IPv4/UDP Packets With Zero Checksum + +To support UDP zero checksum, the zero and bad UDP checksum packet is marked as +PKT_RX_L4_CKSUM_UNKNOWN, so the application needs to recompute the checksum to +validate it. + Inline crypto processing support -------------------------------- diff --git a/dpdk/doc/guides/nics/mlx5.rst b/dpdk/doc/guides/nics/mlx5.rst index 3bda0f8417..24d5a69227 100644 --- a/dpdk/doc/guides/nics/mlx5.rst +++ b/dpdk/doc/guides/nics/mlx5.rst @@ -433,13 +433,17 @@ Driver options A nonzero value enables the compression of CQE on RX side. This feature allows to save PCI bandwidth and improve performance. Enabled by default. Different compression formats are supported in order to achieve the best - performance for different traffic patterns. Hash RSS format is the default. + performance for different traffic patterns. Default format depends on + Multi-Packet Rx queue configuration: Hash RSS format is used in case + MPRQ is disabled, Checksum format is used in case MPRQ is enabled. Specifying 2 as a ``rxq_cqe_comp_en`` value selects Flow Tag format for better compression rate in case of RTE Flow Mark traffic. Specifying 3 as a ``rxq_cqe_comp_en`` value selects Checksum format. Specifying 4 as a ``rxq_cqe_comp_en`` value selects L3/L4 Header format for better compression rate in case of mixed TCP/UDP and IPv4/IPv6 traffic. + CQE compression format selection requires DevX to be enabled. If there is + no DevX enabled/supported the value is reset to 1 by default. Supported on: @@ -448,24 +452,6 @@ Driver options - POWER9 and ARMv8 with ConnectX-4 Lx, ConnectX-5, ConnectX-6, ConnectX-6 Dx, ConnectX-6 Lx, BlueField and BlueField-2. -- ``rxq_cqe_pad_en`` parameter [int] - - A nonzero value enables 128B padding of CQE on RX side. The size of CQE - is aligned with the size of a cacheline of the core. If cacheline size is - 128B, the CQE size is configured to be 128B even though the device writes - only 64B data on the cacheline. This is to avoid unnecessary cache - invalidation by device's two consecutive writes on to one cacheline. - However in some architecture, it is more beneficial to update entire - cacheline with padding the rest 64B rather than striding because - read-modify-write could drop performance a lot. On the other hand, - writing extra data will consume more PCIe bandwidth and could also drop - the maximum throughput. It is recommended to empirically set this - parameter. Disabled by default. - - Supported on: - - - CPU having 128B cacheline with ConnectX-5 and BlueField. - - ``rxq_pkt_pad_en`` parameter [int] A nonzero value enables padding Rx packet to the size of cacheline on PCI @@ -825,7 +811,7 @@ Driver options +------+-----------+-----------+-------------+-------------+ | 1 | 24 bits | vary 0-32 | 32 bits | yes | +------+-----------+-----------+-------------+-------------+ - | 2 | vary 0-32 | 32 bits | 32 bits | yes | + | 2 | vary 0-24 | 32 bits | 32 bits | yes | +------+-----------+-----------+-------------+-------------+ If there is no E-Switch configuration the ``dv_xmeta_en`` parameter is @@ -837,6 +823,17 @@ Driver options of the extensive metadata features. The legacy Verbs supports FLAG and MARK metadata actions over NIC Rx steering domain only. + Setting META value to zero in flow action means there is no item provided + and receiving datapath will not report in mbufs the metadata are present. + Setting MARK value to zero in flow action means the zero FDIR ID value + will be reported on packet receiving. + + For the MARK action the last 16 values in the full range are reserved for + internal PMD purposes (to emulate FLAG action). The valid range for the + MARK action values is 0-0xFFEF for the 16-bit mode and 0-xFFFFEF + for the 24-bit mode, the flows with the MARK action value outside + the specified range will be rejected. + - ``dv_flow_en`` parameter [int] A nonzero value enables the DV flow steering assuming it is supported @@ -1390,7 +1387,8 @@ Supported hardware offloads Rx timestamp 17.11 4.14 16 4.2-1 12.21.1000 ConnectX-4 TSO 17.11 4.14 16 4.2-1 12.21.1000 ConnectX-4 LRO 19.08 N/A N/A 4.6-4 16.25.6406 ConnectX-5 - Buffer Split 20.11 N/A N/A 5.1-2 22.28.2006 ConnectX-6 Dx + Tx scheduling 20.08 N/A N/A 5.1-2 22.28.2006 ConnectX-6 Dx + Buffer Split 20.11 N/A N/A 5.1-2 16.28.2006 ConnectX-5 ============== ===== ===== ========= ===== ========== ============= .. table:: Minimal SW/HW versions for rte_flow offloads diff --git a/dpdk/doc/guides/prog_guide/graph_lib.rst b/dpdk/doc/guides/prog_guide/graph_lib.rst index 5d183f86d7..fcff9c4286 100644 --- a/dpdk/doc/guides/prog_guide/graph_lib.rst +++ b/dpdk/doc/guides/prog_guide/graph_lib.rst @@ -61,7 +61,7 @@ Anatomy of Node: .. figure:: img/anatomy_of_a_node.* -The :numref:`figure_anatomy_of_a_node` diagram depicts the anatomy of a node. + Anatomy of a node The node is the basic building block of the graph framework. @@ -138,8 +138,7 @@ Link the Nodes to create the graph topology .. figure:: img/link_the_nodes.* -The :numref:`figure_link_the_nodes` diagram shows a graph topology after -linking the N nodes. + Topology after linking the nodes Once nodes are available to the program, Application or node public API functions can links them together to create a complex packet processing graph. @@ -322,8 +321,9 @@ Graph object memory layout .. figure:: img/graph_mem_layout.* -The :numref:`figure_graph_mem_layout` diagram shows ``rte_graph`` object memory -layout. Understanding the memory layout helps to debug the graph library and + Memory layout + +Understanding the memory layout helps to debug the graph library and improve the performance if needed. Graph object consists of a header, circular buffer to store the pending diff --git a/dpdk/doc/guides/prog_guide/img/anatomy_of_a_node.svg b/dpdk/doc/guides/prog_guide/img/anatomy_of_a_node.svg index fa4b5b2d5a..d3bc742e77 100644 --- a/dpdk/doc/guides/prog_guide/img/anatomy_of_a_node.svg +++ b/dpdk/doc/guides/prog_guide/img/anatomy_of_a_node.svg @@ -309,11 +309,6 @@ id="path109" inkscape:connector-curvature="0" style="fill:#000000;fill-opacity:0;fill-rule:evenodd" /> - - diff --git a/dpdk/doc/guides/prog_guide/profile_app.rst b/dpdk/doc/guides/prog_guide/profile_app.rst index 7093681983..52f85bb9e0 100644 --- a/dpdk/doc/guides/prog_guide/profile_app.rst +++ b/dpdk/doc/guides/prog_guide/profile_app.rst @@ -33,6 +33,20 @@ Refer to the for details about application profiling. +Profiling with VTune +~~~~~~~~~~~~~~~~~~~~ + +To allow VTune attaching to the DPDK application, reconfigure a DPDK build +folder by passing ``-Dc_args=-DRTE_ETHDEV_PROFILE_WITH_VTUNE`` meson option +and recompile the DPDK: + +.. code-block:: console + + meson build + meson configure build -Dc_args=-DRTE_ETHDEV_PROFILE_WITH_VTUNE + ninja -C build + + Profiling on ARM64 ------------------ diff --git a/dpdk/doc/guides/rel_notes/release_20_11.rst b/dpdk/doc/guides/rel_notes/release_20_11.rst index 7405a9864f..e6a7f121c8 100644 --- a/dpdk/doc/guides/rel_notes/release_20_11.rst +++ b/dpdk/doc/guides/rel_notes/release_20_11.rst @@ -1053,3 +1053,503 @@ Tested Platforms * Broadcom Yocto Linux * Kernel version: 4.14.174 * DPDK application running on 8 Arm Cortex-A72 cores + +20.11.1 Release Notes +--------------------- + +20.11.1 Fixes +~~~~~~~~~~~~~ + +* app/crypto-perf: fix CSV output format +* app/crypto-perf: fix latency CSV output +* app/crypto-perf: fix spelling in output +* app/crypto-perf: remove always true condition +* app/eventdev: adjust event count order for pipeline test +* app/eventdev: fix SMP barrier in performance test +* app/eventdev: remove redundant enqueue in burst Tx +* app: fix build with extra include paths +* app/flow-perf: simplify objects initialization +* app/procinfo: fix check on xstats-ids +* app/procinfo: fix _filters stats reporting +* app/procinfo: fix security context info +* app/procinfo: remove useless assignment +* app/procinfo: remove useless memset +* app/testpmd: avoid exit without terminal restore +* app/testpmd: fix help of metering commands +* app/testpmd: fix IP checksum calculation +* app/testpmd: fix key for RSS flow rule +* app/testpmd: fix max Rx packet length for VLAN packets +* app/testpmd: fix packets dump overlapping +* app/testpmd: fix queue reconfig request on Rx split update +* app/testpmd: fix queue stats mapping configuration +* app/testpmd: fix setting maximum packet length +* app/testpmd: fix start index for showing FEC array +* app/testpmd: release flows left before port stop +* app/testpmd: support shared age action query +* bitrate: fix missing header include +* build: fix linker flags on Windows +* build: fix plugin load on static build +* build: force pkg-config for dependency detection +* build: provide suitable error for "both" libraries option +* bus/pci: fix build with MinGW-w64 8 +* bus/pci: fix build with Windows SDK >= 10.0.20253 +* bus/pci: fix hardware ID limit on Windows +* bus/pci: ignore missing NUMA node on Windows +* common/mlx5: fix completion queue entry size configuration +* common/mlx5: fix pointer cast on Windows +* common/mlx5: fix storing synced MAC to internal table +* common/octeontx2: fix build with SVE +* common/sfc_efx/base: apply mask to value on match field set +* common/sfc_efx/base: check for MAE privilege +* common/sfc_efx/base: enhance field ID check in field set API +* common/sfc_efx/base: fix MAE match spec class comparison API +* common/sfc_efx/base: fix MAE match spec validation helper +* common/sfc_efx/base: fix MPORT related byte order handling +* common/sfc_efx/base: fix signed/unsigned mismatch warnings +* common/sfc_efx/base: remove warnings about inline specifiers +* common/sfc_efx/base: support alternative MAE match fields +* common/sfc_efx/base: update MCDI headers for MAE privilege +* crypto/dpaa2_sec: fix memory allocation check +* crypto/qat: fix access to uninitialized variable +* crypto/qat: fix digest in buffer +* doc: add FEC to NIC features +* doc: add vtune profiling config to prog guide +* doc: fix figure numbering in graph guide +* doc: fix mark action zero value in mlx5 guide +* doc: fix product link in hns3 guide +* doc: fix QinQ flow rules in testpmd guide +* doc: fix RSS flow description in i40e guide +* doc: fix some statements for ice vector PMD +* doc: fix supported feature table in mlx5 guide +* doc: update flow mark action in mlx5 guide +* eal/arm: fix debug build with gcc for 128-bit atomics +* eal: fix automatic loading of drivers as shared libs +* eal: fix internal ABI tag with clang +* eal: fix MCS lock header include +* eal: fix reciprocal header include +* eal/linux: fix handling of error events from epoll +* eal/windows: fix build with MinGW-w64 8 +* eal/windows: fix C++ compatibility +* eal/windows: fix debug build with MinGW +* eal/windows: fix vfprintf warning with clang +* ethdev: avoid blocking telemetry for link status +* ethdev: fix close failure handling +* ethdev: fix max Rx packet length check +* ethdev: fix missing header include +* eventdev: fix a return value comment +* event/dlb: fix accessing uninitialized variables +* examples/eventdev: add info output for main core +* examples/eventdev: check CPU core enabling +* examples/eventdev: move ethdev stop to the end +* examples/l3fwd: remove limitation on Tx queue count +* examples/pipeline: fix CLI parsing crash +* examples/pipeline: fix VXLAN script permission +* fbarray: fix overlap check +* fib: fix missing header includes +* ip_frag: remove padding length of fragment +* ipsec: fix missing header include +* lib: fix doxygen for parameters of function pointers +* license: add licenses for exception cases +* lpm: fix vector IPv4 lookup +* mbuf: add C++ include guard for dynamic fields header +* mbuf: fix missing header include +* mbuf: remove unneeded atomic generic header include +* mempool: fix panic on dump or audit +* metrics: fix variable declaration in header +* net/af_xdp: remove useless assignment +* net/avp: remove always true condition +* net/axgbe: fix jumbo frame flag condition for MTU set +* net/bnxt: disable end of packet padding for Rx +* net/bnxt: fix cleanup on mutex init failure +* net/bnxt: fix doorbell write ordering +* net/bnxt: fix error handling in device start +* net/bnxt: fix fallback mbuf allocation logic +* net/bnxt: fix format specifier for unsigned int +* net/bnxt: fix freeing mbuf +* net/bnxt: fix FW version log +* net/bnxt: fix lock init and destroy +* net/bnxt: fix max rings computation +* net/bnxt: fix memory leak when mapping fails +* net/bnxt: fix null termination of Rx mbuf chain +* net/bnxt: fix outer UDP checksum Rx offload capability +* net/bnxt: fix packet type index calculation +* net/bnxt: fix PF resource query +* net/bnxt: fix Rx completion ring size calculation +* net/bnxt: fix Rx rings in RSS redirection table +* net/bnxt: fix VNIC config on Rx queue stop +* net/bnxt: fix VNIC RSS configure function +* net/bnxt: limit Rx representor packets per poll +* net/bnxt: make offload flags mapping per-ring +* net/bnxt: propagate FW command failure to application +* net/bnxt: refactor init/uninit +* net/bnxt: release HWRM lock in error +* net/bnxt: remove redundant return +* net/bnxt: set correct checksum status in mbuf +* net/bonding: fix PCI address comparison on non-PCI ports +* net/bonding: fix port id validity check on parsing +* net/bonding: remove local variable shadowing outer one +* net/cxgbe: accept VLAN flow items without ethertype +* net/cxgbe: fix jumbo frame flag condition +* net/dpaa2: fix jumbo frame flag condition for MTU set +* net/dpaa: fix jumbo frame flag condition for MTU set +* net/e1000: fix flow control mode setting +* net/e1000: fix jumbo frame flag condition for MTU set +* net/ena: fix Tx doorbell statistics +* net/ena: fix Tx SQ free space assessment +* net/ena: flush Rx buffers memory pool cache +* net/ena: prevent double doorbell +* net/ena: validate Rx req ID upon acquiring descriptor +* net/enetc: fix jumbo frame flag condition for MTU set +* net/enic: fix filter log message +* net/enic: fix filter type used for flow API +* net: fix missing header include +* net/hinic: fix jumbo frame flag condition for MTU set +* net/hinic: restore vectorised code +* net/hns3: adjust format specifier for enum +* net/hns3: adjust some comments +* net/hns3: fix build with SVE +* net/hns3: fix crash with multi-process +* net/hns3: fix data overwriting during register dump +* net/hns3: fix dump register out of range +* net/hns3: fix error code in xstats +* net/hns3: fix FEC state query +* net/hns3: fix firmware exceptions by concurrent commands +* net/hns3: fix flow director rule residue on malloc failure +* net/hns3: fix interception with flow director +* net/hns3: fix interrupt resources in Rx interrupt mode +* net/hns3: fix jumbo frame flag condition for MTU set +* net/hns3: fix link status change from firmware +* net/hns3: fix memory leak on secondary process exit +* net/hns3: fix query order of link status and link info +* net/hns3: fix register length when dumping registers +* net/hns3: fix RSS indirection table size +* net/hns3: fix Rx/Tx errors stats +* net/hns3: fix stats flip overflow +* net/hns3: fix VF query link status in dev init +* net/hns3: fix VF reset on mailbox failure +* net/hns3: fix xstats with id and names +* net/hns3: remove MPLS from supported flow items +* net/hns3: use new opcode for clearing hardware resource +* net/hns3: validate requested maximum Rx frame length +* net/i40e: add null input checks +* net/i40e: fix flex payload rule conflict +* net/i40e: fix global register recovery +* net/i40e: fix jumbo frame flag condition +* net/i40e: fix L4 checksum flag +* net/i40e: fix returned code for RSS hardware failure +* net/i40e: fix Rx bytes statistics +* net/i40e: fix stats counters +* net/i40e: fix VLAN stripping in VF +* net/i40e: fix X722 for 802.1ad frames ability +* net/iavf: fix conflicting RSS combination rules +* net/iavf: fix GTPU UL and DL support for flow director +* net/iavf: fix jumbo frame flag condition +* net/iavf: fix memory leak in large VF +* net/iavf: fix queue pairs configuration +* net/iavf: fix symmetric flow rule creation +* net/iavf: fix vector mapping with queue +* net/ice/base: fix memory handling +* net/ice/base: fix null pointer dereference +* net/ice/base: fix tunnel destroy +* net/ice: check Rx queue number on RSS init +* net/ice: disable IPv4 checksum offload in vector Tx +* net/ice: drain out DCF AdminQ command queue +* net/ice: enlarge Rx queue rearm threshold to 64 +* net/ice: fix jumbo frame flag condition +* net/ice: fix outer checksum flags +* net/ice: fix outer UDP Tx checksum offload +* net/ice: fix RSS lookup table initialization +* net/ionic: allow separate L3 and L4 checksum offload +* net/ionic: do minor logging fixups +* net/ionic: fix address handling in Tx +* net/ionic: fix link speed and autonegotiation +* net/ionic: fix up function attribute tags +* net/ipn3ke: fix jumbo frame flag condition for MTU set +* net/ixgbe: detect failed VF MTU set +* net/ixgbe: disable NFS filtering +* net/ixgbe: fix configuration of max frame size +* net/ixgbe: fix flex bytes flow director rule +* net/ixgbe: fix jumbo frame flag condition +* net/ixgbe: fix UDP zero checksum on x86 +* net/liquidio: fix jumbo frame flag condition for MTU set +* net/mlx4: fix device detach +* net/mlx4: fix handling of probing failure +* net/mlx4: fix port attach in secondary process +* net/mlx5: check FW miniCQE format capabilities +* net/mlx5: fix buffer split offload advertising +* net/mlx5: fix comparison sign in flow engine +* net/mlx5: fix constant array size +* net/mlx5: fix count actions query in sample flow +* net/mlx5: fix counter and age flow action validation +* net/mlx5: fix crash on secondary process port close +* net/mlx5: fix device name size on Windows +* net/mlx5: fix Direct Verbs flow descriptor allocation +* net/mlx5: fix drop action in tunnel offload mode +* net/mlx5: fix flow action destroy wrapper +* net/mlx5: fix flow operation wrapper per OS +* net/mlx5: fix flow split combined with age action +* net/mlx5: fix flow split combined with counter +* net/mlx5: fix flow tag decompression +* net/mlx5: fix freeing packet pacing +* net/mlx5: fix hairpin flow split decision +* net/mlx5: fix leak on ASO SQ creation failure +* net/mlx5: fix leak on Rx queue creation failure +* net/mlx5: fix leak on Tx queue creation failure +* net/mlx5: fix mark action in active tunnel offload +* net/mlx5: fix mbuf freeing in vectorized MPRQ +* net/mlx5: fix miniCQE configuration for Verbs +* net/mlx5: fix multi-process port ID +* net/mlx5: fix port attach in secondary process +* net/mlx5: fix shared age action validation +* net/mlx5: fix shared RSS and mark actions combination +* net/mlx5: fix shared RSS capability check +* net/mlx5: fix shared RSS translation and cleanup +* net/mlx5: fix tunnel rules validation on VF representor +* net/mlx5: fix Tx queue size created with DevX +* net/mlx5: fix unnecessary checking for RSS action +* net/mlx5: fix Verbs memory allocation callback +* net/mlx5: fix VXLAN decap on non-VXLAN flow +* net/mlx5: fix wire vport hint +* net/mlx5: refuse empty VLAN in flow pattern +* net/mlx5: remove CQE padding device argument +* net/mlx5: unify operations for all OS +* net/mlx5: validate hash Rx queue pointer +* net/mvneta: check allocation in Rx queue flush +* net/mvpp2: fix frame size checking +* net/mvpp2: fix stack corruption +* net/mvpp2: remove CRC length from MRU validation +* net/mvpp2: remove debug log on fast-path +* net/mvpp2: remove VLAN flush +* net/netvsc: ignore unsupported packet on sync command +* net/nfp: fix jumbo frame flag condition for MTU set +* net/nfp: read chip model from PluDevice register +* net/octeontx2: fix corruption in segments list +* net/octeontx2: fix jumbo frame flag condition for MTU +* net/octeontx2: fix PF flow action for Tx +* net/octeontx: fix build with SVE +* net/octeontx: fix jumbo frame flag condition for MTU set +* net/octeontx: fix max Rx packet length +* net/pcap: fix byte stats for drop Tx +* net/pcap: fix infinite Rx with large files +* net/pcap: remove local variable shadowing outer one +* net/qede: fix jumbo frame flag condition for MTU set +* net/qede: fix promiscuous enable +* net/sfc: fix generic byte statistics to exclude FCS bytes +* net/sfc: fix jumbo frame flag condition for MTU set +* net/sfc: fix TSO and checksum offloads for EF10 +* net/thunderx: fix jumbo frame flag condition for MTU set +* net/virtio: add missing backend features negotiation +* net/virtio: fix getting old status on reconnect +* net/virtio: fix memory init with vDPA backend +* net/virtio-user: fix protocol features advertising +* net/virtio-user: fix run closing stdin and close callfd +* node: fix missing header include +* pipeline: fix missing header includes +* power: clean up includes +* power: create guest channel public header file +* power: export guest channel header file +* power: fix missing header includes +* power: make channel message functions public +* power: rename constants +* power: rename public structs +* regex/mlx5: fix memory rule alignment +* regex/mlx5: fix number of supported queues +* regex/mlx5: fix support for group id +* regex/octeontx2: fix PCI table overflow +* rib: fix insertion in some cases +* rib: fix missing header include +* rib: fix missing header includes +* service: propagate init error in EAL +* table: fix missing header include +* telemetry: fix missing header include +* test/distributor: fix return buffer queue overload +* test/event_crypto: set cipher operation in transform +* test: fix buffer overflow in Tx burst +* test: fix terminal settings on exit +* test/ipsec: fix result code for not supported +* test/mcslock: remove unneeded per lcore copy +* test/ring: reduce duration of performance tests +* test/rwlock: fix spelling and missing whitespace +* usertools: fix binding built-in kernel driver +* vdpa/mlx5: fix configuration mutex cleanup +* version: 20.11.1-rc1 +* vhost: fix missing header includes +* vhost: fix packed ring dequeue offloading +* vhost: fix vid allocation race + +20.11.1 Validation +~~~~~~~~~~~~~~~~~~ + +* Canonical(R) Testing + + * Build tests on Ubuntu 21.04 + * OVS-DPDK tests on x86_64 + * 1.0.0 (07:05:12): phys (BM) tests + * 1.1.0 (07:05:12): initialize environment + * 1.1.1 (07:09:32): testpmd => Pass + * 1.1.2 (07:11:12): check testpmd output => Pass + * 2.0.0 (07:11:12): prep virtual test environment + * 1.0.0 (07:14:14): virt tests + * 1.1.0 (07:14:14): initialize environment + * 3.0.0 (07:15:30): performance tests + * 3.1.0 (07:15:30): prep benchmarks + * 3.2.0 (07:15:51): performance tests + * 3.2.1 (07:16:01): test guest-openvswitch for OVS-5CPU => Pass + * 3.2.2 (07:35:44): test guest-dpdk-vhost-user-client-multiq for + * OVSDPDK-VUC => Pass + * 4.0.0 (07:57:11): VUC endurance checks + * 4.1.0 (07:57:11): prep VUC endurance tests + * 4.1.1 (08:12:38): start stop guests (client) => Pass + * 4.1.2 (09:25:59): add/remove ports (client) => Pass + * 4.2.0 (09:35:04): Final cleanup + + +* Red Hat(R) Testing + + * Platform + + * RHEL 8 + * Kernel 4.18 + * Qemu 5.2 + * X540-AT2 NIC(ixgbe, 10G) + + * Functionality + + * Guest with device assignment(PF) throughput testing(1G hugepage size) + * Guest with device assignment(PF) throughput testing(2M hugepage size) + * Guest with device assignment(VF) throughput testing + * PVP (host dpdk testpmd as vswitch) 1Q: throughput testing + * PVP vhost-user 2Q throughput testing + * PVP vhost-user 1Q * cross numa node throughput testing + * Guest with vhost-user 2 queues throughput testing + * vhost-user reconnect with dpdk-client, qemu-server: qemu reconnect + * vhost-user reconnect with dpdk-client, qemu-server: ovs reconnect + * PVP 1Q live migration testing + * PVP 1Q cross numa node live migration testing + * Guest with ovs+dpdk+vhost-user 1Q live migration testing + * Guest with ovs+dpdk+vhost-user 1Q live migration testing (2M) + * Guest with ovs+dpdk+vhost-user 2Q live migration testing + * Guest with ovs+dpdk+vhost-user 4Q live migration testing + * Host PF + DPDK testing + * Host VF + DPDK testing + + +* Broadcom(R) Testing + + * Functionality + + * Tx/Rx + * Link status + * RSS + * TSO + * VLAN filtering + * MAC filtering + * statistics + * Checksum offload + * MTU + * Promiscuous mode + + * Platform + + * BCM57414 NetXtreme-E 10Gb/25Gb Ethernet Controller, Firmware: 218.1.186.0 + * BCM57508 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb/200Gb Ethernet, Firmware : 219.0.0.74 + + +* Intel(R) Testing + + * Basic Intel(R) NIC(ixgbe, i40e and ice) testing + * PF (i40e) + * PF (ixgbe) + * PF (ice) + * VF (i40e) + * VF (ixgbe) + * VF (ice) + * Compile Testing + * Intel NIC single core/NIC performance + * Power and IPsec + + * Basic cryptodev and virtio testing + + * vhost/virtio basic loopback, PVP and performance test + * cryptodev Function/Performance + + +* Intel(R) Testing with Open vSwitch + + * OVS testing with OVS 2.15.0 + + * ICE Device + + * Jumbo frames, RSS, Kernel forwarding + + * i40e Device + + * Basic performance (RFC2544 P2P, PVP_CONT, RFC2544 PVP_TPUT, RFC2544 PVVP_TPUT, PVPV), Jumbo frames, RSS + + * Niantic Device + + * Basic performance tests (RFC2544 P2P, PVP_CONT, RFC2544 PVP_TPUT, RFC2544 PVVP_TPUT, PVPV), Jumbo frames, RSS + + * vhost + + * Port addition/deletion, Jumbo frames, RSS + + +* Nvidia(R) Testing + + * Basic functionality with testpmd + + * Tx/Rx + * xstats + * Timestamps + * Link status + * RTE flow and flow_director + * RSS + * VLAN stripping and insertion + * Checksum/TSO + * ptype + * link_status_interrupt example application + * l3fwd-power example application + * Multi-process example applications + + * Build tests + + * Ubuntu 20.04.1 with MLNX_OFED_LINUX-5.2-2.2.0.0. + * Ubuntu 20.04.1 with rdma-core master (7f2d460). + * Ubuntu 20.04.1 with rdma-core v28.0. + * Ubuntu 18.04.5 with rdma-core v17.1. + * Ubuntu 18.04.5 with rdma-core master (7f2d460) (i386). + * Ubuntu 16.04.7 with rdma-core v22.7. + * Fedora 32 with rdma-core v33.0. + * CentOS 7 7.9.2009 with rdma-core master (7f2d460). + * CentOS 7 7.9.2009 with MLNX_OFED_LINUX-5.2-2.2.0.0. + * CentOS 8 8.3.2011 with rdma-core master (7f2d460). + * openSUSE Leap 15.2 with rdma-core v27.1. + + * ConnectX-5 + + * RHEL 7.4 + * Driver MLNX_OFED_LINUX-5.2-2.2.0.0 + * Kernel: 5.12.0-rc1 / Driver: rdma-core 34.0 + * fw 14.29.2002 + + * ConnectX-4 Lx + + * RHEL 7.4 + * Driver MLNX_OFED_LINUX-5.2-2.2.0.0 + * Kernel: 5.12.0-rc1 / Driver: rdma-core 34.0 + * fw 16.29.2002 + +20.11.1 Known Issues +~~~~~~~~~~~~~~~~~~~~ + +* ICE + + * creating 512 acl rules after creating a full mask switch rule fails. + +* vhost/virtio + + * udp-fragmentation-offload cannot be setup on Ubuntu 19.10 VMs. + https://bugzilla.kernel.org/show_bug.cgi?id=207075 + * vm2vm virtio-net connectivity between two vms randomly fails due + to lost connection after vhost reconnect. diff --git a/dpdk/doc/guides/sample_app_ug/eventdev_pipeline.rst b/dpdk/doc/guides/sample_app_ug/eventdev_pipeline.rst index 4508c3dcc8..19ff53803e 100644 --- a/dpdk/doc/guides/sample_app_ug/eventdev_pipeline.rst +++ b/dpdk/doc/guides/sample_app_ug/eventdev_pipeline.rst @@ -34,6 +34,7 @@ options. An example eventdev pipeline running with the software eventdev PMD using these settings is shown below: + * ``-l 0,2,8-15``: lcore to use * ``-r1``: core mask 0x1 for RX * ``-t1``: core mask 0x1 for TX * ``-e4``: core mask 0x4 for the software scheduler @@ -46,8 +47,8 @@ these settings is shown below: .. code-block:: console - .//examples/dpdk-eventdev_pipeline --vdev event_sw0 -- -r1 -t1 \ - -e4 -w FF00 -s4 -n0 -c32 -W1000 -D + .//examples/dpdk-eventdev_pipeline -l 0,2,8-15 --vdev event_sw0 \ + -- -r1 -t1 -e4 -w FF00 -s4 -n0 -c32 -W1000 -D The application has some sanity checking built-in, so if there is a function (e.g.; the RX core) which doesn't have a cpu core mask assigned, the application diff --git a/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst index 9be450066e..6a00245fc8 100644 --- a/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst +++ b/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst @@ -4426,14 +4426,14 @@ Sample QinQ flow rules Before creating QinQ rule(s) the following commands should be issued to enable QinQ:: testpmd> port stop 0 - testpmd> vlan set qinq_strip on 0 + testpmd> vlan set extend on 0 The above command sets the inner and outer TPID's to 0x8100. To change the TPID's the following commands should be used:: - testpmd> vlan set outer tpid 0xa100 0 - testpmd> vlan set inner tpid 0x9100 0 + testpmd> vlan set outer tpid 0x88A8 0 + testpmd> vlan set inner tpid 0x8100 0 testpmd> port start 0 Validate and create a QinQ rule on port 0 to steer traffic to a VF queue in a VM. diff --git a/dpdk/drivers/bus/pci/windows/pci.c b/dpdk/drivers/bus/pci/windows/pci.c index b450346bdc..f662584528 100644 --- a/dpdk/drivers/bus/pci/windows/pci.c +++ b/dpdk/drivers/bus/pci/windows/pci.c @@ -10,8 +10,9 @@ #include "pci_netuio.h" #include +#include -#ifdef RTE_TOOLCHAIN_GCC +#if defined RTE_TOOLCHAIN_GCC && (__MINGW64_VERSION_MAJOR < 8) #include DEFINE_DEVPROPKEY(DEVPKEY_Device_Numa_Node, 0x540b947e, 0x8b40, 0x45bc, 0xa8, 0xa2, 0x6a, 0x0b, 0x89, 0x4c, 0xbd, 0xa2, 3); @@ -234,6 +235,12 @@ get_device_resource_info(HDEVINFO dev_info, &DEVPKEY_Device_Numa_Node, &property_type, (BYTE *)&numa_node, sizeof(numa_node), NULL, 0); if (!res) { + DWORD error = GetLastError(); + if (error == ERROR_NOT_FOUND) { + /* On older CPUs, NUMA is not bound to PCIe locality. */ + dev->device.numa_node = 0; + return ERROR_SUCCESS; + } RTE_LOG_WIN32_ERR("SetupDiGetDevicePropertyW" "(DEVPKEY_Device_Numa_Node)"); return -1; @@ -303,7 +310,7 @@ pci_scan_one(HDEVINFO dev_info, PSP_DEVINFO_DATA device_info_data) { struct rte_pci_device *dev; int ret = -1; - char pci_device_info[PATH_MAX]; + char pci_device_info[REGSTR_VAL_MAX_HCID_LEN]; struct rte_pci_addr addr; struct rte_pci_id pci_id; @@ -314,7 +321,7 @@ pci_scan_one(HDEVINFO dev_info, PSP_DEVINFO_DATA device_info_data) memset(dev, 0, sizeof(*dev)); ret = get_pci_hardware_id(dev_info, device_info_data, - pci_device_info, PATH_MAX); + pci_device_info, sizeof(pci_device_info)); if (ret != 0) goto end; diff --git a/dpdk/drivers/bus/pci/windows/pci_netuio.c b/dpdk/drivers/bus/pci/windows/pci_netuio.c index 6701948392..1bf9133f71 100644 --- a/dpdk/drivers/bus/pci/windows/pci_netuio.c +++ b/dpdk/drivers/bus/pci/windows/pci_netuio.c @@ -7,6 +7,12 @@ #include #include +#ifdef __MINGW32__ +#include +#else +#include +#endif + #include "private.h" #include "pci_netuio.h" diff --git a/dpdk/drivers/bus/pci/windows/pci_netuio.h b/dpdk/drivers/bus/pci/windows/pci_netuio.h index 9a77806b57..2f6c97ea73 100644 --- a/dpdk/drivers/bus/pci/windows/pci_netuio.h +++ b/dpdk/drivers/bus/pci/windows/pci_netuio.h @@ -5,6 +5,7 @@ #ifndef _PCI_NETUIO_H_ #define _PCI_NETUIO_H_ +#if !defined(NTDDI_WIN10_FE) || NTDDI_VERSION < NTDDI_WIN10_FE /* GUID definition for device class netUIO */ DEFINE_GUID(GUID_DEVCLASS_NETUIO, 0x78912bc1, 0xcb8e, 0x4b28, 0xa3, 0x29, 0xf3, 0x22, 0xeb, 0xad, 0xbe, 0x0f); @@ -12,6 +13,7 @@ DEFINE_GUID(GUID_DEVCLASS_NETUIO, 0x78912bc1, 0xcb8e, 0x4b28, /* GUID definition for the netuio device interface */ DEFINE_GUID(GUID_DEVINTERFACE_NETUIO, 0x08336f60, 0x0679, 0x4c6c, 0x85, 0xd2, 0xae, 0x7c, 0xed, 0x65, 0xff, 0xf7); +#endif /* IOCTL code definitions */ #define IOCTL_NETUIO_MAP_HW_INTO_USERSPACE \ diff --git a/dpdk/drivers/common/mlx5/linux/meson.build b/dpdk/drivers/common/mlx5/linux/meson.build index 63b78e4bce..fa9686fdaf 100644 --- a/dpdk/drivers/common/mlx5/linux/meson.build +++ b/dpdk/drivers/common/mlx5/linux/meson.build @@ -19,7 +19,8 @@ endif libnames = [ 'mlx5', 'ibverbs' ] libs = [] foreach libname:libnames - lib = dependency('lib' + libname, static:static_ibverbs, required:false) + lib = dependency('lib' + libname, static:static_ibverbs, + required:false, method: 'pkg-config') if not lib.found() and not static_ibverbs lib = cc.find_library(libname, required:false) endif diff --git a/dpdk/drivers/common/mlx5/linux/mlx5_nl.c b/dpdk/drivers/common/mlx5/linux/mlx5_nl.c index 40d8620300..ef7a521379 100644 --- a/dpdk/drivers/common/mlx5/linux/mlx5_nl.c +++ b/dpdk/drivers/common/mlx5/linux/mlx5_nl.c @@ -758,11 +758,21 @@ mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, break; if (j != n) continue; - /* Find the first entry available. */ - for (j = 0; j != n; ++j) { - if (rte_is_zero_ether_addr(&mac_addrs[j])) { - mac_addrs[j] = macs[i]; - break; + if (rte_is_multicast_ether_addr(&macs[i])) { + /* Find the first entry available. */ + for (j = MLX5_MAX_UC_MAC_ADDRESSES; j != n; ++j) { + if (rte_is_zero_ether_addr(&mac_addrs[j])) { + mac_addrs[j] = macs[i]; + break; + } + } + } else { + /* Find the first entry available. */ + for (j = 0; j != MLX5_MAX_UC_MAC_ADDRESSES; ++j) { + if (rte_is_zero_ether_addr(&mac_addrs[j])) { + mac_addrs[j] = macs[i]; + break; + } } } } diff --git a/dpdk/drivers/common/mlx5/mlx5_devx_cmds.c b/dpdk/drivers/common/mlx5/mlx5_devx_cmds.c index 9c1d1883ea..eafee65f22 100644 --- a/dpdk/drivers/common/mlx5/mlx5_devx_cmds.c +++ b/dpdk/drivers/common/mlx5/mlx5_devx_cmds.c @@ -720,6 +720,11 @@ mlx5_devx_cmd_query_hca_attr(void *ctx, attr->flow_hit_aso = !!(MLX5_GET64(cmd_hca_cap, hcattr, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_FLOW_HIT_ASO); + attr->cqe_compression = MLX5_GET(cmd_hca_cap, hcattr, cqe_compression); + attr->mini_cqe_resp_flow_tag = MLX5_GET(cmd_hca_cap, hcattr, + mini_cqe_resp_flow_tag); + attr->mini_cqe_resp_l3_l4_tag = MLX5_GET(cmd_hca_cap, hcattr, + mini_cqe_resp_l3_l4_tag); if (attr->qos.sup) { MLX5_SET(query_hca_cap_in, in, op_mod, MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP | @@ -1558,7 +1563,8 @@ mlx5_devx_cmd_create_cq(void *ctx, struct mlx5_devx_cq_attr *attr) } else { MLX5_SET64(cqc, cqctx, dbr_addr, attr->db_addr); } - MLX5_SET(cqc, cqctx, cqe_sz, attr->cqe_size); + MLX5_SET(cqc, cqctx, cqe_sz, (RTE_CACHE_LINE_SIZE == 128) ? + MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B); MLX5_SET(cqc, cqctx, cc, attr->use_first_only); MLX5_SET(cqc, cqctx, oi, attr->overrun_ignore); MLX5_SET(cqc, cqctx, log_cq_size, attr->log_cq_size); @@ -1571,7 +1577,6 @@ mlx5_devx_cmd_create_cq(void *ctx, struct mlx5_devx_cq_attr *attr) attr->mini_cqe_res_format); MLX5_SET(cqc, cqctx, mini_cqe_res_format_ext, attr->mini_cqe_res_format_ext); - MLX5_SET(cqc, cqctx, cqe_sz, attr->cqe_size); if (attr->q_umem_valid) { MLX5_SET(create_cq_in, in, cq_umem_valid, attr->q_umem_valid); MLX5_SET(create_cq_in, in, cq_umem_id, attr->q_umem_id); diff --git a/dpdk/drivers/common/mlx5/mlx5_devx_cmds.h b/dpdk/drivers/common/mlx5/mlx5_devx_cmds.h index 726e9f5192..78202eba9d 100644 --- a/dpdk/drivers/common/mlx5/mlx5_devx_cmds.h +++ b/dpdk/drivers/common/mlx5/mlx5_devx_cmds.h @@ -115,6 +115,9 @@ struct mlx5_hca_attr { uint32_t regex:1; uint32_t regexp_num_of_engines; uint32_t log_max_ft_sampler_num:8; + uint32_t cqe_compression:1; + uint32_t mini_cqe_resp_flow_tag:1; + uint32_t mini_cqe_resp_l3_l4_tag:1; struct mlx5_hca_qos_attr qos; struct mlx5_hca_vdpa_attr vdpa; }; @@ -267,7 +270,6 @@ struct mlx5_devx_cq_attr { uint32_t cqe_comp_en:1; uint32_t mini_cqe_res_format:2; uint32_t mini_cqe_res_format_ext:2; - uint32_t cqe_size:3; uint32_t log_cq_size:5; uint32_t log_page_size:5; uint32_t uar_page_id; diff --git a/dpdk/drivers/common/mlx5/mlx5_prm.h b/dpdk/drivers/common/mlx5/mlx5_prm.h index 58d180486e..00b425ac85 100644 --- a/dpdk/drivers/common/mlx5/mlx5_prm.h +++ b/dpdk/drivers/common/mlx5/mlx5_prm.h @@ -600,7 +600,7 @@ typedef uint8_t u8; #define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0) #define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld) -#define __mlx5_bit_off(typ, fld) ((unsigned int)(unsigned long) \ +#define __mlx5_bit_off(typ, fld) ((unsigned int)(uintptr_t) \ (&(__mlx5_nullp(typ)->fld))) #define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - \ (__mlx5_bit_off(typ, fld) & 0x1f)) @@ -1364,7 +1364,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 num_of_uars_per_page[0x20]; u8 flex_parser_protocols[0x20]; u8 reserved_at_560[0x20]; - u8 reserved_at_580[0x3c]; + u8 reserved_at_580[0x39]; + u8 mini_cqe_resp_l3_l4_tag[0x1]; + u8 mini_cqe_resp_flow_tag[0x1]; + u8 enhanced_cqe_compression[0x1]; u8 mini_cqe_resp_stride_index[0x1]; u8 cqe_128_always[0x1]; u8 cqe_compression_128[0x1]; diff --git a/dpdk/drivers/common/octeontx2/otx2_io_arm64.h b/dpdk/drivers/common/octeontx2/otx2_io_arm64.h index b5c85d9a6e..34268e3af3 100644 --- a/dpdk/drivers/common/octeontx2/otx2_io_arm64.h +++ b/dpdk/drivers/common/octeontx2/otx2_io_arm64.h @@ -21,6 +21,12 @@ #define otx2_prefetch_store_keep(ptr) ({\ asm volatile("prfm pstl1keep, [%x0]\n" : : "r" (ptr)); }) +#if defined(__ARM_FEATURE_SVE) +#define __LSE_PREAMBLE " .cpu generic+lse+sve\n" +#else +#define __LSE_PREAMBLE " .cpu generic+lse\n" +#endif + static __rte_always_inline uint64_t otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr) { @@ -28,7 +34,7 @@ otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr) /* Atomic add with no ordering */ asm volatile ( - ".cpu generic+lse\n" + __LSE_PREAMBLE "ldadd %x[i], %x[r], [%[b]]" : [r] "=r" (result), "+m" (*ptr) : [i] "r" (incr), [b] "r" (ptr) @@ -43,7 +49,7 @@ otx2_atomic64_add_sync(int64_t incr, int64_t *ptr) /* Atomic add with ordering */ asm volatile ( - ".cpu generic+lse\n" + __LSE_PREAMBLE "ldadda %x[i], %x[r], [%[b]]" : [r] "=r" (result), "+m" (*ptr) : [i] "r" (incr), [b] "r" (ptr) @@ -57,7 +63,7 @@ otx2_lmt_submit(rte_iova_t io_address) uint64_t result; asm volatile ( - ".cpu generic+lse\n" + __LSE_PREAMBLE "ldeor xzr,%x[rf],[%[rs]]" : [rf] "=r"(result): [rs] "r"(io_address)); return result; @@ -69,7 +75,7 @@ otx2_lmt_submit_release(rte_iova_t io_address) uint64_t result; asm volatile ( - ".cpu generic+lse\n" + __LSE_PREAMBLE "ldeorl xzr,%x[rf],[%[rs]]" : [rf] "=r"(result) : [rs] "r"(io_address)); return result; @@ -104,4 +110,5 @@ otx2_lmt_mov_seg(void *out, const void *in, const uint16_t segdw) dst128[i] = src128[i]; } +#undef __LSE_PREAMBLE #endif /* _OTX2_IO_ARM64_H_ */ diff --git a/dpdk/drivers/common/qat/meson.build b/dpdk/drivers/common/qat/meson.build index 29e1299f20..b2915c91fe 100644 --- a/dpdk/drivers/common/qat/meson.build +++ b/dpdk/drivers/common/qat/meson.build @@ -23,7 +23,7 @@ if disabled_drivers.contains(qat_compress_path) 'Explicitly disabled via build config') endif -libcrypto = dependency('libcrypto', required: false) +libcrypto = dependency('libcrypto', required: false, method: 'pkg-config') if qat_crypto and not libcrypto.found() qat_crypto = false dpdk_drvs_disabled += qat_crypto_path diff --git a/dpdk/drivers/common/sfc_efx/base/ef10_nic.c b/dpdk/drivers/common/sfc_efx/base/ef10_nic.c index 68414d9fa9..9dccde9576 100644 --- a/dpdk/drivers/common/sfc_efx/base/ef10_nic.c +++ b/dpdk/drivers/common/sfc_efx/base/ef10_nic.c @@ -1423,11 +1423,19 @@ ef10_get_datapath_caps( #if EFSYS_OPT_MAE /* - * Indicate support for MAE. - * MAE is supported by Riverhead boards starting with R2, - * and it is required that FW is built with MAE support, too. + * Check support for EF100 Match Action Engine (MAE). + * MAE hardware is present on Riverhead boards (from R2), + * and on Keystone, and requires support in firmware. + * + * MAE control operations require MAE control privilege, + * which is not available for VFs. + * + * Privileges can change dynamically at runtime: we assume + * MAE support requires the privilege is granted initially, + * and ignore later dynamic changes. */ - if (CAP_FLAGS3(req, MAE_SUPPORTED)) + if (CAP_FLAGS3(req, MAE_SUPPORTED) && + EFX_MCDI_HAVE_PRIVILEGE(encp->enc_privilege_mask, MAE)) encp->enc_mae_supported = B_TRUE; else encp->enc_mae_supported = B_FALSE; @@ -1896,6 +1904,18 @@ efx_mcdi_nic_board_cfg( EFX_MAC_ADDR_COPY(encp->enc_mac_addr, mac_addr); + /* + * Get the current privilege mask. Note that this may be modified + * dynamically, so for most cases the value is informational only. + * If the privilege being discovered can't be granted dynamically, + * it's fine to rely on the value. In all other cases, DO NOT use + * the privilege mask to check for sufficient privileges, as that + * can result in time-of-check/time-of-use bugs. + */ + if ((rc = ef10_get_privilege_mask(enp, &mask)) != 0) + goto fail6; + encp->enc_privilege_mask = mask; + /* Board configuration (legacy) */ rc = efx_mcdi_get_board_cfg(enp, &board_type, NULL, NULL); if (rc != 0) { @@ -1903,14 +1923,14 @@ efx_mcdi_nic_board_cfg( if (rc == EACCES) board_type = 0; else - goto fail6; + goto fail7; } encp->enc_board_type = board_type; /* Fill out fields in enp->en_port and enp->en_nic_cfg from MCDI */ if ((rc = efx_mcdi_get_phy_cfg(enp)) != 0) - goto fail7; + goto fail8; /* * Firmware with support for *_FEC capability bits does not @@ -1929,18 +1949,18 @@ efx_mcdi_nic_board_cfg( /* Obtain the default PHY advertised capabilities */ if ((rc = ef10_phy_get_link(enp, &els)) != 0) - goto fail8; + goto fail9; epp->ep_default_adv_cap_mask = els.epls.epls_adv_cap_mask; epp->ep_adv_cap_mask = els.epls.epls_adv_cap_mask; /* Check capabilities of running datapath firmware */ if ((rc = ef10_get_datapath_caps(enp)) != 0) - goto fail9; + goto fail10; /* Get interrupt vector limits */ if ((rc = efx_mcdi_get_vector_cfg(enp, &base, &nvec, NULL)) != 0) { if (EFX_PCI_FUNCTION_IS_PF(encp)) - goto fail10; + goto fail11; /* Ignore error (cannot query vector limits from a VF). */ base = 0; @@ -1949,16 +1969,6 @@ efx_mcdi_nic_board_cfg( encp->enc_intr_vec_base = base; encp->enc_intr_limit = nvec; - /* - * Get the current privilege mask. Note that this may be modified - * dynamically, so this value is informational only. DO NOT use - * the privilege mask to check for sufficient privileges, as that - * can result in time-of-check/time-of-use bugs. - */ - if ((rc = ef10_get_privilege_mask(enp, &mask)) != 0) - goto fail11; - encp->enc_privilege_mask = mask; - return (0); fail11: diff --git a/dpdk/drivers/common/sfc_efx/base/efx.h b/dpdk/drivers/common/sfc_efx/base/efx.h index 3b40e28b4e..ccf9c7ab8a 100644 --- a/dpdk/drivers/common/sfc_efx/base/efx.h +++ b/dpdk/drivers/common/sfc_efx/base/efx.h @@ -4283,6 +4283,11 @@ efx_mae_action_set_specs_equal( * Conduct a comparison to check whether two match specifications * of equal rule type (action / outer) and priority would map to * the very same rule class from the firmware's standpoint. + * + * For match specification fields that are not supported by firmware, + * the rule class only matches if the mask/value pairs for that field + * are equal. Clients should use efx_mae_match_spec_is_valid() before + * calling this API to detect usage of unsupported fields. */ LIBEFX_API extern __checkReturn efx_rc_t diff --git a/dpdk/drivers/common/sfc_efx/base/efx_mae.c b/dpdk/drivers/common/sfc_efx/base/efx_mae.c index ee0a3d3196..338a0013f9 100644 --- a/dpdk/drivers/common/sfc_efx/base/efx_mae.c +++ b/dpdk/drivers/common/sfc_efx/base/efx_mae.c @@ -463,6 +463,10 @@ typedef enum efx_mae_field_endianness_e { * The information in it is meant to be used internally by * APIs for addressing a given field in a mask-value pairs * structure and for validation purposes. + * + * A field may have an alternative one. This structure + * has additional members to reference the alternative + * field's mask. See efx_mae_match_spec_is_valid(). */ typedef struct efx_mae_mv_desc_s { efx_mae_field_cap_id_t emmd_field_cap_id; @@ -472,6 +476,14 @@ typedef struct efx_mae_mv_desc_s { size_t emmd_mask_size; size_t emmd_mask_offset; + /* + * Having the alternative field's mask size set to 0 + * means that there's no alternative field specified. + */ + size_t emmd_alt_mask_size; + size_t emmd_alt_mask_offset; + + /* Primary field and the alternative one are of the same endianness. */ efx_mae_field_endianness_t emmd_endianness; } efx_mae_mv_desc_t; @@ -485,6 +497,7 @@ static const efx_mae_mv_desc_t __efx_mae_action_rule_mv_desc_set[] = { MAE_FIELD_MASK_VALUE_PAIRS_##_name##_OFST, \ MAE_FIELD_MASK_VALUE_PAIRS_##_name##_MASK_LEN, \ MAE_FIELD_MASK_VALUE_PAIRS_##_name##_MASK_OFST, \ + 0, 0 /* no alternative field */, \ _endianness \ } @@ -522,6 +535,21 @@ static const efx_mae_mv_desc_t __efx_mae_outer_rule_mv_desc_set[] = { MAE_ENC_FIELD_PAIRS_##_name##_OFST, \ MAE_ENC_FIELD_PAIRS_##_name##_MASK_LEN, \ MAE_ENC_FIELD_PAIRS_##_name##_MASK_OFST, \ + 0, 0 /* no alternative field */, \ + _endianness \ + } + +/* Same as EFX_MAE_MV_DESC(), but also indicates an alternative field. */ +#define EFX_MAE_MV_DESC_ALT(_name, _alt_name, _endianness) \ + [EFX_MAE_FIELD_##_name] = \ + { \ + EFX_MAE_FIELD_ID_##_name, \ + MAE_ENC_FIELD_PAIRS_##_name##_LEN, \ + MAE_ENC_FIELD_PAIRS_##_name##_OFST, \ + MAE_ENC_FIELD_PAIRS_##_name##_MASK_LEN, \ + MAE_ENC_FIELD_PAIRS_##_name##_MASK_OFST, \ + MAE_ENC_FIELD_PAIRS_##_alt_name##_MASK_LEN, \ + MAE_ENC_FIELD_PAIRS_##_alt_name##_MASK_OFST, \ _endianness \ } @@ -533,16 +561,17 @@ static const efx_mae_mv_desc_t __efx_mae_outer_rule_mv_desc_set[] = { EFX_MAE_MV_DESC(ENC_VLAN0_PROTO_BE, EFX_MAE_FIELD_BE), EFX_MAE_MV_DESC(ENC_VLAN1_TCI_BE, EFX_MAE_FIELD_BE), EFX_MAE_MV_DESC(ENC_VLAN1_PROTO_BE, EFX_MAE_FIELD_BE), - EFX_MAE_MV_DESC(ENC_SRC_IP4_BE, EFX_MAE_FIELD_BE), - EFX_MAE_MV_DESC(ENC_DST_IP4_BE, EFX_MAE_FIELD_BE), + EFX_MAE_MV_DESC_ALT(ENC_SRC_IP4_BE, ENC_SRC_IP6_BE, EFX_MAE_FIELD_BE), + EFX_MAE_MV_DESC_ALT(ENC_DST_IP4_BE, ENC_DST_IP6_BE, EFX_MAE_FIELD_BE), EFX_MAE_MV_DESC(ENC_IP_PROTO, EFX_MAE_FIELD_BE), EFX_MAE_MV_DESC(ENC_IP_TOS, EFX_MAE_FIELD_BE), EFX_MAE_MV_DESC(ENC_IP_TTL, EFX_MAE_FIELD_BE), - EFX_MAE_MV_DESC(ENC_SRC_IP6_BE, EFX_MAE_FIELD_BE), - EFX_MAE_MV_DESC(ENC_DST_IP6_BE, EFX_MAE_FIELD_BE), + EFX_MAE_MV_DESC_ALT(ENC_SRC_IP6_BE, ENC_SRC_IP4_BE, EFX_MAE_FIELD_BE), + EFX_MAE_MV_DESC_ALT(ENC_DST_IP6_BE, ENC_DST_IP4_BE, EFX_MAE_FIELD_BE), EFX_MAE_MV_DESC(ENC_L4_SPORT_BE, EFX_MAE_FIELD_BE), EFX_MAE_MV_DESC(ENC_L4_DPORT_BE, EFX_MAE_FIELD_BE), +#undef EFX_MAE_MV_DESC_ALT #undef EFX_MAE_MV_DESC }; @@ -564,7 +593,13 @@ efx_mae_mport_by_phy_port( MAE_MPORT_SELECTOR_PPORT_ID, phy_port); memset(mportp, 0, sizeof (*mportp)); - mportp->sel = dword.ed_u32[0]; + /* + * The constructed DWORD is little-endian, + * but the resulting value is meant to be + * passed to MCDIs, where it will undergo + * host-order to little endian conversion. + */ + mportp->sel = EFX_DWORD_FIELD(dword, EFX_DWORD_0); return (0); @@ -601,7 +636,13 @@ efx_mae_mport_by_pcie_function( MAE_MPORT_SELECTOR_FUNC_VF_ID, vf); memset(mportp, 0, sizeof (*mportp)); - mportp->sel = dword.ed_u32[0]; + /* + * The constructed DWORD is little-endian, + * but the resulting value is meant to be + * passed to MCDIs, where it will undergo + * host-order to little endian conversion. + */ + mportp->sel = EFX_DWORD_FIELD(dword, EFX_DWORD_0); return (0); @@ -644,28 +685,54 @@ efx_mae_match_spec_field_set( goto fail1; } - if (field_id >= desc_set_nentries) { + if ((unsigned int)field_id >= desc_set_nentries) { rc = EINVAL; goto fail2; } - if (value_size != descp->emmd_value_size) { + if (descp->emmd_mask_size == 0) { + /* The ID points to a gap in the array of field descriptors. */ rc = EINVAL; goto fail3; } - if (mask_size != descp->emmd_mask_size) { + if (value_size != descp->emmd_value_size) { rc = EINVAL; goto fail4; } + if (mask_size != descp->emmd_mask_size) { + rc = EINVAL; + goto fail5; + } + if (descp->emmd_endianness == EFX_MAE_FIELD_BE) { + unsigned int i; + /* * The mask/value are in network (big endian) order. * The MCDI request field is also big endian. */ - memcpy(mvp + descp->emmd_value_offset, value, value_size); - memcpy(mvp + descp->emmd_mask_offset, mask, mask_size); + + EFSYS_ASSERT3U(value_size, ==, mask_size); + + for (i = 0; i < value_size; ++i) { + uint8_t *v_bytep = mvp + descp->emmd_value_offset + i; + uint8_t *m_bytep = mvp + descp->emmd_mask_offset + i; + + /* + * Apply the mask (which may be all-zeros) to the value. + * + * If this API is provided with some value to set for a + * given field in one specification and with some other + * value to set for this field in another specification, + * then, if the two masks are all-zeros, the field will + * avoid being counted as a mismatch when comparing the + * specifications using efx_mae_match_specs_equal() API. + */ + *v_bytep = value[i] & mask[i]; + *m_bytep = mask[i]; + } } else { efx_dword_t dword; @@ -700,6 +767,8 @@ efx_mae_match_spec_field_set( return (0); +fail5: + EFSYS_PROBE(fail5); fail4: EFSYS_PROBE(fail4); fail3: @@ -760,7 +829,7 @@ efx_mae_match_specs_equal( ((_mask)[(_bit) / (_mask_page_nbits)] & \ (1ULL << ((_bit) & ((_mask_page_nbits) - 1)))) -static inline boolean_t +static boolean_t efx_mask_is_prefix( __in size_t mask_nbytes, __in_bcount(mask_nbytes) const uint8_t *maskp) @@ -780,7 +849,7 @@ efx_mask_is_prefix( return B_TRUE; } -static inline boolean_t +static boolean_t efx_mask_is_all_ones( __in size_t mask_nbytes, __in_bcount(mask_nbytes) const uint8_t *maskp) @@ -794,7 +863,7 @@ efx_mask_is_all_ones( return (t == (uint8_t)(~0)); } -static inline boolean_t +static boolean_t efx_mask_is_all_zeros( __in size_t mask_nbytes, __in_bcount(mask_nbytes) const uint8_t *maskp) @@ -844,17 +913,29 @@ efx_mae_match_spec_is_valid( if (field_caps == NULL) return (B_FALSE); - for (field_id = 0; field_id < desc_set_nentries; ++field_id) { + for (field_id = 0; (unsigned int)field_id < desc_set_nentries; + ++field_id) { const efx_mae_mv_desc_t *descp = &desc_setp[field_id]; efx_mae_field_cap_id_t field_cap_id = descp->emmd_field_cap_id; + const uint8_t *alt_m_buf = mvp + descp->emmd_alt_mask_offset; const uint8_t *m_buf = mvp + descp->emmd_mask_offset; + size_t alt_m_size = descp->emmd_alt_mask_size; size_t m_size = descp->emmd_mask_size; if (m_size == 0) continue; /* Skip array gap */ - if (field_cap_id >= field_ncaps) - break; + if ((unsigned int)field_cap_id >= field_ncaps) { + /* + * The FW has not reported capability status for + * this field. Make sure that its mask is zeroed. + */ + is_valid = efx_mask_is_all_zeros(m_size, m_buf); + if (is_valid != B_FALSE) + continue; + else + break; + } switch (field_caps[field_cap_id].emfc_support) { case MAE_FIELD_SUPPORTED_MATCH_MASK: @@ -869,6 +950,19 @@ efx_mae_match_spec_is_valid( break; case MAE_FIELD_SUPPORTED_MATCH_ALWAYS: is_valid = efx_mask_is_all_ones(m_size, m_buf); + + if ((is_valid == B_FALSE) && (alt_m_size != 0)) { + /* + * This field has an alternative one. The FW + * reports ALWAYS for both implying that one + * of them is required to have all-ones mask. + * + * The primary field's mask is incorrect; go + * on to check that of the alternative field. + */ + is_valid = efx_mask_is_all_ones(alt_m_size, + alt_m_buf); + } break; case MAE_FIELD_SUPPORTED_MATCH_NEVER: case MAE_FIELD_UNSUPPORTED: @@ -1274,7 +1368,13 @@ efx_mae_action_set_populate_drop( EFX_POPULATE_DWORD_1(dword, MAE_MPORT_SELECTOR_FLAT, MAE_MPORT_SELECTOR_NULL); - mport.sel = dword.ed_u32[0]; + /* + * The constructed DWORD is little-endian, + * but the resulting value is meant to be + * passed to MCDIs, where it will undergo + * host-order to little endian conversion. + */ + mport.sel = EFX_DWORD_FIELD(dword, EFX_DWORD_0); arg = (const uint8_t *)&mport.sel; @@ -1350,21 +1450,36 @@ efx_mae_match_specs_class_cmp( return (0); } - for (field_id = 0; field_id < desc_set_nentries; ++field_id) { + for (field_id = 0; (unsigned int)field_id < desc_set_nentries; + ++field_id) { const efx_mae_mv_desc_t *descp = &desc_setp[field_id]; efx_mae_field_cap_id_t field_cap_id = descp->emmd_field_cap_id; - - if (descp->emmd_mask_size == 0) + const uint8_t *lmaskp = mvpl + descp->emmd_mask_offset; + const uint8_t *rmaskp = mvpr + descp->emmd_mask_offset; + size_t mask_size = descp->emmd_mask_size; + const uint8_t *lvalp = mvpl + descp->emmd_value_offset; + const uint8_t *rvalp = mvpr + descp->emmd_value_offset; + size_t value_size = descp->emmd_value_size; + + if (mask_size == 0) continue; /* Skip array gap */ - if (field_cap_id >= field_ncaps) - break; + if ((unsigned int)field_cap_id >= field_ncaps) { + /* + * The FW has not reported capability status for this + * field. It's unknown whether any difference between + * the two masks / values affects the class. The only + * case when the class must be the same is when these + * mask-value pairs match. Otherwise, report mismatch. + */ + if ((memcmp(lmaskp, rmaskp, mask_size) == 0) && + (memcmp(lvalp, rvalp, value_size) == 0)) + continue; + else + break; + } if (field_caps[field_cap_id].emfc_mask_affects_class) { - const uint8_t *lmaskp = mvpl + descp->emmd_mask_offset; - const uint8_t *rmaskp = mvpr + descp->emmd_mask_offset; - size_t mask_size = descp->emmd_mask_size; - if (memcmp(lmaskp, rmaskp, mask_size) != 0) { have_same_class = B_FALSE; break; @@ -1372,10 +1487,6 @@ efx_mae_match_specs_class_cmp( } if (field_caps[field_cap_id].emfc_match_affects_class) { - const uint8_t *lvalp = mvpl + descp->emmd_value_offset; - const uint8_t *rvalp = mvpr + descp->emmd_value_offset; - size_t value_size = descp->emmd_value_size; - if (memcmp(lvalp, rvalp, value_size) != 0) { have_same_class = B_FALSE; break; diff --git a/dpdk/drivers/common/sfc_efx/base/efx_regs_mcdi.h b/dpdk/drivers/common/sfc_efx/base/efx_regs_mcdi.h index 0388acf723..689a491d05 100644 --- a/dpdk/drivers/common/sfc_efx/base/efx_regs_mcdi.h +++ b/dpdk/drivers/common/sfc_efx/base/efx_regs_mcdi.h @@ -20349,6 +20349,8 @@ * SF-117064-DG for background). */ #define MC_CMD_PRIVILEGE_MASK_IN_GRP_ADMIN_TSA_UNBOUND 0x8000 +/* enum: Control the Match-Action Engine if present. See mcdi_mae.yml. */ +#define MC_CMD_PRIVILEGE_MASK_IN_GRP_MAE 0x10000 /* enum: Set this bit to indicate that a new privilege mask is to be set, * otherwise the command will only read the existing mask. */ @@ -26823,7 +26825,7 @@ #define MC_CMD_MAE_GET_AR_CAPS 0x141 #undef MC_CMD_0x141_PRIVILEGE_CTG -#define MC_CMD_0x141_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x141_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_GET_AR_CAPS_IN msgrequest */ #define MC_CMD_MAE_GET_AR_CAPS_IN_LEN 0 @@ -26855,7 +26857,7 @@ #define MC_CMD_MAE_GET_OR_CAPS 0x142 #undef MC_CMD_0x142_PRIVILEGE_CTG -#define MC_CMD_0x142_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x142_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_GET_OR_CAPS_IN msgrequest */ #define MC_CMD_MAE_GET_OR_CAPS_IN_LEN 0 @@ -26885,7 +26887,7 @@ #define MC_CMD_MAE_COUNTER_ALLOC 0x143 #undef MC_CMD_0x143_PRIVILEGE_CTG -#define MC_CMD_0x143_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x143_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_COUNTER_ALLOC_IN msgrequest */ #define MC_CMD_MAE_COUNTER_ALLOC_IN_LEN 4 @@ -26928,7 +26930,7 @@ #define MC_CMD_MAE_COUNTER_FREE 0x144 #undef MC_CMD_0x144_PRIVILEGE_CTG -#define MC_CMD_0x144_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x144_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_COUNTER_FREE_IN msgrequest */ #define MC_CMD_MAE_COUNTER_FREE_IN_LENMIN 8 @@ -26993,6 +26995,9 @@ * delivering packets to the current queue first. */ #define MC_CMD_MAE_COUNTERS_STREAM_START 0x151 +#undef MC_CMD_0x151_PRIVILEGE_CTG + +#define MC_CMD_0x151_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_COUNTERS_STREAM_START_IN msgrequest */ #define MC_CMD_MAE_COUNTERS_STREAM_START_IN_LEN 8 @@ -27026,6 +27031,9 @@ * Stop streaming counter values to the specified RxQ. */ #define MC_CMD_MAE_COUNTERS_STREAM_STOP 0x152 +#undef MC_CMD_0x152_PRIVILEGE_CTG + +#define MC_CMD_0x152_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_COUNTERS_STREAM_STOP_IN msgrequest */ #define MC_CMD_MAE_COUNTERS_STREAM_STOP_IN_LEN 2 @@ -27052,6 +27060,9 @@ * MAE_COUNTERS_PACKETISER_STREAM_START/PACKET_SIZE and rung the doorbell. */ #define MC_CMD_MAE_COUNTERS_STREAM_GIVE_CREDITS 0x153 +#undef MC_CMD_0x153_PRIVILEGE_CTG + +#define MC_CMD_0x153_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_COUNTERS_STREAM_GIVE_CREDITS_IN msgrequest */ #define MC_CMD_MAE_COUNTERS_STREAM_GIVE_CREDITS_IN_LEN 4 @@ -27070,7 +27081,7 @@ #define MC_CMD_MAE_ENCAP_HEADER_ALLOC 0x148 #undef MC_CMD_0x148_PRIVILEGE_CTG -#define MC_CMD_0x148_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x148_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_ENCAP_HEADER_ALLOC_IN msgrequest */ #define MC_CMD_MAE_ENCAP_HEADER_ALLOC_IN_LENMIN 4 @@ -27103,7 +27114,7 @@ #define MC_CMD_MAE_ENCAP_HEADER_UPDATE 0x149 #undef MC_CMD_0x149_PRIVILEGE_CTG -#define MC_CMD_0x149_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x149_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_ENCAP_HEADER_UPDATE_IN msgrequest */ #define MC_CMD_MAE_ENCAP_HEADER_UPDATE_IN_LENMIN 8 @@ -27132,7 +27143,7 @@ #define MC_CMD_MAE_ENCAP_HEADER_FREE 0x14a #undef MC_CMD_0x14a_PRIVILEGE_CTG -#define MC_CMD_0x14a_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x14a_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_ENCAP_HEADER_FREE_IN msgrequest */ #define MC_CMD_MAE_ENCAP_HEADER_FREE_IN_LENMIN 4 @@ -27170,7 +27181,7 @@ #define MC_CMD_MAE_MAC_ADDR_ALLOC 0x15e #undef MC_CMD_0x15e_PRIVILEGE_CTG -#define MC_CMD_0x15e_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x15e_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_MAC_ADDR_ALLOC_IN msgrequest */ #define MC_CMD_MAE_MAC_ADDR_ALLOC_IN_LEN 6 @@ -27195,7 +27206,7 @@ #define MC_CMD_MAE_MAC_ADDR_FREE 0x15f #undef MC_CMD_0x15f_PRIVILEGE_CTG -#define MC_CMD_0x15f_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x15f_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_MAC_ADDR_FREE_IN msgrequest */ #define MC_CMD_MAE_MAC_ADDR_FREE_IN_LENMIN 4 @@ -27232,7 +27243,7 @@ #define MC_CMD_MAE_ACTION_SET_ALLOC 0x14d #undef MC_CMD_0x14d_PRIVILEGE_CTG -#define MC_CMD_0x14d_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x14d_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_ACTION_SET_ALLOC_IN msgrequest */ #define MC_CMD_MAE_ACTION_SET_ALLOC_IN_LEN 44 @@ -27317,7 +27328,7 @@ #define MC_CMD_MAE_ACTION_SET_FREE 0x14e #undef MC_CMD_0x14e_PRIVILEGE_CTG -#define MC_CMD_0x14e_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x14e_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_ACTION_SET_FREE_IN msgrequest */ #define MC_CMD_MAE_ACTION_SET_FREE_IN_LENMIN 4 @@ -27355,7 +27366,7 @@ #define MC_CMD_MAE_ACTION_SET_LIST_ALLOC 0x14f #undef MC_CMD_0x14f_PRIVILEGE_CTG -#define MC_CMD_0x14f_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x14f_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_ACTION_SET_LIST_ALLOC_IN msgrequest */ #define MC_CMD_MAE_ACTION_SET_LIST_ALLOC_IN_LENMIN 8 @@ -27398,7 +27409,7 @@ #define MC_CMD_MAE_ACTION_SET_LIST_FREE 0x150 #undef MC_CMD_0x150_PRIVILEGE_CTG -#define MC_CMD_0x150_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x150_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_ACTION_SET_LIST_FREE_IN msgrequest */ #define MC_CMD_MAE_ACTION_SET_LIST_FREE_IN_LENMIN 4 @@ -27435,7 +27446,7 @@ #define MC_CMD_MAE_OUTER_RULE_INSERT 0x15a #undef MC_CMD_0x15a_PRIVILEGE_CTG -#define MC_CMD_0x15a_PRIVILEGE_CTG SRIOV_CTG_ADMIN +#define MC_CMD_0x15a_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_OUTER_RULE_INSERT_IN msgrequest */ #define MC_CMD_MAE_OUTER_RULE_INSERT_IN_LENMIN 16 @@ -27495,7 +27506,7 @@ #define MC_CMD_MAE_OUTER_RULE_REMOVE 0x15b #undef MC_CMD_0x15b_PRIVILEGE_CTG -#define MC_CMD_0x15b_PRIVILEGE_CTG SRIOV_CTG_ADMIN +#define MC_CMD_0x15b_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_OUTER_RULE_REMOVE_IN msgrequest */ #define MC_CMD_MAE_OUTER_RULE_REMOVE_IN_LENMIN 4 @@ -27577,7 +27588,7 @@ #define MC_CMD_MAE_ACTION_RULE_INSERT 0x15c #undef MC_CMD_0x15c_PRIVILEGE_CTG -#define MC_CMD_0x15c_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x15c_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_ACTION_RULE_INSERT_IN msgrequest */ #define MC_CMD_MAE_ACTION_RULE_INSERT_IN_LENMIN 28 @@ -27618,7 +27629,7 @@ #define MC_CMD_MAE_ACTION_RULE_UPDATE 0x15d #undef MC_CMD_0x15d_PRIVILEGE_CTG -#define MC_CMD_0x15d_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x15d_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_ACTION_RULE_UPDATE_IN msgrequest */ #define MC_CMD_MAE_ACTION_RULE_UPDATE_IN_LEN 24 @@ -27639,7 +27650,7 @@ #define MC_CMD_MAE_ACTION_RULE_DELETE 0x155 #undef MC_CMD_0x155_PRIVILEGE_CTG -#define MC_CMD_0x155_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x155_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_ACTION_RULE_DELETE_IN msgrequest */ #define MC_CMD_MAE_ACTION_RULE_DELETE_IN_LENMIN 4 @@ -27696,7 +27707,7 @@ #define MC_CMD_MAE_MPORT_ALLOC 0x163 #undef MC_CMD_0x163_PRIVILEGE_CTG -#define MC_CMD_0x163_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x163_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_MPORT_ALLOC_IN msgrequest */ #define MC_CMD_MAE_MPORT_ALLOC_IN_LEN 20 @@ -27803,7 +27814,7 @@ #define MC_CMD_MAE_MPORT_FREE 0x164 #undef MC_CMD_0x164_PRIVILEGE_CTG -#define MC_CMD_0x164_PRIVILEGE_CTG SRIOV_CTG_GENERAL +#define MC_CMD_0x164_PRIVILEGE_CTG SRIOV_CTG_MAE /* MC_CMD_MAE_MPORT_FREE_IN msgrequest */ #define MC_CMD_MAE_MPORT_FREE_IN_LEN 4 @@ -27907,6 +27918,9 @@ /* MC_CMD_MAE_MPORT_ENUMERATE */ #define MC_CMD_MAE_MPORT_ENUMERATE 0x17c +#undef MC_CMD_0x17c_PRIVILEGE_CTG + +#define MC_CMD_0x17c_PRIVILEGE_CTG SRIOV_CTG_GENERAL /* MC_CMD_MAE_MPORT_ENUMERATE_IN msgrequest */ #define MC_CMD_MAE_MPORT_ENUMERATE_IN_LEN 0 diff --git a/dpdk/drivers/compress/isal/meson.build b/dpdk/drivers/compress/isal/meson.build index 5ee17e28f5..d847c2ea6f 100644 --- a/dpdk/drivers/compress/isal/meson.build +++ b/dpdk/drivers/compress/isal/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright 2018 Intel Corporation -dep = dependency('libisal', required: false) +dep = dependency('libisal', required: false, method: 'pkg-config') if not dep.found() build = false reason = 'missing dependency, "libisal"' diff --git a/dpdk/drivers/compress/zlib/meson.build b/dpdk/drivers/compress/zlib/meson.build index b19a6d2b16..82cf0dddd6 100644 --- a/dpdk/drivers/compress/zlib/meson.build +++ b/dpdk/drivers/compress/zlib/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2018 Cavium Networks -dep = dependency('zlib', required: false) +dep = dependency('zlib', required: false, method: 'pkg-config') if not dep.found() build = false reason = 'missing dependency, "zlib"' diff --git a/dpdk/drivers/crypto/armv8/meson.build b/dpdk/drivers/crypto/armv8/meson.build index 3289a2adca..027173bc1e 100644 --- a/dpdk/drivers/crypto/armv8/meson.build +++ b/dpdk/drivers/crypto/armv8/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2019 Arm Limited -dep = dependency('libAArch64crypto', required: false) +dep = dependency('libAArch64crypto', required: false, method: 'pkg-config') if not dep.found() build = false reason = 'missing dependency, "libAArch64crypto"' diff --git a/dpdk/drivers/crypto/ccp/meson.build b/dpdk/drivers/crypto/ccp/meson.build index a0e0b379eb..ff66427ae8 100644 --- a/dpdk/drivers/crypto/ccp/meson.build +++ b/dpdk/drivers/crypto/ccp/meson.build @@ -5,7 +5,7 @@ if not is_linux build = false reason = 'only supported on Linux' endif -dep = dependency('libcrypto', required: false) +dep = dependency('libcrypto', required: false, method: 'pkg-config') if not dep.found() build = false reason = 'missing dependency, "libcrypto"' diff --git a/dpdk/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c b/dpdk/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c index 6ff0d833e9..5d91bf910e 100644 --- a/dpdk/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c +++ b/dpdk/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c @@ -1842,7 +1842,7 @@ dpaa2_sec_cipher_init(struct rte_cryptodev *dev, session->ctxt_type = DPAA2_SEC_CIPHER; session->cipher_key.data = rte_zmalloc(NULL, xform->cipher.key.length, RTE_CACHE_LINE_SIZE); - if (session->cipher_key.data == NULL) { + if (session->cipher_key.data == NULL && xform->cipher.key.length > 0) { DPAA2_SEC_ERR("No Memory for cipher key"); rte_free(priv); return -ENOMEM; diff --git a/dpdk/drivers/crypto/openssl/meson.build b/dpdk/drivers/crypto/openssl/meson.build index d9ac698971..47fb2bb751 100644 --- a/dpdk/drivers/crypto/openssl/meson.build +++ b/dpdk/drivers/crypto/openssl/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation -dep = dependency('libcrypto', required: false) +dep = dependency('libcrypto', required: false, method: 'pkg-config') if not dep.found() build = false reason = 'missing dependency, "libcrypto"' diff --git a/dpdk/drivers/crypto/qat/meson.build b/dpdk/drivers/crypto/qat/meson.build index bc90ec44cc..92e0ed6565 100644 --- a/dpdk/drivers/crypto/qat/meson.build +++ b/dpdk/drivers/crypto/qat/meson.build @@ -5,7 +5,7 @@ # driver which comes later. Here we just add our sources files to the list build = false reason = '' # sentinal value to suppress printout -dep = dependency('libcrypto', required: false) +dep = dependency('libcrypto', required: false, method: 'pkg-config') qat_includes += include_directories('.') qat_deps += 'cryptodev' qat_deps += 'net' diff --git a/dpdk/drivers/crypto/qat/qat_asym_pmd.c b/dpdk/drivers/crypto/qat/qat_asym_pmd.c index ed8a2a50b4..a2c8aca2c1 100644 --- a/dpdk/drivers/crypto/qat/qat_asym_pmd.c +++ b/dpdk/drivers/crypto/qat/qat_asym_pmd.c @@ -251,6 +251,10 @@ qat_asym_dev_create(struct qat_pci_device *qat_pci_dev, struct rte_cryptodev *cryptodev; struct qat_asym_dev_private *internals; + snprintf(name, RTE_CRYPTODEV_NAME_MAX_LEN, "%s_%s", + qat_pci_dev->name, "asym"); + QAT_LOG(DEBUG, "Creating QAT ASYM device %s\n", name); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { qat_pci_dev->qat_asym_driver_id = qat_asym_driver_id; @@ -264,10 +268,6 @@ qat_asym_dev_create(struct qat_pci_device *qat_pci_dev, } } - snprintf(name, RTE_CRYPTODEV_NAME_MAX_LEN, "%s_%s", - qat_pci_dev->name, "asym"); - QAT_LOG(DEBUG, "Creating QAT ASYM device %s\n", name); - /* Populate subset device to use in cryptodev device creation */ qat_dev_instance->asym_rte_dev.driver = &cryptodev_qat_asym_driver; qat_dev_instance->asym_rte_dev.numa_node = diff --git a/dpdk/drivers/crypto/qat/qat_sym_hw_dp.c b/dpdk/drivers/crypto/qat/qat_sym_hw_dp.c index dfbbad59b6..01afb883e3 100644 --- a/dpdk/drivers/crypto/qat/qat_sym_hw_dp.c +++ b/dpdk/drivers/crypto/qat/qat_sym_hw_dp.c @@ -558,55 +558,6 @@ enqueue_one_chain_job(struct qat_sym_session *ctx, case ICP_QAT_HW_AUTH_ALGO_KASUMI_F9: case ICP_QAT_HW_AUTH_ALGO_ZUC_3G_128_EIA3: auth_param->u1.aad_adr = auth_iv->iova; - - if (unlikely(n_data_vecs > 1)) { - int auth_end_get = 0, i = n_data_vecs - 1; - struct rte_crypto_vec *cvec = &data[0]; - uint32_t len; - - len = data_len - ofs.ofs.auth.tail; - - while (i >= 0 && len > 0) { - if (cvec->len >= len) { - auth_iova_end = cvec->iova + - (cvec->len - len); - len = 0; - auth_end_get = 1; - break; - } - len -= cvec->len; - i--; - cvec++; - } - - if (unlikely(auth_end_get == 0)) - return -1; - } else - auth_iova_end = data[0].iova + auth_param->auth_off + - auth_param->auth_len; - - /* Then check if digest-encrypted conditions are met */ - if ((auth_param->auth_off + auth_param->auth_len < - cipher_param->cipher_offset + - cipher_param->cipher_length) && - (digest->iova == auth_iova_end)) { - /* Handle partial digest encryption */ - if (cipher_param->cipher_offset + - cipher_param->cipher_length < - auth_param->auth_off + - auth_param->auth_len + - ctx->digest_length) - req->comn_mid.dst_length = - req->comn_mid.src_length = - auth_param->auth_off + - auth_param->auth_len + - ctx->digest_length; - struct icp_qat_fw_comn_req_hdr *header = - &req->comn_hdr; - ICP_QAT_FW_LA_DIGEST_IN_BUFFER_SET( - header->serv_specif_flags, - ICP_QAT_FW_LA_DIGEST_IN_BUFFER); - } break; case ICP_QAT_HW_AUTH_ALGO_GALOIS_128: case ICP_QAT_HW_AUTH_ALGO_GALOIS_64: @@ -615,6 +566,54 @@ enqueue_one_chain_job(struct qat_sym_session *ctx, break; } + if (unlikely(n_data_vecs > 1)) { + int auth_end_get = 0, i = n_data_vecs - 1; + struct rte_crypto_vec *cvec = &data[0]; + uint32_t len; + + len = data_len - ofs.ofs.auth.tail; + + while (i >= 0 && len > 0) { + if (cvec->len >= len) { + auth_iova_end = cvec->iova + len; + len = 0; + auth_end_get = 1; + break; + } + len -= cvec->len; + i--; + cvec++; + } + + if (unlikely(auth_end_get == 0)) + return -1; + } else + auth_iova_end = data[0].iova + auth_param->auth_off + + auth_param->auth_len; + + /* Then check if digest-encrypted conditions are met */ + if ((auth_param->auth_off + auth_param->auth_len < + cipher_param->cipher_offset + + cipher_param->cipher_length) && + (digest->iova == auth_iova_end)) { + /* Handle partial digest encryption */ + if (cipher_param->cipher_offset + + cipher_param->cipher_length < + auth_param->auth_off + + auth_param->auth_len + + ctx->digest_length) + req->comn_mid.dst_length = + req->comn_mid.src_length = + auth_param->auth_off + + auth_param->auth_len + + ctx->digest_length; + struct icp_qat_fw_comn_req_hdr *header = + &req->comn_hdr; + ICP_QAT_FW_LA_DIGEST_IN_BUFFER_SET( + header->serv_specif_flags, + ICP_QAT_FW_LA_DIGEST_IN_BUFFER); + } + return 0; } diff --git a/dpdk/drivers/crypto/qat/qat_sym_pmd.c b/dpdk/drivers/crypto/qat/qat_sym_pmd.c index 6da9512fe4..93666fdade 100644 --- a/dpdk/drivers/crypto/qat/qat_sym_pmd.c +++ b/dpdk/drivers/crypto/qat/qat_sym_pmd.c @@ -330,6 +330,10 @@ qat_sym_dev_create(struct qat_pci_device *qat_pci_dev, const struct rte_cryptodev_capabilities *capabilities; uint64_t capa_size; + snprintf(name, RTE_CRYPTODEV_NAME_MAX_LEN, "%s_%s", + qat_pci_dev->name, "sym"); + QAT_LOG(DEBUG, "Creating QAT SYM device %s", name); + /* * All processes must use same driver id so they can share sessions. * Store driver_id so we can validate that all processes have the same @@ -349,10 +353,6 @@ qat_sym_dev_create(struct qat_pci_device *qat_pci_dev, } } - snprintf(name, RTE_CRYPTODEV_NAME_MAX_LEN, "%s_%s", - qat_pci_dev->name, "sym"); - QAT_LOG(DEBUG, "Creating QAT SYM device %s", name); - /* Populate subset device to use in cryptodev device creation */ qat_dev_instance->sym_rte_dev.driver = &cryptodev_qat_sym_driver; qat_dev_instance->sym_rte_dev.numa_node = diff --git a/dpdk/drivers/event/dlb/dlb.c b/dpdk/drivers/event/dlb/dlb.c index 0c95c4793d..e2d5d43da7 100644 --- a/dpdk/drivers/event/dlb/dlb.c +++ b/dpdk/drivers/event/dlb/dlb.c @@ -1847,7 +1847,7 @@ dlb_hw_create_dir_queue(struct dlb_eventdev *dlb, int32_t qm_port_id) { struct dlb_hw_dev *handle = &dlb->qm_instance; struct dlb_create_dir_queue_args cfg; - struct dlb_cmd_response response; + struct dlb_cmd_response response = {0}; int32_t ret; cfg.response = (uintptr_t)&response; @@ -3569,7 +3569,7 @@ dlb_get_ldb_queue_depth(struct dlb_eventdev *dlb, { struct dlb_hw_dev *handle = &dlb->qm_instance; struct dlb_get_ldb_queue_depth_args cfg; - struct dlb_cmd_response response; + struct dlb_cmd_response response = {0}; int ret; cfg.queue_id = queue->qm_queue.id; @@ -3591,7 +3591,7 @@ dlb_get_dir_queue_depth(struct dlb_eventdev *dlb, { struct dlb_hw_dev *handle = &dlb->qm_instance; struct dlb_get_dir_queue_depth_args cfg; - struct dlb_cmd_response response; + struct dlb_cmd_response response = {0}; int ret; cfg.queue_id = queue->qm_queue.id; diff --git a/dpdk/drivers/net/af_xdp/meson.build b/dpdk/drivers/net/af_xdp/meson.build index fead8dd99f..ae7355c8c9 100644 --- a/dpdk/drivers/net/af_xdp/meson.build +++ b/dpdk/drivers/net/af_xdp/meson.build @@ -3,14 +3,15 @@ sources = files('rte_eth_af_xdp.c') -bpf_dep = dependency('libbpf', required: false) +bpf_dep = dependency('libbpf', required: false, method: 'pkg-config') if not bpf_dep.found() bpf_dep = cc.find_library('bpf', required: false) endif if bpf_dep.found() and cc.has_header('bpf/xsk.h') and cc.has_header('linux/if_xdp.h') ext_deps += bpf_dep - bpf_ver_dep = dependency('libbpf', version : '>=0.2.0', required: false) + bpf_ver_dep = dependency('libbpf', version : '>=0.2.0', + required: false, method: 'pkg-config') if bpf_ver_dep.found() dpdk_conf.set('RTE_LIBRTE_AF_XDP_PMD_SHARED_UMEM', 1) endif diff --git a/dpdk/drivers/net/af_xdp/rte_eth_af_xdp.c b/dpdk/drivers/net/af_xdp/rte_eth_af_xdp.c index 2c7892bd7e..7fc70df713 100644 --- a/dpdk/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/dpdk/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -840,7 +840,6 @@ xdp_umem_destroy(struct xsk_umem_info *umem) #endif rte_free(umem); - umem = NULL; } static int diff --git a/dpdk/drivers/net/avp/avp_ethdev.c b/dpdk/drivers/net/avp/avp_ethdev.c index 5f8187b905..f531e03c02 100644 --- a/dpdk/drivers/net/avp/avp_ethdev.c +++ b/dpdk/drivers/net/avp/avp_ethdev.c @@ -267,7 +267,7 @@ avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request) break; } - if ((count < 1) && (retry == 0)) { + if (retry == 0) { PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n", request->req_id); ret = -ETIME; diff --git a/dpdk/drivers/net/axgbe/axgbe_ethdev.c b/dpdk/drivers/net/axgbe/axgbe_ethdev.c index cfe6aba73a..9cd056d04a 100644 --- a/dpdk/drivers/net/axgbe/axgbe_ethdev.c +++ b/dpdk/drivers/net/axgbe/axgbe_ethdev.c @@ -1439,7 +1439,7 @@ static int axgb_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) dev->data->port_id); return -EBUSY; } - if (frame_size > RTE_ETHER_MAX_LEN) { + if (frame_size > AXGBE_ETH_MAX_LEN) { dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; val = 1; diff --git a/dpdk/drivers/net/axgbe/axgbe_ethdev.h b/dpdk/drivers/net/axgbe/axgbe_ethdev.h index 35a8476466..ac9210f2c8 100644 --- a/dpdk/drivers/net/axgbe/axgbe_ethdev.h +++ b/dpdk/drivers/net/axgbe/axgbe_ethdev.h @@ -125,6 +125,12 @@ /* MDIO port types */ #define AXGMAC_MAX_C22_PORT 3 +/* The max frame size with default MTU */ +#define AXGBE_ETH_MAX_LEN ( \ + RTE_ETHER_MTU + \ + RTE_ETHER_HDR_LEN + \ + RTE_ETHER_CRC_LEN) + /* Helper macro for descriptor handling * Always use AXGBE_GET_DESC_DATA to access the descriptor data * since the index is free-running and needs to be and-ed diff --git a/dpdk/drivers/net/bnx2x/meson.build b/dpdk/drivers/net/bnx2x/meson.build index 4892bb234c..9801697949 100644 --- a/dpdk/drivers/net/bnx2x/meson.build +++ b/dpdk/drivers/net/bnx2x/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2018 Intel Corporation -dep = dependency('zlib', required: false) +dep = dependency('zlib', required: false, method: 'pkg-config') build = dep.found() reason = 'missing dependency, "zlib"' ext_deps += dep diff --git a/dpdk/drivers/net/bnxt/bnxt.h b/dpdk/drivers/net/bnxt/bnxt.h index 90ced972c0..b912fd8564 100644 --- a/dpdk/drivers/net/bnxt/bnxt.h +++ b/dpdk/drivers/net/bnxt/bnxt.h @@ -389,7 +389,7 @@ struct bnxt_coal { #define DBR_TYPE_NQ (0xaULL << 60) #define DBR_TYPE_NQ_ARM (0xbULL << 60) -#define BNXT_RSS_TBL_SIZE_THOR 512 +#define BNXT_RSS_TBL_SIZE_THOR 512U #define BNXT_RSS_ENTRIES_PER_CTX_THOR 64 #define BNXT_MAX_RSS_CTXTS_THOR \ (BNXT_RSS_TBL_SIZE_THOR / BNXT_RSS_ENTRIES_PER_CTX_THOR) @@ -583,6 +583,7 @@ struct bnxt_rep_info { DEV_RX_OFFLOAD_UDP_CKSUM | \ DEV_RX_OFFLOAD_TCP_CKSUM | \ DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | \ + DEV_RX_OFFLOAD_OUTER_UDP_CKSUM | \ DEV_RX_OFFLOAD_JUMBO_FRAME | \ DEV_RX_OFFLOAD_KEEP_CRC | \ DEV_RX_OFFLOAD_VLAN_EXTEND | \ @@ -752,19 +753,6 @@ struct bnxt { uint16_t max_tx_rings; uint16_t max_rx_rings; #define MAX_STINGRAY_RINGS 128U -/* For sake of symmetry, max Tx rings == max Rx rings, one stat ctx for each */ -#define BNXT_MAX_RX_RINGS(bp) \ - (BNXT_STINGRAY(bp) ? RTE_MIN(RTE_MIN(bp->max_rx_rings / 2U, \ - MAX_STINGRAY_RINGS), \ - bp->max_stat_ctx / 2U) : \ - RTE_MIN(bp->max_rx_rings / 2U, \ - bp->max_stat_ctx / 2U)) -#define BNXT_MAX_TX_RINGS(bp) \ - (RTE_MIN((bp)->max_tx_rings, BNXT_MAX_RX_RINGS(bp))) - -#define BNXT_MAX_RINGS(bp) \ - (RTE_MIN((((bp)->max_cp_rings - BNXT_NUM_ASYNC_CPR(bp)) / 2U), \ - BNXT_MAX_TX_RINGS(bp))) #define BNXT_MAX_VF_REP_RINGS 8 @@ -823,6 +811,34 @@ struct bnxt { uint16_t tx_cfa_action; }; +static +inline uint16_t bnxt_max_rings(struct bnxt *bp) +{ + uint16_t max_tx_rings = bp->max_tx_rings; + uint16_t max_rx_rings = bp->max_rx_rings; + uint16_t max_cp_rings = bp->max_cp_rings; + uint16_t max_rings; + + /* For the sake of symmetry: + * max Tx rings == max Rx rings, one stat ctx for each. + */ + if (BNXT_STINGRAY(bp)) { + max_rx_rings = RTE_MIN(RTE_MIN(max_rx_rings / 2U, + MAX_STINGRAY_RINGS), + bp->max_stat_ctx / 2U); + } else { + max_rx_rings = RTE_MIN(max_rx_rings / 2U, + bp->max_stat_ctx / 2U); + } + + max_tx_rings = RTE_MIN(max_tx_rings, max_rx_rings); + if (max_cp_rings > BNXT_NUM_ASYNC_CPR(bp)) + max_cp_rings -= BNXT_NUM_ASYNC_CPR(bp); + max_rings = RTE_MIN(max_cp_rings / 2U, max_tx_rings); + + return max_rings; +} + #define BNXT_FC_TIMER 1 /* Timer freq in Sec Flow Counters */ /** diff --git a/dpdk/drivers/net/bnxt/bnxt_ethdev.c b/dpdk/drivers/net/bnxt/bnxt_ethdev.c index 81c8f8d79d..3aa346d45c 100644 --- a/dpdk/drivers/net/bnxt/bnxt_ethdev.c +++ b/dpdk/drivers/net/bnxt/bnxt_ethdev.c @@ -207,12 +207,15 @@ int is_bnxt_in_error(struct bnxt *bp) static uint16_t bnxt_rss_ctxts(const struct bnxt *bp) { + unsigned int num_rss_rings = RTE_MIN(bp->rx_nr_rings, + BNXT_RSS_TBL_SIZE_THOR); + if (!BNXT_CHIP_THOR(bp)) return 1; - return RTE_ALIGN_MUL_CEIL(bp->rx_nr_rings, + return RTE_ALIGN_MUL_CEIL(num_rss_rings, BNXT_RSS_ENTRIES_PER_CTX_THOR) / - BNXT_RSS_ENTRIES_PER_CTX_THOR; + BNXT_RSS_ENTRIES_PER_CTX_THOR; } uint16_t bnxt_rss_hash_tbl_size(const struct bnxt *bp) @@ -424,6 +427,14 @@ static int bnxt_setup_one_vnic(struct bnxt *bp, uint16_t vnic_id) if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_RSS) { int j, nr_ctxs = bnxt_rss_ctxts(bp); + if (bp->rx_nr_rings > BNXT_RSS_TBL_SIZE_THOR) { + PMD_DRV_LOG(ERR, "RxQ cnt %d > reta_size %d\n", + bp->rx_nr_rings, BNXT_RSS_TBL_SIZE_THOR); + PMD_DRV_LOG(ERR, + "Only queues 0-%d will be in RSS table\n", + BNXT_RSS_TBL_SIZE_THOR - 1); + } + rc = 0; for (j = 0; j < nr_ctxs; j++) { rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, j); @@ -678,7 +689,7 @@ static int bnxt_update_phy_setting(struct bnxt *bp) return rc; } -static int bnxt_init_chip(struct bnxt *bp) +static int bnxt_start_nic(struct bnxt *bp) { struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(bp->eth_dev); struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; @@ -909,7 +920,7 @@ static int bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev, if (BNXT_PF(bp)) dev_info->max_vfs = pdev->max_vfs; - max_rx_rings = BNXT_MAX_RINGS(bp); + max_rx_rings = bnxt_max_rings(bp); /* For the sake of symmetry, max_rx_queues = max_tx_queues */ dev_info->max_rx_queues = max_rx_rings; dev_info->max_tx_queues = max_rx_rings; @@ -1060,13 +1071,6 @@ static int bnxt_dev_configure_op(struct rte_eth_dev *eth_dev) } pthread_mutex_unlock(&bp->def_cp_lock); - } else { - /* legacy driver needs to get updated values */ - rc = bnxt_hwrm_func_qcaps(bp); - if (rc) { - PMD_DRV_LOG(ERR, "hwrm func qcaps fail:%d\n", rc); - return rc; - } } /* Inherit new configurations */ @@ -1143,6 +1147,9 @@ static int bnxt_scattered_rx(struct rte_eth_dev *eth_dev) if (eth_dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) return 1; + if (eth_dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_TCP_LRO) + return 1; + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { struct bnxt_rx_queue *rxq = eth_dev->data->rx_queues[i]; @@ -1175,6 +1182,7 @@ bnxt_receive_function(struct rte_eth_dev *eth_dev) DEV_RX_OFFLOAD_UDP_CKSUM | DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | + DEV_RX_OFFLOAD_OUTER_UDP_CKSUM | DEV_RX_OFFLOAD_RSS_HASH | DEV_RX_OFFLOAD_VLAN_FILTER)) && !BNXT_TRUFLOW_EN(bp) && BNXT_NUM_ASYNC_CPR(bp) && @@ -1248,81 +1256,6 @@ static int bnxt_handle_if_change_status(struct bnxt *bp) return rc; } -static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) -{ - struct bnxt *bp = eth_dev->data->dev_private; - uint64_t rx_offloads = eth_dev->data->dev_conf.rxmode.offloads; - int vlan_mask = 0; - int rc, retry_cnt = BNXT_IF_CHANGE_RETRY_COUNT; - - if (!eth_dev->data->nb_tx_queues || !eth_dev->data->nb_rx_queues) { - PMD_DRV_LOG(ERR, "Queues are not configured yet!\n"); - return -EINVAL; - } - - if (bp->rx_cp_nr_rings > RTE_ETHDEV_QUEUE_STAT_CNTRS) { - PMD_DRV_LOG(ERR, - "RxQ cnt %d > RTE_ETHDEV_QUEUE_STAT_CNTRS %d\n", - bp->rx_cp_nr_rings, RTE_ETHDEV_QUEUE_STAT_CNTRS); - } - - do { - rc = bnxt_hwrm_if_change(bp, true); - if (rc == 0 || rc != -EAGAIN) - break; - - rte_delay_ms(BNXT_IF_CHANGE_RETRY_INTERVAL); - } while (retry_cnt--); - - if (rc) - return rc; - - if (bp->flags & BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE) { - rc = bnxt_handle_if_change_status(bp); - if (rc) - return rc; - } - - bnxt_enable_int(bp); - - rc = bnxt_init_chip(bp); - if (rc) - goto error; - - eth_dev->data->scattered_rx = bnxt_scattered_rx(eth_dev); - eth_dev->data->dev_started = 1; - - bnxt_link_update_op(eth_dev, 1); - - if (rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) - vlan_mask |= ETH_VLAN_FILTER_MASK; - if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP) - vlan_mask |= ETH_VLAN_STRIP_MASK; - rc = bnxt_vlan_offload_set_op(eth_dev, vlan_mask); - if (rc) - goto error; - - /* Initialize bnxt ULP port details */ - rc = bnxt_ulp_port_init(bp); - if (rc) - goto error; - - eth_dev->rx_pkt_burst = bnxt_receive_function(eth_dev); - eth_dev->tx_pkt_burst = bnxt_transmit_function(eth_dev); - - bnxt_schedule_fw_health_check(bp); - - return 0; - -error: - bnxt_shutdown_nic(bp); - bnxt_free_tx_mbufs(bp); - bnxt_free_rx_mbufs(bp); - bnxt_hwrm_if_change(bp, false); - eth_dev->data->dev_started = 0; - return rc; -} - static int bnxt_dev_set_link_up_op(struct rte_eth_dev *eth_dev) { struct bnxt *bp = eth_dev->data->dev_private; @@ -1429,31 +1362,98 @@ static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) return 0; } -static int bnxt_dev_close_op(struct rte_eth_dev *eth_dev) +static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) { struct bnxt *bp = eth_dev->data->dev_private; - int ret = 0; + uint64_t rx_offloads = eth_dev->data->dev_conf.rxmode.offloads; + int vlan_mask = 0; + int rc, retry_cnt = BNXT_IF_CHANGE_RETRY_COUNT; - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return 0; + if (!eth_dev->data->nb_tx_queues || !eth_dev->data->nb_rx_queues) { + PMD_DRV_LOG(ERR, "Queues are not configured yet!\n"); + return -EINVAL; + } - /* cancel the recovery handler before remove dev */ - rte_eal_alarm_cancel(bnxt_dev_reset_and_resume, (void *)bp); - rte_eal_alarm_cancel(bnxt_dev_recover, (void *)bp); - bnxt_cancel_fc_thread(bp); + if (bp->rx_cp_nr_rings > RTE_ETHDEV_QUEUE_STAT_CNTRS) + PMD_DRV_LOG(ERR, + "RxQ cnt %d > RTE_ETHDEV_QUEUE_STAT_CNTRS %d\n", + bp->rx_cp_nr_rings, RTE_ETHDEV_QUEUE_STAT_CNTRS); - if (eth_dev->data->dev_started) - ret = bnxt_dev_stop_op(eth_dev); + do { + rc = bnxt_hwrm_if_change(bp, true); + if (rc == 0 || rc != -EAGAIN) + break; - bnxt_free_switch_domain(bp); + rte_delay_ms(BNXT_IF_CHANGE_RETRY_INTERVAL); + } while (retry_cnt--); - bnxt_uninit_resources(bp, false); + if (rc) + return rc; + + if (bp->flags & BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE) { + rc = bnxt_handle_if_change_status(bp); + if (rc) + return rc; + } + + bnxt_enable_int(bp); + + eth_dev->data->scattered_rx = bnxt_scattered_rx(eth_dev); + + rc = bnxt_start_nic(bp); + if (rc) + goto error; + + eth_dev->data->dev_started = 1; + + bnxt_link_update_op(eth_dev, 1); + + if (rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) + vlan_mask |= ETH_VLAN_FILTER_MASK; + if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP) + vlan_mask |= ETH_VLAN_STRIP_MASK; + rc = bnxt_vlan_offload_set_op(eth_dev, vlan_mask); + if (rc) + goto error; + + /* Initialize bnxt ULP port details */ + rc = bnxt_ulp_port_init(bp); + if (rc) + goto error; + + eth_dev->rx_pkt_burst = bnxt_receive_function(eth_dev); + eth_dev->tx_pkt_burst = bnxt_transmit_function(eth_dev); + + bnxt_schedule_fw_health_check(bp); + + return 0; + +error: + bnxt_dev_stop_op(eth_dev); + return rc; +} +static void +bnxt_uninit_locks(struct bnxt *bp) +{ + pthread_mutex_destroy(&bp->flow_lock); + pthread_mutex_destroy(&bp->def_cp_lock); + pthread_mutex_destroy(&bp->health_check_lock); + if (bp->rep_info) { + pthread_mutex_destroy(&bp->rep_info->vfr_lock); + pthread_mutex_destroy(&bp->rep_info->vfr_start_lock); + } +} + +static void bnxt_drv_uninit(struct bnxt *bp) +{ + bnxt_free_switch_domain(bp); bnxt_free_leds_info(bp); bnxt_free_cos_queues(bp); bnxt_free_link_info(bp); bnxt_free_pf_info(bp); bnxt_free_parent_info(bp); + bnxt_uninit_locks(bp); rte_memzone_free((const struct rte_memzone *)bp->tx_mem_zone); bp->tx_mem_zone = NULL; @@ -1464,6 +1464,27 @@ static int bnxt_dev_close_op(struct rte_eth_dev *eth_dev) rte_free(bp->grp_info); bp->grp_info = NULL; +} + +static int bnxt_dev_close_op(struct rte_eth_dev *eth_dev) +{ + struct bnxt *bp = eth_dev->data->dev_private; + int ret = 0; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + /* cancel the recovery handler before remove dev */ + rte_eal_alarm_cancel(bnxt_dev_reset_and_resume, (void *)bp); + rte_eal_alarm_cancel(bnxt_dev_recover, (void *)bp); + bnxt_cancel_fc_thread(bp); + + if (eth_dev->data->dev_started) + ret = bnxt_dev_stop_op(eth_dev); + + bnxt_uninit_resources(bp, false); + + bnxt_drv_uninit(bp); return ret; } @@ -1832,8 +1853,8 @@ static int bnxt_reta_update_op(struct rte_eth_dev *eth_dev, } } - bnxt_hwrm_vnic_rss_cfg(bp, vnic); - return 0; + rc = bnxt_hwrm_vnic_rss_cfg(bp, vnic); + return rc; } static int bnxt_reta_query_op(struct rte_eth_dev *eth_dev, @@ -1938,8 +1959,8 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev, memcpy(vnic->rss_hash_key, rss_conf->rss_key, rss_conf->rss_key_len); rss_config: - bnxt_hwrm_vnic_rss_cfg(bp, vnic); - return 0; + rc = bnxt_hwrm_vnic_rss_cfg(bp, vnic); + return rc; } static int bnxt_rss_hash_conf_get_op(struct rte_eth_dev *eth_dev, @@ -4032,7 +4053,7 @@ bool bnxt_stratus_device(struct bnxt *bp) } } -static int bnxt_init_board(struct rte_eth_dev *eth_dev) +static int bnxt_map_pci_bars(struct rte_eth_dev *eth_dev) { struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); struct bnxt *bp = eth_dev->data->dev_private; @@ -4657,7 +4678,11 @@ static int bnxt_map_hcomm_fw_status_reg(struct bnxt *bp) return 0; } -static int bnxt_init_fw(struct bnxt *bp) +/* This function gets the FW version along with the + * capabilities(MAX and current) of the function, vnic, + * error recovery, phy and other chip related info + */ +static int bnxt_get_config(struct bnxt *bp) { uint16_t mtu; int rc = 0; @@ -4727,8 +4752,10 @@ bnxt_init_locks(struct bnxt *bp) } err = pthread_mutex_init(&bp->def_cp_lock, NULL); - if (err) + if (err) { PMD_DRV_LOG(ERR, "Unable to initialize def_cp_lock\n"); + return err; + } err = pthread_mutex_init(&bp->health_check_lock, NULL); if (err) @@ -4740,7 +4767,7 @@ static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev) { int rc = 0; - rc = bnxt_init_fw(bp); + rc = bnxt_get_config(bp); if (rc) return rc; @@ -4797,10 +4824,6 @@ static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev) return rc; } - rc = bnxt_init_locks(bp); - if (rc) - return rc; - return 0; } @@ -5191,38 +5214,14 @@ static int bnxt_alloc_switch_domain(struct bnxt *bp) return rc; } -static int -bnxt_dev_init(struct rte_eth_dev *eth_dev, void *params __rte_unused) +/* Allocate and initialize various fields in bnxt struct that + * need to be allocated/destroyed only once in the lifetime of the driver + */ +static int bnxt_drv_init(struct rte_eth_dev *eth_dev) { struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); - static int version_printed; - struct bnxt *bp; - int rc; - - if (version_printed++ == 0) - PMD_DRV_LOG(INFO, "%s\n", bnxt_version); - - eth_dev->dev_ops = &bnxt_dev_ops; - eth_dev->rx_queue_count = bnxt_rx_queue_count_op; - eth_dev->rx_descriptor_status = bnxt_rx_descriptor_status_op; - eth_dev->tx_descriptor_status = bnxt_tx_descriptor_status_op; - eth_dev->rx_pkt_burst = &bnxt_recv_pkts; - eth_dev->tx_pkt_burst = &bnxt_xmit_pkts; - - /* - * For secondary processes, we don't initialise any further - * as primary has already done this work. - */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return 0; - - rte_eth_copy_pci_info(eth_dev, pci_dev); - eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; - - bp = eth_dev->data->dev_private; - - /* Parse dev arguments passed on when starting the DPDK application. */ - bnxt_parse_dev_args(bp, pci_dev->device.devargs); + struct bnxt *bp = eth_dev->data->dev_private; + int rc = 0; bp->flags &= ~BNXT_FLAG_RX_VECTOR_PKT_MODE; @@ -5254,7 +5253,7 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev, void *params __rte_unused) } } - rc = bnxt_init_board(eth_dev); + rc = bnxt_map_pci_bars(eth_dev); if (rc) { PMD_DRV_LOG(ERR, "Failed to initialize board rc: %x\n", rc); @@ -5263,27 +5262,75 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev, void *params __rte_unused) rc = bnxt_alloc_pf_info(bp); if (rc) - goto error_free; + return rc; rc = bnxt_alloc_link_info(bp); if (rc) - goto error_free; + return rc; rc = bnxt_alloc_parent_info(bp); if (rc) - goto error_free; + return rc; rc = bnxt_alloc_hwrm_resources(bp); if (rc) { PMD_DRV_LOG(ERR, "Failed to allocate hwrm resource rc: %x\n", rc); - goto error_free; + return rc; } rc = bnxt_alloc_leds_info(bp); if (rc) - goto error_free; + return rc; rc = bnxt_alloc_cos_queues(bp); + if (rc) + return rc; + + rc = bnxt_init_locks(bp); + if (rc) + return rc; + + rc = bnxt_alloc_switch_domain(bp); + if (rc) + return rc; + + return rc; +} + +static int +bnxt_dev_init(struct rte_eth_dev *eth_dev, void *params __rte_unused) +{ + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); + static int version_printed; + struct bnxt *bp; + int rc; + + if (version_printed++ == 0) + PMD_DRV_LOG(INFO, "%s\n", bnxt_version); + + eth_dev->dev_ops = &bnxt_dev_ops; + eth_dev->rx_queue_count = bnxt_rx_queue_count_op; + eth_dev->rx_descriptor_status = bnxt_rx_descriptor_status_op; + eth_dev->tx_descriptor_status = bnxt_tx_descriptor_status_op; + eth_dev->rx_pkt_burst = &bnxt_recv_pkts; + eth_dev->tx_pkt_burst = &bnxt_xmit_pkts; + + /* + * For secondary processes, we don't initialise any further + * as primary has already done this work. + */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; + + bp = eth_dev->data->dev_private; + + /* Parse dev arguments passed on when starting the DPDK application. */ + bnxt_parse_dev_args(bp, pci_dev->device.devargs); + + rc = bnxt_drv_init(eth_dev); if (rc) goto error_free; @@ -5295,8 +5342,6 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev, void *params __rte_unused) if (rc) goto error_free; - bnxt_alloc_switch_domain(bp); - PMD_DRV_LOG(INFO, DRV_MODULE_NAME "found at mem %" PRIX64 ", node addr %pM\n", pci_dev->mem_resource[0].phys_addr, @@ -5378,18 +5423,6 @@ bnxt_free_error_recovery_info(struct bnxt *bp) bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY; } -static void -bnxt_uninit_locks(struct bnxt *bp) -{ - pthread_mutex_destroy(&bp->flow_lock); - pthread_mutex_destroy(&bp->def_cp_lock); - pthread_mutex_destroy(&bp->health_check_lock); - if (bp->rep_info) { - pthread_mutex_destroy(&bp->rep_info->vfr_lock); - pthread_mutex_destroy(&bp->rep_info->vfr_start_lock); - } -} - static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev) { @@ -5411,7 +5444,6 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev) bnxt_uninit_ctx_mem(bp); - bnxt_uninit_locks(bp); bnxt_free_flow_stats_info(bp); bnxt_free_rep_info(bp); rte_free(bp->ptp_cfg); diff --git a/dpdk/drivers/net/bnxt/bnxt_hwrm.c b/dpdk/drivers/net/bnxt/bnxt_hwrm.c index 24c33185b4..344895843b 100644 --- a/dpdk/drivers/net/bnxt/bnxt_hwrm.c +++ b/dpdk/drivers/net/bnxt/bnxt_hwrm.c @@ -718,6 +718,7 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) sizeof(bp->pf->vf_info[0]) * new_max_vfs, 0); if (bp->pf->vf_info == NULL) { PMD_DRV_LOG(ERR, "Alloc vf info fail\n"); + HWRM_UNLOCK(); return -ENOMEM; } bp->pf->max_vfs = new_max_vfs; @@ -1095,10 +1096,11 @@ int bnxt_hwrm_ver_get(struct bnxt *bp, uint32_t timeout) else HWRM_CHECK_RESULT(); - PMD_DRV_LOG(INFO, "%d.%d.%d:%d.%d.%d\n", + PMD_DRV_LOG(INFO, "%d.%d.%d:%d.%d.%d.%d\n", resp->hwrm_intf_maj_8b, resp->hwrm_intf_min_8b, resp->hwrm_intf_upd_8b, resp->hwrm_fw_maj_8b, - resp->hwrm_fw_min_8b, resp->hwrm_fw_bld_8b); + resp->hwrm_fw_min_8b, resp->hwrm_fw_bld_8b, + resp->hwrm_fw_rsvd_8b); bp->fw_ver = (resp->hwrm_fw_maj_8b << 24) | (resp->hwrm_fw_min_8b << 16) | (resp->hwrm_fw_bld_8b << 8) | @@ -3455,6 +3457,35 @@ static int bnxt_update_max_resources(struct bnxt *bp, return 0; } +/* Update the PF resource values based on how many resources + * got allocated to it. + */ +static int bnxt_update_max_resources_pf_only(struct bnxt *bp) +{ + struct hwrm_func_qcfg_input req = {0}; + struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr; + int rc; + + /* Get the actual allocated values now */ + HWRM_PREP(&req, HWRM_FUNC_QCFG, BNXT_USE_CHIMP_MB); + req.fid = rte_cpu_to_le_16(0xffff); + rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB); + HWRM_CHECK_RESULT(); + + bp->max_rsscos_ctx = rte_le_to_cpu_16(resp->alloc_rsscos_ctx); + bp->max_stat_ctx = rte_le_to_cpu_16(resp->alloc_stat_ctx); + bp->max_cp_rings = rte_le_to_cpu_16(resp->alloc_cmpl_rings); + bp->max_tx_rings = rte_le_to_cpu_16(resp->alloc_tx_rings); + bp->max_rx_rings = rte_le_to_cpu_16(resp->alloc_rx_rings); + bp->max_l2_ctx = rte_le_to_cpu_16(resp->alloc_l2_ctx); + bp->max_ring_grps = rte_le_to_cpu_16(resp->alloc_hw_ring_grps); + bp->max_vnics = rte_le_to_cpu_16(resp->alloc_vnics); + + HWRM_UNLOCK(); + + return 0; +} + int bnxt_hwrm_func_qcfg_current_vf_vlan(struct bnxt *bp, int vf) { struct hwrm_func_qcfg_input req = {0}; @@ -3554,8 +3585,13 @@ int bnxt_hwrm_allocate_pf_only(struct bnxt *bp) HWRM_FUNC_CFG_INPUT_FLAGS_STD_TX_RING_MODE_DISABLE); bp->pf->func_cfg_flags |= HWRM_FUNC_CFG_INPUT_FLAGS_STD_TX_RING_MODE_DISABLE; + rc = bnxt_hwrm_pf_func_cfg(bp, &pf_resc); - rc = __bnxt_hwrm_func_qcaps(bp); + if (rc) + return rc; + + rc = bnxt_update_max_resources_pf_only(bp); + return rc; } @@ -4320,6 +4356,7 @@ int bnxt_get_nvram_directory(struct bnxt *bp, uint32_t len, uint8_t *data) return -ENOMEM; dma_handle = rte_malloc_virt2iova(buf); if (dma_handle == RTE_BAD_IOVA) { + rte_free(buf); PMD_DRV_LOG(ERR, "unable to map response address to physical memory\n"); return -ENOMEM; @@ -4354,6 +4391,7 @@ int bnxt_hwrm_get_nvram_item(struct bnxt *bp, uint32_t index, dma_handle = rte_malloc_virt2iova(buf); if (dma_handle == RTE_BAD_IOVA) { + rte_free(buf); PMD_DRV_LOG(ERR, "unable to map response address to physical memory\n"); return -ENOMEM; @@ -4407,6 +4445,7 @@ int bnxt_hwrm_flash_nvram(struct bnxt *bp, uint16_t dir_type, dma_handle = rte_malloc_virt2iova(buf); if (dma_handle == RTE_BAD_IOVA) { + rte_free(buf); PMD_DRV_LOG(ERR, "unable to map response address to physical memory\n"); return -ENOMEM; @@ -4892,37 +4931,35 @@ int bnxt_vnic_rss_configure(struct bnxt *bp, struct bnxt_vnic_info *vnic) { unsigned int rss_idx, fw_idx, i; + if (vnic->fw_vnic_id == INVALID_HW_RING_ID) + return 0; + if (!(vnic->rss_table && vnic->hash_type)) return 0; if (BNXT_CHIP_THOR(bp)) return bnxt_vnic_rss_configure_thor(bp, vnic); - if (vnic->fw_vnic_id == INVALID_HW_RING_ID) - return 0; - - if (vnic->rss_table && vnic->hash_type) { - /* - * Fill the RSS hash & redirection table with - * ring group ids for all VNICs - */ - for (rss_idx = 0, fw_idx = 0; rss_idx < HW_HASH_INDEX_SIZE; - rss_idx++, fw_idx++) { - for (i = 0; i < bp->rx_cp_nr_rings; i++) { - fw_idx %= bp->rx_cp_nr_rings; - if (vnic->fw_grp_ids[fw_idx] != - INVALID_HW_RING_ID) - break; - fw_idx++; - } - if (i == bp->rx_cp_nr_rings) - return 0; - vnic->rss_table[rss_idx] = vnic->fw_grp_ids[fw_idx]; + /* + * Fill the RSS hash & redirection table with + * ring group ids for all VNICs + */ + for (rss_idx = 0, fw_idx = 0; rss_idx < HW_HASH_INDEX_SIZE; + rss_idx++, fw_idx++) { + for (i = 0; i < bp->rx_cp_nr_rings; i++) { + fw_idx %= bp->rx_cp_nr_rings; + if (vnic->fw_grp_ids[fw_idx] != INVALID_HW_RING_ID) + break; + fw_idx++; } - return bnxt_hwrm_vnic_rss_cfg(bp, vnic); + + if (i == bp->rx_cp_nr_rings) + return 0; + + vnic->rss_table[rss_idx] = vnic->fw_grp_ids[fw_idx]; } - return 0; + return bnxt_hwrm_vnic_rss_cfg(bp, vnic); } static void bnxt_hwrm_set_coal_params(struct bnxt_coal *hw_coal, diff --git a/dpdk/drivers/net/bnxt/bnxt_reps.c b/dpdk/drivers/net/bnxt/bnxt_reps.c index e5ba0909b9..167c46ad41 100644 --- a/dpdk/drivers/net/bnxt/bnxt_reps.c +++ b/dpdk/drivers/net/bnxt/bnxt_reps.c @@ -65,7 +65,7 @@ bnxt_vfr_recv(uint16_t port_id, uint16_t queue_id, struct rte_mbuf *mbuf) /* Representor Rx ring full, drop pkt */ vfr_bp->rx_drop_bytes[que] += mbuf->pkt_len; vfr_bp->rx_drop_pkts[que]++; - rte_pktmbuf_free(mbuf); + rte_mbuf_raw_free(mbuf); } return 0; diff --git a/dpdk/drivers/net/bnxt/bnxt_ring.c b/dpdk/drivers/net/bnxt/bnxt_ring.c index aeb6cb6150..94cf7d3de2 100644 --- a/dpdk/drivers/net/bnxt/bnxt_ring.c +++ b/dpdk/drivers/net/bnxt/bnxt_ring.c @@ -568,6 +568,17 @@ int bnxt_alloc_hwrm_rx_ring(struct bnxt *bp, int queue_index) struct bnxt_rx_ring_info *rxr = rxq->rx_ring; int rc; + /* + * Storage for the cp ring is allocated based on worst-case + * usage, the actual size to be used by hw is computed here. + */ + cp_ring->ring_size = rxr->rx_ring_struct->ring_size * 2; + + if (bp->eth_dev->data->scattered_rx) + cp_ring->ring_size *= AGG_RING_SIZE_FACTOR; + + cp_ring->ring_mask = cp_ring->ring_size - 1; + rc = bnxt_alloc_cmpl_ring(bp, queue_index, cpr); if (rc) goto err_out; @@ -679,6 +690,17 @@ int bnxt_alloc_hwrm_rings(struct bnxt *bp) struct bnxt_ring *cp_ring = cpr->cp_ring_struct; struct bnxt_rx_ring_info *rxr = rxq->rx_ring; + /* + * Storage for the cp ring is allocated based on worst-case + * usage, the actual size to be used by hw is computed here. + */ + cp_ring->ring_size = rxr->rx_ring_struct->ring_size * 2; + + if (bp->eth_dev->data->scattered_rx) + cp_ring->ring_size *= AGG_RING_SIZE_FACTOR; + + cp_ring->ring_mask = cp_ring->ring_size - 1; + if (bnxt_alloc_cmpl_ring(bp, i, cpr)) goto err_out; diff --git a/dpdk/drivers/net/bnxt/bnxt_rxq.c b/dpdk/drivers/net/bnxt/bnxt_rxq.c index e0ec342162..8637559370 100644 --- a/dpdk/drivers/net/bnxt/bnxt_rxq.c +++ b/dpdk/drivers/net/bnxt/bnxt_rxq.c @@ -311,7 +311,7 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev, if (rc) return rc; - if (queue_idx >= BNXT_MAX_RINGS(bp)) { + if (queue_idx >= bnxt_max_rings(bp)) { PMD_DRV_LOG(ERR, "Cannot create Rx ring %d. Only %d rings available\n", queue_idx, bp->max_rx_rings); @@ -364,8 +364,9 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev, eth_dev->data->rx_queues[queue_idx] = rxq; /* Allocate RX ring hardware descriptors */ - if (bnxt_alloc_rings(bp, queue_idx, NULL, rxq, rxq->cp_ring, NULL, - "rxr")) { + rc = bnxt_alloc_rings(bp, queue_idx, NULL, rxq, rxq->cp_ring, NULL, + "rxr"); + if (rc) { PMD_DRV_LOG(ERR, "ring_dma_zone_reserve for rx_ring failed!\n"); goto err; @@ -557,12 +558,12 @@ int bnxt_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id) rc = bnxt_vnic_rss_configure(bp, vnic); } - if (BNXT_CHIP_THOR(bp)) { - /* Compute current number of active receive queues. */ - for (i = vnic->start_grp_id; i < vnic->end_grp_id; i++) - if (bp->rx_queues[i]->rx_started) - active_queue_cnt++; + /* Compute current number of active receive queues. */ + for (i = vnic->start_grp_id; i < vnic->end_grp_id; i++) + if (bp->rx_queues[i]->rx_started) + active_queue_cnt++; + if (BNXT_CHIP_THOR(bp)) { /* * For Thor, we need to ensure that the VNIC default receive * ring corresponds to an active receive queue. When no queue @@ -582,6 +583,22 @@ int bnxt_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id) /* Reconfigure default receive ring. */ bnxt_hwrm_vnic_cfg(bp, vnic); } + } else if (active_queue_cnt) { + /* + * If the queue being stopped is the current default queue and + * there are other active queues, pick one of them as the + * default and reconfigure the vnic. + */ + if (vnic->dflt_ring_grp == bp->grp_info[rx_queue_id].fw_grp_id) { + for (i = vnic->start_grp_id; i < vnic->end_grp_id; i++) { + if (bp->rx_queues[i]->rx_started) { + vnic->dflt_ring_grp = + bp->grp_info[i].fw_grp_id; + bnxt_hwrm_vnic_cfg(bp, vnic); + break; + } + } + } } if (rc == 0) diff --git a/dpdk/drivers/net/bnxt/bnxt_rxr.c b/dpdk/drivers/net/bnxt/bnxt_rxr.c index fdbe6f71ea..b28b7fb561 100644 --- a/dpdk/drivers/net/bnxt/bnxt_rxr.c +++ b/dpdk/drivers/net/bnxt/bnxt_rxr.c @@ -267,6 +267,7 @@ static int bnxt_rx_pages(struct bnxt_rx_queue *rxq, */ rte_bitmap_set(rxr->ag_bitmap, ag_cons); } + last->next = NULL; bnxt_prod_ag_mbuf(rxq); return 0; } @@ -344,7 +345,7 @@ bnxt_init_ptype_table(void) ip6 = i & (RX_PKT_CMPL_FLAGS2_IP_TYPE >> 7); tun = i & (RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC >> 2); - type = (i & 0x38) << 9; + type = (i & 0x78) << 9; if (!tun && !ip6) l3 = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; @@ -406,62 +407,98 @@ bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1) return bnxt_ptype_table[index]; } -uint32_t -bnxt_ol_flags_table[BNXT_OL_FLAGS_TBL_DIM] __rte_cache_aligned; - -uint32_t -bnxt_ol_flags_err_table[BNXT_OL_FLAGS_ERR_TBL_DIM] __rte_cache_aligned; - static void __rte_cold -bnxt_init_ol_flags_tables(void) +bnxt_init_ol_flags_tables(struct bnxt_rx_queue *rxq) { - static bool initialized; + struct bnxt_rx_ring_info *rxr = rxq->rx_ring; + struct rte_eth_conf *dev_conf; + bool outer_cksum_enabled; + uint64_t offloads; uint32_t *pt; int i; - if (initialized) - return; + dev_conf = &rxq->bp->eth_dev->data->dev_conf; + offloads = dev_conf->rxmode.offloads; + + outer_cksum_enabled = !!(offloads & (DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | + DEV_RX_OFFLOAD_OUTER_UDP_CKSUM)); /* Initialize ol_flags table. */ - pt = bnxt_ol_flags_table; + pt = rxr->ol_flags_table; for (i = 0; i < BNXT_OL_FLAGS_TBL_DIM; i++) { pt[i] = 0; + if (i & RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) pt[i] |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; - if (i & RX_PKT_CMPL_FLAGS2_IP_CS_CALC) - pt[i] |= PKT_RX_IP_CKSUM_GOOD; + if (i & (RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC << 3)) { + /* Tunnel case. */ + if (outer_cksum_enabled) { + if (i & RX_PKT_CMPL_FLAGS2_IP_CS_CALC) + pt[i] |= PKT_RX_IP_CKSUM_GOOD; - if (i & RX_PKT_CMPL_FLAGS2_L4_CS_CALC) - pt[i] |= PKT_RX_L4_CKSUM_GOOD; + if (i & RX_PKT_CMPL_FLAGS2_L4_CS_CALC) + pt[i] |= PKT_RX_L4_CKSUM_GOOD; + + if (i & RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC) + pt[i] |= PKT_RX_OUTER_L4_CKSUM_GOOD; + } else { + if (i & RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC) + pt[i] |= PKT_RX_IP_CKSUM_GOOD; - if (i & RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC) - pt[i] |= PKT_RX_OUTER_L4_CKSUM_GOOD; + if (i & RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC) + pt[i] |= PKT_RX_L4_CKSUM_GOOD; + } + } else { + /* Non-tunnel case. */ + if (i & RX_PKT_CMPL_FLAGS2_IP_CS_CALC) + pt[i] |= PKT_RX_IP_CKSUM_GOOD; + + if (i & RX_PKT_CMPL_FLAGS2_L4_CS_CALC) + pt[i] |= PKT_RX_L4_CKSUM_GOOD; + } } /* Initialize checksum error table. */ - pt = bnxt_ol_flags_err_table; + pt = rxr->ol_flags_err_table; for (i = 0; i < BNXT_OL_FLAGS_ERR_TBL_DIM; i++) { pt[i] = 0; - if (i & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR >> 4)) - pt[i] |= PKT_RX_IP_CKSUM_BAD; - if (i & (RX_PKT_CMPL_ERRORS_L4_CS_ERROR >> 4)) - pt[i] |= PKT_RX_L4_CKSUM_BAD; + if (i & (RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC << 2)) { + /* Tunnel case. */ + if (outer_cksum_enabled) { + if (i & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR >> 4)) + pt[i] |= PKT_RX_IP_CKSUM_BAD; - if (i & (RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR >> 4)) - pt[i] |= PKT_RX_EIP_CKSUM_BAD; + if (i & (RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR >> 4)) + pt[i] |= PKT_RX_EIP_CKSUM_BAD; - if (i & (RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR >> 4)) - pt[i] |= PKT_RX_OUTER_L4_CKSUM_BAD; - } + if (i & (RX_PKT_CMPL_ERRORS_L4_CS_ERROR >> 4)) + pt[i] |= PKT_RX_L4_CKSUM_BAD; - initialized = true; + if (i & (RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR >> 4)) + pt[i] |= PKT_RX_OUTER_L4_CKSUM_BAD; + } else { + if (i & (RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR >> 4)) + pt[i] |= PKT_RX_IP_CKSUM_BAD; + + if (i & (RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR >> 4)) + pt[i] |= PKT_RX_L4_CKSUM_BAD; + } + } else { + /* Non-tunnel case. */ + if (i & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR >> 4)) + pt[i] |= PKT_RX_IP_CKSUM_BAD; + + if (i & (RX_PKT_CMPL_ERRORS_L4_CS_ERROR >> 4)) + pt[i] |= PKT_RX_L4_CKSUM_BAD; + } + } } static void -bnxt_set_ol_flags(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1, - struct rte_mbuf *mbuf) +bnxt_set_ol_flags(struct bnxt_rx_ring_info *rxr, struct rx_pkt_cmpl *rxcmp, + struct rx_pkt_cmpl_hi *rxcmp1, struct rte_mbuf *mbuf) { uint16_t flags_type, errors, flags; uint64_t ol_flags; @@ -475,6 +512,7 @@ bnxt_set_ol_flags(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1, RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC | RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN); + flags |= (flags & RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC) << 3; errors = rte_le_to_cpu_16(rxcmp1->errors_v2) & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR | RX_PKT_CMPL_ERRORS_L4_CS_ERROR | @@ -482,10 +520,12 @@ bnxt_set_ol_flags(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1, RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR); errors = (errors >> 4) & flags; - ol_flags = bnxt_ol_flags_table[flags & ~errors]; + ol_flags = rxr->ol_flags_table[flags & ~errors]; - if (errors) - ol_flags |= bnxt_ol_flags_err_table[errors]; + if (unlikely(errors)) { + errors |= (flags & RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC) << 2; + ol_flags |= rxr->ol_flags_err_table[errors]; + } if (flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID) { mbuf->hash.rss = rte_le_to_cpu_32(rxcmp->rss_hash); @@ -740,7 +780,7 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt, mbuf->data_len = mbuf->pkt_len; mbuf->port = rxq->port_id; - bnxt_set_ol_flags(rxcmp, rxcmp1, mbuf); + bnxt_set_ol_flags(rxr, rxcmp, rxcmp1, mbuf); #ifdef RTE_LIBRTE_IEEE1588 if (unlikely((rte_le_to_cpu_16(rxcmp->flags_type) & @@ -827,6 +867,7 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, struct bnxt_cp_ring_info *cpr = rxq->cp_ring; struct bnxt_rx_ring_info *rxr = rxq->rx_ring; uint32_t raw_cons = cpr->cp_raw_cons; + bool alloc_failed = false; uint32_t cons; int nb_rx_pkts = 0; int nb_rep_rx_pkts = 0; @@ -875,12 +916,16 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, /* TODO: Avoid magic numbers... */ if ((CMP_TYPE(rxcmp) & 0x30) == 0x10) { rc = bnxt_rx_pkt(&rx_pkts[nb_rx_pkts], rxq, &raw_cons); - if (likely(!rc) || rc == -ENOMEM) + if (!rc) nb_rx_pkts++; - if (rc == -EBUSY) /* partial completion */ + else if (rc == -EBUSY) /* partial completion */ break; - if (rc == -ENODEV) /* completion for representor */ + else if (rc == -ENODEV) /* completion for representor */ nb_rep_rx_pkts++; + else if (rc == -ENOMEM) { + nb_rx_pkts++; + alloc_failed = true; + } } else if (!BNXT_NUM_ASYNC_CPR(rxq->bp)) { evt = bnxt_event_hwrm_resp_handler(rxq->bp, @@ -891,7 +936,7 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, } raw_cons = NEXT_RAW_CMP(raw_cons); - if (nb_rx_pkts == nb_pkts || evt) + if (nb_rx_pkts == nb_pkts || nb_rep_rx_pkts == nb_pkts || evt) break; /* Post some Rx buf early in case of larger burst processing */ if (nb_rx_pkts == BNXT_RX_POST_THRESH) @@ -907,6 +952,10 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, goto done; } + /* Ring the completion queue doorbell. */ + bnxt_db_cq(cpr); + + /* Ring the receive descriptor doorbell. */ if (prod != rxr->rx_prod) bnxt_db_write(&rxr->rx_db, rxr->rx_prod); @@ -914,24 +963,23 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, if (ag_prod != rxr->ag_prod) bnxt_db_write(&rxr->ag_db, rxr->ag_prod); - bnxt_db_cq(cpr); - /* Attempt to alloc Rx buf in case of a previous allocation failure. */ - if (rc == -ENOMEM) { - int i = RING_NEXT(rxr->rx_ring_struct, prod); - int cnt = nb_rx_pkts; + if (alloc_failed) { + uint16_t cnt; + + for (cnt = 0; cnt < nb_rx_pkts + nb_rep_rx_pkts; cnt++) { + struct rte_mbuf **rx_buf; - for (; cnt; - i = RING_NEXT(rxr->rx_ring_struct, i), cnt--) { - struct rte_mbuf **rx_buf = &rxr->rx_buf_ring[i]; + prod = RING_NEXT(rxr->rx_ring_struct, prod); + rx_buf = &rxr->rx_buf_ring[prod]; /* Buffer already allocated for this index. */ if (*rx_buf != NULL && *rx_buf != &rxq->fake_mbuf) continue; /* This slot is empty. Alloc buffer for Rx */ - if (!bnxt_alloc_rx_data(rxq, rxr, i)) { - rxr->rx_prod = i; + if (!bnxt_alloc_rx_data(rxq, rxr, prod)) { + rxr->rx_prod = prod; bnxt_db_write(&rxr->rx_db, rxr->rx_prod); } else { PMD_DRV_LOG(ERR, "Alloc mbuf failed\n"); @@ -992,12 +1040,9 @@ void bnxt_free_rx_rings(struct bnxt *bp) int bnxt_init_rx_ring_struct(struct bnxt_rx_queue *rxq, unsigned int socket_id) { - struct rte_eth_dev *eth_dev = rxq->bp->eth_dev; - struct rte_eth_rxmode *rxmode; struct bnxt_cp_ring_info *cpr; struct bnxt_rx_ring_info *rxr; struct bnxt_ring *ring; - bool use_agg_ring; rxq->rx_buf_size = BNXT_MAX_PKT_LEN + sizeof(struct rte_mbuf); @@ -1040,19 +1085,9 @@ int bnxt_init_rx_ring_struct(struct bnxt_rx_queue *rxq, unsigned int socket_id) return -ENOMEM; cpr->cp_ring_struct = ring; - rxmode = ð_dev->data->dev_conf.rxmode; - use_agg_ring = (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER) || - (rxmode->offloads & DEV_RX_OFFLOAD_TCP_LRO) || - (rxmode->max_rx_pkt_len > - (uint32_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) - - RTE_PKTMBUF_HEADROOM)); - /* Allocate two completion slots per entry in desc ring. */ ring->ring_size = rxr->rx_ring_struct->ring_size * 2; - - /* Allocate additional slots if aggregation ring is in use. */ - if (use_agg_ring) - ring->ring_size *= AGG_RING_SIZE_FACTOR; + ring->ring_size *= AGG_RING_SIZE_FACTOR; ring->ring_size = rte_align32pow2(ring->ring_size); ring->ring_mask = ring->ring_size - 1; @@ -1107,18 +1142,18 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq) /* Initialize packet type table. */ bnxt_init_ptype_table(); - /* Initialize offload flags parsing table. */ - bnxt_init_ol_flags_tables(); - size = rte_pktmbuf_data_room_size(rxq->mb_pool) - RTE_PKTMBUF_HEADROOM; size = RTE_MIN(BNXT_MAX_PKT_LEN, size); - type = RX_PROD_PKT_BD_TYPE_RX_PROD_PKT | RX_PROD_PKT_BD_FLAGS_EOP_PAD; + type = RX_PROD_PKT_BD_TYPE_RX_PROD_PKT; rxr = rxq->rx_ring; ring = rxr->rx_ring_struct; bnxt_init_rxbds(ring, type, size); + /* Initialize offload flags parsing table. */ + bnxt_init_ol_flags_tables(rxq); + prod = rxr->rx_prod; for (i = 0; i < ring->ring_size; i++) { if (unlikely(!rxr->rx_buf_ring[i])) { diff --git a/dpdk/drivers/net/bnxt/bnxt_rxr.h b/dpdk/drivers/net/bnxt/bnxt_rxr.h index 3fc901fdf0..46c34e6e16 100644 --- a/dpdk/drivers/net/bnxt/bnxt_rxr.h +++ b/dpdk/drivers/net/bnxt/bnxt_rxr.h @@ -42,6 +42,9 @@ static inline uint16_t bnxt_tpa_start_agg_id(struct bnxt *bp, /* Number of descriptors to process per inner loop in vector mode. */ #define RTE_BNXT_DESCS_PER_LOOP 4U +#define BNXT_OL_FLAGS_TBL_DIM 64 +#define BNXT_OL_FLAGS_ERR_TBL_DIM 32 + struct bnxt_tpa_info { struct rte_mbuf *mbuf; uint16_t len; @@ -73,6 +76,9 @@ struct bnxt_rx_ring_info { struct rte_bitmap *ag_bitmap; struct bnxt_tpa_info *tpa_info; + + uint32_t ol_flags_table[BNXT_OL_FLAGS_TBL_DIM]; + uint32_t ol_flags_err_table[BNXT_OL_FLAGS_ERR_TBL_DIM]; }; uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -116,10 +122,4 @@ bnxt_cfa_code_dynfield(struct rte_mbuf *mbuf) #define BNXT_PTYPE_TBL_DIM 128 extern uint32_t bnxt_ptype_table[BNXT_PTYPE_TBL_DIM]; - -#define BNXT_OL_FLAGS_TBL_DIM 32 -extern uint32_t bnxt_ol_flags_table[BNXT_OL_FLAGS_TBL_DIM]; - -#define BNXT_OL_FLAGS_ERR_TBL_DIM 16 -extern uint32_t bnxt_ol_flags_err_table[BNXT_OL_FLAGS_ERR_TBL_DIM]; #endif diff --git a/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_neon.c index de1d96570c..54f47a3fe1 100644 --- a/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_neon.c +++ b/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_neon.c @@ -27,11 +27,11 @@ uint32_t tmp, of; \ \ of = vgetq_lane_u32((rss_flags), (pi)) | \ - bnxt_ol_flags_table[vgetq_lane_u32((ol_idx), (pi))]; \ + rxr->ol_flags_table[vgetq_lane_u32((ol_idx), (pi))]; \ \ tmp = vgetq_lane_u32((errors), (pi)); \ if (tmp) \ - of |= bnxt_ol_flags_err_table[tmp]; \ + of |= rxr->ol_flags_err_table[tmp]; \ (ol_flags) = of; \ } @@ -58,7 +58,8 @@ static void descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4], - uint64x2_t mb_init, struct rte_mbuf **mbuf) + uint64x2_t mb_init, struct rte_mbuf **mbuf, + struct bnxt_rx_ring_info *rxr) { const uint8x16_t shuf_msk = { 0xFF, 0xFF, 0xFF, 0xFF, /* pkt_type (zeroes) */ @@ -79,7 +80,7 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4], const uint32x4_t flags2_index_mask = vdupq_n_u32(0x1F); const uint32x4_t flags2_error_mask = vdupq_n_u32(0x0F); uint32x4_t flags_type, flags2, index, errors, rss_flags; - uint32x4_t tmp, ptype_idx; + uint32x4_t tmp, ptype_idx, is_tunnel; uint64x2_t t0, t1; uint32_t ol_flags; @@ -116,10 +117,14 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4], vget_low_u64(t1))); /* Compute ol_flags and checksum error indexes for four packets. */ + is_tunnel = vandq_u32(flags2, vdupq_n_u32(4)); + is_tunnel = vshlq_n_u32(is_tunnel, 3); errors = vandq_u32(vshrq_n_u32(errors, 4), flags2_error_mask); errors = vandq_u32(errors, flags2); index = vbicq_u32(flags2, errors); + errors = vorrq_u32(errors, vshrq_n_u32(is_tunnel, 1)); + index = vorrq_u32(index, is_tunnel); /* Update mbuf rearm_data for four packets. */ GET_OL_FLAGS(rss_flags, index, errors, 0, ol_flags); @@ -286,7 +291,8 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, goto out; } - descs_to_mbufs(rxcmp, rxcmp1, mb_init, &rx_pkts[nb_rx_pkts]); + descs_to_mbufs(rxcmp, rxcmp1, mb_init, &rx_pkts[nb_rx_pkts], + rxr); nb_rx_pkts += num_valid; if (num_valid < RTE_BNXT_DESCS_PER_LOOP) diff --git a/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_sse.c index e12bf8bb76..621f567890 100644 --- a/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_sse.c +++ b/dpdk/drivers/net/bnxt/bnxt_rxtx_vec_sse.c @@ -27,11 +27,11 @@ uint32_t tmp, of; \ \ of = _mm_extract_epi32((rss_flags), (pi)) | \ - bnxt_ol_flags_table[_mm_extract_epi32((ol_index), (pi))]; \ + rxr->ol_flags_table[_mm_extract_epi32((ol_index), (pi))]; \ \ tmp = _mm_extract_epi32((errors), (pi)); \ if (tmp) \ - of |= bnxt_ol_flags_err_table[tmp]; \ + of |= rxr->ol_flags_err_table[tmp]; \ (ol_flags) = of; \ } @@ -54,7 +54,8 @@ static inline void descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], - __m128i mbuf_init, struct rte_mbuf **mbuf) + __m128i mbuf_init, struct rte_mbuf **mbuf, + struct bnxt_rx_ring_info *rxr) { const __m128i shuf_msk = _mm_set_epi8(15, 14, 13, 12, /* rss */ @@ -72,7 +73,7 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], const __m128i rss_mask = _mm_set1_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID); __m128i t0, t1, flags_type, flags2, index, errors, rss_flags; - __m128i ptype_idx; + __m128i ptype_idx, is_tunnel; uint32_t ol_flags; /* Compute packet type table indexes for four packets */ @@ -99,6 +100,8 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], t1 = _mm_unpackhi_epi32(mm_rxcmp1[2], mm_rxcmp1[3]); /* Compute ol_flags and checksum error indexes for four packets. */ + is_tunnel = _mm_and_si128(flags2, _mm_set1_epi32(4)); + is_tunnel = _mm_slli_epi32(is_tunnel, 3); flags2 = _mm_and_si128(flags2, _mm_set1_epi32(0x1F)); errors = _mm_srli_epi32(_mm_unpacklo_epi64(t0, t1), 4); @@ -106,6 +109,8 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], errors = _mm_and_si128(errors, flags2); index = _mm_andnot_si128(errors, flags2); + errors = _mm_or_si128(errors, _mm_srli_epi32(is_tunnel, 1)); + index = _mm_or_si128(index, is_tunnel); /* Update mbuf rearm_data for four packets. */ GET_OL_FLAGS(rss_flags, index, errors, 0, ol_flags); @@ -268,7 +273,8 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, goto out; } - descs_to_mbufs(rxcmp, rxcmp1, mbuf_init, &rx_pkts[nb_rx_pkts]); + descs_to_mbufs(rxcmp, rxcmp1, mbuf_init, &rx_pkts[nb_rx_pkts], + rxr); nb_rx_pkts += num_valid; if (num_valid < RTE_BNXT_DESCS_PER_LOOP) diff --git a/dpdk/drivers/net/bnxt/bnxt_txq.c b/dpdk/drivers/net/bnxt/bnxt_txq.c index c9792a2af2..99a31cef28 100644 --- a/dpdk/drivers/net/bnxt/bnxt_txq.c +++ b/dpdk/drivers/net/bnxt/bnxt_txq.c @@ -98,7 +98,7 @@ int bnxt_tx_queue_setup_op(struct rte_eth_dev *eth_dev, if (rc) return rc; - if (queue_idx >= BNXT_MAX_RINGS(bp)) { + if (queue_idx >= bnxt_max_rings(bp)) { PMD_DRV_LOG(ERR, "Cannot create Tx ring %d. Only %d rings available\n", queue_idx, bp->max_tx_rings); diff --git a/dpdk/drivers/net/bnxt/tf_core/tf_core.c b/dpdk/drivers/net/bnxt/tf_core/tf_core.c index 24d49096a7..3409cbbcec 100644 --- a/dpdk/drivers/net/bnxt/tf_core/tf_core.c +++ b/dpdk/drivers/net/bnxt/tf_core/tf_core.c @@ -82,7 +82,7 @@ tf_open_session(struct tf *tfp, return rc; TFP_DRV_LOG(INFO, - "domain:%d, bus:%d, device:%d\n", + "domain:%d, bus:%d, device:%u\n", parms->session_id.internal.domain, parms->session_id.internal.bus, parms->session_id.internal.device); diff --git a/dpdk/drivers/net/bnxt/tf_core/tf_em_common.c b/dpdk/drivers/net/bnxt/tf_core/tf_em_common.c index ad92cbdc75..c96c21c2e9 100644 --- a/dpdk/drivers/net/bnxt/tf_core/tf_em_common.c +++ b/dpdk/drivers/net/bnxt/tf_core/tf_em_common.c @@ -307,7 +307,6 @@ tf_em_page_tbl_pgcnt(uint32_t num_pages, { return roundup(num_pages, MAX_PAGE_PTRS(page_size)) / MAX_PAGE_PTRS(page_size); - return 0; } /** diff --git a/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.c b/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.c index 67ca0730fa..5fe004e551 100644 --- a/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.c +++ b/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.c @@ -1334,8 +1334,7 @@ bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals, rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr); if (internals->mode4.dedicated_queues.enabled == 0) { - int retval = rte_ring_enqueue(port->tx_ring, pkt); - if (retval != 0) { + if (rte_ring_enqueue(port->tx_ring, pkt) != 0) { /* reset timer */ port->rx_marker_timer = 0; wrn = WRN_TX_QUEUE_FULL; @@ -1355,8 +1354,7 @@ bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals, } } else if (likely(subtype == SLOW_SUBTYPE_LACP)) { if (internals->mode4.dedicated_queues.enabled == 0) { - int retval = rte_ring_enqueue(port->rx_ring, pkt); - if (retval != 0) { + if (rte_ring_enqueue(port->rx_ring, pkt) != 0) { /* If RX fing full free lacpdu message and drop packet */ wrn = WRN_RX_QUEUE_FULL; goto free_out; diff --git a/dpdk/drivers/net/bonding/rte_eth_bond_args.c b/dpdk/drivers/net/bonding/rte_eth_bond_args.c index abdf552610..8c5f90dc63 100644 --- a/dpdk/drivers/net/bonding/rte_eth_bond_args.c +++ b/dpdk/drivers/net/bonding/rte_eth_bond_args.c @@ -22,23 +22,37 @@ const char *pmd_bond_init_valid_arguments[] = { NULL }; +static inline int +bond_pci_addr_cmp(const struct rte_device *dev, const void *_pci_addr) +{ + const struct rte_pci_device *pdev = RTE_DEV_TO_PCI_CONST(dev); + const struct rte_pci_addr *paddr = _pci_addr; + + return rte_pci_addr_cmp(&pdev->addr, paddr); +} + static inline int find_port_id_by_pci_addr(const struct rte_pci_addr *pci_addr) { - struct rte_pci_device *pci_dev; - struct rte_pci_addr *eth_pci_addr; + struct rte_bus *pci_bus; + struct rte_device *dev; unsigned i; - RTE_ETH_FOREACH_DEV(i) { - pci_dev = RTE_ETH_DEV_TO_PCI(&rte_eth_devices[i]); - eth_pci_addr = &pci_dev->addr; + pci_bus = rte_bus_find_by_name("pci"); + if (pci_bus == NULL) { + RTE_BOND_LOG(ERR, "No PCI bus found"); + return -1; + } - if (pci_addr->bus == eth_pci_addr->bus && - pci_addr->devid == eth_pci_addr->devid && - pci_addr->domain == eth_pci_addr->domain && - pci_addr->function == eth_pci_addr->function) - return i; + dev = pci_bus->find_device(NULL, bond_pci_addr_cmp, pci_addr); + if (dev == NULL) { + RTE_BOND_LOG(ERR, "unable to find PCI device"); + return -1; } + + RTE_ETH_FOREACH_DEV(i) + if (rte_eth_devices[i].device == dev) + return i; return -1; } @@ -57,15 +71,6 @@ find_port_id_by_dev_name(const char *name) return -1; } -static inline int -bond_pci_addr_cmp(const struct rte_device *dev, const void *_pci_addr) -{ - const struct rte_pci_device *pdev = RTE_DEV_TO_PCI_CONST(dev); - const struct rte_pci_addr *paddr = _pci_addr; - - return rte_pci_addr_cmp(&pdev->addr, paddr); -} - /** * Parses a port identifier string to a port id by pci address, then by name, * and finally port id. @@ -74,23 +79,10 @@ static inline int parse_port_id(const char *port_str) { struct rte_pci_addr dev_addr; - struct rte_bus *pci_bus; - struct rte_device *dev; int port_id; - pci_bus = rte_bus_find_by_name("pci"); - if (pci_bus == NULL) { - RTE_BOND_LOG(ERR, "unable to find PCI bus\n"); - return -1; - } - /* try parsing as pci address, physical devices */ - if (pci_bus->parse(port_str, &dev_addr) == 0) { - dev = pci_bus->find_device(NULL, bond_pci_addr_cmp, &dev_addr); - if (dev == NULL) { - RTE_BOND_LOG(ERR, "unable to find PCI device"); - return -1; - } + if (rte_pci_addr_parse(port_str, &dev_addr) == 0) { port_id = find_port_id_by_pci_addr(&dev_addr); if (port_id < 0) return -1; @@ -108,9 +100,8 @@ parse_port_id(const char *port_str) } } - if (port_id < 0 || port_id > RTE_MAX_ETHPORTS) { - RTE_BOND_LOG(ERR, "Slave port specified (%s) outside expected range", - port_str); + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_BOND_LOG(ERR, "Specified port (%s) is invalid", port_str); return -1; } return port_id; diff --git a/dpdk/drivers/net/cxgbe/cxgbe.h b/dpdk/drivers/net/cxgbe/cxgbe.h index ef62af1c3f..7c89a028bf 100644 --- a/dpdk/drivers/net/cxgbe/cxgbe.h +++ b/dpdk/drivers/net/cxgbe/cxgbe.h @@ -19,6 +19,10 @@ #define CXGBE_MAX_RX_PKTLEN (9000 + RTE_ETHER_HDR_LEN + \ RTE_ETHER_CRC_LEN) /* max pkt */ +/* The max frame size with default MTU */ +#define CXGBE_ETH_MAX_LEN (RTE_ETHER_MTU + \ + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN) + /* Max poll time is 100 * 100msec = 10 sec */ #define CXGBE_LINK_STATUS_POLL_MS 100 /* 100ms */ #define CXGBE_LINK_STATUS_POLL_CNT 100 /* Max number of times to poll */ diff --git a/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c b/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c index 98d0362fa3..480d6f58a8 100644 --- a/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c +++ b/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c @@ -300,7 +300,7 @@ int cxgbe_dev_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu) return -EINVAL; /* set to jumbo mode if needed */ - if (new_mtu > RTE_ETHER_MAX_LEN) + if (new_mtu > CXGBE_ETH_MAX_LEN) eth_dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else @@ -669,7 +669,7 @@ int cxgbe_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, rxq->fl.size = temp_nb_desc; /* Set to jumbo mode if necessary */ - if (pkt_len > RTE_ETHER_MAX_LEN) + if (pkt_len > CXGBE_ETH_MAX_LEN) eth_dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else diff --git a/dpdk/drivers/net/cxgbe/cxgbe_flow.c b/dpdk/drivers/net/cxgbe/cxgbe_flow.c index f7c4f36962..520a5a5c9a 100644 --- a/dpdk/drivers/net/cxgbe/cxgbe_flow.c +++ b/dpdk/drivers/net/cxgbe/cxgbe_flow.c @@ -245,11 +245,6 @@ ch_rte_parsetype_vlan(const void *dmask, const struct rte_flow_item *item, /* If user has not given any mask, then use chelsio supported mask. */ mask = umask ? umask : (const struct rte_flow_item_vlan *)dmask; - if (!fs->mask.ethtype) - return rte_flow_error_set(e, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, - item, - "Can't parse VLAN item without knowing ethertype"); - /* If ethertype is already set and is not VLAN (0x8100) or * QINQ(0x88A8), then don't proceed further. Otherwise, * reset the outer ethertype, so that it can be replaced by @@ -275,7 +270,7 @@ ch_rte_parsetype_vlan(const void *dmask, const struct rte_flow_item *item, fs->mask.ethtype = 0; fs->val.ethtype = 0; } - } else if (fs->val.ethtype == RTE_ETHER_TYPE_VLAN) { + } else { CXGBE_FILL_FS(1, 1, ivlan_vld); if (spec) { if (spec->tci || (umask && umask->tci)) diff --git a/dpdk/drivers/net/dpaa/dpaa_ethdev.c b/dpdk/drivers/net/dpaa/dpaa_ethdev.c index f00279e004..0c87c136d7 100644 --- a/dpdk/drivers/net/dpaa/dpaa_ethdev.c +++ b/dpdk/drivers/net/dpaa/dpaa_ethdev.c @@ -184,7 +184,7 @@ dpaa_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) return -EINVAL; } - if (frame_size > RTE_ETHER_MAX_LEN) + if (frame_size > DPAA_ETH_MAX_LEN) dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else diff --git a/dpdk/drivers/net/dpaa/dpaa_ethdev.h b/dpdk/drivers/net/dpaa/dpaa_ethdev.h index 659bceb467..a858b1372c 100644 --- a/dpdk/drivers/net/dpaa/dpaa_ethdev.h +++ b/dpdk/drivers/net/dpaa/dpaa_ethdev.h @@ -51,6 +51,10 @@ #define VLAN_TAG_SIZE 4 /** < Vlan Header Length */ #endif +#define DPAA_ETH_MAX_LEN (RTE_ETHER_MTU + \ + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + \ + VLAN_TAG_SIZE) + /* PCD frame queues */ #define DPAA_DEFAULT_NUM_PCD_QUEUES 1 #define DPAA_VSP_PROFILE_MAX_NUM 8 diff --git a/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c b/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c index ab6863300e..6f38da3cce 100644 --- a/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c +++ b/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c @@ -1420,7 +1420,7 @@ dpaa2_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) if (mtu < RTE_ETHER_MIN_MTU || frame_size > DPAA2_MAX_RX_PKT_LEN) return -EINVAL; - if (frame_size > RTE_ETHER_MAX_LEN) + if (frame_size > DPAA2_ETH_MAX_LEN) dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else diff --git a/dpdk/drivers/net/dpaa2/dpaa2_ethdev.h b/dpdk/drivers/net/dpaa2/dpaa2_ethdev.h index 8d82f74684..cacb11bd3e 100644 --- a/dpdk/drivers/net/dpaa2/dpaa2_ethdev.h +++ b/dpdk/drivers/net/dpaa2/dpaa2_ethdev.h @@ -26,6 +26,10 @@ #define DPAA2_RX_DEFAULT_NBDESC 512 +#define DPAA2_ETH_MAX_LEN (RTE_ETHER_MTU + \ + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + \ + VLAN_TAG_SIZE) + /*default tc to be used for ,congestion, distribution etc configuration. */ #define DPAA2_DEF_TC 0 diff --git a/dpdk/drivers/net/e1000/e1000_ethdev.h b/dpdk/drivers/net/e1000/e1000_ethdev.h index 4755a5f333..3b4d9c3ee6 100644 --- a/dpdk/drivers/net/e1000/e1000_ethdev.h +++ b/dpdk/drivers/net/e1000/e1000_ethdev.h @@ -97,7 +97,7 @@ */ #define E1000_ETH_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + \ VLAN_TAG_SIZE) - +#define E1000_ETH_MAX_LEN (RTE_ETHER_MTU + E1000_ETH_OVERHEAD) /* * Maximum number of Ring Descriptors. * diff --git a/dpdk/drivers/net/e1000/em_ethdev.c b/dpdk/drivers/net/e1000/em_ethdev.c index 8ee9422bf4..2036c6e917 100644 --- a/dpdk/drivers/net/e1000/em_ethdev.c +++ b/dpdk/drivers/net/e1000/em_ethdev.c @@ -1799,8 +1799,7 @@ eth_em_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) if (ret != 0) return ret; - frame_size = mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + - VLAN_TAG_SIZE; + frame_size = mtu + E1000_ETH_OVERHEAD; /* check that mtu is within the allowed range */ if (mtu < RTE_ETHER_MIN_MTU || frame_size > dev_info.max_rx_pktlen) @@ -1816,7 +1815,7 @@ eth_em_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) rctl = E1000_READ_REG(hw, E1000_RCTL); /* switch to jumbo mode if needed */ - if (frame_size > RTE_ETHER_MAX_LEN) { + if (frame_size > E1000_ETH_MAX_LEN) { dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; rctl |= E1000_RCTL_LPE; diff --git a/dpdk/drivers/net/e1000/igb_ethdev.c b/dpdk/drivers/net/e1000/igb_ethdev.c index 647aa8d995..5bcc67d75f 100644 --- a/dpdk/drivers/net/e1000/igb_ethdev.c +++ b/dpdk/drivers/net/e1000/igb_ethdev.c @@ -3064,6 +3064,7 @@ eth_igb_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) uint32_t rx_buf_size; uint32_t max_high_water; uint32_t rctl; + uint32_t ctrl; hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); if (fc_conf->autoneg != hw->mac.autoneg) @@ -3101,6 +3102,39 @@ eth_igb_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) rctl &= ~E1000_RCTL_PMCF; E1000_WRITE_REG(hw, E1000_RCTL, rctl); + + /* + * check if we want to change flow control mode - driver doesn't have native + * capability to do that, so we'll write the registers ourselves + */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + + /* + * set or clear E1000_CTRL_RFCE and E1000_CTRL_TFCE bits depending + * on configuration + */ + switch (fc_conf->mode) { + case RTE_FC_NONE: + ctrl &= ~E1000_CTRL_RFCE & ~E1000_CTRL_TFCE; + break; + case RTE_FC_RX_PAUSE: + ctrl |= E1000_CTRL_RFCE; + ctrl &= ~E1000_CTRL_TFCE; + break; + case RTE_FC_TX_PAUSE: + ctrl |= E1000_CTRL_TFCE; + ctrl &= ~E1000_CTRL_RFCE; + break; + case RTE_FC_FULL: + ctrl |= E1000_CTRL_RFCE | E1000_CTRL_TFCE; + break; + default: + PMD_INIT_LOG(ERR, "invalid flow control mode"); + return -EINVAL; + } + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + E1000_WRITE_FLUSH(hw); return 0; @@ -4369,7 +4403,7 @@ eth_igb_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) rctl = E1000_READ_REG(hw, E1000_RCTL); /* switch to jumbo mode if needed */ - if (frame_size > RTE_ETHER_MAX_LEN) { + if (frame_size > E1000_ETH_MAX_LEN) { dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; rctl |= E1000_RCTL_LPE; diff --git a/dpdk/drivers/net/ena/base/ena_eth_com.c b/dpdk/drivers/net/ena/base/ena_eth_com.c index a35d92fbd3..5583a310a1 100644 --- a/dpdk/drivers/net/ena/base/ena_eth_com.c +++ b/dpdk/drivers/net/ena/base/ena_eth_com.c @@ -531,6 +531,7 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, { struct ena_com_rx_buf_info *ena_buf = &ena_rx_ctx->ena_bufs[0]; struct ena_eth_io_rx_cdesc_base *cdesc = NULL; + u16 q_depth = io_cq->q_depth; u16 cdesc_idx = 0; u16 nb_hw_desc; u16 i = 0; @@ -559,6 +560,8 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, do { ena_buf[i].len = cdesc->length; ena_buf[i].req_id = cdesc->req_id; + if (unlikely(ena_buf[i].req_id >= q_depth)) + return ENA_COM_EIO; if (++i >= nb_hw_desc) break; diff --git a/dpdk/drivers/net/ena/base/ena_plat_dpdk.h b/dpdk/drivers/net/ena/base/ena_plat_dpdk.h index 48c77f0c19..a1d749f83f 100644 --- a/dpdk/drivers/net/ena/base/ena_plat_dpdk.h +++ b/dpdk/drivers/net/ena/base/ena_plat_dpdk.h @@ -51,6 +51,7 @@ typedef uint64_t dma_addr_t; #define ENA_COM_FAULT -EFAULT #define ENA_COM_TRY_AGAIN -EAGAIN #define ENA_COM_UNSUPPORTED -EOPNOTSUPP +#define ENA_COM_EIO -EIO #define ____cacheline_aligned __rte_cache_aligned diff --git a/dpdk/drivers/net/ena/ena_ethdev.c b/dpdk/drivers/net/ena/ena_ethdev.c index 20ff3653c6..8baec80040 100644 --- a/dpdk/drivers/net/ena/ena_ethdev.c +++ b/dpdk/drivers/net/ena/ena_ethdev.c @@ -28,7 +28,7 @@ #define DRV_MODULE_VER_MAJOR 2 #define DRV_MODULE_VER_MINOR 2 -#define DRV_MODULE_VER_SUBMINOR 0 +#define DRV_MODULE_VER_SUBMINOR 1 #define ENA_IO_TXQ_IDX(q) (2 * (q)) #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) @@ -380,20 +380,6 @@ static inline void ena_tx_mbuf_prepare(struct rte_mbuf *mbuf, } } -static inline int validate_rx_req_id(struct ena_ring *rx_ring, uint16_t req_id) -{ - if (likely(req_id < rx_ring->ring_size)) - return 0; - - PMD_DRV_LOG(ERR, "Invalid rx req_id: %hu\n", req_id); - - rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; - rx_ring->adapter->trigger_reset = true; - ++rx_ring->rx_stats.bad_req_id; - - return -EFAULT; -} - static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) { struct ena_tx_buffer *tx_info = NULL; @@ -1246,6 +1232,10 @@ static int ena_queue_start(struct ena_ring *ring) PMD_INIT_LOG(ERR, "Failed to populate rx ring !"); return ENA_COM_FAULT; } + /* Flush per-core RX buffers pools cache as they can be used on other + * cores as well. + */ + rte_mempool_cache_flush(NULL, ring->mb_pool); return 0; } @@ -1292,6 +1282,7 @@ static int ena_tx_queue_setup(struct rte_eth_dev *dev, txq->ring_size = nb_desc; txq->size_mask = nb_desc - 1; txq->numa_socket_id = socket_id; + txq->pkts_without_db = false; txq->tx_buffer_info = rte_zmalloc("txq->tx_buffer_info", sizeof(struct ena_tx_buffer) * @@ -1482,10 +1473,6 @@ static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count) rte_prefetch0(mbufs[i + 4]); req_id = rxq->empty_rx_reqs[next_to_use]; - rc = validate_rx_req_id(rxq, req_id); - if (unlikely(rc)) - break; - rx_info = &rxq->rx_buffer_info[req_id]; rc = ena_add_single_rx_desc(rxq->ena_com_io_sq, mbuf, req_id); @@ -2110,8 +2097,6 @@ static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; - if (unlikely(validate_rx_req_id(rx_ring, req_id))) - return NULL; rx_info = &rx_ring->rx_buffer_info[req_id]; @@ -2135,10 +2120,6 @@ static struct rte_mbuf *ena_rx_mbuf(struct ena_ring *rx_ring, ++buf; len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; - if (unlikely(validate_rx_req_id(rx_ring, req_id))) { - rte_mbuf_raw_free(mbuf_head); - return NULL; - } rx_info = &rx_ring->rx_buffer_info[req_id]; RTE_ASSERT(rx_info->mbuf != NULL); @@ -2226,10 +2207,16 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, &ena_rx_ctx); if (unlikely(rc)) { PMD_DRV_LOG(ERR, "ena_com_rx_pkt error %d\n", rc); - rx_ring->adapter->reset_reason = - ENA_REGS_RESET_TOO_MANY_RX_DESCS; + if (rc == ENA_COM_NO_SPACE) { + ++rx_ring->rx_stats.bad_desc_num; + rx_ring->adapter->reset_reason = + ENA_REGS_RESET_TOO_MANY_RX_DESCS; + } else { + ++rx_ring->rx_stats.bad_req_id; + rx_ring->adapter->reset_reason = + ENA_REGS_RESET_INV_RX_REQ_ID; + } rx_ring->adapter->trigger_reset = true; - ++rx_ring->rx_stats.bad_desc_num; return 0; } @@ -2373,8 +2360,8 @@ static void ena_update_hints(struct ena_adapter *adapter, } } -static int ena_check_and_linearize_mbuf(struct ena_ring *tx_ring, - struct rte_mbuf *mbuf) +static int ena_check_space_and_linearize_mbuf(struct ena_ring *tx_ring, + struct rte_mbuf *mbuf) { struct ena_com_dev *ena_dev; int num_segments, header_len, rc; @@ -2384,13 +2371,21 @@ static int ena_check_and_linearize_mbuf(struct ena_ring *tx_ring, header_len = mbuf->data_len; if (likely(num_segments < tx_ring->sgl_size)) - return 0; + goto checkspace; if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && (num_segments == tx_ring->sgl_size) && (header_len < tx_ring->tx_max_header_size)) - return 0; + goto checkspace; + /* Checking for space for 2 additional metadata descriptors due to + * possible header split and metadata descriptor. Linearization will + * be needed so we reduce the segments number from num_segments to 1 + */ + if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 3)) { + PMD_DRV_LOG(DEBUG, "Not enough space in the tx queue\n"); + return ENA_COM_NO_MEM; + } ++tx_ring->tx_stats.linearize; rc = rte_pktmbuf_linearize(mbuf); if (unlikely(rc)) { @@ -2400,7 +2395,19 @@ static int ena_check_and_linearize_mbuf(struct ena_ring *tx_ring, return rc; } - return rc; + return 0; + +checkspace: + /* Checking for space for 2 additional metadata descriptors due to + * possible header split and metadata descriptor + */ + if (!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, + num_segments + 2)) { + PMD_DRV_LOG(DEBUG, "Not enough space in the tx queue\n"); + return ENA_COM_NO_MEM; + } + + return 0; } static void ena_tx_map_mbuf(struct ena_ring *tx_ring, @@ -2487,7 +2494,7 @@ static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) int nb_hw_desc; int rc; - rc = ena_check_and_linearize_mbuf(tx_ring, mbuf); + rc = ena_check_space_and_linearize_mbuf(tx_ring, mbuf); if (unlikely(rc)) return rc; @@ -2515,6 +2522,8 @@ static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct rte_mbuf *mbuf) "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", tx_ring->id); ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); + tx_ring->tx_stats.doorbells++; + tx_ring->pkts_without_db = false; } /* prepare the packet's descriptors to dma engine */ @@ -2593,13 +2602,10 @@ static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, return 0; } - nb_pkts = RTE_MIN(ena_com_free_q_entries(tx_ring->ena_com_io_sq), - nb_pkts); - for (sent_idx = 0; sent_idx < nb_pkts; sent_idx++) { if (ena_xmit_mbuf(tx_ring, tx_pkts[sent_idx])) break; - + tx_ring->pkts_without_db = true; rte_prefetch0(tx_pkts[ENA_IDX_ADD_MASKED(sent_idx, 4, tx_ring->size_mask)]); } @@ -2608,10 +2614,11 @@ static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, ena_com_free_q_entries(tx_ring->ena_com_io_sq); /* If there are ready packets to be xmitted... */ - if (sent_idx > 0) { + if (likely(tx_ring->pkts_without_db)) { /* ...let HW do its best :-) */ ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); tx_ring->tx_stats.doorbells++; + tx_ring->pkts_without_db = false; } ena_tx_cleanup(tx_ring); diff --git a/dpdk/drivers/net/ena/ena_ethdev.h b/dpdk/drivers/net/ena/ena_ethdev.h index 7bb74a1d06..ae235897ee 100644 --- a/dpdk/drivers/net/ena/ena_ethdev.h +++ b/dpdk/drivers/net/ena/ena_ethdev.h @@ -100,6 +100,10 @@ struct ena_ring { enum ena_ring_type type; enum ena_admin_placement_policy_type tx_mem_queue_type; + + /* Indicate there are Tx packets pushed to the device and wait for db */ + bool pkts_without_db; + /* Holds the empty requests for TX/RX OOO completions */ union { uint16_t *empty_tx_reqs; diff --git a/dpdk/drivers/net/enetc/enetc.h b/dpdk/drivers/net/enetc/enetc.h index 14ef3bc18b..7163633bce 100644 --- a/dpdk/drivers/net/enetc/enetc.h +++ b/dpdk/drivers/net/enetc/enetc.h @@ -29,6 +29,10 @@ /* maximum frame size supported */ #define ENETC_MAC_MAXFRM_SIZE 9600 +/* The max frame size with default MTU */ +#define ENETC_ETH_MAX_LEN (RTE_ETHER_MTU + \ + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN) + /* * upper_32_bits - return bits 32-63 of a number * @n: the number we're accessing diff --git a/dpdk/drivers/net/enetc/enetc_ethdev.c b/dpdk/drivers/net/enetc/enetc_ethdev.c index 6ff3022874..4d2c9c0474 100644 --- a/dpdk/drivers/net/enetc/enetc_ethdev.c +++ b/dpdk/drivers/net/enetc/enetc_ethdev.c @@ -677,7 +677,7 @@ enetc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) return -EINVAL; } - if (frame_size > RTE_ETHER_MAX_LEN) + if (frame_size > ENETC_ETH_MAX_LEN) dev->data->dev_conf.rxmode.offloads &= DEV_RX_OFFLOAD_JUMBO_FRAME; else diff --git a/dpdk/drivers/net/enic/enic_flow.c b/dpdk/drivers/net/enic/enic_flow.c index cebca7d55a..92b1c9eda6 100644 --- a/dpdk/drivers/net/enic/enic_flow.c +++ b/dpdk/drivers/net/enic/enic_flow.c @@ -1389,7 +1389,7 @@ enic_dump_filter(const struct filter_v2 *filt) if (gp->mask_flags & FILTER_GENERIC_1_IPV6) sprintf(ip6, "%s ", - (gp->val_flags & FILTER_GENERIC_1_IPV4) + (gp->val_flags & FILTER_GENERIC_1_IPV6) ? "ip6(y)" : "ip6(n)"); else sprintf(ip6, "%s ", "ip6(x)"); @@ -1595,6 +1595,8 @@ enic_flow_parse(struct rte_eth_dev *dev, return -rte_errno; } enic_filter->type = enic->flow_filter_mode; + if (enic->adv_filters) + enic_filter->type = FILTER_DPDK_1; ret = enic_copy_filter(pattern, enic_filter_cap, enic, enic_filter, error); return ret; diff --git a/dpdk/drivers/net/hinic/hinic_pmd_ethdev.c b/dpdk/drivers/net/hinic/hinic_pmd_ethdev.c index 62642354cf..5a2c171099 100644 --- a/dpdk/drivers/net/hinic/hinic_pmd_ethdev.c +++ b/dpdk/drivers/net/hinic/hinic_pmd_ethdev.c @@ -75,6 +75,9 @@ #define HINIC_PKTLEN_TO_MTU(pktlen) \ ((pktlen) - (ETH_HLEN + ETH_CRC_LEN)) +/* The max frame size with default MTU */ +#define HINIC_ETH_MAX_LEN (RTE_ETHER_MTU + ETH_HLEN + ETH_CRC_LEN) + /* lro numer limit for one packet */ #define HINIC_LRO_WQE_NUM_DEFAULT 8 @@ -1556,7 +1559,7 @@ static int hinic_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) /* update max frame size */ frame_size = HINIC_MTU_TO_PKTLEN(mtu); - if (frame_size > RTE_ETHER_MAX_LEN) + if (frame_size > HINIC_ETH_MAX_LEN) dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else diff --git a/dpdk/drivers/net/hinic/hinic_pmd_rx.c b/dpdk/drivers/net/hinic/hinic_pmd_rx.c index a49769a863..842399cc4c 100644 --- a/dpdk/drivers/net/hinic/hinic_pmd_rx.c +++ b/dpdk/drivers/net/hinic/hinic_pmd_rx.c @@ -4,7 +4,7 @@ #include #include -#ifdef __ARM64_NEON__ +#ifdef RTE_ARCH_ARM64 #include #endif @@ -762,7 +762,7 @@ void hinic_free_all_rx_mbufs(struct hinic_rxq *rxq) static inline void hinic_rq_cqe_be_to_cpu32(void *dst_le32, volatile void *src_be32) { -#if defined(__X86_64_SSE__) +#if defined(RTE_ARCH_X86_64) volatile __m128i *wqe_be = (volatile __m128i *)src_be32; __m128i *wqe_le = (__m128i *)dst_le32; __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, @@ -770,7 +770,7 @@ static inline void hinic_rq_cqe_be_to_cpu32(void *dst_le32, /* l2nic just use first 128 bits */ wqe_le[0] = _mm_shuffle_epi8(wqe_be[0], shuf_mask); -#elif defined(__ARM64_NEON__) +#elif defined(RTE_ARCH_ARM64) volatile uint8x16_t *wqe_be = (volatile uint8x16_t *)src_be32; uint8x16_t *wqe_le = (uint8x16_t *)dst_le32; const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, diff --git a/dpdk/drivers/net/hinic/hinic_pmd_tx.c b/dpdk/drivers/net/hinic/hinic_pmd_tx.c index 9d0264e67a..669f82389c 100644 --- a/dpdk/drivers/net/hinic/hinic_pmd_tx.c +++ b/dpdk/drivers/net/hinic/hinic_pmd_tx.c @@ -7,7 +7,7 @@ #include #include #include -#ifdef __ARM64_NEON__ +#ifdef RTE_ARCH_ARM64 #include #endif @@ -203,7 +203,7 @@ static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb) { -#if defined(__X86_64_SSE__) +#if defined(RTE_ARCH_X86_64) int i; __m128i *wqe_line = (__m128i *)data; __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, @@ -217,7 +217,7 @@ static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb) wqe_line[3] = _mm_shuffle_epi8(wqe_line[3], shuf_mask); wqe_line += 4; } -#elif defined(__ARM64_NEON__) +#elif defined(RTE_ARCH_ARM64) int i; uint8x16_t *wqe_line = (uint8x16_t *)data; const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, @@ -237,7 +237,7 @@ static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb) static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge) { -#if defined(__X86_64_SSE__) +#if defined(RTE_ARCH_X86_64) int i; __m128i *sge_line = (__m128i *)data; __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, @@ -248,7 +248,7 @@ static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge) *sge_line = _mm_shuffle_epi8(*sge_line, shuf_mask); sge_line++; } -#elif defined(__ARM64_NEON__) +#elif defined(RTE_ARCH_ARM64) int i; uint8x16_t *sge_line = (uint8x16_t *)data; const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, diff --git a/dpdk/drivers/net/hns3/hns3_cmd.c b/dpdk/drivers/net/hns3/hns3_cmd.c index f58f4f7adc..76d16a5a92 100644 --- a/dpdk/drivers/net/hns3/hns3_cmd.c +++ b/dpdk/drivers/net/hns3/hns3_cmd.c @@ -432,6 +432,16 @@ static void hns3_parse_capability(struct hns3_hw *hw, hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_STASH_B, 1); } +static uint32_t +hns3_build_api_caps(void) +{ + uint32_t api_caps = 0; + + hns3_set_bit(api_caps, HNS3_API_CAP_FLEX_RSS_TBL_B, 1); + + return rte_cpu_to_le_32(api_caps); +} + static enum hns3_cmd_status hns3_cmd_query_firmware_version_and_capability(struct hns3_hw *hw) { @@ -441,6 +451,7 @@ hns3_cmd_query_firmware_version_and_capability(struct hns3_hw *hw) hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_QUERY_FW_VER, 1); resp = (struct hns3_query_version_cmd *)desc.data; + resp->api_caps = hns3_build_api_caps(); /* Initialize the cmd function */ ret = hns3_cmd_send(hw, &desc, 1); @@ -572,9 +583,21 @@ hns3_cmd_destroy_queue(struct hns3_hw *hw) void hns3_cmd_uninit(struct hns3_hw *hw) { + rte_atomic16_set(&hw->reset.disable_cmd, 1); + + /* + * A delay is added to ensure that the register cleanup operations + * will not be performed concurrently with the firmware command and + * ensure that all the reserved commands are executed. + * Concurrency may occur in two scenarios: asynchronous command and + * timeout command. If the command fails to be executed due to busy + * scheduling, the command will be processed in the next scheduling + * of the firmware. + */ + rte_delay_ms(HNS3_CMDQ_CLEAR_WAIT_TIME); + rte_spinlock_lock(&hw->cmq.csq.lock); rte_spinlock_lock(&hw->cmq.crq.lock); - rte_atomic16_set(&hw->reset.disable_cmd, 1); hns3_cmd_clear_regs(hw); rte_spinlock_unlock(&hw->cmq.crq.lock); rte_spinlock_unlock(&hw->cmq.csq.lock); diff --git a/dpdk/drivers/net/hns3/hns3_cmd.h b/dpdk/drivers/net/hns3/hns3_cmd.h index 194c3a731b..20c373590f 100644 --- a/dpdk/drivers/net/hns3/hns3_cmd.h +++ b/dpdk/drivers/net/hns3/hns3_cmd.h @@ -8,6 +8,7 @@ #include #define HNS3_CMDQ_TX_TIMEOUT 30000 +#define HNS3_CMDQ_CLEAR_WAIT_TIME 200 #define HNS3_CMDQ_RX_INVLD_B 0 #define HNS3_CMDQ_RX_OUTVLD_B 1 #define HNS3_CMD_DESC_ALIGNMENT 4096 @@ -203,7 +204,10 @@ enum hns3_opcode_type { HNS3_OPC_FD_COUNTER_OP = 0x1205, /* Clear hardware state command */ - HNS3_OPC_CLEAR_HW_STATE = 0x700A, + HNS3_OPC_CLEAR_HW_STATE = 0x700B, + + /* Firmware stats command */ + HNS3_OPC_FIRMWARE_COMPAT_CFG = 0x701A, /* SFP command */ HNS3_OPC_SFP_GET_SPEED = 0x7104, @@ -291,11 +295,16 @@ enum HNS3_CAPS_BITS { HNS3_CAPS_HW_PAD_B, HNS3_CAPS_STASH_B, }; + +enum HNS3_API_CAP_BITS { + HNS3_API_CAP_FLEX_RSS_TBL_B, +}; + #define HNS3_QUERY_CAP_LENGTH 3 struct hns3_query_version_cmd { uint32_t firmware; uint32_t hardware; - uint32_t rsv; + uint32_t api_caps; uint32_t caps[HNS3_QUERY_CAP_LENGTH]; /* capabilities of device */ }; @@ -632,6 +641,13 @@ enum hns3_promisc_type { HNS3_BROADCAST = 3, }; +#define HNS3_LINK_EVENT_REPORT_EN_B 0 +#define HNS3_NCSI_ERROR_REPORT_EN_B 1 +struct hns3_firmware_compat_cmd { + uint32_t compat; + uint8_t rsv[20]; +}; + #define HNS3_MAC_TX_EN_B 6 #define HNS3_MAC_RX_EN_B 7 #define HNS3_MAC_PAD_TX_B 11 @@ -775,12 +791,16 @@ enum hns3_int_gl_idx { #define HNS3_TQP_ID_M GENMASK(12, 2) #define HNS3_INT_GL_IDX_S 13 #define HNS3_INT_GL_IDX_M GENMASK(14, 13) +#define HNS3_TQP_INT_ID_L_S 0 +#define HNS3_TQP_INT_ID_L_M GENMASK(7, 0) +#define HNS3_TQP_INT_ID_H_S 8 +#define HNS3_TQP_INT_ID_H_M GENMASK(15, 8) struct hns3_ctrl_vector_chain_cmd { - uint8_t int_vector_id; + uint8_t int_vector_id; /* the low order of the interrupt id */ uint8_t int_cause_num; uint16_t tqp_type_and_id[HNS3_VECTOR_ELEMENTS_PER_CMD]; uint8_t vfid; - uint8_t rsv; + uint8_t int_vector_id_h; /* the high order of the interrupt id */ }; struct hns3_config_max_frm_size_cmd { diff --git a/dpdk/drivers/net/hns3/hns3_dcb.c b/dpdk/drivers/net/hns3/hns3_dcb.c index fb501795f0..ab77acd948 100644 --- a/dpdk/drivers/net/hns3/hns3_dcb.c +++ b/dpdk/drivers/net/hns3/hns3_dcb.c @@ -634,7 +634,7 @@ hns3_set_rss_size(struct hns3_hw *hw, uint16_t nb_rx_q) * stage of the reset process. */ if (rte_atomic16_read(&hw->reset.resetting) == 0) { - for (i = 0; i < HNS3_RSS_IND_TBL_SIZE; i++) + for (i = 0; i < hw->rss_ind_tbl_size; i++) rss_cfg->rss_indirection_tbl[i] = i % hw->alloc_rss_size; } diff --git a/dpdk/drivers/net/hns3/hns3_ethdev.c b/dpdk/drivers/net/hns3/hns3_ethdev.c index 2011378879..ba7d6e38a2 100644 --- a/dpdk/drivers/net/hns3/hns3_ethdev.c +++ b/dpdk/drivers/net/hns3/hns3_ethdev.c @@ -93,14 +93,14 @@ static enum hns3_reset_level hns3_get_reset_level(struct hns3_adapter *hns, static int hns3_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); static int hns3_vlan_pvid_configure(struct hns3_adapter *hns, uint16_t pvid, int on); -static int hns3_update_speed_duplex(struct rte_eth_dev *eth_dev); +static int hns3_update_link_info(struct rte_eth_dev *eth_dev); static int hns3_add_mc_addr(struct hns3_hw *hw, struct rte_ether_addr *mac_addr); static int hns3_remove_mc_addr(struct hns3_hw *hw, struct rte_ether_addr *mac_addr); static int hns3_restore_fec(struct hns3_hw *hw); -static int hns3_query_dev_fec_info(struct rte_eth_dev *dev); +static int hns3_query_dev_fec_info(struct hns3_hw *hw); static void hns3_pf_disable_irq0(struct hns3_hw *hw) @@ -2203,7 +2203,7 @@ hns3_check_dcb_cfg(struct rte_eth_dev *dev) } static int -hns3_bind_ring_with_vector(struct hns3_hw *hw, uint8_t vector_id, bool mmap, +hns3_bind_ring_with_vector(struct hns3_hw *hw, uint16_t vector_id, bool en, enum hns3_ring_type queue_type, uint16_t queue_id) { struct hns3_cmd_desc desc; @@ -2212,13 +2212,15 @@ hns3_bind_ring_with_vector(struct hns3_hw *hw, uint8_t vector_id, bool mmap, enum hns3_cmd_status status; enum hns3_opcode_type op; uint16_t tqp_type_and_id = 0; - const char *op_str; uint16_t type; uint16_t gl; - op = mmap ? HNS3_OPC_ADD_RING_TO_VECTOR : HNS3_OPC_DEL_RING_TO_VECTOR; + op = en ? HNS3_OPC_ADD_RING_TO_VECTOR : HNS3_OPC_DEL_RING_TO_VECTOR; hns3_cmd_setup_basic_desc(&desc, op, false); - req->int_vector_id = vector_id; + req->int_vector_id = hns3_get_field(vector_id, HNS3_TQP_INT_ID_L_M, + HNS3_TQP_INT_ID_L_S); + req->int_vector_id_h = hns3_get_field(vector_id, HNS3_TQP_INT_ID_H_M, + HNS3_TQP_INT_ID_H_S); if (queue_type == HNS3_RING_TYPE_RX) gl = HNS3_RING_GL_RX; @@ -2234,11 +2236,10 @@ hns3_bind_ring_with_vector(struct hns3_hw *hw, uint8_t vector_id, bool mmap, gl); req->tqp_type_and_id[0] = rte_cpu_to_le_16(tqp_type_and_id); req->int_cause_num = 1; - op_str = mmap ? "Map" : "Unmap"; status = hns3_cmd_send(hw, &desc, 1); if (status) { hns3_err(hw, "%s TQP %u fail, vector_id is %u, status is %d.", - op_str, queue_id, req->int_vector_id, status); + en ? "Map" : "Unmap", queue_id, vector_id, status); return status; } @@ -2312,6 +2313,7 @@ hns3_dev_configure(struct rte_eth_dev *dev) uint16_t nb_rx_q = dev->data->nb_rx_queues; uint16_t nb_tx_q = dev->data->nb_tx_queues; struct rte_eth_rss_conf rss_conf; + uint32_t max_rx_pkt_len; uint16_t mtu; bool gro_en; int ret; @@ -2370,12 +2372,18 @@ hns3_dev_configure(struct rte_eth_dev *dev) * according to the maximum RX packet length. */ if (conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { - /* - * Security of max_rx_pkt_len is guaranteed in dpdk frame. - * Maximum value of max_rx_pkt_len is HNS3_MAX_FRAME_LEN, so it - * can safely assign to "uint16_t" type variable. - */ - mtu = (uint16_t)HNS3_PKTLEN_TO_MTU(conf->rxmode.max_rx_pkt_len); + max_rx_pkt_len = conf->rxmode.max_rx_pkt_len; + if (max_rx_pkt_len > HNS3_MAX_FRAME_LEN || + max_rx_pkt_len <= HNS3_DEFAULT_FRAME_LEN) { + hns3_err(hw, "maximum Rx packet length must be greater " + "than %u and less than %u when jumbo frame enabled.", + (uint16_t)HNS3_DEFAULT_FRAME_LEN, + (uint16_t)HNS3_MAX_FRAME_LEN); + ret = -EINVAL; + goto cfg_err; + } + + mtu = (uint16_t)HNS3_PKTLEN_TO_MTU(max_rx_pkt_len); ret = hns3_dev_mtu_set(dev, mtu); if (ret) goto cfg_err; @@ -2458,7 +2466,7 @@ hns3_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) } rte_spinlock_lock(&hw->lock); - is_jumbo_frame = frame_size > RTE_ETHER_MAX_LEN ? true : false; + is_jumbo_frame = frame_size > HNS3_DEFAULT_FRAME_LEN ? true : false; frame_size = RTE_MAX(frame_size, HNS3_DEFAULT_FRAME_LEN); /* @@ -2567,7 +2575,7 @@ hns3_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) info->vmdq_queue_num = 0; - info->reta_size = HNS3_RSS_IND_TBL_SIZE; + info->reta_size = hw->rss_ind_tbl_size; info->hash_key_size = HNS3_RSS_KEY_SIZE; info->flow_type_rss_offloads = HNS3_ETH_RSS_SUPPORT; @@ -2616,8 +2624,8 @@ hns3_dev_link_update(struct rte_eth_dev *eth_dev, struct rte_eth_link new_link; if (!hns3_is_reset_pending(hns)) { - hns3_update_speed_duplex(eth_dev); hns3_update_link_status(hw); + hns3_update_link_info(eth_dev); } memset(&new_link, 0, sizeof(new_link)); @@ -2957,6 +2965,20 @@ hns3_parse_dev_specifications(struct hns3_hw *hw, struct hns3_cmd_desc *desc) hw->intr.int_ql_max = rte_le_to_cpu_16(req0->intr_ql_max); } +static int +hns3_check_dev_specifications(struct hns3_hw *hw) +{ + if (hw->rss_ind_tbl_size == 0 || + hw->rss_ind_tbl_size > HNS3_RSS_IND_TBL_SIZE_MAX) { + hns3_err(hw, "the size of hash lookup table configured (%u)" + " exceeds the maximum(%u)", hw->rss_ind_tbl_size, + HNS3_RSS_IND_TBL_SIZE_MAX); + return -EINVAL; + } + + return 0; +} + static int hns3_query_dev_specifications(struct hns3_hw *hw) { @@ -2977,7 +2999,7 @@ hns3_query_dev_specifications(struct hns3_hw *hw) hns3_parse_dev_specifications(hw, desc); - return 0; + return hns3_check_dev_specifications(hw); } static int @@ -3001,13 +3023,6 @@ hns3_get_capability(struct hns3_hw *hw) device_id == HNS3_DEV_ID_200G_RDMA) hns3_set_bit(hw->capability, HNS3_DEV_SUPPORT_DCB_B, 1); - ret = hns3_query_dev_fec_info(eth_dev); - if (ret) { - PMD_INIT_LOG(ERR, - "failed to query FEC information, ret = %d", ret); - return ret; - } - /* Get PCI revision id */ ret = rte_pci_read_config(pci_dev, &revision, HNS3_PCI_REVISION_ID_LEN, HNS3_PCI_REVISION_ID); @@ -3139,8 +3154,15 @@ hns3_get_configuration(struct hns3_hw *hw) } ret = hns3_get_board_configuration(hw); - if (ret) + if (ret) { PMD_INIT_LOG(ERR, "failed to get board configuration: %d", ret); + return ret; + } + + ret = hns3_query_dev_fec_info(hw); + if (ret) + PMD_INIT_LOG(ERR, + "failed to query FEC information, ret = %d", ret); return ret; } @@ -3892,6 +3914,26 @@ hns3_buffer_alloc(struct hns3_hw *hw) return ret; } +static int +hns3_firmware_compat_config(struct hns3_hw *hw, bool is_init) +{ + struct hns3_firmware_compat_cmd *req; + struct hns3_cmd_desc desc; + uint32_t compat = 0; + + hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_FIRMWARE_COMPAT_CFG, false); + req = (struct hns3_firmware_compat_cmd *)desc.data; + + if (is_init) { + hns3_set_bit(compat, HNS3_LINK_EVENT_REPORT_EN_B, 1); + hns3_set_bit(compat, HNS3_NCSI_ERROR_REPORT_EN_B, 0); + } + + req->compat = rte_cpu_to_le_32(compat); + + return hns3_cmd_send(hw, &desc, 1); +} + static int hns3_mac_init(struct hns3_hw *hw) { @@ -4342,10 +4384,9 @@ hns3_cfg_mac_speed_dup(struct hns3_hw *hw, uint32_t speed, uint8_t duplex) } static int -hns3_update_speed_duplex(struct rte_eth_dev *eth_dev) +hns3_update_fiber_link_info(struct hns3_hw *hw) { - struct hns3_adapter *hns = eth_dev->data->dev_private; - struct hns3_hw *hw = &hns->hw; + struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); struct hns3_pf *pf = &hns->pf; uint32_t speed; int ret; @@ -4368,6 +4409,21 @@ hns3_update_speed_duplex(struct rte_eth_dev *eth_dev) return hns3_cfg_mac_speed_dup(hw, speed, ETH_LINK_FULL_DUPLEX); } +static int +hns3_update_link_info(struct rte_eth_dev *eth_dev) +{ + struct hns3_adapter *hns = eth_dev->data->dev_private; + struct hns3_hw *hw = &hns->hw; + int ret = 0; + + if (hw->mac.media_type == HNS3_MEDIA_TYPE_COPPER) + return 0; + else if (hw->mac.media_type == HNS3_MEDIA_TYPE_FIBER) + ret = hns3_update_fiber_link_info(hw); + + return ret; +} + static int hns3_cfg_mac_mode(struct hns3_hw *hw, bool enable) { @@ -4454,8 +4510,8 @@ hns3_service_handler(void *param) struct hns3_hw *hw = &hns->hw; if (!hns3_is_reset_pending(hns)) { - hns3_update_speed_duplex(eth_dev); hns3_update_link_status(hw); + hns3_update_link_info(eth_dev); } else hns3_warn(hw, "Cancel the query when reset is pending"); @@ -4541,6 +4597,15 @@ hns3_init_hardware(struct hns3_adapter *hns) goto err_mac_init; } + /* + * Requiring firmware to enable some features, driver can + * still work without it. + */ + ret = hns3_firmware_compat_config(hw, true); + if (ret) + PMD_INIT_LOG(WARNING, "firmware compatible features not " + "supported, ret = %d.", ret); + return 0; err_mac_init: @@ -4675,6 +4740,7 @@ hns3_init_pf(struct rte_eth_dev *eth_dev) err_enable_intr: hns3_fdir_filter_uninit(hns); err_fdir: + (void)hns3_firmware_compat_config(hw, false); hns3_uninit_umv_space(hw); err_init_hw: hns3_tqp_stats_uninit(hw); @@ -4708,6 +4774,7 @@ hns3_uninit_pf(struct rte_eth_dev *eth_dev) (void)hns3_config_gro(hw, false); hns3_promisc_uninit(hw); hns3_fdir_filter_uninit(hns); + (void)hns3_firmware_compat_config(hw, false); hns3_uninit_umv_space(hw); hns3_tqp_stats_uninit(hw); hns3_pf_disable_irq0(hw); @@ -4761,8 +4828,8 @@ hns3_map_rx_interrupt(struct rte_eth_dev *dev) struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); - uint8_t base = RTE_INTR_VEC_ZERO_OFFSET; - uint8_t vec = RTE_INTR_VEC_ZERO_OFFSET; + uint16_t base = RTE_INTR_VEC_ZERO_OFFSET; + uint16_t vec = RTE_INTR_VEC_ZERO_OFFSET; uint32_t intr_vector; uint16_t q_id; int ret; @@ -5788,6 +5855,16 @@ get_current_fec_auto_state(struct hns3_hw *hw, uint8_t *state) struct hns3_cmd_desc desc; int ret; + /* + * CMD(HNS3_OPC_CONFIG_FEC_MODE) read is not supported + * in device of link speed + * below 10 Gbps. + */ + if (hw->mac.link_speed < ETH_SPEED_NUM_10G) { + *state = 0; + return 0; + } + hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_CONFIG_FEC_MODE, true); req = (struct hns3_config_fec_cmd *)desc.data; ret = hns3_cmd_send(hw, &desc, 1); @@ -5802,10 +5879,9 @@ get_current_fec_auto_state(struct hns3_hw *hw, uint8_t *state) } static int -hns3_fec_get(struct rte_eth_dev *dev, uint32_t *fec_capa) +hns3_fec_get_internal(struct hns3_hw *hw, uint32_t *fec_capa) { #define QUERY_ACTIVE_SPEED 1 - struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct hns3_sfp_speed_cmd *resp; uint32_t tmp_fec_capa; uint8_t auto_state; @@ -5865,6 +5941,14 @@ hns3_fec_get(struct rte_eth_dev *dev, uint32_t *fec_capa) return 0; } +static int +hns3_fec_get(struct rte_eth_dev *dev, uint32_t *fec_capa) +{ + struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + return hns3_fec_get_internal(hw, fec_capa); +} + static int hns3_set_fec_hw(struct hns3_hw *hw, uint32_t mode) { @@ -5994,14 +6078,13 @@ hns3_restore_fec(struct hns3_hw *hw) } static int -hns3_query_dev_fec_info(struct rte_eth_dev *dev) +hns3_query_dev_fec_info(struct hns3_hw *hw) { - struct hns3_adapter *hns = dev->data->dev_private; - struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(hns); - struct hns3_pf *pf = &hns->pf; + struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); + struct hns3_pf *pf = HNS3_DEV_PRIVATE_TO_PF(hns); int ret; - ret = hns3_fec_get(dev, &pf->fec_mode); + ret = hns3_fec_get_internal(hw, &pf->fec_mode); if (ret) hns3_err(hw, "query device FEC info failed, ret = %d", ret); @@ -6210,8 +6293,11 @@ hns3_dev_uninit(struct rte_eth_dev *eth_dev) PMD_INIT_FUNC_TRACE(); - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return -EPERM; + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + rte_free(eth_dev->process_private); + eth_dev->process_private = NULL; + return 0; + } if (hw->adapter_state < HNS3_NIC_CLOSING) hns3_dev_close(eth_dev); diff --git a/dpdk/drivers/net/hns3/hns3_ethdev_vf.c b/dpdk/drivers/net/hns3/hns3_ethdev_vf.c index 0366b9d4dc..9c84740d7b 100644 --- a/dpdk/drivers/net/hns3/hns3_ethdev_vf.c +++ b/dpdk/drivers/net/hns3/hns3_ethdev_vf.c @@ -779,6 +779,7 @@ hns3vf_dev_configure(struct rte_eth_dev *dev) uint16_t nb_rx_q = dev->data->nb_rx_queues; uint16_t nb_tx_q = dev->data->nb_tx_queues; struct rte_eth_rss_conf rss_conf; + uint32_t max_rx_pkt_len; uint16_t mtu; bool gro_en; int ret; @@ -831,12 +832,18 @@ hns3vf_dev_configure(struct rte_eth_dev *dev) * according to the maximum RX packet length. */ if (conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { - /* - * Security of max_rx_pkt_len is guaranteed in dpdk frame. - * Maximum value of max_rx_pkt_len is HNS3_MAX_FRAME_LEN, so it - * can safely assign to "uint16_t" type variable. - */ - mtu = (uint16_t)HNS3_PKTLEN_TO_MTU(conf->rxmode.max_rx_pkt_len); + max_rx_pkt_len = conf->rxmode.max_rx_pkt_len; + if (max_rx_pkt_len > HNS3_MAX_FRAME_LEN || + max_rx_pkt_len <= HNS3_DEFAULT_FRAME_LEN) { + hns3_err(hw, "maximum Rx packet length must be greater " + "than %u and less than %u when jumbo frame enabled.", + (uint16_t)HNS3_DEFAULT_FRAME_LEN, + (uint16_t)HNS3_MAX_FRAME_LEN); + ret = -EINVAL; + goto cfg_err; + } + + mtu = (uint16_t)HNS3_PKTLEN_TO_MTU(max_rx_pkt_len); ret = hns3vf_dev_mtu_set(dev, mtu); if (ret) goto cfg_err; @@ -928,7 +935,7 @@ hns3vf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) rte_spinlock_unlock(&hw->lock); return ret; } - if (frame_size > RTE_ETHER_MAX_LEN) + if (mtu > RTE_ETHER_MTU) dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else @@ -1022,7 +1029,7 @@ hns3vf_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) info->vmdq_queue_num = 0; - info->reta_size = HNS3_RSS_IND_TBL_SIZE; + info->reta_size = hw->rss_ind_tbl_size; info->hash_key_size = HNS3_RSS_KEY_SIZE; info->flow_type_rss_offloads = HNS3_ETH_RSS_SUPPORT; info->default_rxportconf.ring_size = HNS3_DEFAULT_RING_DESC; @@ -1154,6 +1161,20 @@ hns3vf_parse_dev_specifications(struct hns3_hw *hw, struct hns3_cmd_desc *desc) hw->intr.int_ql_max = rte_le_to_cpu_16(req0->intr_ql_max); } +static int +hns3vf_check_dev_specifications(struct hns3_hw *hw) +{ + if (hw->rss_ind_tbl_size == 0 || + hw->rss_ind_tbl_size > HNS3_RSS_IND_TBL_SIZE_MAX) { + hns3_warn(hw, "the size of hash lookup table configured (%u)" + " exceeds the maximum(%u)", hw->rss_ind_tbl_size, + HNS3_RSS_IND_TBL_SIZE_MAX); + return -EINVAL; + } + + return 0; +} + static int hns3vf_query_dev_specifications(struct hns3_hw *hw) { @@ -1174,7 +1195,7 @@ hns3vf_query_dev_specifications(struct hns3_hw *hw) hns3vf_parse_dev_specifications(hw, desc); - return 0; + return hns3vf_check_dev_specifications(hw); } static int @@ -1749,7 +1770,6 @@ hns3vf_init_hardware(struct hns3_adapter *hns) goto err_init_hardware; } - hns3vf_request_link_info(hw); return 0; err_init_hardware: @@ -1978,8 +1998,11 @@ hns3vf_dev_close(struct rte_eth_dev *eth_dev) struct hns3_hw *hw = &hns->hw; int ret = 0; - if (rte_eal_process_type() != RTE_PROC_PRIMARY) + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + rte_free(eth_dev->process_private); + eth_dev->process_private = NULL; return 0; + } if (hw->adapter_state == HNS3_NIC_STARTED) ret = hns3vf_dev_stop(eth_dev); @@ -2238,7 +2261,7 @@ hns3vf_dev_start(struct rte_eth_dev *dev) hns3_rx_scattered_calc(dev); hns3_set_rxtx_function(dev); hns3_mp_req_start_rxtx(dev); - rte_eal_alarm_set(HNS3VF_SERVICE_INTERVAL, hns3vf_service_handler, dev); + hns3vf_service_handler(dev); hns3vf_restore_filter(dev); @@ -2360,15 +2383,17 @@ static int hns3vf_prepare_reset(struct hns3_adapter *hns) { struct hns3_hw *hw = &hns->hw; - int ret = 0; + int ret; if (hw->reset.level == HNS3_VF_FUNC_RESET) { ret = hns3_send_mbx_msg(hw, HNS3_MBX_RESET, 0, NULL, 0, true, NULL, 0); + if (ret) + return ret; } rte_atomic16_set(&hw->reset.disable_cmd, 1); - return ret; + return 0; } static int @@ -2849,8 +2874,11 @@ hns3vf_dev_uninit(struct rte_eth_dev *eth_dev) PMD_INIT_FUNC_TRACE(); - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return -EPERM; + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + rte_free(eth_dev->process_private); + eth_dev->process_private = NULL; + return 0; + } if (hw->adapter_state < HNS3_NIC_CLOSING) hns3vf_dev_close(eth_dev); diff --git a/dpdk/drivers/net/hns3/hns3_flow.c b/dpdk/drivers/net/hns3/hns3_flow.c index ee6ec15498..8e4519a425 100644 --- a/dpdk/drivers/net/hns3/hns3_flow.c +++ b/dpdk/drivers/net/hns3/hns3_flow.c @@ -44,8 +44,7 @@ static enum rte_flow_item_type first_items[] = { RTE_FLOW_ITEM_TYPE_NVGRE, RTE_FLOW_ITEM_TYPE_VXLAN, RTE_FLOW_ITEM_TYPE_GENEVE, - RTE_FLOW_ITEM_TYPE_VXLAN_GPE, - RTE_FLOW_ITEM_TYPE_MPLS + RTE_FLOW_ITEM_TYPE_VXLAN_GPE }; static enum rte_flow_item_type L2_next_items[] = { @@ -65,8 +64,7 @@ static enum rte_flow_item_type L3_next_items[] = { static enum rte_flow_item_type L4_next_items[] = { RTE_FLOW_ITEM_TYPE_VXLAN, RTE_FLOW_ITEM_TYPE_GENEVE, - RTE_FLOW_ITEM_TYPE_VXLAN_GPE, - RTE_FLOW_ITEM_TYPE_MPLS + RTE_FLOW_ITEM_TYPE_VXLAN_GPE }; static enum rte_flow_item_type tunnel_next_items[] = { @@ -91,9 +89,9 @@ net_addr_to_host(uint32_t *dst, const rte_be32_t *src, size_t len) /* * This function is used to find rss general action. * 1. As we know RSS is used to spread packets among several queues, the flow - * API provide the struct rte_flow_action_rss, user could config it's field + * API provide the struct rte_flow_action_rss, user could config its field * sush as: func/level/types/key/queue to control RSS function. - * 2. The flow API also support queue region configuration for hns3. It was + * 2. The flow API also supports queue region configuration for hns3. It was * implemented by FDIR + RSS in hns3 hardware, user can create one FDIR rule * which action is RSS queues region. * 3. When action is RSS, we use the following rule to distinguish: @@ -128,11 +126,11 @@ hns3_find_rss_general_action(const struct rte_flow_item pattern[], rss = act->conf; if (have_eth && rss->conf.queue_num) { /* - * Patter have ETH and action's queue_num > 0, indicate this is + * Pattern have ETH and action's queue_num > 0, indicate this is * queue region configuration. * Because queue region is implemented by FDIR + RSS in hns3 - * hardware, it need enter FDIR process, so here return NULL to - * avoid enter RSS process. + * hardware, it needs to enter FDIR process, so here return NULL + * to avoid enter RSS process. */ return NULL; } @@ -405,7 +403,6 @@ hns3_handle_actions(struct rte_eth_dev *dev, return 0; } -/* Parse to get the attr and action info of flow director rule. */ static int hns3_check_attr(const struct rte_flow_attr *attr, struct rte_flow_error *error) { @@ -800,7 +797,7 @@ hns3_parse_sctp(const struct rte_flow_item *item, struct hns3_fdir_rule *rule, } /* - * Check items before tunnel, save inner configs to outer configs,and clear + * Check items before tunnel, save inner configs to outer configs, and clear * inner configs. * The key consists of two parts: meta_data and tuple keys. * Meta data uses 15 bits, including vlan_num(2bit), des_port(12bit) and tunnel @@ -1146,8 +1143,7 @@ is_tunnel_packet(enum rte_flow_item_type type) if (type == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || type == RTE_FLOW_ITEM_TYPE_VXLAN || type == RTE_FLOW_ITEM_TYPE_NVGRE || - type == RTE_FLOW_ITEM_TYPE_GENEVE || - type == RTE_FLOW_ITEM_TYPE_MPLS) + type == RTE_FLOW_ITEM_TYPE_GENEVE) return true; return false; } @@ -1208,11 +1204,6 @@ hns3_parse_fdir_filter(struct rte_eth_dev *dev, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, "Fdir not supported in VF"); - if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_PERFECT) - return rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_HANDLE, NULL, - "fdir_conf.mode isn't perfect"); - step_mngr.items = first_items; step_mngr.count = ARRAY_SIZE(first_items); for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { @@ -1469,7 +1460,7 @@ hns3_parse_rss_algorithm(struct hns3_hw *hw, enum rte_eth_hash_function *func, *hash_algo = HNS3_RSS_HASH_ALGO_SYMMETRIC_TOEP; break; default: - hns3_err(hw, "Invalid RSS algorithm configuration(%u)", + hns3_err(hw, "Invalid RSS algorithm configuration(%d)", algo_func); return -EINVAL; } @@ -1495,10 +1486,8 @@ hns3_hw_rss_hash_set(struct hns3_hw *hw, struct rte_flow_action_rss *rss_config) if (ret) return ret; - /* Update algorithm of hw */ hw->rss_info.conf.func = rss_config->func; - /* Set flow type supported */ tuple = &hw->rss_info.rss_tuple_sets; ret = hns3_set_rss_tuple_by_rss_hf(hw, tuple, rss_config->types); if (ret) @@ -1513,14 +1502,14 @@ hns3_update_indir_table(struct rte_eth_dev *dev, { struct hns3_adapter *hns = dev->data->dev_private; struct hns3_hw *hw = &hns->hw; - uint16_t indir_tbl[HNS3_RSS_IND_TBL_SIZE]; + uint16_t indir_tbl[HNS3_RSS_IND_TBL_SIZE_MAX]; uint16_t j; uint32_t i; /* Fill in redirection table */ memcpy(indir_tbl, hw->rss_info.rss_indirection_tbl, sizeof(hw->rss_info.rss_indirection_tbl)); - for (i = 0, j = 0; i < HNS3_RSS_IND_TBL_SIZE; i++, j++) { + for (i = 0, j = 0; i < hw->rss_ind_tbl_size; i++, j++) { j %= num; if (conf->queue[j] >= hw->alloc_rss_size) { hns3_err(hw, "queue id(%u) set to redirection table " @@ -1531,7 +1520,7 @@ hns3_update_indir_table(struct rte_eth_dev *dev, indir_tbl[i] = conf->queue[j]; } - return hns3_set_rss_indir_table(hw, indir_tbl, HNS3_RSS_IND_TBL_SIZE); + return hns3_set_rss_indir_table(hw, indir_tbl, hw->rss_ind_tbl_size); } static int @@ -1583,7 +1572,7 @@ hns3_config_rss_filter(struct rte_eth_dev *dev, if (rss_flow_conf.queue_num) { /* * Due the content of queue pointer have been reset to - * 0, the rss_info->conf.queue should be set NULL + * 0, the rss_info->conf.queue should be set to NULL */ rss_info->conf.queue = NULL; rss_info->conf.queue_num = 0; @@ -1749,7 +1738,7 @@ hns3_flow_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, /* * Create or destroy a flow rule. * Theorically one rule can match more than one filters. - * We will let it use the filter which it hitt first. + * We will let it use the filter which it hit first. * So, the sequence matters. */ static struct rte_flow * @@ -1833,17 +1822,18 @@ hns3_flow_create(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, flow->counter_id = fdir_rule.act_cnt.id; } + + fdir_rule_ptr = rte_zmalloc("hns3 fdir rule", + sizeof(struct hns3_fdir_rule_ele), + 0); + if (fdir_rule_ptr == NULL) { + hns3_err(hw, "failed to allocate fdir_rule memory."); + ret = -ENOMEM; + goto err_fdir; + } + ret = hns3_fdir_filter_program(hns, &fdir_rule, false); if (!ret) { - fdir_rule_ptr = rte_zmalloc("hns3 fdir rule", - sizeof(struct hns3_fdir_rule_ele), - 0); - if (fdir_rule_ptr == NULL) { - hns3_err(hw, "Failed to allocate fdir_rule memory"); - ret = -ENOMEM; - goto err_fdir; - } - memcpy(&fdir_rule_ptr->fdir_conf, &fdir_rule, sizeof(struct hns3_fdir_rule)); TAILQ_INSERT_TAIL(&process_list->fdir_list, @@ -1854,10 +1844,10 @@ hns3_flow_create(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, return flow; } + rte_free(fdir_rule_ptr); err_fdir: if (fdir_rule.flags & HNS3_RULE_FLAG_COUNTER) hns3_counter_release(dev, fdir_rule.act_cnt.id); - err: rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, "Failed to create flow"); diff --git a/dpdk/drivers/net/hns3/hns3_regs.c b/dpdk/drivers/net/hns3/hns3_regs.c index b2cc599f12..8afe132585 100644 --- a/dpdk/drivers/net/hns3/hns3_regs.c +++ b/dpdk/drivers/net/hns3/hns3_regs.c @@ -104,6 +104,7 @@ hns3_get_regs_length(struct hns3_hw *hw, uint32_t *length) struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); uint32_t cmdq_lines, common_lines, ring_lines, tqp_intr_lines; uint32_t regs_num_32_bit, regs_num_64_bit; + uint32_t dfx_reg_lines; uint32_t len; int ret; @@ -117,7 +118,7 @@ hns3_get_regs_length(struct hns3_hw *hw, uint32_t *length) tqp_intr_lines = sizeof(tqp_intr_reg_addrs) / REG_LEN_PER_LINE + 1; len = (cmdq_lines + common_lines + ring_lines * hw->tqps_num + - tqp_intr_lines * hw->num_msi) * REG_LEN_PER_LINE; + tqp_intr_lines * hw->num_msi) * REG_NUM_PER_LINE; if (!hns->is_vf) { ret = hns3_get_regs_num(hw, ®s_num_32_bit, ®s_num_64_bit); @@ -126,8 +127,11 @@ hns3_get_regs_length(struct hns3_hw *hw, uint32_t *length) ret); return -ENOTSUP; } - len += regs_num_32_bit * sizeof(uint32_t) + - regs_num_64_bit * sizeof(uint64_t); + dfx_reg_lines = regs_num_32_bit * sizeof(uint32_t) / + REG_LEN_PER_LINE + 1; + dfx_reg_lines += regs_num_64_bit * sizeof(uint64_t) / + REG_LEN_PER_LINE + 1; + len += dfx_reg_lines * REG_NUM_PER_LINE; } *length = len; @@ -248,63 +252,68 @@ hns3_get_64_bit_regs(struct hns3_hw *hw, uint32_t regs_num, void *data) return 0; } -static void +static int +hns3_insert_reg_separator(int reg_num, uint32_t *data) +{ + int separator_num; + int i; + + separator_num = MAX_SEPARATE_NUM - reg_num % REG_NUM_PER_LINE; + for (i = 0; i < separator_num; i++) + *data++ = SEPARATOR_VALUE; + return separator_num; +} + +static int hns3_direct_access_regs(struct hns3_hw *hw, uint32_t *data) { struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); + uint32_t *origin_data_ptr = data; uint32_t reg_offset; - int separator_num; - int reg_um; + int reg_num; int i, j; /* fetching per-PF registers values from PF PCIe register space */ - reg_um = sizeof(cmdq_reg_addrs) / sizeof(uint32_t); - separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE; - for (i = 0; i < reg_um; i++) + reg_num = sizeof(cmdq_reg_addrs) / sizeof(uint32_t); + for (i = 0; i < reg_num; i++) *data++ = hns3_read_dev(hw, cmdq_reg_addrs[i]); - for (i = 0; i < separator_num; i++) - *data++ = SEPARATOR_VALUE; + data += hns3_insert_reg_separator(reg_num, data); if (hns->is_vf) - reg_um = sizeof(common_vf_reg_addrs) / sizeof(uint32_t); + reg_num = sizeof(common_vf_reg_addrs) / sizeof(uint32_t); else - reg_um = sizeof(common_reg_addrs) / sizeof(uint32_t); - separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE; - for (i = 0; i < reg_um; i++) + reg_num = sizeof(common_reg_addrs) / sizeof(uint32_t); + for (i = 0; i < reg_num; i++) if (hns->is_vf) *data++ = hns3_read_dev(hw, common_vf_reg_addrs[i]); else *data++ = hns3_read_dev(hw, common_reg_addrs[i]); - for (i = 0; i < separator_num; i++) - *data++ = SEPARATOR_VALUE; + data += hns3_insert_reg_separator(reg_num, data); - reg_um = sizeof(ring_reg_addrs) / sizeof(uint32_t); - separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE; + reg_num = sizeof(ring_reg_addrs) / sizeof(uint32_t); for (j = 0; j < hw->tqps_num; j++) { reg_offset = hns3_get_tqp_reg_offset(j); - for (i = 0; i < reg_um; i++) + for (i = 0; i < reg_num; i++) *data++ = hns3_read_dev(hw, ring_reg_addrs[i] + reg_offset); - for (i = 0; i < separator_num; i++) - *data++ = SEPARATOR_VALUE; + data += hns3_insert_reg_separator(reg_num, data); } - reg_um = sizeof(tqp_intr_reg_addrs) / sizeof(uint32_t); - separator_num = MAX_SEPARATE_NUM - reg_um % REG_NUM_PER_LINE; - for (j = 0; j < hw->num_msi; j++) { - reg_offset = HNS3_TQP_INTR_REG_SIZE * j; - for (i = 0; i < reg_um; i++) - *data++ = hns3_read_dev(hw, - tqp_intr_reg_addrs[i] + + reg_num = sizeof(tqp_intr_reg_addrs) / sizeof(uint32_t); + for (j = 0; j < hw->intr_tqps_num; j++) { + reg_offset = hns3_get_tqp_intr_reg_offset(j); + for (i = 0; i < reg_num; i++) + *data++ = hns3_read_dev(hw, tqp_intr_reg_addrs[i] + reg_offset); - for (i = 0; i < separator_num; i++) - *data++ = SEPARATOR_VALUE; + data += hns3_insert_reg_separator(reg_num, data); } + return data - origin_data_ptr; } int hns3_get_regs(struct rte_eth_dev *eth_dev, struct rte_dev_reg_info *regs) { +#define HNS3_64_BIT_REG_SIZE (sizeof(uint64_t) / sizeof(uint32_t)) struct hns3_adapter *hns = eth_dev->data->dev_private; struct hns3_hw *hw = &hns->hw; uint32_t regs_num_32_bit; @@ -334,7 +343,7 @@ hns3_get_regs(struct rte_eth_dev *eth_dev, struct rte_dev_reg_info *regs) return -ENOTSUP; /* fetching per-PF registers values from PF PCIe register space */ - hns3_direct_access_regs(hw, data); + data += hns3_direct_access_regs(hw, data); if (hns->is_vf) return 0; @@ -351,11 +360,16 @@ hns3_get_regs(struct rte_eth_dev *eth_dev, struct rte_dev_reg_info *regs) hns3_err(hw, "Get 32 bit register failed, ret = %d", ret); return ret; } - data += regs_num_32_bit; + data += hns3_insert_reg_separator(regs_num_32_bit, data); + ret = hns3_get_64_bit_regs(hw, regs_num_64_bit, data); - if (ret) + if (ret) { hns3_err(hw, "Get 64 bit register failed, ret = %d", ret); - + return ret; + } + data += regs_num_64_bit * HNS3_64_BIT_REG_SIZE; + data += hns3_insert_reg_separator(regs_num_64_bit * + HNS3_64_BIT_REG_SIZE, data); return ret; } diff --git a/dpdk/drivers/net/hns3/hns3_regs.h b/dpdk/drivers/net/hns3/hns3_regs.h index 81a0af59e4..39fc5d1b18 100644 --- a/dpdk/drivers/net/hns3/hns3_regs.h +++ b/dpdk/drivers/net/hns3/hns3_regs.h @@ -95,15 +95,21 @@ #define HNS3_MIN_EXTEND_QUEUE_ID 1024 /* bar registers for tqp interrupt */ -#define HNS3_TQP_INTR_CTRL_REG 0x20000 -#define HNS3_TQP_INTR_GL0_REG 0x20100 -#define HNS3_TQP_INTR_GL1_REG 0x20200 -#define HNS3_TQP_INTR_GL2_REG 0x20300 -#define HNS3_TQP_INTR_RL_REG 0x20900 -#define HNS3_TQP_INTR_TX_QL_REG 0x20e00 -#define HNS3_TQP_INTR_RX_QL_REG 0x20f00 - -#define HNS3_TQP_INTR_REG_SIZE 4 +#define HNS3_TQP_INTR_REG_BASE 0x20000 +#define HNS3_TQP_INTR_EXT_REG_BASE 0x30000 +#define HNS3_TQP_INTR_CTRL_REG 0 +#define HNS3_TQP_INTR_GL0_REG 0x100 +#define HNS3_TQP_INTR_GL1_REG 0x200 +#define HNS3_TQP_INTR_GL2_REG 0x300 +#define HNS3_TQP_INTR_RL_REG 0x900 +#define HNS3_TQP_INTR_TX_QL_REG 0xe00 +#define HNS3_TQP_INTR_RX_QL_REG 0xf00 +#define HNS3_TQP_INTR_RL_EN_B 6 + +#define HNS3_MIN_EXT_TQP_INTR_ID 64 +#define HNS3_TQP_INTR_LOW_ORDER_OFFSET 0x4 +#define HNS3_TQP_INTR_HIGH_ORDER_OFFSET 0x1000 + #define HNS3_TQP_INTR_GL_MAX 0x1FE0 #define HNS3_TQP_INTR_GL_DEFAULT 20 #define HNS3_TQP_INTR_GL_UNIT_1US BIT(31) diff --git a/dpdk/drivers/net/hns3/hns3_rss.c b/dpdk/drivers/net/hns3/hns3_rss.c index e2f04687b2..7bd7745859 100644 --- a/dpdk/drivers/net/hns3/hns3_rss.c +++ b/dpdk/drivers/net/hns3/hns3_rss.c @@ -312,7 +312,7 @@ hns3_set_rss_indir_table(struct hns3_hw *hw, uint16_t *indir, uint16_t size) /* Update redirection table of hw */ memcpy(hw->rss_info.rss_indirection_tbl, indir, - sizeof(hw->rss_info.rss_indirection_tbl)); + sizeof(uint16_t) * size); return 0; } @@ -324,13 +324,13 @@ hns3_rss_reset_indir_table(struct hns3_hw *hw) int ret; lut = rte_zmalloc("hns3_rss_lut", - HNS3_RSS_IND_TBL_SIZE * sizeof(uint16_t), 0); + hw->rss_ind_tbl_size * sizeof(uint16_t), 0); if (lut == NULL) { hns3_err(hw, "No hns3_rss_lut memory can be allocated"); return -ENOMEM; } - ret = hns3_set_rss_indir_table(hw, lut, HNS3_RSS_IND_TBL_SIZE); + ret = hns3_set_rss_indir_table(hw, lut, hw->rss_ind_tbl_size); if (ret) hns3_err(hw, "RSS uninit indir table failed: %d", ret); rte_free(lut); @@ -428,7 +428,7 @@ hns3_dev_rss_hash_update(struct rte_eth_dev *dev, } else if (rss_hf && rss_cfg->conf.types == 0) { /* Enable RSS, restore indirection table by hw's config */ ret = hns3_set_rss_indir_table(hw, rss_cfg->rss_indirection_tbl, - HNS3_RSS_IND_TBL_SIZE); + hw->rss_ind_tbl_size); if (ret) goto conf_err; } @@ -505,15 +505,15 @@ hns3_dev_rss_reta_update(struct rte_eth_dev *dev, struct hns3_adapter *hns = dev->data->dev_private; struct hns3_hw *hw = &hns->hw; struct hns3_rss_conf *rss_cfg = &hw->rss_info; - uint16_t i, indir_size = HNS3_RSS_IND_TBL_SIZE; /* Table size is 512 */ - uint16_t indirection_tbl[HNS3_RSS_IND_TBL_SIZE]; + uint16_t indirection_tbl[HNS3_RSS_IND_TBL_SIZE_MAX]; uint16_t idx, shift; + uint16_t i; int ret; - if (reta_size != indir_size || reta_size > ETH_RSS_RETA_SIZE_512) { + if (reta_size != hw->rss_ind_tbl_size) { hns3_err(hw, "The size of hash lookup table configured (%u)" "doesn't match the number hardware can supported" - "(%u)", reta_size, indir_size); + "(%u)", reta_size, hw->rss_ind_tbl_size); return -EINVAL; } rte_spinlock_lock(&hw->lock); @@ -536,7 +536,7 @@ hns3_dev_rss_reta_update(struct rte_eth_dev *dev, } ret = hns3_set_rss_indir_table(hw, indirection_tbl, - HNS3_RSS_IND_TBL_SIZE); + hw->rss_ind_tbl_size); rte_spinlock_unlock(&hw->lock); return ret; @@ -561,13 +561,13 @@ hns3_dev_rss_reta_query(struct rte_eth_dev *dev, struct hns3_adapter *hns = dev->data->dev_private; struct hns3_hw *hw = &hns->hw; struct hns3_rss_conf *rss_cfg = &hw->rss_info; - uint16_t i, indir_size = HNS3_RSS_IND_TBL_SIZE; /* Table size is 512 */ uint16_t idx, shift; + uint16_t i; - if (reta_size != indir_size || reta_size > ETH_RSS_RETA_SIZE_512) { + if (reta_size != hw->rss_ind_tbl_size) { hns3_err(hw, "The size of hash lookup table configured (%u)" " doesn't match the number hardware can supported" - "(%u)", reta_size, indir_size); + "(%u)", reta_size, hw->rss_ind_tbl_size); return -EINVAL; } rte_spinlock_lock(&hw->lock); @@ -667,7 +667,7 @@ hns3_set_default_rss_args(struct hns3_hw *hw) memcpy(rss_cfg->key, hns3_hash_key, HNS3_RSS_KEY_SIZE); /* Initialize RSS indirection table */ - for (i = 0; i < HNS3_RSS_IND_TBL_SIZE; i++) + for (i = 0; i < hw->rss_ind_tbl_size; i++) rss_cfg->rss_indirection_tbl[i] = i % queue_num; } @@ -716,7 +716,7 @@ hns3_config_rss(struct hns3_adapter *hns) */ if (((uint32_t)mq_mode & ETH_MQ_RX_RSS_FLAG)) { ret = hns3_set_rss_indir_table(hw, rss_cfg->rss_indirection_tbl, - HNS3_RSS_IND_TBL_SIZE); + hw->rss_ind_tbl_size); if (ret) goto rss_tuple_uninit; } diff --git a/dpdk/drivers/net/hns3/hns3_rss.h b/dpdk/drivers/net/hns3/hns3_rss.h index 6d1d25f227..798c5c62df 100644 --- a/dpdk/drivers/net/hns3/hns3_rss.h +++ b/dpdk/drivers/net/hns3/hns3_rss.h @@ -24,9 +24,8 @@ ETH_RSS_L4_DST_ONLY) #define HNS3_RSS_IND_TBL_SIZE 512 /* The size of hash lookup table */ +#define HNS3_RSS_IND_TBL_SIZE_MAX 2048 #define HNS3_RSS_KEY_SIZE 40 -#define HNS3_RSS_CFG_TBL_NUM \ - (HNS3_RSS_IND_TBL_SIZE / HNS3_RSS_CFG_TBL_SIZE) #define HNS3_RSS_SET_BITMAP_MSK 0xffff #define HNS3_RSS_HASH_ALGO_TOEPLITZ 0 @@ -45,7 +44,7 @@ struct hns3_rss_conf { uint8_t hash_algo; /* hash function type definited by hardware */ uint8_t key[HNS3_RSS_KEY_SIZE]; /* Hash key */ struct hns3_rss_tuple_cfg rss_tuple_sets; - uint16_t rss_indirection_tbl[HNS3_RSS_IND_TBL_SIZE]; /* Shadow table */ + uint16_t rss_indirection_tbl[HNS3_RSS_IND_TBL_SIZE_MAX]; uint16_t queue[HNS3_RSS_QUEUES_BUFFER_NUM]; /* Queues indices to use */ bool valid; /* check if RSS rule is valid */ /* diff --git a/dpdk/drivers/net/hns3/hns3_rxtx.c b/dpdk/drivers/net/hns3/hns3_rxtx.c index 88d3baba4a..896567c791 100644 --- a/dpdk/drivers/net/hns3/hns3_rxtx.c +++ b/dpdk/drivers/net/hns3/hns3_rxtx.c @@ -10,7 +10,7 @@ #include #include #include -#if defined(RTE_ARCH_ARM64) && defined(CC_SVE_SUPPORT) +#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_SVE) #include #endif @@ -834,6 +834,24 @@ hns3_reset_queue(struct hns3_hw *hw, uint16_t queue_id, return ret; } +uint32_t +hns3_get_tqp_intr_reg_offset(uint16_t tqp_intr_id) +{ + uint32_t reg_offset; + + /* Need an extend offset to config queues > 64 */ + if (tqp_intr_id < HNS3_MIN_EXT_TQP_INTR_ID) + reg_offset = HNS3_TQP_INTR_REG_BASE + + tqp_intr_id * HNS3_TQP_INTR_LOW_ORDER_OFFSET; + else + reg_offset = HNS3_TQP_INTR_EXT_REG_BASE + + tqp_intr_id / HNS3_MIN_EXT_TQP_INTR_ID * + HNS3_TQP_INTR_HIGH_ORDER_OFFSET + + tqp_intr_id % HNS3_MIN_EXT_TQP_INTR_ID * + HNS3_TQP_INTR_LOW_ORDER_OFFSET; + + return reg_offset; +} void hns3_set_queue_intr_gl(struct hns3_hw *hw, uint16_t queue_id, @@ -847,7 +865,7 @@ hns3_set_queue_intr_gl(struct hns3_hw *hw, uint16_t queue_id, if (gl_idx >= RTE_DIM(offset) || gl_value > HNS3_TQP_INTR_GL_MAX) return; - addr = offset[gl_idx] + queue_id * HNS3_TQP_INTR_REG_SIZE; + addr = offset[gl_idx] + hns3_get_tqp_intr_reg_offset(queue_id); if (hw->intr.gl_unit == HNS3_INTR_COALESCE_GL_UINT_1US) value = gl_value | HNS3_TQP_INTR_GL_UNIT_1US; else @@ -864,7 +882,7 @@ hns3_set_queue_intr_rl(struct hns3_hw *hw, uint16_t queue_id, uint16_t rl_value) if (rl_value > HNS3_TQP_INTR_RL_MAX) return; - addr = HNS3_TQP_INTR_RL_REG + queue_id * HNS3_TQP_INTR_REG_SIZE; + addr = HNS3_TQP_INTR_RL_REG + hns3_get_tqp_intr_reg_offset(queue_id); value = HNS3_RL_USEC_TO_REG(rl_value); if (value > 0) value |= HNS3_TQP_INTR_RL_ENABLE_MASK; @@ -885,10 +903,10 @@ hns3_set_queue_intr_ql(struct hns3_hw *hw, uint16_t queue_id, uint16_t ql_value) if (hw->intr.int_ql_max == HNS3_INTR_QL_NONE) return; - addr = HNS3_TQP_INTR_TX_QL_REG + queue_id * HNS3_TQP_INTR_REG_SIZE; + addr = HNS3_TQP_INTR_TX_QL_REG + hns3_get_tqp_intr_reg_offset(queue_id); hns3_write_dev(hw, addr, ql_value); - addr = HNS3_TQP_INTR_RX_QL_REG + queue_id * HNS3_TQP_INTR_REG_SIZE; + addr = HNS3_TQP_INTR_RX_QL_REG + hns3_get_tqp_intr_reg_offset(queue_id); hns3_write_dev(hw, addr, ql_value); } @@ -897,7 +915,7 @@ hns3_queue_intr_enable(struct hns3_hw *hw, uint16_t queue_id, bool en) { uint32_t addr, value; - addr = HNS3_TQP_INTR_CTRL_REG + queue_id * HNS3_TQP_INTR_REG_SIZE; + addr = HNS3_TQP_INTR_CTRL_REG + hns3_get_tqp_intr_reg_offset(queue_id); value = en ? 1 : 0; hns3_write_dev(hw, addr, value); @@ -2467,7 +2485,7 @@ hns3_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id, static bool hns3_check_sve_support(void) { -#if defined(RTE_ARCH_ARM64) && defined(CC_SVE_SUPPORT) +#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_SVE) if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SVE)) return true; #endif diff --git a/dpdk/drivers/net/hns3/hns3_rxtx.h b/dpdk/drivers/net/hns3/hns3_rxtx.h index 6538848fee..5650a97c3a 100644 --- a/dpdk/drivers/net/hns3/hns3_rxtx.h +++ b/dpdk/drivers/net/hns3/hns3_rxtx.h @@ -653,6 +653,7 @@ int hns3_tx_burst_mode_get(struct rte_eth_dev *dev, const uint32_t *hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev); void hns3_init_rx_ptype_tble(struct rte_eth_dev *dev); void hns3_set_rxtx_function(struct rte_eth_dev *eth_dev); +uint32_t hns3_get_tqp_intr_reg_offset(uint16_t tqp_intr_id); void hns3_set_queue_intr_gl(struct hns3_hw *hw, uint16_t queue_id, uint8_t gl_idx, uint16_t gl_value); void hns3_set_queue_intr_rl(struct hns3_hw *hw, uint16_t queue_id, diff --git a/dpdk/drivers/net/hns3/hns3_stats.c b/dpdk/drivers/net/hns3/hns3_stats.c index 91168ac95a..48ab6a38bb 100644 --- a/dpdk/drivers/net/hns3/hns3_stats.c +++ b/dpdk/drivers/net/hns3/hns3_stats.c @@ -521,8 +521,15 @@ hns3_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *rte_stats) if (rxq) { cnt = rxq->l2_errors + rxq->pkt_len_errors; rte_stats->q_errors[i] = cnt; + /* + * If HW statistics are reset by stats_reset, but + * a lot of residual packets exist in the hardware + * queue and these packets are error packets, flip + * overflow may occurred. So return 0 in this case. + */ rte_stats->q_ipackets[i] = - stats->rcb_rx_ring_pktnum[i] - cnt; + stats->rcb_rx_ring_pktnum[i] > cnt ? + stats->rcb_rx_ring_pktnum[i] - cnt : 0; rte_stats->ierrors += cnt; } } @@ -535,8 +542,9 @@ hns3_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *rte_stats) } rte_stats->oerrors = 0; - rte_stats->ipackets = stats->rcb_rx_ring_pktnum_rcd - - rte_stats->ierrors; + rte_stats->ipackets = + stats->rcb_rx_ring_pktnum_rcd > rte_stats->ierrors ? + stats->rcb_rx_ring_pktnum_rcd - rte_stats->ierrors : 0; rte_stats->opackets = stats->rcb_tx_ring_pktnum_rcd - rte_stats->oerrors; rte_stats->rx_nombuf = eth_dev->data->rx_mbuf_alloc_failed; @@ -551,7 +559,6 @@ hns3_stats_reset(struct rte_eth_dev *eth_dev) struct hns3_hw *hw = &hns->hw; struct hns3_cmd_desc desc_reset; struct hns3_rx_queue *rxq; - struct hns3_tx_queue *txq; uint16_t i; int ret; @@ -581,29 +588,15 @@ hns3_stats_reset(struct rte_eth_dev *eth_dev) } } - /* Clear the Rx BD errors stats */ - for (i = 0; i != eth_dev->data->nb_rx_queues; ++i) { + /* + * Clear soft stats of rx error packet which will be dropped + * in driver. + */ + for (i = 0; i < eth_dev->data->nb_rx_queues; ++i) { rxq = eth_dev->data->rx_queues[i]; if (rxq) { rxq->pkt_len_errors = 0; rxq->l2_errors = 0; - rxq->l3_csum_errors = 0; - rxq->l4_csum_errors = 0; - rxq->ol3_csum_errors = 0; - rxq->ol4_csum_errors = 0; - } - } - - /* Clear the Tx errors stats */ - for (i = 0; i != eth_dev->data->nb_tx_queues; ++i) { - txq = eth_dev->data->tx_queues[i]; - if (txq) { - txq->over_length_pkt_cnt = 0; - txq->exceed_limit_bd_pkt_cnt = 0; - txq->exceed_limit_bd_reassem_fail = 0; - txq->unsupported_tunnel_pkt_cnt = 0; - txq->queue_full_cnt = 0; - txq->pkt_padding_fail_cnt = 0; } } @@ -739,9 +732,9 @@ hns3_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, if (!hns->is_vf) { /* Update Mac stats */ ret = hns3_query_update_mac_stats(dev); - if (ret) { + if (ret < 0) { hns3_err(hw, "Update Mac stats fail : %d", ret); - return 0; + return ret; } /* Get MAC stats from hw->hw_xstats.mac_stats struct */ @@ -933,9 +926,13 @@ hns3_dev_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids, uint32_t i; int ret; - if (ids == NULL || size < cnt_stats) + if (ids == NULL && values == NULL) return cnt_stats; + if (ids == NULL) + if (size < cnt_stats) + return cnt_stats; + /* Update tqp stats by read register */ ret = hns3_update_tqp_stats(hw); if (ret) { @@ -957,6 +954,15 @@ hns3_dev_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids, return -EINVAL; } + if (ids == NULL && values != NULL) { + for (i = 0; i < cnt_stats; i++) + memcpy(&values[i], &values_copy[i].value, + sizeof(values[i])); + + rte_free(values_copy); + return cnt_stats; + } + for (i = 0; i < size; i++) { if (ids[i] >= cnt_stats) { hns3_err(hw, "ids[%u] (%" PRIx64 ") is invalid, " @@ -1005,9 +1011,16 @@ hns3_dev_xstats_get_names_by_id(struct rte_eth_dev *dev, uint64_t len; uint32_t i; - if (ids == NULL || xstats_names == NULL) + if (xstats_names == NULL) return cnt_stats; + if (ids == NULL) { + if (size < cnt_stats) + return cnt_stats; + + return hns3_dev_xstats_get_names(dev, xstats_names, cnt_stats); + } + len = cnt_stats * sizeof(struct rte_eth_xstat_name); names_copy = rte_zmalloc("hns3_xstats_names", len, 0); if (names_copy == NULL) { @@ -1033,6 +1046,38 @@ hns3_dev_xstats_get_names_by_id(struct rte_eth_dev *dev, return size; } +static void +hns3_tqp_dfx_stats_clear(struct rte_eth_dev *dev) +{ + struct hns3_rx_queue *rxq; + struct hns3_tx_queue *txq; + int i; + + /* Clear Rx dfx stats */ + for (i = 0; i < dev->data->nb_rx_queues; ++i) { + rxq = dev->data->rx_queues[i]; + if (rxq) { + rxq->l3_csum_errors = 0; + rxq->l4_csum_errors = 0; + rxq->ol3_csum_errors = 0; + rxq->ol4_csum_errors = 0; + } + } + + /* Clear Tx dfx stats */ + for (i = 0; i < dev->data->nb_tx_queues; ++i) { + txq = dev->data->tx_queues[i]; + if (txq) { + txq->over_length_pkt_cnt = 0; + txq->exceed_limit_bd_pkt_cnt = 0; + txq->exceed_limit_bd_reassem_fail = 0; + txq->unsupported_tunnel_pkt_cnt = 0; + txq->queue_full_cnt = 0; + txq->pkt_padding_fail_cnt = 0; + } + } +} + int hns3_dev_xstats_reset(struct rte_eth_dev *dev) { @@ -1048,6 +1093,8 @@ hns3_dev_xstats_reset(struct rte_eth_dev *dev) /* Clear reset stats */ memset(&hns->hw.reset.stats, 0, sizeof(struct hns3_reset_stats)); + hns3_tqp_dfx_stats_clear(dev); + if (hns->is_vf) return 0; diff --git a/dpdk/drivers/net/hns3/meson.build b/dpdk/drivers/net/hns3/meson.build index 45cee34d9d..5674d986ba 100644 --- a/dpdk/drivers/net/hns3/meson.build +++ b/dpdk/drivers/net/hns3/meson.build @@ -32,7 +32,6 @@ deps += ['hash'] if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64') sources += files('hns3_rxtx_vec.c') if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != '' - cflags = ['-DCC_SVE_SUPPORT'] sources += files('hns3_rxtx_vec_sve.c') endif endif diff --git a/dpdk/drivers/net/i40e/base/i40e_osdep.h b/dpdk/drivers/net/i40e/base/i40e_osdep.h index 9b5033024f..c9287ff255 100644 --- a/dpdk/drivers/net/i40e/base/i40e_osdep.h +++ b/dpdk/drivers/net/i40e/base/i40e_osdep.h @@ -133,6 +133,14 @@ static inline uint32_t i40e_read_addr(volatile void *addr) return rte_le_to_cpu_32(I40E_PCI_REG(addr)); } +#define I40E_PCI_REG64(reg) rte_read64(reg) +#define I40E_PCI_REG64_ADDR(a, reg) \ + ((volatile uint64_t *)((char *)(a)->hw_addr + (reg))) +static inline uint64_t i40e_read64_addr(volatile void *addr) +{ + return rte_le_to_cpu_64(I40E_PCI_REG64(addr)); +} + #define I40E_PCI_REG_WRITE(reg, value) \ rte_write32((rte_cpu_to_le_32(value)), reg) #define I40E_PCI_REG_WRITE_RELAXED(reg, value) \ @@ -150,6 +158,8 @@ static inline uint32_t i40e_read_addr(volatile void *addr) #define I40E_WRITE_REG(hw, reg, value) \ I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((hw), (reg)), (value)) +#define I40E_READ_REG64(hw, reg) i40e_read64_addr(I40E_PCI_REG64_ADDR((hw), (reg))) + #define rd32(a, reg) i40e_read_addr(I40E_PCI_REG_ADDR((a), (reg))) #define wr32(a, reg, value) \ I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((a), (reg)), (value)) diff --git a/dpdk/drivers/net/i40e/i40e_ethdev.c b/dpdk/drivers/net/i40e/i40e_ethdev.c index f54769c29d..ef4f28fe53 100644 --- a/dpdk/drivers/net/i40e/i40e_ethdev.c +++ b/dpdk/drivers/net/i40e/i40e_ethdev.c @@ -763,6 +763,21 @@ static inline void i40e_config_automask(struct i40e_pf *pf) I40E_WRITE_REG(hw, I40E_GLINT_CTL, val); } +static inline void i40e_clear_automask(struct i40e_pf *pf) +{ + struct i40e_hw *hw = I40E_PF_TO_HW(pf); + uint32_t val; + + val = I40E_READ_REG(hw, I40E_GLINT_CTL); + val &= ~(I40E_GLINT_CTL_DIS_AUTOMASK_PF0_MASK | + I40E_GLINT_CTL_DIS_AUTOMASK_VF0_MASK); + + if (!pf->support_multi_driver) + val &= ~I40E_GLINT_CTL_DIS_AUTOMASK_N_MASK; + + I40E_WRITE_REG(hw, I40E_GLINT_CTL, val); +} + #define I40E_FLOW_CONTROL_ETHERTYPE 0x8808 /* @@ -1534,8 +1549,9 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused) PMD_INIT_LOG(ERR, "Failed to init adminq: %d", ret); return -EIO; } - /* Firmware of SFP x722 does not support adminq option */ - if (hw->device_id == I40E_DEV_ID_SFP_X722) + /* Firmware of SFP x722 does not support 802.1ad frames ability */ + if (hw->device_id == I40E_DEV_ID_SFP_X722 || + hw->device_id == I40E_DEV_ID_SFP_I_X722) hw->flags &= ~I40E_HW_FLAG_802_1AD_CAPABLE; PMD_INIT_LOG(INFO, "FW %d.%d API %d.%d NVM %02d.%02d.%02d eetrack %04x", @@ -2741,6 +2757,8 @@ i40e_dev_close(struct rte_eth_dev *dev) /* Remove all Traffic Manager configuration */ i40e_tm_conf_uninit(dev); + i40e_clear_automask(pf); + hw->adapter_closed = 1; return ret; } @@ -4426,7 +4444,6 @@ i40e_set_rss_lut(struct i40e_vsi *vsi, uint8_t *lut, uint16_t lut_size) { struct i40e_pf *pf; struct i40e_hw *hw; - int ret; if (!vsi || !lut) return -EINVAL; @@ -4435,12 +4452,16 @@ i40e_set_rss_lut(struct i40e_vsi *vsi, uint8_t *lut, uint16_t lut_size) hw = I40E_VSI_TO_HW(vsi); if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) { - ret = i40e_aq_set_rss_lut(hw, vsi->vsi_id, - vsi->type != I40E_VSI_SRIOV, - lut, lut_size); - if (ret) { - PMD_DRV_LOG(ERR, "Failed to set RSS lookup table"); - return ret; + enum i40e_status_code status; + + status = i40e_aq_set_rss_lut(hw, vsi->vsi_id, + vsi->type != I40E_VSI_SRIOV, + lut, lut_size); + if (status) { + PMD_DRV_LOG(ERR, + "Failed to update RSS lookup table, error status: %d", + status); + return -EIO; } } else { uint32_t *lut_dw = (uint32_t *)lut; @@ -6603,9 +6624,13 @@ i40e_stat_update_48(struct i40e_hw *hw, { uint64_t new_data; - new_data = (uint64_t)I40E_READ_REG(hw, loreg); - new_data |= ((uint64_t)(I40E_READ_REG(hw, hireg) & - I40E_16_BIT_MASK)) << I40E_32_BIT_WIDTH; + if (hw->device_id == I40E_DEV_ID_QEMU) { + new_data = (uint64_t)I40E_READ_REG(hw, loreg); + new_data |= ((uint64_t)(I40E_READ_REG(hw, hireg) & + I40E_16_BIT_MASK)) << I40E_32_BIT_WIDTH; + } else { + new_data = I40E_READ_REG64(hw, loreg); + } if (!offset_loaded) *offset = new_data; @@ -7591,7 +7616,6 @@ i40e_set_rss_key(struct i40e_vsi *vsi, uint8_t *key, uint8_t key_len) uint16_t key_idx = (vsi->type == I40E_VSI_SRIOV) ? I40E_VFQF_HKEY_MAX_INDEX : I40E_PFQF_HKEY_MAX_INDEX; - int ret = 0; if (!key || key_len == 0) { PMD_DRV_LOG(DEBUG, "No key to be configured"); @@ -7604,11 +7628,16 @@ i40e_set_rss_key(struct i40e_vsi *vsi, uint8_t *key, uint8_t key_len) if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) { struct i40e_aqc_get_set_rss_key_data *key_dw = - (struct i40e_aqc_get_set_rss_key_data *)key; + (struct i40e_aqc_get_set_rss_key_data *)key; + enum i40e_status_code status = + i40e_aq_set_rss_key(hw, vsi->vsi_id, key_dw); - ret = i40e_aq_set_rss_key(hw, vsi->vsi_id, key_dw); - if (ret) - PMD_INIT_LOG(ERR, "Failed to configure RSS key via AQ"); + if (status) { + PMD_DRV_LOG(ERR, + "Failed to configure RSS key via AQ, error status: %d", + status); + return -EIO; + } } else { uint32_t *hash_key = (uint32_t *)key; uint16_t i; @@ -7628,7 +7657,7 @@ i40e_set_rss_key(struct i40e_vsi *vsi, uint8_t *key, uint8_t key_len) I40E_WRITE_FLUSH(hw); } - return ret; + return 0; } static int @@ -11753,7 +11782,7 @@ i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) return -EBUSY; } - if (frame_size > RTE_ETHER_MAX_LEN) + if (frame_size > I40E_ETH_MAX_LEN) dev_data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else diff --git a/dpdk/drivers/net/i40e/i40e_ethdev.h b/dpdk/drivers/net/i40e/i40e_ethdev.h index 696c5aaf7e..20d051db8b 100644 --- a/dpdk/drivers/net/i40e/i40e_ethdev.h +++ b/dpdk/drivers/net/i40e/i40e_ethdev.h @@ -281,6 +281,7 @@ struct rte_flow { */ #define I40E_ETH_OVERHEAD \ (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + I40E_VLAN_TAG_SIZE * 2) +#define I40E_ETH_MAX_LEN (RTE_ETHER_MTU + I40E_ETH_OVERHEAD) #define I40E_RXTX_BYTES_H_16_BIT(bytes) ((bytes) & ~I40E_48_BIT_MASK) #define I40E_RXTX_BYTES_L_48_BIT(bytes) ((bytes) & I40E_48_BIT_MASK) @@ -636,6 +637,7 @@ struct i40e_fdir_flow_ext { bool is_udp; /* ipv4|ipv6 udp flow */ enum i40e_flxpld_layer_idx layer_idx; struct i40e_fdir_flex_pit flex_pit[I40E_MAX_FLXPLD_LAYER * I40E_MAX_FLXPLD_FIED]; + bool is_flex_flow; }; /* A structure used to define the input for a flow director filter entry */ @@ -784,6 +786,8 @@ struct i40e_fdir_info { bool flex_mask_flag[I40E_FILTER_PCTYPE_MAX]; bool inset_flag[I40E_FILTER_PCTYPE_MAX]; /* Mark if input set is set */ + + uint32_t flex_flow_count[I40E_MAX_FLXPLD_LAYER]; }; /* Ethertype filter number HW supports */ diff --git a/dpdk/drivers/net/i40e/i40e_ethdev_vf.c b/dpdk/drivers/net/i40e/i40e_ethdev_vf.c index c26b036b85..bca8cb80e4 100644 --- a/dpdk/drivers/net/i40e/i40e_ethdev_vf.c +++ b/dpdk/drivers/net/i40e/i40e_ethdev_vf.c @@ -1078,8 +1078,18 @@ i40evf_add_vlan(struct rte_eth_dev *dev, uint16_t vlanid) args.out_buffer = vf->aq_resp; args.out_size = I40E_AQ_BUF_SZ; err = i40evf_execute_vf_cmd(dev, &args); - if (err) + if (err) { PMD_DRV_LOG(ERR, "fail to execute command OP_ADD_VLAN"); + return err; + } + /** + * In linux kernel driver on receiving ADD_VLAN it enables + * VLAN_STRIP by default. So reconfigure the vlan_offload + * as it was done by the app earlier. + */ + err = i40evf_vlan_offload_set(dev, ETH_VLAN_STRIP_MASK); + if (err) + PMD_DRV_LOG(ERR, "fail to set vlan_strip"); return err; } @@ -1889,22 +1899,22 @@ i40evf_rxq_init(struct rte_eth_dev *dev, struct i40e_rx_queue *rxq) * Check if the jumbo frame and maximum packet length are set correctly */ if (dev_data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { - if (rxq->max_pkt_len <= RTE_ETHER_MAX_LEN || + if (rxq->max_pkt_len <= I40E_ETH_MAX_LEN || rxq->max_pkt_len > I40E_FRAME_SIZE_MAX) { PMD_DRV_LOG(ERR, "maximum packet length must be " "larger than %u and smaller than %u, as jumbo " - "frame is enabled", (uint32_t)RTE_ETHER_MAX_LEN, + "frame is enabled", (uint32_t)I40E_ETH_MAX_LEN, (uint32_t)I40E_FRAME_SIZE_MAX); return I40E_ERR_CONFIG; } } else { if (rxq->max_pkt_len < RTE_ETHER_MIN_LEN || - rxq->max_pkt_len > RTE_ETHER_MAX_LEN) { + rxq->max_pkt_len > I40E_ETH_MAX_LEN) { PMD_DRV_LOG(ERR, "maximum packet length must be " "larger than %u and smaller than %u, as jumbo " "frame is disabled", (uint32_t)RTE_ETHER_MIN_LEN, - (uint32_t)RTE_ETHER_MAX_LEN); + (uint32_t)I40E_ETH_MAX_LEN); return I40E_ERR_CONFIG; } } @@ -2406,6 +2416,7 @@ i40evf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) stats->imissed = pstats->rx_discards; stats->oerrors = pstats->tx_errors + pstats->tx_discards; stats->ibytes = pstats->rx_bytes; + stats->ibytes -= stats->ipackets * RTE_ETHER_CRC_LEN; stats->obytes = pstats->tx_bytes; } else { PMD_DRV_LOG(ERR, "Get statistics failed"); @@ -2825,7 +2836,7 @@ i40evf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) return -EBUSY; } - if (frame_size > RTE_ETHER_MAX_LEN) + if (frame_size > I40E_ETH_MAX_LEN) dev_data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else diff --git a/dpdk/drivers/net/i40e/i40e_fdir.c b/dpdk/drivers/net/i40e/i40e_fdir.c index 50c0eee9f2..f5defcf585 100644 --- a/dpdk/drivers/net/i40e/i40e_fdir.c +++ b/dpdk/drivers/net/i40e/i40e_fdir.c @@ -116,7 +116,7 @@ i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq) #endif rx_ctx.dtype = i40e_header_split_none; rx_ctx.hsplit_0 = I40E_HEADER_SPLIT_NONE; - rx_ctx.rxmax = RTE_ETHER_MAX_LEN; + rx_ctx.rxmax = I40E_ETH_MAX_LEN; rx_ctx.tphrdesc_ena = 1; rx_ctx.tphwdesc_ena = 1; rx_ctx.tphdata_ena = 1; @@ -355,6 +355,7 @@ i40e_init_flx_pld(struct i40e_pf *pf) I40E_PRTQF_FLX_PIT(index + 1), 0x0000FC29);/*non-used*/ I40E_WRITE_REG(hw, I40E_PRTQF_FLX_PIT(index + 2), 0x0000FC2A);/*non-used*/ + pf->fdir.flex_pit_flag[i] = 0; } /* initialize the masks */ @@ -1513,8 +1514,6 @@ i40e_flow_set_fdir_flex_pit(struct i40e_pf *pf, I40E_WRITE_REG(hw, I40E_PRTQF_FLX_PIT(field_idx), flx_pit); min_next_off++; } - - pf->fdir.flex_pit_flag[layer_idx] = 1; } static int @@ -1686,7 +1685,7 @@ i40e_flow_add_del_fdir_filter(struct rte_eth_dev *dev, i40e_fdir_filter_convert(filter, &check_filter); if (add) { - if (!filter->input.flow_ext.customized_pctype) { + if (filter->input.flow_ext.is_flex_flow) { for (i = 0; i < filter->input.flow_ext.raw_id; i++) { layer_idx = filter->input.flow_ext.layer_idx; field_idx = layer_idx * I40E_MAX_FLXPLD_FIED + i; @@ -1738,6 +1737,9 @@ i40e_flow_add_del_fdir_filter(struct rte_eth_dev *dev, fdir_info->fdir_guarantee_free_space > 0) wait_status = false; } else { + if (filter->input.flow_ext.is_flex_flow) + layer_idx = filter->input.flow_ext.layer_idx; + node = i40e_sw_fdir_filter_lookup(fdir_info, &check_filter.fdir.input); if (!node) { @@ -1785,6 +1787,17 @@ i40e_flow_add_del_fdir_filter(struct rte_eth_dev *dev, goto error_op; } + if (filter->input.flow_ext.is_flex_flow) { + if (add) { + fdir_info->flex_flow_count[layer_idx]++; + pf->fdir.flex_pit_flag[layer_idx] = 1; + } else { + fdir_info->flex_flow_count[layer_idx]--; + if (!fdir_info->flex_flow_count[layer_idx]) + pf->fdir.flex_pit_flag[layer_idx] = 0; + } + } + if (add) { fdir_info->fdir_actual_cnt++; if (fdir_info->fdir_invalprio == 1 && diff --git a/dpdk/drivers/net/i40e/i40e_flow.c b/dpdk/drivers/net/i40e/i40e_flow.c index b09ff6590d..bbd666b7a0 100644 --- a/dpdk/drivers/net/i40e/i40e_flow.c +++ b/dpdk/drivers/net/i40e/i40e_flow.c @@ -3069,6 +3069,7 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, &flex_pit, sizeof(struct i40e_fdir_flex_pit)); filter->input.flow_ext.layer_idx = layer_idx; filter->input.flow_ext.raw_id = raw_id; + filter->input.flow_ext.is_flex_flow = true; break; case RTE_FLOW_ITEM_TYPE_VF: vf_spec = item->spec; @@ -5515,6 +5516,9 @@ i40e_flow_flush_fdir_filter(struct i40e_pf *pf) pf->fdir.flex_mask_flag[pctype] = 0; } + for (i = 0; i < I40E_MAX_FLXPLD_LAYER; i++) + pf->fdir.flex_pit_flag[i] = 0; + /* Disable FDIR processing as all FDIR rules are now flushed */ i40e_fdir_rx_proc_enable(dev, 0); } diff --git a/dpdk/drivers/net/i40e/i40e_rxtx.c b/dpdk/drivers/net/i40e/i40e_rxtx.c index 5df9a9df56..b8859bbff2 100644 --- a/dpdk/drivers/net/i40e/i40e_rxtx.c +++ b/dpdk/drivers/net/i40e/i40e_rxtx.c @@ -2797,23 +2797,23 @@ i40e_rx_queue_config(struct i40e_rx_queue *rxq) RTE_MIN((uint32_t)(hw->func_caps.rx_buf_chain_len * rxq->rx_buf_len), data->dev_conf.rxmode.max_rx_pkt_len); if (data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { - if (rxq->max_pkt_len <= RTE_ETHER_MAX_LEN || + if (rxq->max_pkt_len <= I40E_ETH_MAX_LEN || rxq->max_pkt_len > I40E_FRAME_SIZE_MAX) { PMD_DRV_LOG(ERR, "maximum packet length must " "be larger than %u and smaller than %u," "as jumbo frame is enabled", - (uint32_t)RTE_ETHER_MAX_LEN, + (uint32_t)I40E_ETH_MAX_LEN, (uint32_t)I40E_FRAME_SIZE_MAX); return I40E_ERR_CONFIG; } } else { if (rxq->max_pkt_len < RTE_ETHER_MIN_LEN || - rxq->max_pkt_len > RTE_ETHER_MAX_LEN) { + rxq->max_pkt_len > I40E_ETH_MAX_LEN) { PMD_DRV_LOG(ERR, "maximum packet length must be " "larger than %u and smaller than %u, " "as jumbo frame is disabled", (uint32_t)RTE_ETHER_MIN_LEN, - (uint32_t)RTE_ETHER_MAX_LEN); + (uint32_t)I40E_ETH_MAX_LEN); return I40E_ERR_CONFIG; } } diff --git a/dpdk/drivers/net/i40e/i40e_rxtx_vec_avx2.c b/dpdk/drivers/net/i40e/i40e_rxtx_vec_avx2.c index 7a558fc73a..fe6ec7deef 100644 --- a/dpdk/drivers/net/i40e/i40e_rxtx_vec_avx2.c +++ b/dpdk/drivers/net/i40e/i40e_rxtx_vec_avx2.c @@ -342,24 +342,32 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, */ const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, /* shift right 1 bit to make sure it not exceed 255 */ - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1, - PKT_RX_IP_CKSUM_BAD >> 1, - (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, /* second 128-bits */ 0, 0, 0, 0, 0, 0, 0, 0, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1, - PKT_RX_IP_CKSUM_BAD >> 1, - (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1); + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1); const __m256i cksum_mask = _mm256_set1_epi32( PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | diff --git a/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c b/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c index 4b2b6a28fc..0bcb48e24e 100644 --- a/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c +++ b/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c @@ -254,16 +254,18 @@ desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp, const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, /* shift right 1 bit to make sure it not exceed 255 */ - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD | - PKT_RX_L4_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1, - PKT_RX_IP_CKSUM_BAD >> 1, - (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1); + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1); /* Unpack "status" from quadword 1, bits 0:32 */ vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]); diff --git a/dpdk/drivers/net/i40e/rte_pmd_i40e.c b/dpdk/drivers/net/i40e/rte_pmd_i40e.c index 790d042002..2e34140c5b 100644 --- a/dpdk/drivers/net/i40e/rte_pmd_i40e.c +++ b/dpdk/drivers/net/i40e/rte_pmd_i40e.c @@ -2366,6 +2366,9 @@ rte_pmd_i40e_add_vf_mac_addr(uint16_t port, uint16_t vf_id, struct i40e_mac_filter_info mac_filter; int ret; + if (mac_addr == NULL) + return -EINVAL; + if (i40e_validate_mac_addr((u8 *)mac_addr) != I40E_SUCCESS) return -EINVAL; @@ -3042,6 +3045,9 @@ int rte_pmd_i40e_flow_add_del_packet_template( RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + if (conf == NULL) + return -EINVAL; + if (!is_i40e_supported(dev)) return -ENOTSUP; diff --git a/dpdk/drivers/net/iavf/iavf.h b/dpdk/drivers/net/iavf/iavf.h index 6d5912d8c1..3328bd9327 100644 --- a/dpdk/drivers/net/iavf/iavf.h +++ b/dpdk/drivers/net/iavf/iavf.h @@ -66,6 +66,7 @@ #define IAVF_VLAN_TAG_SIZE 4 #define IAVF_ETH_OVERHEAD \ (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + IAVF_VLAN_TAG_SIZE * 2) +#define IAVF_ETH_MAX_LEN (RTE_ETHER_MTU + IAVF_ETH_OVERHEAD) #define IAVF_32_BIT_WIDTH (CHAR_BIT * 4) #define IAVF_48_BIT_WIDTH (CHAR_BIT * 6) diff --git a/dpdk/drivers/net/iavf/iavf_ethdev.c b/dpdk/drivers/net/iavf/iavf_ethdev.c index 7e3c26a94e..ed69ba483e 100644 --- a/dpdk/drivers/net/iavf/iavf_ethdev.c +++ b/dpdk/drivers/net/iavf/iavf_ethdev.c @@ -372,8 +372,10 @@ iavf_dev_configure(struct rte_eth_dev *dev) } else { /* Check if large VF is already enabled. If so, disable and * release redundant queue resource. + * Or check if enough queue pairs. If not, request them from PF. */ - if (vf->lv_enabled) { + if (vf->lv_enabled || + num_queue_pairs > vf->vsi_res->num_queue_pairs) { ret = iavf_queues_req_reset(dev, num_queue_pairs); if (ret) return ret; @@ -418,23 +420,23 @@ iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue *rxq) * correctly. */ if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { - if (max_pkt_len <= RTE_ETHER_MAX_LEN || + if (max_pkt_len <= IAVF_ETH_MAX_LEN || max_pkt_len > IAVF_FRAME_SIZE_MAX) { PMD_DRV_LOG(ERR, "maximum packet length must be " "larger than %u and smaller than %u, " "as jumbo frame is enabled", - (uint32_t)RTE_ETHER_MAX_LEN, + (uint32_t)IAVF_ETH_MAX_LEN, (uint32_t)IAVF_FRAME_SIZE_MAX); return -EINVAL; } } else { if (max_pkt_len < RTE_ETHER_MIN_LEN || - max_pkt_len > RTE_ETHER_MAX_LEN) { + max_pkt_len > IAVF_ETH_MAX_LEN) { PMD_DRV_LOG(ERR, "maximum packet length must be " "larger than %u and smaller than %u, " "as jumbo frame is disabled", (uint32_t)RTE_ETHER_MIN_LEN, - (uint32_t)RTE_ETHER_MAX_LEN); + (uint32_t)IAVF_ETH_MAX_LEN); return -EINVAL; } } @@ -570,15 +572,15 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev, /* If Rx interrupt is reuquired, and we can use * multi interrupts, then the vec is from 1 */ - vf->nb_msix = RTE_MIN(vf->vf_res->max_vectors, - intr_handle->nb_efd); + vf->nb_msix = RTE_MIN(intr_handle->nb_efd, + (uint16_t)(vf->vf_res->max_vectors - 1)); vf->msix_base = IAVF_RX_VEC_START; vec = IAVF_RX_VEC_START; for (i = 0; i < dev->data->nb_rx_queues; i++) { qv_map[i].queue_id = i; qv_map[i].vector_id = vec; intr_handle->intr_vec[i] = vec++; - if (vec >= vf->nb_msix) + if (vec >= vf->nb_msix + IAVF_RX_VEC_START) vec = IAVF_RX_VEC_START; } vf->qv_map = qv_map; @@ -1167,7 +1169,7 @@ iavf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) return -EBUSY; } - if (frame_size > RTE_ETHER_MAX_LEN) + if (frame_size > IAVF_ETH_MAX_LEN) dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else diff --git a/dpdk/drivers/net/iavf/iavf_fdir.c b/dpdk/drivers/net/iavf/iavf_fdir.c index 7054bde0b9..253213f8b5 100644 --- a/dpdk/drivers/net/iavf/iavf_fdir.c +++ b/dpdk/drivers/net/iavf/iavf_fdir.c @@ -25,6 +25,9 @@ #define IAVF_FDIR_IPV6_TC_OFFSET 20 #define IAVF_IPV6_TC_MASK (0xFF << IAVF_FDIR_IPV6_TC_OFFSET) +#define IAVF_GTPU_EH_DWLINK 0 +#define IAVF_GTPU_EH_UPLINK 1 + #define IAVF_FDIR_INSET_ETH (\ IAVF_INSET_ETHERTYPE) @@ -807,7 +810,14 @@ iavf_fdir_parse_pattern(__rte_unused struct iavf_adapter *ad, hdr = &filter->add_fltr.rule_cfg.proto_hdrs.proto_hdr[layer]; - VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, GTPU_EH); + if (!gtp_psc_spec) + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, GTPU_EH); + else if ((gtp_psc_mask->qfi) && !(gtp_psc_mask->pdu_type)) + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, GTPU_EH); + else if (gtp_psc_spec->pdu_type == IAVF_GTPU_EH_UPLINK) + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, GTPU_EH_PDU_UP); + else if (gtp_psc_spec->pdu_type == IAVF_GTPU_EH_DWLINK) + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, GTPU_EH_PDU_DWN); if (gtp_psc_spec && gtp_psc_mask) { if (gtp_psc_mask->qfi == UINT8_MAX) { diff --git a/dpdk/drivers/net/iavf/iavf_hash.c b/dpdk/drivers/net/iavf/iavf_hash.c index c4c73e6644..72b0117230 100644 --- a/dpdk/drivers/net/iavf/iavf_hash.c +++ b/dpdk/drivers/net/iavf/iavf_hash.c @@ -806,7 +806,9 @@ static void iavf_refine_proto_hdrs(struct virtchnl_proto_hdrs *proto_hdrs, static uint64_t invalid_rss_comb[] = { ETH_RSS_IPV4 | ETH_RSS_NONFRAG_IPV4_UDP, + ETH_RSS_IPV4 | ETH_RSS_NONFRAG_IPV4_TCP, ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_UDP, + ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_TCP, RTE_ETH_RSS_L3_PRE32 | RTE_ETH_RSS_L3_PRE40 | RTE_ETH_RSS_L3_PRE48 | RTE_ETH_RSS_L3_PRE56 | RTE_ETH_RSS_L3_PRE96 @@ -867,6 +869,13 @@ iavf_any_invalid_rss_type(enum rte_eth_hash_function rss_func, if (rss_type & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY | ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) return true; + + if (!(rss_type & + (ETH_RSS_IPV4 | ETH_RSS_IPV6 | + ETH_RSS_NONFRAG_IPV4_UDP | ETH_RSS_NONFRAG_IPV6_UDP | + ETH_RSS_NONFRAG_IPV4_TCP | ETH_RSS_NONFRAG_IPV6_TCP | + ETH_RSS_NONFRAG_IPV4_SCTP | ETH_RSS_NONFRAG_IPV6_SCTP))) + return true; } /* check invalid combination */ diff --git a/dpdk/drivers/net/iavf/iavf_vchnl.c b/dpdk/drivers/net/iavf/iavf_vchnl.c index 33d03af653..c17ae06227 100644 --- a/dpdk/drivers/net/iavf/iavf_vchnl.c +++ b/dpdk/drivers/net/iavf/iavf_vchnl.c @@ -644,12 +644,12 @@ iavf_enable_queues_lv(struct iavf_adapter *adapter) args.out_buffer = vf->aq_resp; args.out_size = IAVF_AQ_BUF_SZ; err = iavf_execute_vf_cmd(adapter, &args); - if (err) { + if (err) PMD_DRV_LOG(ERR, "Failed to execute command of OP_ENABLE_QUEUES_V2"); - return err; - } - return 0; + + rte_free(queue_select); + return err; } int @@ -688,12 +688,12 @@ iavf_disable_queues_lv(struct iavf_adapter *adapter) args.out_buffer = vf->aq_resp; args.out_size = IAVF_AQ_BUF_SZ; err = iavf_execute_vf_cmd(adapter, &args); - if (err) { + if (err) PMD_DRV_LOG(ERR, "Failed to execute command of OP_DISABLE_QUEUES_V2"); - return err; - } - return 0; + + rte_free(queue_select); + return err; } int @@ -737,6 +737,8 @@ iavf_switch_queue_lv(struct iavf_adapter *adapter, uint16_t qid, if (err) PMD_DRV_LOG(ERR, "Failed to execute command of %s", on ? "OP_ENABLE_QUEUES_V2" : "OP_DISABLE_QUEUES_V2"); + + rte_free(queue_select); return err; } diff --git a/dpdk/drivers/net/ice/base/ice_flex_pipe.c b/dpdk/drivers/net/ice/base/ice_flex_pipe.c index 7594df1696..d74fecbf5b 100644 --- a/dpdk/drivers/net/ice/base/ice_flex_pipe.c +++ b/dpdk/drivers/net/ice/base/ice_flex_pipe.c @@ -2156,7 +2156,7 @@ enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all) u16 count = 0; u16 index; u16 size; - u16 i; + u16 i, j; ice_acquire_lock(&hw->tnl_lock); @@ -2196,30 +2196,31 @@ enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all) size); if (!sect_rx) goto ice_destroy_tunnel_err; - sect_rx->count = CPU_TO_LE16(1); + sect_rx->count = CPU_TO_LE16(count); sect_tx = (struct ice_boost_tcam_section *) ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM, size); if (!sect_tx) goto ice_destroy_tunnel_err; - sect_tx->count = CPU_TO_LE16(1); + sect_tx->count = CPU_TO_LE16(count); /* copy original boost entry to update package buffer, one copy to Rx * section, another copy to the Tx section */ - for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) + for (i = 0, j = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use && (all || hw->tnl.tbl[i].port == port)) { - ice_memcpy(sect_rx->tcam + i, + ice_memcpy(sect_rx->tcam + j, hw->tnl.tbl[i].boost_entry, sizeof(*sect_rx->tcam), ICE_NONDMA_TO_NONDMA); - ice_memcpy(sect_tx->tcam + i, + ice_memcpy(sect_tx->tcam + j, hw->tnl.tbl[i].boost_entry, sizeof(*sect_tx->tcam), ICE_NONDMA_TO_NONDMA); hw->tnl.tbl[i].marked = true; + j++; } status = ice_update_pkg(hw, ice_pkg_buf(bld), 1); diff --git a/dpdk/drivers/net/ice/base/ice_sched.c b/dpdk/drivers/net/ice/base/ice_sched.c index ac48bbe279..882448671e 100644 --- a/dpdk/drivers/net/ice/base/ice_sched.c +++ b/dpdk/drivers/net/ice/base/ice_sched.c @@ -1345,7 +1345,7 @@ enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw) ice_memdup(hw, buf->layer_props, (hw->num_tx_sched_layers * sizeof(*hw->layer_info)), - ICE_DMA_TO_DMA); + ICE_NONDMA_TO_NONDMA); if (!hw->layer_info) { status = ICE_ERR_NO_MEMORY; goto sched_query_out; diff --git a/dpdk/drivers/net/ice/base/ice_switch.c b/dpdk/drivers/net/ice/base/ice_switch.c index dc55d7e3ce..247c3acb67 100644 --- a/dpdk/drivers/net/ice/base/ice_switch.c +++ b/dpdk/drivers/net/ice/base/ice_switch.c @@ -3683,6 +3683,9 @@ ice_add_update_vsi_list(struct ice_hw *hw, ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2, vsi_list_id); + if (!m_entry->vsi_list_info) + return ICE_ERR_NO_MEMORY; + /* If this entry was large action then the large action needs * to be updated to point to FWD to VSI list */ @@ -5016,6 +5019,7 @@ ice_vsi_uses_fltr(struct ice_fltr_mgmt_list_entry *fm_entry, u16 vsi_handle) return ((fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI && fm_entry->fltr_info.vsi_handle == vsi_handle) || (fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST && + fm_entry->vsi_list_info && (ice_is_bit_set(fm_entry->vsi_list_info->vsi_map, vsi_handle)))); } @@ -5090,14 +5094,12 @@ ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle, LIST_FOR_EACH_ENTRY(fm_entry, lkup_list_head, ice_fltr_mgmt_list_entry, list_entry) { - struct ice_fltr_info *fi; - - fi = &fm_entry->fltr_info; - if (!fi || !ice_vsi_uses_fltr(fm_entry, vsi_handle)) + if (!ice_vsi_uses_fltr(fm_entry, vsi_handle)) continue; status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle, - vsi_list_head, fi); + vsi_list_head, + &fm_entry->fltr_info); if (status) return status; } diff --git a/dpdk/drivers/net/ice/ice_dcf.c b/dpdk/drivers/net/ice/ice_dcf.c index 44dbd3bb84..294ddcd2e1 100644 --- a/dpdk/drivers/net/ice/ice_dcf.c +++ b/dpdk/drivers/net/ice/ice_dcf.c @@ -504,9 +504,7 @@ ice_dcf_send_aq_cmd(void *dcf_hw, struct ice_aq_desc *desc, } do { - if ((!desc_cmd.pending && !buff_cmd.pending) || - (!desc_cmd.pending && desc_cmd.v_ret != IAVF_SUCCESS) || - (!buff_cmd.pending && buff_cmd.v_ret != IAVF_SUCCESS)) + if (!desc_cmd.pending && !buff_cmd.pending) break; rte_delay_ms(ICE_DCF_ARQ_CHECK_TIME); diff --git a/dpdk/drivers/net/ice/ice_dcf_ethdev.c b/dpdk/drivers/net/ice/ice_dcf_ethdev.c index b0b2ecb0d6..e5c877805f 100644 --- a/dpdk/drivers/net/ice/ice_dcf_ethdev.c +++ b/dpdk/drivers/net/ice/ice_dcf_ethdev.c @@ -60,23 +60,23 @@ ice_dcf_init_rxq(struct rte_eth_dev *dev, struct ice_rx_queue *rxq) * correctly. */ if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { - if (max_pkt_len <= RTE_ETHER_MAX_LEN || + if (max_pkt_len <= ICE_ETH_MAX_LEN || max_pkt_len > ICE_FRAME_SIZE_MAX) { PMD_DRV_LOG(ERR, "maximum packet length must be " "larger than %u and smaller than %u, " "as jumbo frame is enabled", - (uint32_t)RTE_ETHER_MAX_LEN, + (uint32_t)ICE_ETH_MAX_LEN, (uint32_t)ICE_FRAME_SIZE_MAX); return -EINVAL; } } else { if (max_pkt_len < RTE_ETHER_MIN_LEN || - max_pkt_len > RTE_ETHER_MAX_LEN) { + max_pkt_len > ICE_ETH_MAX_LEN) { PMD_DRV_LOG(ERR, "maximum packet length must be " "larger than %u and smaller than %u, " "as jumbo frame is disabled", (uint32_t)RTE_ETHER_MIN_LEN, - (uint32_t)RTE_ETHER_MAX_LEN); + (uint32_t)ICE_ETH_MAX_LEN); return -EINVAL; } } diff --git a/dpdk/drivers/net/ice/ice_ethdev.c b/dpdk/drivers/net/ice/ice_ethdev.c index 9a5d6a559f..70e5f74b2f 100644 --- a/dpdk/drivers/net/ice/ice_ethdev.c +++ b/dpdk/drivers/net/ice/ice_ethdev.c @@ -3182,6 +3182,12 @@ static int ice_init_rss(struct ice_pf *pf) vsi->rss_key_size = ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE; vsi->rss_lut_size = pf->hash_lut_size; + if (nb_q == 0) { + PMD_DRV_LOG(WARNING, + "RSS is not supported as rx queues number is zero\n"); + return 0; + } + if (is_safe_mode) { PMD_DRV_LOG(WARNING, "RSS is not supported in safe mode\n"); return 0; @@ -3268,10 +3274,12 @@ ice_dev_configure(struct rte_eth_dev *dev) if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; - ret = ice_init_rss(pf); - if (ret) { - PMD_DRV_LOG(ERR, "Failed to enable rss for PF"); - return ret; + if (dev->data->nb_rx_queues) { + ret = ice_init_rss(pf); + if (ret) { + PMD_DRV_LOG(ERR, "Failed to enable rss for PF"); + return ret; + } } return 0; @@ -3904,7 +3912,7 @@ ice_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) return -EBUSY; } - if (frame_size > RTE_ETHER_MAX_LEN) + if (frame_size > ICE_ETH_MAX_LEN) dev_data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else diff --git a/dpdk/drivers/net/ice/ice_ethdev.h b/dpdk/drivers/net/ice/ice_ethdev.h index 899f446cde..2b03c59671 100644 --- a/dpdk/drivers/net/ice/ice_ethdev.h +++ b/dpdk/drivers/net/ice/ice_ethdev.h @@ -135,6 +135,7 @@ */ #define ICE_ETH_OVERHEAD \ (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + ICE_VLAN_TAG_SIZE * 2) +#define ICE_ETH_MAX_LEN (RTE_ETHER_MTU + ICE_ETH_OVERHEAD) #define ICE_RXTX_BYTES_HIGH(bytes) ((bytes) & ~ICE_40_BIT_MASK) #define ICE_RXTX_BYTES_LOW(bytes) ((bytes) & ICE_40_BIT_MASK) diff --git a/dpdk/drivers/net/ice/ice_rxtx.c b/dpdk/drivers/net/ice/ice_rxtx.c index 5fbd68eafc..c98328ce0b 100644 --- a/dpdk/drivers/net/ice/ice_rxtx.c +++ b/dpdk/drivers/net/ice/ice_rxtx.c @@ -246,23 +246,23 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq) dev->data->dev_conf.rxmode.max_rx_pkt_len); if (rxmode->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { - if (rxq->max_pkt_len <= RTE_ETHER_MAX_LEN || + if (rxq->max_pkt_len <= ICE_ETH_MAX_LEN || rxq->max_pkt_len > ICE_FRAME_SIZE_MAX) { PMD_DRV_LOG(ERR, "maximum packet length must " "be larger than %u and smaller than %u," "as jumbo frame is enabled", - (uint32_t)RTE_ETHER_MAX_LEN, + (uint32_t)ICE_ETH_MAX_LEN, (uint32_t)ICE_FRAME_SIZE_MAX); return -EINVAL; } } else { if (rxq->max_pkt_len < RTE_ETHER_MIN_LEN || - rxq->max_pkt_len > RTE_ETHER_MAX_LEN) { + rxq->max_pkt_len > ICE_ETH_MAX_LEN) { PMD_DRV_LOG(ERR, "maximum packet length must be " "larger than %u and smaller than %u, " "as jumbo frame is disabled", (uint32_t)RTE_ETHER_MIN_LEN, - (uint32_t)RTE_ETHER_MAX_LEN); + (uint32_t)ICE_ETH_MAX_LEN); return -EINVAL; } } @@ -701,7 +701,7 @@ ice_fdir_program_hw_rx_queue(struct ice_rx_queue *rxq) rx_ctx.hbuf = rxq->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S; rx_ctx.dtype = 0; /* No Header Split mode */ rx_ctx.dsize = 1; /* 32B descriptors */ - rx_ctx.rxmax = RTE_ETHER_MAX_LEN; + rx_ctx.rxmax = ICE_ETH_MAX_LEN; /* TPH: Transaction Layer Packet (TLP) processing hints */ rx_ctx.tphrdesc_ena = 1; rx_ctx.tphwdesc_ena = 1; @@ -1451,6 +1451,11 @@ ice_rxd_error_to_pkt_flags(uint16_t stat_err0) if (unlikely(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))) flags |= PKT_RX_EIP_CKSUM_BAD; + if (unlikely(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S))) + flags |= PKT_RX_OUTER_L4_CKSUM_BAD; + else + flags |= PKT_RX_OUTER_L4_CKSUM_GOOD; + return flags; } @@ -2319,8 +2324,11 @@ ice_parse_tunneling_params(uint64_t ol_flags, *cd_tunneling |= (tx_offload.l2_len >> 1) << ICE_TXD_CTX_QW0_NATLEN_S; - if ((ol_flags & PKT_TX_OUTER_UDP_CKSUM) && - (ol_flags & PKT_TX_OUTER_IP_CKSUM) && + /** + * Calculate the tunneling UDP checksum. + * Shall be set only if L4TUNT = 01b and EIPT is not zero + */ + if (!(*cd_tunneling & ICE_TX_CTX_EIPT_NONE) && (*cd_tunneling & ICE_TXD_CTX_UDP_TUNNELING)) *cd_tunneling |= ICE_TXD_CTX_QW0_L4T_CS_M; } diff --git a/dpdk/drivers/net/ice/ice_rxtx.h b/dpdk/drivers/net/ice/ice_rxtx.h index 6b16716063..adfae016a9 100644 --- a/dpdk/drivers/net/ice/ice_rxtx.h +++ b/dpdk/drivers/net/ice/ice_rxtx.h @@ -31,7 +31,7 @@ #define ICE_VPMD_RX_BURST 32 #define ICE_VPMD_TX_BURST 32 -#define ICE_RXQ_REARM_THRESH 32 +#define ICE_RXQ_REARM_THRESH 64 #define ICE_MAX_RX_BURST ICE_RXQ_REARM_THRESH #define ICE_TX_MAX_FREE_BUF_SZ 64 #define ICE_DESCS_PER_LOOP 4 diff --git a/dpdk/drivers/net/ice/ice_rxtx_vec_avx2.c b/dpdk/drivers/net/ice/ice_rxtx_vec_avx2.c index b72a9e7025..7838e17787 100644 --- a/dpdk/drivers/net/ice/ice_rxtx_vec_avx2.c +++ b/dpdk/drivers/net/ice/ice_rxtx_vec_avx2.c @@ -251,43 +251,88 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, * bit13 is for VLAN indication. */ const __m256i flags_mask = - _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13)); + _mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 << 13)); /** * data to be shuffled by the result of the flags mask shifted by 4 * bits. This gives use the l3_l4 flags. */ - const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, - /* shift right 1 bit to make sure it not exceed 255 */ - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, - /* second 128-bits */ - 0, 0, 0, 0, 0, 0, 0, 0, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1); + const __m256i l3_l4_flags_shuf = + _mm256_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + /** + * second 128-bits + * shift right 20 bits to use the low two bits to indicate + * outer checksum status + * shift right 1 bit to make sure it not exceed 255 + */ + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1); const __m256i cksum_mask = - _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | - PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_EIP_CKSUM_BAD); + _mm256_set1_epi32(PKT_RX_IP_CKSUM_MASK | + PKT_RX_L4_CKSUM_MASK | + PKT_RX_EIP_CKSUM_BAD | + PKT_RX_OUTER_L4_CKSUM_MASK); /** * data to be shuffled by result of flag mask, shifted down 12. * If RSS(bit12)/VLAN(bit13) are set, @@ -469,6 +514,15 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts, __m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf, _mm256_srli_epi32(flag_bits, 4)); l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1); + + __m256i l4_outer_mask = _mm256_set1_epi32(0x6); + __m256i l4_outer_flags = + _mm256_and_si256(l3_l4_flags, l4_outer_mask); + l4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20); + + __m256i l3_l4_mask = _mm256_set1_epi32(~0x6); + l3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask); + l3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags); l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask); /* set rss and vlan flags */ const __m256i rss_vlan_flag_bits = diff --git a/dpdk/drivers/net/ice/ice_rxtx_vec_avx512.c b/dpdk/drivers/net/ice/ice_rxtx_vec_avx512.c index df5d2be1e6..fd5d724329 100644 --- a/dpdk/drivers/net/ice/ice_rxtx_vec_avx512.c +++ b/dpdk/drivers/net/ice/ice_rxtx_vec_avx512.c @@ -230,43 +230,88 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq, * bit13 is for VLAN indication. */ const __m256i flags_mask = - _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13)); + _mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 << 13)); /** * data to be shuffled by the result of the flags mask shifted by 4 * bits. This gives use the l3_l4 flags. */ - const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, - /* shift right 1 bit to make sure it not exceed 255 */ - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, - /* 2nd 128-bits */ - 0, 0, 0, 0, 0, 0, 0, 0, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1); + const __m256i l3_l4_flags_shuf = + _mm256_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + /** + * second 128-bits + * shift right 20 bits to use the low two bits to indicate + * outer checksum status + * shift right 1 bit to make sure it not exceed 255 + */ + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1); const __m256i cksum_mask = - _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | - PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_EIP_CKSUM_BAD); + _mm256_set1_epi32(PKT_RX_IP_CKSUM_MASK | + PKT_RX_L4_CKSUM_MASK | + PKT_RX_EIP_CKSUM_BAD | + PKT_RX_OUTER_L4_CKSUM_MASK); /** * data to be shuffled by result of flag mask, shifted down 12. * If RSS(bit12)/VLAN(bit13) are set, @@ -451,6 +496,14 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq, __m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf, _mm256_srli_epi32(flag_bits, 4)); l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1); + __m256i l4_outer_mask = _mm256_set1_epi32(0x6); + __m256i l4_outer_flags = + _mm256_and_si256(l3_l4_flags, l4_outer_mask); + l4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20); + + __m256i l3_l4_mask = _mm256_set1_epi32(~0x6); + l3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask); + l3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags); l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask); /* set rss and vlan flags */ const __m256i rss_vlan_flag_bits = diff --git a/dpdk/drivers/net/ice/ice_rxtx_vec_common.h b/dpdk/drivers/net/ice/ice_rxtx_vec_common.h index ae2ac29f2a..c09ac7f667 100644 --- a/dpdk/drivers/net/ice/ice_rxtx_vec_common.h +++ b/dpdk/drivers/net/ice/ice_rxtx_vec_common.h @@ -266,6 +266,7 @@ ice_rx_vec_queue_default(struct ice_rx_queue *rxq) #define ICE_NO_VECTOR_FLAGS ( \ DEV_TX_OFFLOAD_MULTI_SEGS | \ DEV_TX_OFFLOAD_VLAN_INSERT | \ + DEV_TX_OFFLOAD_IPV4_CKSUM | \ DEV_TX_OFFLOAD_SCTP_CKSUM | \ DEV_TX_OFFLOAD_UDP_CKSUM | \ DEV_TX_OFFLOAD_TCP_TSO | \ diff --git a/dpdk/drivers/net/ice/ice_rxtx_vec_sse.c b/dpdk/drivers/net/ice/ice_rxtx_vec_sse.c index 626364719b..87e0c3db2e 100644 --- a/dpdk/drivers/net/ice/ice_rxtx_vec_sse.c +++ b/dpdk/drivers/net/ice/ice_rxtx_vec_sse.c @@ -114,39 +114,67 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4], * bit12 for RSS indication. * bit13 for VLAN indication. */ - const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070, - 0x3070, 0x3070); - + const __m128i desc_mask = _mm_set_epi32(0x30f0, 0x30f0, + 0x30f0, 0x30f0); const __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_MASK | PKT_RX_L4_CKSUM_MASK | + PKT_RX_OUTER_L4_CKSUM_MASK | PKT_RX_EIP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK | PKT_RX_L4_CKSUM_MASK | + PKT_RX_OUTER_L4_CKSUM_MASK | PKT_RX_EIP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK | PKT_RX_L4_CKSUM_MASK | + PKT_RX_OUTER_L4_CKSUM_MASK | PKT_RX_EIP_CKSUM_BAD, PKT_RX_IP_CKSUM_MASK | PKT_RX_L4_CKSUM_MASK | + PKT_RX_OUTER_L4_CKSUM_MASK | PKT_RX_EIP_CKSUM_BAD); /* map the checksum, rss and vlan fields to the checksum, rss * and vlan flag */ - const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, - /* shift right 1 bit to make sure it not exceed 255 */ - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | - PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, - (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1); + const __m128i cksum_flags = + _mm_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + /** + * shift right 20 bits to use the low two bits to indicate + * outer checksum status + * shift right 1 bit to make sure it not exceed 255 + */ + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1); const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, @@ -166,6 +194,14 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4], flags = _mm_shuffle_epi8(cksum_flags, tmp_desc); /* then we shift left 1 bit */ flags = _mm_slli_epi32(flags, 1); + + __m128i l4_outer_mask = _mm_set_epi32(0x6, 0x6, 0x6, 0x6); + __m128i l4_outer_flags = _mm_and_si128(flags, l4_outer_mask); + l4_outer_flags = _mm_slli_epi32(l4_outer_flags, 20); + + __m128i l3_l4_mask = _mm_set_epi32(~0x6, ~0x6, ~0x6, ~0x6); + __m128i l3_l4_flags = _mm_and_si128(flags, l3_l4_mask); + flags = _mm_or_si128(l3_l4_flags, l4_outer_flags); /* we need to mask out the reduntant bits introduced by RSS or * VLAN fields. */ @@ -217,10 +253,10 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4], * appropriate flags means that we have to do a shift and blend for * each mbuf before we do the write. */ - rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10); - rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10); - rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10); - rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10); + rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x30); + rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x30); + rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x30); + rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x30); /* write the rearm data and the olflags in one write */ RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) != diff --git a/dpdk/drivers/net/ionic/ionic.h b/dpdk/drivers/net/ionic/ionic.h index 1538df3092..a6d84036e8 100644 --- a/dpdk/drivers/net/ionic/ionic.h +++ b/dpdk/drivers/net/ionic/ionic.h @@ -48,6 +48,7 @@ struct ionic_hw { struct ionic_adapter { struct ionic_hw hw; struct ionic_dev idev; + const char *name; struct ionic_dev_bar bars[IONIC_BARS_MAX]; struct ionic_identity ident; struct ionic_lif *lifs[IONIC_LIFS_MAX]; diff --git a/dpdk/drivers/net/ionic/ionic_dev.c b/dpdk/drivers/net/ionic/ionic_dev.c index 5c2820b7a1..632ca10cf2 100644 --- a/dpdk/drivers/net/ionic/ionic_dev.c +++ b/dpdk/drivers/net/ionic/ionic_dev.c @@ -103,6 +103,9 @@ ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd) uint32_t cmd_size = sizeof(cmd->words) / sizeof(cmd->words[0]); + IONIC_PRINT(DEBUG, "Sending %s (%d) via dev_cmd", + ionic_opcode_to_str(cmd->cmd.opcode), cmd->cmd.opcode); + for (i = 0; i < cmd_size; i++) iowrite32(cmd->words[i], &idev->dev_cmd->cmd.words[i]); @@ -350,6 +353,8 @@ ionic_dev_cmd_adminq_init(struct ionic_dev *idev, .q_init.cq_ring_base = cq->base_pa, }; + IONIC_PRINT(DEBUG, "adminq.q_init.ver %u", cmd.q_init.ver); + ionic_dev_cmd_go(idev, &cmd); } diff --git a/dpdk/drivers/net/ionic/ionic_dev.h b/dpdk/drivers/net/ionic/ionic_dev.h index 532255a603..6bac96072d 100644 --- a/dpdk/drivers/net/ionic/ionic_dev.h +++ b/dpdk/drivers/net/ionic/ionic_dev.h @@ -208,6 +208,8 @@ struct ionic_qcq; void ionic_intr_init(struct ionic_dev *idev, struct ionic_intr_info *intr, unsigned long index); +const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode); + int ionic_dev_setup(struct ionic_adapter *adapter); void ionic_dev_cmd_go(struct ionic_dev *idev, union ionic_dev_cmd *cmd); diff --git a/dpdk/drivers/net/ionic/ionic_ethdev.c b/dpdk/drivers/net/ionic/ionic_ethdev.c index 600333e20f..fe778043eb 100644 --- a/dpdk/drivers/net/ionic/ionic_ethdev.c +++ b/dpdk/drivers/net/ionic/ionic_ethdev.c @@ -289,7 +289,10 @@ ionic_dev_link_update(struct rte_eth_dev *eth_dev, /* Initialize */ memset(&link, 0, sizeof(link)); - link.link_autoneg = ETH_LINK_AUTONEG; + + if (adapter->idev.port_info->config.an_enable) { + link.link_autoneg = ETH_LINK_AUTONEG; + } if (!adapter->link_up) { /* Interface is down */ @@ -571,7 +574,7 @@ ionic_dev_rss_reta_update(struct rte_eth_dev *eth_dev, if (reta_size != ident->lif.eth.rss_ind_tbl_sz) { IONIC_PRINT(ERR, "The size of hash lookup table configured " - "(%d) doesn't match the number hardware can supported " + "(%d) does not match the number hardware can support " "(%d)", reta_size, ident->lif.eth.rss_ind_tbl_sz); return -EINVAL; @@ -605,7 +608,7 @@ ionic_dev_rss_reta_query(struct rte_eth_dev *eth_dev, if (reta_size != ident->lif.eth.rss_ind_tbl_sz) { IONIC_PRINT(ERR, "The size of hash lookup table configured " - "(%d) doesn't match the number hardware can supported " + "(%d) does not match the number hardware can support " "(%d)", reta_size, ident->lif.eth.rss_ind_tbl_sz); return -EINVAL; @@ -901,7 +904,8 @@ ionic_dev_start(struct rte_eth_dev *eth_dev) struct ionic_lif *lif = IONIC_ETH_DEV_TO_LIF(eth_dev); struct ionic_adapter *adapter = lif->adapter; struct ionic_dev *idev = &adapter->idev; - uint32_t allowed_speeds; + uint32_t speed = 0, allowed_speeds; + uint8_t an_enable; int err; IONIC_PRINT_CALL(); @@ -925,11 +929,23 @@ ionic_dev_start(struct rte_eth_dev *eth_dev) return err; } - if (eth_dev->data->dev_conf.link_speeds & ETH_LINK_SPEED_FIXED) { - uint32_t speed = ionic_parse_link_speeds(dev_conf->link_speeds); + /* Configure link */ + an_enable = (dev_conf->link_speeds & ETH_LINK_SPEED_FIXED) == 0; - if (speed) - ionic_dev_cmd_port_speed(idev, speed); + ionic_dev_cmd_port_autoneg(idev, an_enable); + err = ionic_dev_cmd_wait_check(idev, IONIC_DEVCMD_TIMEOUT); + if (err) + IONIC_PRINT(WARNING, "Failed to %s autonegotiation", + an_enable ? "enable" : "disable"); + + if (!an_enable) + speed = ionic_parse_link_speeds(dev_conf->link_speeds); + if (speed) { + ionic_dev_cmd_port_speed(idev, speed); + err = ionic_dev_cmd_wait_check(idev, IONIC_DEVCMD_TIMEOUT); + if (err) + IONIC_PRINT(WARNING, "Failed to set link speed %u", + speed); } ionic_dev_link_update(eth_dev, 0); diff --git a/dpdk/drivers/net/ionic/ionic_lif.c b/dpdk/drivers/net/ionic/ionic_lif.c index 60a5f3d537..5894f3505a 100644 --- a/dpdk/drivers/net/ionic/ionic_lif.c +++ b/dpdk/drivers/net/ionic/ionic_lif.c @@ -551,7 +551,7 @@ ionic_intr_alloc(struct ionic_lif *lif, struct ionic_intr_info *intr) /* * Note: interrupt handler is called for index = 0 only * (we use interrupts for the notifyq only anyway, - * which hash index = 0) + * which has index = 0) */ for (index = 0; index < adapter->nintrs; index++) @@ -684,8 +684,8 @@ ionic_qcq_alloc(struct ionic_lif *lif, uint8_t type, ionic_q_sg_map(&new->q, sg_base, sg_base_pa); } - IONIC_PRINT(DEBUG, "Q-Base-PA = %ju CQ-Base-PA = %ju " - "SG-base-PA = %ju", + IONIC_PRINT(DEBUG, "Q-Base-PA = %#jx CQ-Base-PA = %#jx " + "SG-base-PA = %#jx", q_base_pa, cq_base_pa, sg_base_pa); ionic_q_map(&new->q, q_base, q_base_pa); @@ -824,7 +824,13 @@ ionic_lif_alloc(struct ionic_lif *lif) int dbpage_num; int err; - snprintf(lif->name, sizeof(lif->name), "lif%u", lif->index); + /* + * lif->name was zeroed on allocation. + * Copy (sizeof() - 1) bytes to ensure that it is NULL terminated. + */ + memcpy(lif->name, lif->eth_dev->data->name, sizeof(lif->name) - 1); + + IONIC_PRINT(DEBUG, "LIF: %s", lif->name); IONIC_PRINT(DEBUG, "Allocating Lif Info"); @@ -867,8 +873,6 @@ ionic_lif_alloc(struct ionic_lif *lif) IONIC_PRINT(DEBUG, "Allocating Admin Queue"); - IONIC_PRINT(DEBUG, "Allocating Admin Queue"); - err = ionic_admin_qcq_alloc(lif); if (err) { IONIC_PRINT(ERR, "Cannot allocate admin queue"); @@ -1224,6 +1228,7 @@ ionic_lif_notifyq_init(struct ionic_lif *lif) ctx.cmd.q_init.ring_base); IONIC_PRINT(DEBUG, "notifyq_init.ring_size %d", ctx.cmd.q_init.ring_size); + IONIC_PRINT(DEBUG, "notifyq_init.ver %u", ctx.cmd.q_init.ver); err = ionic_adminq_post_wait(lif, &ctx); if (err) @@ -1335,6 +1340,7 @@ ionic_lif_txq_init(struct ionic_qcq *qcq) ctx.cmd.q_init.ring_base); IONIC_PRINT(DEBUG, "txq_init.ring_size %d", ctx.cmd.q_init.ring_size); + IONIC_PRINT(DEBUG, "txq_init.ver %u", ctx.cmd.q_init.ver); err = ionic_adminq_post_wait(qcq->lif, &ctx); if (err) @@ -1383,6 +1389,7 @@ ionic_lif_rxq_init(struct ionic_qcq *qcq) ctx.cmd.q_init.ring_base); IONIC_PRINT(DEBUG, "rxq_init.ring_size %d", ctx.cmd.q_init.ring_size); + IONIC_PRINT(DEBUG, "rxq_init.ver %u", ctx.cmd.q_init.ver); err = ionic_adminq_post_wait(qcq->lif, &ctx); if (err) @@ -1453,8 +1460,8 @@ ionic_lif_set_name(struct ionic_lif *lif) }, }; - snprintf(ctx.cmd.lif_setattr.name, sizeof(ctx.cmd.lif_setattr.name), - "%d", lif->port_id); + memcpy(ctx.cmd.lif_setattr.name, lif->name, + sizeof(ctx.cmd.lif_setattr.name) - 1); ionic_adminq_post_wait(lif, &ctx); } @@ -1685,7 +1692,8 @@ ionic_lifs_size(struct ionic_adapter *adapter) nintrs = nlifs * 1 /* notifyq */; if (nintrs > dev_nintrs) { - IONIC_PRINT(ERR, "At most %d intr queues supported, minimum required is %u", + IONIC_PRINT(ERR, + "At most %d intr supported, minimum req'd is %u", dev_nintrs, nintrs); return -ENOSPC; } diff --git a/dpdk/drivers/net/ionic/ionic_main.c b/dpdk/drivers/net/ionic/ionic_main.c index 2ade213d2d..b963898db0 100644 --- a/dpdk/drivers/net/ionic/ionic_main.c +++ b/dpdk/drivers/net/ionic/ionic_main.c @@ -61,7 +61,7 @@ ionic_error_to_str(enum ionic_status_code code) } } -static const char * +const char * ionic_opcode_to_str(enum ionic_cmd_opcode opcode) { switch (opcode) { @@ -107,6 +107,8 @@ ionic_opcode_to_str(enum ionic_cmd_opcode opcode) return "IONIC_CMD_Q_INIT"; case IONIC_CMD_Q_CONTROL: return "IONIC_CMD_Q_CONTROL"; + case IONIC_CMD_Q_IDENTIFY: + return "IONIC_CMD_Q_IDENTIFY"; case IONIC_CMD_RDMA_RESET_LIF: return "IONIC_CMD_RDMA_RESET_LIF"; case IONIC_CMD_RDMA_CREATE_EQ: @@ -126,8 +128,9 @@ ionic_adminq_check_err(struct ionic_admin_ctx *ctx, bool timeout) const char *name; const char *status; + name = ionic_opcode_to_str(ctx->cmd.cmd.opcode); + if (ctx->comp.comp.status || timeout) { - name = ionic_opcode_to_str(ctx->cmd.cmd.opcode); status = ionic_error_to_str(ctx->comp.comp.status); IONIC_PRINT(ERR, "%s (%d) failed: %s (%d)", name, @@ -137,6 +140,8 @@ ionic_adminq_check_err(struct ionic_admin_ctx *ctx, bool timeout) return -EIO; } + IONIC_PRINT(DEBUG, "%s (%d) succeeded", name, ctx->cmd.cmd.opcode); + return 0; } @@ -174,14 +179,13 @@ ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) bool done; int err; - IONIC_PRINT(DEBUG, "Sending %s to the admin queue", - ionic_opcode_to_str(ctx->cmd.cmd.opcode)); + IONIC_PRINT(DEBUG, "Sending %s (%d) via the admin queue", + ionic_opcode_to_str(ctx->cmd.cmd.opcode), ctx->cmd.cmd.opcode); err = ionic_adminq_post(lif, ctx); if (err) { - IONIC_PRINT(ERR, "Failure posting to the admin queue %d (%d)", + IONIC_PRINT(ERR, "Failure posting %d to the admin queue (%d)", ctx->cmd.cmd.opcode, err); - return err; } @@ -339,12 +343,12 @@ ionic_port_identify(struct ionic_adapter *adapter) ioread32(&idev->dev_cmd->data[i]); } - IONIC_PRINT(INFO, "speed %d ", ident->port.config.speed); - IONIC_PRINT(INFO, "mtu %d ", ident->port.config.mtu); - IONIC_PRINT(INFO, "state %d ", ident->port.config.state); - IONIC_PRINT(INFO, "an_enable %d ", ident->port.config.an_enable); - IONIC_PRINT(INFO, "fec_type %d ", ident->port.config.fec_type); - IONIC_PRINT(INFO, "pause_type %d ", ident->port.config.pause_type); + IONIC_PRINT(INFO, "speed %d", ident->port.config.speed); + IONIC_PRINT(INFO, "mtu %d", ident->port.config.mtu); + IONIC_PRINT(INFO, "state %d", ident->port.config.state); + IONIC_PRINT(INFO, "an_enable %d", ident->port.config.an_enable); + IONIC_PRINT(INFO, "fec_type %d", ident->port.config.fec_type); + IONIC_PRINT(INFO, "pause_type %d", ident->port.config.pause_type); IONIC_PRINT(INFO, "loopback_mode %d", ident->port.config.loopback_mode); @@ -385,8 +389,7 @@ ionic_port_init(struct ionic_adapter *adapter) idev->port_info_sz = RTE_ALIGN(sizeof(*idev->port_info), PAGE_SIZE); snprintf(z_name, sizeof(z_name), "%s_port_%s_info", - IONIC_DRV_NAME, - adapter->pci_dev->device.name); + IONIC_DRV_NAME, adapter->name); idev->port_info_z = ionic_memzone_reserve(z_name, idev->port_info_sz, SOCKET_ID_ANY); diff --git a/dpdk/drivers/net/ionic/ionic_rxtx.c b/dpdk/drivers/net/ionic/ionic_rxtx.c index 2592f5cab6..9466099352 100644 --- a/dpdk/drivers/net/ionic/ionic_rxtx.c +++ b/dpdk/drivers/net/ionic/ionic_rxtx.c @@ -67,7 +67,7 @@ ionic_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, qinfo->conf.tx_deferred_start = txq->deferred_start; } -static inline void __rte_cold +static __rte_always_inline void ionic_tx_flush(struct ionic_cq *cq) { struct ionic_queue *q = cq->bound_q; @@ -133,7 +133,7 @@ ionic_dev_tx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id) { struct ionic_qcq *txq; - IONIC_PRINT_CALL(); + IONIC_PRINT(DEBUG, "Stopping TX queue %u", tx_queue_id); txq = eth_dev->data->tx_queues[tx_queue_id]; @@ -156,7 +156,7 @@ ionic_dev_tx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id) int __rte_cold ionic_dev_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id, - uint16_t nb_desc, uint32_t socket_id __rte_unused, + uint16_t nb_desc, uint32_t socket_id, const struct rte_eth_txconf *tx_conf) { struct ionic_lif *lif = IONIC_ETH_DEV_TO_LIF(eth_dev); @@ -164,11 +164,6 @@ ionic_dev_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id, uint64_t offloads; int err; - IONIC_PRINT_CALL(); - - IONIC_PRINT(DEBUG, "Configuring TX queue %u with %u buffers", - tx_queue_id, nb_desc); - if (tx_queue_id >= lif->ntxqcqs) { IONIC_PRINT(DEBUG, "Queue index %u not available " "(max %u queues)", @@ -177,6 +172,9 @@ ionic_dev_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id, } offloads = tx_conf->offloads | eth_dev->data->dev_conf.txmode.offloads; + IONIC_PRINT(DEBUG, + "Configuring skt %u TX queue %u with %u buffers, offloads %jx", + socket_id, tx_queue_id, nb_desc, offloads); /* Validate number of receive descriptors */ if (!rte_is_power_of_2(nb_desc) || nb_desc < IONIC_MIN_RING_DESC) @@ -214,10 +212,11 @@ ionic_dev_tx_queue_start(struct rte_eth_dev *eth_dev, uint16_t tx_queue_id) struct ionic_qcq *txq; int err; - IONIC_PRINT_CALL(); - txq = eth_dev->data->tx_queues[tx_queue_id]; + IONIC_PRINT(DEBUG, "Starting TX queue %u, %u descs", + tx_queue_id, txq->q.num_descs); + err = ionic_lif_txq_init(txq); if (err) return err; @@ -316,7 +315,8 @@ ionic_tx_tso(struct ionic_queue *q, struct rte_mbuf *txm, struct ionic_txq_desc *desc; struct ionic_txq_sg_elem *elem; struct rte_mbuf *txm_seg; - uint64_t desc_addr = 0; + rte_iova_t data_iova; + uint64_t desc_addr = 0, next_addr; uint16_t desc_len = 0; uint8_t desc_nsge; uint32_t hdrlen; @@ -353,6 +353,7 @@ ionic_tx_tso(struct ionic_queue *q, struct rte_mbuf *txm, seglen = hdrlen + mss; left = txm->data_len; + data_iova = rte_mbuf_data_iova(txm); desc = ionic_tx_tso_next(q, &elem); start = true; @@ -362,7 +363,7 @@ ionic_tx_tso(struct ionic_queue *q, struct rte_mbuf *txm, while (left > 0) { len = RTE_MIN(seglen, left); frag_left = seglen - len; - desc_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(txm)); + desc_addr = rte_cpu_to_le_64(data_iova + offset); desc_len = len; desc_nsge = 0; left -= len; @@ -386,24 +387,23 @@ ionic_tx_tso(struct ionic_queue *q, struct rte_mbuf *txm, txm_seg = txm->next; while (txm_seg != NULL) { offset = 0; + data_iova = rte_mbuf_data_iova(txm_seg); left = txm_seg->data_len; stats->frags++; while (left > 0) { - rte_iova_t data_iova; - data_iova = rte_mbuf_data_iova(txm_seg); - elem->addr = rte_cpu_to_le_64(data_iova) + offset; + next_addr = rte_cpu_to_le_64(data_iova + offset); if (frag_left > 0) { len = RTE_MIN(frag_left, left); frag_left -= len; + elem->addr = next_addr; elem->len = len; elem++; desc_nsge++; } else { len = RTE_MIN(mss, left); frag_left = mss - len; - data_iova = rte_mbuf_data_iova(txm_seg); - desc_addr = rte_cpu_to_le_64(data_iova); + desc_addr = next_addr; desc_len = len; desc_nsge = 0; } @@ -411,6 +411,7 @@ ionic_tx_tso(struct ionic_queue *q, struct rte_mbuf *txm, offset += len; if (txm_seg->next != NULL && frag_left > 0) continue; + done = (txm_seg->next == NULL && left == 0); ionic_tx_tso_post(q, desc, txm_seg, desc_addr, desc_nsge, desc_len, @@ -430,7 +431,7 @@ ionic_tx_tso(struct ionic_queue *q, struct rte_mbuf *txm, return 0; } -static int +static __rte_always_inline int ionic_tx(struct ionic_queue *q, struct rte_mbuf *txm, uint64_t offloads, bool not_xmit_more) { @@ -444,23 +445,27 @@ ionic_tx(struct ionic_queue *q, struct rte_mbuf *txm, bool encap; bool has_vlan; uint64_t ol_flags = txm->ol_flags; - uint64_t addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(txm)); + uint64_t addr; uint8_t opcode = IONIC_TXQ_DESC_OPCODE_CSUM_NONE; uint8_t flags = 0; if ((ol_flags & PKT_TX_IP_CKSUM) && - (offloads & DEV_TX_OFFLOAD_IPV4_CKSUM)) { + (offloads & DEV_TX_OFFLOAD_IPV4_CKSUM)) { opcode = IONIC_TXQ_DESC_OPCODE_CSUM_HW; flags |= IONIC_TXQ_DESC_FLAG_CSUM_L3; - if (((ol_flags & PKT_TX_TCP_CKSUM) && - (offloads & DEV_TX_OFFLOAD_TCP_CKSUM)) || - ((ol_flags & PKT_TX_UDP_CKSUM) && - (offloads & DEV_TX_OFFLOAD_UDP_CKSUM))) - flags |= IONIC_TXQ_DESC_FLAG_CSUM_L4; - } else { - stats->no_csum++; } + if (((ol_flags & PKT_TX_TCP_CKSUM) && + (offloads & DEV_TX_OFFLOAD_TCP_CKSUM)) || + ((ol_flags & PKT_TX_UDP_CKSUM) && + (offloads & DEV_TX_OFFLOAD_UDP_CKSUM))) { + opcode = IONIC_TXQ_DESC_OPCODE_CSUM_HW; + flags |= IONIC_TXQ_DESC_FLAG_CSUM_L4; + } + + if (opcode == IONIC_TXQ_DESC_OPCODE_CSUM_NONE) + stats->no_csum++; + has_vlan = (ol_flags & PKT_TX_VLAN_PKT); encap = ((ol_flags & PKT_TX_OUTER_IP_CKSUM) || (ol_flags & PKT_TX_OUTER_UDP_CKSUM)) && @@ -470,6 +475,8 @@ ionic_tx(struct ionic_queue *q, struct rte_mbuf *txm, flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0; flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0; + addr = rte_cpu_to_le_64(rte_mbuf_data_iova(txm)); + desc->cmd = encode_txq_desc_cmd(opcode, flags, txm->nb_segs - 1, addr); desc->len = txm->data_len; desc->vlan_tci = txm->vlan_tci; @@ -641,7 +648,7 @@ int __rte_cold ionic_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id, uint16_t nb_desc, - uint32_t socket_id __rte_unused, + uint32_t socket_id, const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp) { @@ -650,11 +657,6 @@ ionic_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, uint64_t offloads; int err; - IONIC_PRINT_CALL(); - - IONIC_PRINT(DEBUG, "Configuring RX queue %u with %u buffers", - rx_queue_id, nb_desc); - if (rx_queue_id >= lif->nrxqcqs) { IONIC_PRINT(ERR, "Queue index %u not available (max %u queues)", @@ -663,13 +665,16 @@ ionic_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, } offloads = rx_conf->offloads | eth_dev->data->dev_conf.rxmode.offloads; + IONIC_PRINT(DEBUG, + "Configuring skt %u RX queue %u with %u buffers, offloads %jx", + socket_id, rx_queue_id, nb_desc, offloads); /* Validate number of receive descriptors */ if (!rte_is_power_of_2(nb_desc) || nb_desc < IONIC_MIN_RING_DESC || nb_desc > IONIC_MAX_RING_DESC) { IONIC_PRINT(ERR, - "Bad number of descriptors (%u) for queue %u (min: %u)", + "Bad descriptor count (%u) for queue %u (min: %u)", nb_desc, rx_queue_id, IONIC_MIN_RING_DESC); return -EINVAL; /* or use IONIC_DEFAULT_RING_DESC */ } @@ -686,7 +691,7 @@ ionic_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, err = ionic_rx_qcq_alloc(lif, rx_queue_id, nb_desc, &rxq); if (err) { - IONIC_PRINT(ERR, "Queue allocation failure"); + IONIC_PRINT(ERR, "Queue %d allocation failure", rx_queue_id); return -EINVAL; } @@ -712,7 +717,7 @@ ionic_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, return 0; } -static void +static __rte_always_inline void ionic_rx_clean(struct ionic_queue *q, uint32_t q_desc_index, uint32_t cq_desc_index, void *cb_arg, void *service_cb_arg) @@ -873,7 +878,7 @@ ionic_rx_recycle(struct ionic_queue *q, uint32_t q_desc_index, ionic_q_post(q, true, ionic_rx_clean, mbuf); } -static int __rte_cold +static __rte_always_inline int ionic_rx_fill(struct ionic_qcq *rxq, uint32_t len) { struct ionic_queue *q = &rxq->q; @@ -957,13 +962,11 @@ ionic_dev_rx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) struct ionic_qcq *rxq; int err; - IONIC_PRINT_CALL(); - - IONIC_PRINT(DEBUG, "Allocating RX queue buffers (size: %u)", - frame_size); - rxq = eth_dev->data->rx_queues[rx_queue_id]; + IONIC_PRINT(DEBUG, "Starting RX queue %u, %u descs (size: %u)", + rx_queue_id, rxq->q.num_descs, frame_size); + err = ionic_lif_rxq_init(rxq); if (err) return err; @@ -983,7 +986,7 @@ ionic_dev_rx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) return 0; } -static inline void __rte_cold +static __rte_always_inline void ionic_rxq_service(struct ionic_cq *cq, uint32_t work_to_do, void *service_cb_arg) { @@ -1043,7 +1046,7 @@ ionic_dev_rx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) { struct ionic_qcq *rxq; - IONIC_PRINT_CALL(); + IONIC_PRINT(DEBUG, "Stopping RX queue %u", rx_queue_id); rxq = eth_dev->data->rx_queues[rx_queue_id]; diff --git a/dpdk/drivers/net/ipn3ke/ipn3ke_ethdev.h b/dpdk/drivers/net/ipn3ke/ipn3ke_ethdev.h index 9b0cf309c8..a6815a9cca 100644 --- a/dpdk/drivers/net/ipn3ke/ipn3ke_ethdev.h +++ b/dpdk/drivers/net/ipn3ke/ipn3ke_ethdev.h @@ -640,6 +640,7 @@ ipn3ke_tm_ops_get(struct rte_eth_dev *ethdev, */ #define IPN3KE_ETH_OVERHEAD \ (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + IPN3KE_VLAN_TAG_SIZE * 2) +#define IPN3KE_ETH_MAX_LEN (RTE_ETHER_MTU + IPN3KE_ETH_OVERHEAD) #define IPN3KE_MAC_FRAME_SIZE_MAX 9728 #define IPN3KE_MAC_RX_FRAME_MAXLENGTH 0x00AE diff --git a/dpdk/drivers/net/ipn3ke/ipn3ke_representor.c b/dpdk/drivers/net/ipn3ke/ipn3ke_representor.c index 8a53602576..9e15cce34f 100644 --- a/dpdk/drivers/net/ipn3ke/ipn3ke_representor.c +++ b/dpdk/drivers/net/ipn3ke/ipn3ke_representor.c @@ -2801,7 +2801,7 @@ ipn3ke_rpst_mtu_set(struct rte_eth_dev *ethdev, uint16_t mtu) return -EBUSY; } - if (frame_size > RTE_ETHER_MAX_LEN) + if (frame_size > IPN3KE_ETH_MAX_LEN) dev_data->dev_conf.rxmode.offloads |= (uint64_t)(DEV_RX_OFFLOAD_JUMBO_FRAME); else diff --git a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c index 9a47a8b262..fa0f5afd03 100644 --- a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c @@ -5173,7 +5173,7 @@ ixgbe_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); /* switch to jumbo mode if needed */ - if (frame_size > RTE_ETHER_MAX_LEN) { + if (frame_size > IXGBE_ETH_MAX_LEN) { dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; hlreg0 |= IXGBE_HLREG0_JUMBOEN; @@ -6555,7 +6555,8 @@ ixgbevf_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) * prior to 3.11.33 which contains the following change: * "ixgbe: Enable jumbo frames support w/ SR-IOV" */ - ixgbevf_rlpml_set_vf(hw, max_frame); + if (ixgbevf_rlpml_set_vf(hw, max_frame)) + return -EINVAL; /* update max frame size */ dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame; diff --git a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h index 3d35ea791b..a0ce18ca24 100644 --- a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h +++ b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h @@ -104,6 +104,9 @@ /* The overhead from MTU to max frame size. */ #define IXGBE_ETH_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN) +/* The max frame size with default MTU */ +#define IXGBE_ETH_MAX_LEN (RTE_ETHER_MTU + IXGBE_ETH_OVERHEAD) + /* bit of VXLAN tunnel type | 7 bits of zeros | 8 bits of zeros*/ #define IXGBE_FDIR_VXLAN_TUNNEL_TYPE 0x8000 /* bit of NVGRE tunnel type | 7 bits of zeros | 8 bits of zeros*/ diff --git a/dpdk/drivers/net/ixgbe/ixgbe_fdir.c b/dpdk/drivers/net/ixgbe/ixgbe_fdir.c index a0fab5070d..11b9effeba 100644 --- a/dpdk/drivers/net/ixgbe/ixgbe_fdir.c +++ b/dpdk/drivers/net/ixgbe/ixgbe_fdir.c @@ -503,9 +503,30 @@ ixgbe_fdir_set_flexbytes_offset(struct rte_eth_dev *dev, uint16_t offset) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_hw_fdir_info *fdir_info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); uint32_t fdirctrl; int i; + if (fdir_info->flex_bytes_offset == offset) + return 0; + + /** + * 82599 adapters flow director init flow cannot be restarted, + * Workaround 82599 silicon errata by performing the following steps + * before re-writing the FDIRCTRL control register with the same value. + * - write 1 to bit 8 of FDIRCMD register & + * - write 0 to bit 8 of FDIRCMD register + */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, + (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) | + IXGBE_FDIRCMD_CLEARHT)); + IXGBE_WRITE_FLUSH(hw); + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, + (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) & + ~IXGBE_FDIRCMD_CLEARHT)); + IXGBE_WRITE_FLUSH(hw); + fdirctrl = IXGBE_READ_REG(hw, IXGBE_FDIRCTRL); fdirctrl &= ~IXGBE_FDIRCTRL_FLEX_MASK; @@ -520,6 +541,14 @@ ixgbe_fdir_set_flexbytes_offset(struct rte_eth_dev *dev, break; msec_delay(1); } + + if (i >= IXGBE_FDIR_INIT_DONE_POLL) { + PMD_DRV_LOG(ERR, "Flow Director poll time exceeded!"); + return -ETIMEDOUT; + } + + fdir_info->flex_bytes_offset = offset; + return 0; } diff --git a/dpdk/drivers/net/ixgbe/ixgbe_flow.c b/dpdk/drivers/net/ixgbe/ixgbe_flow.c index 39f6ed73f6..9aeb2e4a49 100644 --- a/dpdk/drivers/net/ixgbe/ixgbe_flow.c +++ b/dpdk/drivers/net/ixgbe/ixgbe_flow.c @@ -3137,13 +3137,13 @@ ixgbe_flow_create(struct rte_eth_dev *dev, rte_memcpy(&fdir_info->mask, &fdir_rule.mask, sizeof(struct ixgbe_hw_fdir_mask)); - fdir_info->flex_bytes_offset = - fdir_rule.flex_bytes_offset; - if (fdir_rule.mask.flex_bytes_mask) - ixgbe_fdir_set_flexbytes_offset(dev, + if (fdir_rule.mask.flex_bytes_mask) { + ret = ixgbe_fdir_set_flexbytes_offset(dev, fdir_rule.flex_bytes_offset); - + if (ret) + goto out; + } ret = ixgbe_fdir_set_input_mask(dev); if (ret) goto out; @@ -3161,8 +3161,9 @@ ixgbe_flow_create(struct rte_eth_dev *dev, if (ret) goto out; - if (fdir_info->flex_bytes_offset != - fdir_rule.flex_bytes_offset) + if (fdir_rule.mask.flex_bytes_mask && + fdir_info->flex_bytes_offset != + fdir_rule.flex_bytes_offset) goto out; } } diff --git a/dpdk/drivers/net/ixgbe/ixgbe_pf.c b/dpdk/drivers/net/ixgbe/ixgbe_pf.c index 833863af5a..15982af8da 100644 --- a/dpdk/drivers/net/ixgbe/ixgbe_pf.c +++ b/dpdk/drivers/net/ixgbe/ixgbe_pf.c @@ -552,20 +552,47 @@ ixgbe_vf_set_vlan(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf) } static int -ixgbe_set_vf_lpe(struct rte_eth_dev *dev, __rte_unused uint32_t vf, uint32_t *msgbuf) +ixgbe_set_vf_lpe(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - uint32_t new_mtu = msgbuf[1]; + uint32_t max_frame = msgbuf[1]; uint32_t max_frs; uint32_t hlreg0; - int max_frame = new_mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN; /* X540 and X550 support jumbo frames in IOV mode */ if (hw->mac.type != ixgbe_mac_X540 && hw->mac.type != ixgbe_mac_X550 && hw->mac.type != ixgbe_mac_X550EM_x && - hw->mac.type != ixgbe_mac_X550EM_a) - return -1; + hw->mac.type != ixgbe_mac_X550EM_a) { + struct ixgbe_vf_info *vfinfo = + *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private); + + switch (vfinfo[vf].api_version) { + case ixgbe_mbox_api_11: + case ixgbe_mbox_api_12: + case ixgbe_mbox_api_13: + /** + * Version 1.1&1.2&1.3 supports jumbo frames on VFs + * if PF has jumbo frames enabled which means legacy + * VFs are disabled. + */ + if (dev->data->dev_conf.rxmode.max_rx_pkt_len > + IXGBE_ETH_MAX_LEN) + break; + /* fall through */ + default: + /** + * If the PF or VF are running w/ jumbo frames enabled, + * we return -1 as we cannot support jumbo frames on + * legacy VFs. + */ + if (max_frame > IXGBE_ETH_MAX_LEN || + dev->data->dev_conf.rxmode.max_rx_pkt_len > + IXGBE_ETH_MAX_LEN) + return -1; + break; + } + } if (max_frame < RTE_ETHER_MIN_LEN || max_frame > RTE_ETHER_MAX_JUMBO_FRAME_LEN) @@ -573,9 +600,9 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev, __rte_unused uint32_t vf, uint32_t *ms max_frs = (IXGBE_READ_REG(hw, IXGBE_MAXFRS) & IXGBE_MHADD_MFS_MASK) >> IXGBE_MHADD_MFS_SHIFT; - if (max_frs < new_mtu) { + if (max_frs < max_frame) { hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); - if (new_mtu > RTE_ETHER_MAX_LEN) { + if (max_frame > IXGBE_ETH_MAX_LEN) { dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; hlreg0 |= IXGBE_HLREG0_JUMBOEN; @@ -586,7 +613,7 @@ ixgbe_set_vf_lpe(struct rte_eth_dev *dev, __rte_unused uint32_t vf, uint32_t *ms } IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0); - max_frs = new_mtu << IXGBE_MHADD_MFS_SHIFT; + max_frs = max_frame << IXGBE_MHADD_MFS_SHIFT; IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, max_frs); } diff --git a/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c b/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c index 6cfbb582e2..3b893b0df0 100644 --- a/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c @@ -1441,7 +1441,8 @@ rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags) } static inline uint64_t -rx_desc_error_to_pkt_flags(uint32_t rx_status) +rx_desc_error_to_pkt_flags(uint32_t rx_status, uint16_t pkt_info, + uint8_t rx_udp_csum_zero_err) { uint64_t pkt_flags; @@ -1458,6 +1459,15 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status) pkt_flags = error_to_pkt_flags_map[(rx_status >> IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK]; + /* Mask out the bad UDP checksum error if the hardware has UDP zero + * checksum error issue, so that the software application will then + * have to recompute the checksum itself if needed. + */ + if ((rx_status & IXGBE_RXDADV_ERR_TCPE) && + (pkt_info & IXGBE_RXDADV_PKTTYPE_UDP) && + rx_udp_csum_zero_err) + pkt_flags &= ~PKT_RX_L4_CKSUM_BAD; + if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) && (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) { pkt_flags |= PKT_RX_EIP_CKSUM_BAD; @@ -1544,7 +1554,9 @@ ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq) /* convert descriptor fields to rte mbuf flags */ pkt_flags = rx_desc_status_to_pkt_flags(s[j], vlan_flags); - pkt_flags |= rx_desc_error_to_pkt_flags(s[j]); + pkt_flags |= rx_desc_error_to_pkt_flags(s[j], + (uint16_t)pkt_info[j], + rxq->rx_udp_csum_zero_err); pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags ((uint16_t)pkt_info[j]); mb->ol_flags = pkt_flags; @@ -1877,7 +1889,9 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan); pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags); - pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr); + pkt_flags = pkt_flags | + rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info, + rxq->rx_udp_csum_zero_err); pkt_flags = pkt_flags | ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info); rxm->ol_flags = pkt_flags; @@ -1970,7 +1984,8 @@ ixgbe_fill_cluster_head_buf( head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan); pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data); pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags); - pkt_flags |= rx_desc_error_to_pkt_flags(staterr); + pkt_flags |= rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info, + rxq->rx_udp_csum_zero_err); pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info); head->ol_flags = pkt_flags; head->packet_type = @@ -3091,6 +3106,13 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev, else rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599; + /* + * 82599 errata, UDP frames with a 0 checksum can be marked as checksum + * errors. + */ + if (hw->mac.type == ixgbe_mac_82599EB) + rxq->rx_udp_csum_zero_err = 1; + /* * Allocate RX ring hardware descriptors. A memzone large enough to * handle the maximum ring size is allocated in order to allow for @@ -4898,15 +4920,11 @@ ixgbe_set_rsc(struct rte_eth_dev *dev) /* RFCTL configuration */ rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL); if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) - /* - * Since NFS packets coalescing is not supported - clear - * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is - * enabled. - */ - rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS | - IXGBE_RFCTL_NFSR_DIS); + rfctl &= ~IXGBE_RFCTL_RSC_DIS; else rfctl |= IXGBE_RFCTL_RSC_DIS; + /* disable NFS filtering */ + rfctl |= IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS; IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl); /* If LRO hasn't been requested - we are done here. */ @@ -5634,8 +5652,12 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev) * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way, * VF packets received can work in all cases. */ - ixgbevf_rlpml_set_vf(hw, - (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len); + if (ixgbevf_rlpml_set_vf(hw, + (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len)) { + PMD_INIT_LOG(ERR, "Set max packet length to %d failed.", + dev->data->dev_conf.rxmode.max_rx_pkt_len); + return -EINVAL; + } /* * Assume no header split and no VLAN strip support diff --git a/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h b/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h index 6d2f7c9da3..bcadaf79ce 100644 --- a/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h +++ b/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h @@ -129,6 +129,8 @@ struct ixgbe_rx_queue { uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */ uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */ uint8_t rx_deferred_start; /**< not in global dev start. */ + /** UDP frames with a 0 checksum can be marked as checksum errors. */ + uint8_t rx_udp_csum_zero_err; /** flags to set in mbuf when a vlan is detected. */ uint64_t vlan_flags; uint64_t offloads; /**< Rx offloads with DEV_RX_OFFLOAD_* */ diff --git a/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c b/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c index 90c076825a..52add17b5d 100644 --- a/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c +++ b/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c @@ -132,9 +132,9 @@ desc_to_olflags_v_ipsec(__m128i descs[4], struct rte_mbuf **rx_pkts) static inline void desc_to_olflags_v(__m128i descs[4], __m128i mbuf_init, uint8_t vlan_flags, - struct rte_mbuf **rx_pkts) + uint16_t udp_p_flag, struct rte_mbuf **rx_pkts) { - __m128i ptype0, ptype1, vtag0, vtag1, csum; + __m128i ptype0, ptype1, vtag0, vtag1, csum, udp_csum_skip; __m128i rearm0, rearm1, rearm2, rearm3; /* mask everything except rss type */ @@ -161,6 +161,7 @@ desc_to_olflags_v(__m128i descs[4], __m128i mbuf_init, uint8_t vlan_flags, (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16, IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP); + /* map vlan present (0x8), IPE (0x2), L4E (0x1) to ol_flags */ const __m128i vlan_csum_map_lo = _mm_set_epi8( 0, 0, 0, 0, @@ -182,12 +183,23 @@ desc_to_olflags_v(__m128i descs[4], __m128i mbuf_init, uint8_t vlan_flags, 0, PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t)); + /* mask everything except UDP header present if specified */ + const __m128i udp_hdr_p_msk = _mm_set_epi16 + (0, 0, 0, 0, + udp_p_flag, udp_p_flag, udp_p_flag, udp_p_flag); + + const __m128i udp_csum_bad_shuf = _mm_set_epi8 + (0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, ~(uint8_t)PKT_RX_L4_CKSUM_BAD, 0xFF); + ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]); ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]); vtag0 = _mm_unpackhi_epi16(descs[0], descs[1]); vtag1 = _mm_unpackhi_epi16(descs[2], descs[3]); ptype0 = _mm_unpacklo_epi32(ptype0, ptype1); + /* save the UDP header present information */ + udp_csum_skip = _mm_and_si128(ptype0, udp_hdr_p_msk); ptype0 = _mm_and_si128(ptype0, rsstype_msk); ptype0 = _mm_shuffle_epi8(rss_flags, ptype0); @@ -215,6 +227,15 @@ desc_to_olflags_v(__m128i descs[4], __m128i mbuf_init, uint8_t vlan_flags, vtag1 = _mm_or_si128(ptype0, vtag1); + /* convert the UDP header present 0x200 to 0x1 for aligning with each + * PKT_RX_L4_CKSUM_BAD value in low byte of 16 bits word ol_flag in + * vtag1 (4x16). Then mask out the bad checksum value by shuffle and + * bit-mask. + */ + udp_csum_skip = _mm_srli_epi16(udp_csum_skip, 9); + udp_csum_skip = _mm_shuffle_epi8(udp_csum_bad_shuf, udp_csum_skip); + vtag1 = _mm_and_si128(vtag1, udp_csum_skip); + /* * At this point, we have the 4 sets of flags in the low 64-bits * of vtag1 (4x16). @@ -341,6 +362,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, __m128i dd_check, eop_check; __m128i mbuf_init; uint8_t vlan_flags; + uint16_t udp_p_flag = 0; /* Rx Descriptor UDP header present */ /* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP); @@ -365,6 +387,9 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) return 0; + if (rxq->rx_udp_csum_zero_err) + udp_p_flag = IXGBE_RXDADV_PKTTYPE_UDP; + /* 4 packets DD mask */ dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL); @@ -477,7 +502,8 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]); /* set ol_flags with vlan packet type */ - desc_to_olflags_v(descs, mbuf_init, vlan_flags, &rx_pkts[pos]); + desc_to_olflags_v(descs, mbuf_init, vlan_flags, udp_p_flag, + &rx_pkts[pos]); #ifdef RTE_LIB_SECURITY if (unlikely(use_ipsec)) diff --git a/dpdk/drivers/net/liquidio/lio_ethdev.c b/dpdk/drivers/net/liquidio/lio_ethdev.c index d4dd3768cd..eb0fdab45a 100644 --- a/dpdk/drivers/net/liquidio/lio_ethdev.c +++ b/dpdk/drivers/net/liquidio/lio_ethdev.c @@ -481,7 +481,7 @@ lio_dev_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu) return -1; } - if (frame_len > RTE_ETHER_MAX_LEN) + if (frame_len > LIO_ETH_MAX_LEN) eth_dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else diff --git a/dpdk/drivers/net/liquidio/lio_ethdev.h b/dpdk/drivers/net/liquidio/lio_ethdev.h index 74cd2fb6c6..d33be1c44d 100644 --- a/dpdk/drivers/net/liquidio/lio_ethdev.h +++ b/dpdk/drivers/net/liquidio/lio_ethdev.h @@ -13,6 +13,9 @@ #define LIO_LSC_TIMEOUT 100000 /* 100000us (100ms) */ #define LIO_MAX_CMD_TIMEOUT 10000 /* 10000ms (10s) */ +/* The max frame size with default MTU */ +#define LIO_ETH_MAX_LEN (RTE_ETHER_MTU + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN) + #define LIO_DEV(_eth_dev) ((_eth_dev)->data->dev_private) /* LIO Response condition variable */ diff --git a/dpdk/drivers/net/mlx4/meson.build b/dpdk/drivers/net/mlx4/meson.build index 0cf9938a88..d7602b748e 100644 --- a/dpdk/drivers/net/mlx4/meson.build +++ b/dpdk/drivers/net/mlx4/meson.build @@ -24,7 +24,8 @@ endif libnames = [ 'mlx4', 'ibverbs' ] libs = [] foreach libname:libnames - lib = dependency('lib' + libname, static:static_ibverbs, required:false) + lib = dependency('lib' + libname, static:static_ibverbs, + required:false, method: 'pkg-config') if not lib.found() and not static_ibverbs lib = cc.find_library(libname, required:false) endif diff --git a/dpdk/drivers/net/mlx4/mlx4.c b/dpdk/drivers/net/mlx4/mlx4.c index d5d8c96351..919a9347f9 100644 --- a/dpdk/drivers/net/mlx4/mlx4.c +++ b/dpdk/drivers/net/mlx4/mlx4.c @@ -195,7 +195,7 @@ mlx4_free_verbs_buf(void *ptr, void *data __rte_unused) * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ -static int +int mlx4_proc_priv_init(struct rte_eth_dev *dev) { struct mlx4_proc_priv *ppriv; @@ -207,13 +207,13 @@ mlx4_proc_priv_init(struct rte_eth_dev *dev) */ ppriv_size = sizeof(struct mlx4_proc_priv) + dev->data->nb_tx_queues * sizeof(void *); - ppriv = rte_malloc_socket("mlx4_proc_priv", ppriv_size, - RTE_CACHE_LINE_SIZE, dev->device->numa_node); + ppriv = rte_zmalloc_socket("mlx4_proc_priv", ppriv_size, + RTE_CACHE_LINE_SIZE, dev->device->numa_node); if (!ppriv) { rte_errno = ENOMEM; return -rte_errno; } - ppriv->uar_table_sz = ppriv_size; + ppriv->uar_table_sz = dev->data->nb_tx_queues; dev->process_private = ppriv; return 0; } @@ -224,7 +224,7 @@ mlx4_proc_priv_init(struct rte_eth_dev *dev) * @param dev * Pointer to Ethernet device structure. */ -static void +void mlx4_proc_priv_uninit(struct rte_eth_dev *dev) { if (!dev->process_private) @@ -375,8 +375,10 @@ mlx4_dev_close(struct rte_eth_dev *dev) struct mlx4_priv *priv = dev->data->dev_private; unsigned int i; - if (rte_eal_process_type() != RTE_PROC_PRIMARY) + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + rte_eth_dev_release_port(dev); return 0; + } DEBUG("%p: closing device \"%s\"", (void *)dev, ((priv->ctx != NULL) ? priv->ctx->device->name : "")); @@ -764,6 +766,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) struct ibv_context *attr_ctx = NULL; struct ibv_device_attr device_attr; struct ibv_device_attr_ex device_attr_ex; + struct rte_eth_dev *prev_dev = NULL; struct mlx4_conf conf = { .ports.present = 0, .mr_ext_memseg_en = 1, @@ -878,7 +881,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) ERROR("can not attach rte ethdev"); rte_errno = ENOMEM; err = rte_errno; - goto error; + goto err_secondary; } priv = eth_dev->data->dev_private; if (!priv->verbs_alloc_ctx.enabled) { @@ -887,24 +890,24 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) " from Verbs"); rte_errno = ENOTSUP; err = rte_errno; - goto error; + goto err_secondary; } eth_dev->device = &pci_dev->device; eth_dev->dev_ops = &mlx4_dev_sec_ops; err = mlx4_proc_priv_init(eth_dev); if (err) - goto error; + goto err_secondary; /* Receive command fd from primary process. */ err = mlx4_mp_req_verbs_cmd_fd(eth_dev); if (err < 0) { err = rte_errno; - goto error; + goto err_secondary; } /* Remap UAR for Tx queues. */ err = mlx4_tx_uar_init_secondary(eth_dev, err); if (err) { err = rte_errno; - goto error; + goto err_secondary; } /* * Ethdev pointer is still required as input since @@ -916,7 +919,14 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) claim_zero(mlx4_glue->close_device(ctx)); rte_eth_copy_pci_info(eth_dev, pci_dev); rte_eth_dev_probing_finish(eth_dev); + prev_dev = eth_dev; continue; +err_secondary: + claim_zero(mlx4_glue->close_device(ctx)); + rte_eth_dev_release_port(eth_dev); + if (prev_dev) + rte_eth_dev_release_port(prev_dev); + break; } /* Check port status. */ err = mlx4_glue->query_port(ctx, port, &port_attr); @@ -1091,6 +1101,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) priv, mem_event_cb); rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock); rte_eth_dev_probing_finish(eth_dev); + prev_dev = eth_dev; continue; port_error: rte_free(priv); @@ -1105,14 +1116,10 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) eth_dev->data->mac_addrs = NULL; rte_eth_dev_release_port(eth_dev); } + if (prev_dev) + mlx4_dev_close(prev_dev); break; } - /* - * XXX if something went wrong in the loop above, there is a resource - * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as - * long as the dpdk does not provide a way to deallocate a ethdev and a - * way to enumerate the registered ethdevs to free the previous ones. - */ error: if (attr_ctx) claim_zero(mlx4_glue->close_device(attr_ctx)); @@ -1123,6 +1130,36 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) return -err; } +/** + * DPDK callback to remove a PCI device. + * + * This function removes all Ethernet devices belong to a given PCI device. + * + * @param[in] pci_dev + * Pointer to the PCI device. + * + * @return + * 0 on success, the function cannot fail. + */ +static int +mlx4_pci_remove(struct rte_pci_device *pci_dev) +{ + uint16_t port_id; + int ret = 0; + + RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) { + /* + * mlx4_dev_close() is not registered to secondary process, + * call the close function explicitly for secondary process. + */ + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + ret |= mlx4_dev_close(&rte_eth_devices[port_id]); + else + ret |= rte_eth_dev_close(port_id); + } + return ret == 0 ? 0 : -EIO; +} + static const struct rte_pci_id mlx4_pci_id_map[] = { { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, @@ -1147,6 +1184,7 @@ static struct rte_pci_driver mlx4_driver = { }, .id_table = mlx4_pci_id_map, .probe = mlx4_pci_probe, + .remove = mlx4_pci_remove, .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV, }; diff --git a/dpdk/drivers/net/mlx4/mlx4.h b/dpdk/drivers/net/mlx4/mlx4.h index c6cb29493e..87710d3996 100644 --- a/dpdk/drivers/net/mlx4/mlx4.h +++ b/dpdk/drivers/net/mlx4/mlx4.h @@ -197,6 +197,10 @@ struct mlx4_priv { #define PORT_ID(priv) ((priv)->dev_data->port_id) #define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)]) +int mlx4_proc_priv_init(struct rte_eth_dev *dev); +void mlx4_proc_priv_uninit(struct rte_eth_dev *dev); + + /* mlx4_ethdev.c */ int mlx4_get_ifname(const struct mlx4_priv *priv, char (*ifname)[IF_NAMESIZE]); diff --git a/dpdk/drivers/net/mlx4/mlx4_mp.c b/dpdk/drivers/net/mlx4/mlx4_mp.c index eca0c20a8a..3622d61075 100644 --- a/dpdk/drivers/net/mlx4/mlx4_mp.c +++ b/dpdk/drivers/net/mlx4/mlx4_mp.c @@ -111,6 +111,9 @@ mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer) const struct mlx4_mp_param *param = (const struct mlx4_mp_param *)mp_msg->param; struct rte_eth_dev *dev; +#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET + struct mlx4_proc_priv *ppriv; +#endif int ret; MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); @@ -126,6 +129,21 @@ mp_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer) rte_mb(); dev->tx_pkt_burst = mlx4_tx_burst; dev->rx_pkt_burst = mlx4_rx_burst; +#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET + ppriv = (struct mlx4_proc_priv *)dev->process_private; + if (ppriv->uar_table_sz != dev->data->nb_tx_queues) { + mlx4_tx_uar_uninit_secondary(dev); + mlx4_proc_priv_uninit(dev); + ret = mlx4_proc_priv_init(dev); + if (ret) + return -rte_errno; + ret = mlx4_tx_uar_init_secondary(dev, mp_msg->fds[0]); + if (ret) { + mlx4_proc_priv_uninit(dev); + return -rte_errno; + } + } +#endif mp_init_msg(dev, &mp_res, param->type); res->result = 0; ret = rte_mp_reply(&mp_res, peer); @@ -163,6 +181,7 @@ mp_req_on_rxtx(struct rte_eth_dev *dev, enum mlx4_mp_req_type type) struct rte_mp_reply mp_rep; struct mlx4_mp_param *res __rte_unused; struct timespec ts = {.tv_sec = MLX4_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0}; + struct mlx4_priv *priv; int ret; int i; @@ -175,6 +194,11 @@ mp_req_on_rxtx(struct rte_eth_dev *dev, enum mlx4_mp_req_type type) return; } mp_init_msg(dev, &mp_req, type); + if (type == MLX4_MP_REQ_START_RXTX) { + priv = dev->data->dev_private; + mp_req.num_fds = 1; + mp_req.fds[0] = priv->ctx->cmd_fd; + } ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); if (ret) { if (rte_errno != ENOTSUP) diff --git a/dpdk/drivers/net/mlx4/mlx4_rxtx.h b/dpdk/drivers/net/mlx4/mlx4_rxtx.h index 9de6c59411..136ca56ca4 100644 --- a/dpdk/drivers/net/mlx4/mlx4_rxtx.h +++ b/dpdk/drivers/net/mlx4/mlx4_rxtx.h @@ -157,6 +157,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts, /* mlx4_txq.c */ int mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); +void mlx4_tx_uar_uninit_secondary(struct rte_eth_dev *dev); uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv); int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, unsigned int socket, diff --git a/dpdk/drivers/net/mlx4/mlx4_txq.c b/dpdk/drivers/net/mlx4/mlx4_txq.c index 37b84413fb..60560d9545 100644 --- a/dpdk/drivers/net/mlx4/mlx4_txq.c +++ b/dpdk/drivers/net/mlx4/mlx4_txq.c @@ -157,6 +157,27 @@ mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd) } while (i--); return -rte_errno; } + +void +mlx4_tx_uar_uninit_secondary(struct rte_eth_dev *dev) +{ + struct mlx4_proc_priv *ppriv = + (struct mlx4_proc_priv *)dev->process_private; + const size_t page_size = sysconf(_SC_PAGESIZE); + void *addr; + size_t i; + + if (page_size == (size_t)-1) { + ERROR("Failed to get mem page size"); + return; + } + for (i = 0; i < ppriv->uar_table_sz; i++) { + addr = ppriv->uar_table[i]; + if (addr) + munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); + } +} + #else int mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev __rte_unused, @@ -167,6 +188,13 @@ mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev __rte_unused, rte_errno = ENOTSUP; return -rte_errno; } + +void +mlx4_tx_uar_uninit_secondary(struct rte_eth_dev *dev __rte_unused) +{ + assert(rte_eal_process_type() == RTE_PROC_SECONDARY); + ERROR("UAR remap is not supported"); +} #endif /** diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/dpdk/drivers/net/mlx5/linux/mlx5_ethdev_os.c index 128845cb52..e36a78091c 100644 --- a/dpdk/drivers/net/mlx5/linux/mlx5_ethdev_os.c +++ b/dpdk/drivers/net/mlx5/linux/mlx5_ethdev_os.c @@ -143,7 +143,7 @@ struct ethtool_link_settings { * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) +mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[MLX5_NAMESIZE]) { struct mlx5_priv *priv = dev->data->dev_private; unsigned int ifindex; @@ -151,7 +151,7 @@ mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) MLX5_ASSERT(priv); MLX5_ASSERT(priv->sh); if (priv->bond_ifindex > 0) { - memcpy(ifname, priv->bond_name, IF_NAMESIZE); + memcpy(ifname, priv->bond_name, MLX5_NAMESIZE); return 0; } ifindex = mlx5_ifindex(dev); diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_flow_os.h b/dpdk/drivers/net/mlx5/linux/mlx5_flow_os.h index a6bd2c01e1..73ed62655e 100644 --- a/dpdk/drivers/net/mlx5/linux/mlx5_flow_os.h +++ b/dpdk/drivers/net/mlx5/linux/mlx5_flow_os.h @@ -350,6 +350,32 @@ mlx5_flow_os_create_flow_action_drop(void **action) return (*action) ? 0 : -1; } +/** + * Create flow action: dest_devx_tir + * + * @param[in] tir + * Pointer to DevX tir object + * @param[out] action + * Pointer to a valid action on success, NULL otherwise. + * + * @return + * 0 on success, or -1 on failure and errno is set. + */ +static inline int +mlx5_flow_os_create_flow_action_dest_devx_tir(struct mlx5_devx_obj *tir, + void **action) +{ +#ifdef HAVE_IBV_FLOW_DV_SUPPORT + *action = mlx5_glue->dv_create_flow_action_dest_devx_tir(tir->obj); + return (*action) ? 0 : -1; +#else + /* If no DV support - skip the operation and return success */ + RTE_SET_USED(tir); + *action = 0; + return 0; +#endif +} + /** * Destroy flow action. * diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_mp_os.c b/dpdk/drivers/net/mlx5/linux/mlx5_mp_os.c index 08ade75799..95372e2084 100644 --- a/dpdk/drivers/net/mlx5/linux/mlx5_mp_os.c +++ b/dpdk/drivers/net/mlx5/linux/mlx5_mp_os.c @@ -115,6 +115,7 @@ struct rte_mp_msg mp_res; const struct mlx5_mp_param *param = (const struct mlx5_mp_param *)mp_msg->param; struct rte_eth_dev *dev; + struct mlx5_proc_priv *ppriv; struct mlx5_priv *priv; int ret; @@ -132,6 +133,20 @@ struct rte_mp_msg mp_res; rte_mb(); dev->rx_pkt_burst = mlx5_select_rx_function(dev); dev->tx_pkt_burst = mlx5_select_tx_function(dev); + ppriv = (struct mlx5_proc_priv *)dev->process_private; + /* If Tx queue number changes, re-initialize UAR. */ + if (ppriv->uar_table_sz != priv->txqs_n) { + mlx5_tx_uar_uninit_secondary(dev); + mlx5_proc_priv_uninit(dev); + ret = mlx5_proc_priv_init(dev); + if (ret) + return -rte_errno; + ret = mlx5_tx_uar_init_secondary(dev, mp_msg->fds[0]); + if (ret) { + mlx5_proc_priv_uninit(dev); + return -rte_errno; + } + } mp_init_msg(&priv->mp_id, &mp_res, param->type); res->result = 0; ret = rte_mp_reply(&mp_res, peer); @@ -183,6 +198,10 @@ mp_req_on_rxtx(struct rte_eth_dev *dev, enum mlx5_mp_req_type type) return; } mp_init_msg(&priv->mp_id, &mp_req, type); + if (type == MLX5_MP_REQ_START_RXTX) { + mp_req.num_fds = 1; + mp_req.fds[0] = ((struct ibv_context *)priv->sh->ctx)->cmd_fd; + } ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); if (ret) { if (rte_errno != ENOTSUP) diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_os.c b/dpdk/drivers/net/mlx5/linux/mlx5_os.c index 4c863db1a7..91001473b0 100644 --- a/dpdk/drivers/net/mlx5/linux/mlx5_os.c +++ b/dpdk/drivers/net/mlx5/linux/mlx5_os.c @@ -168,9 +168,8 @@ mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *device_attr) static void * mlx5_alloc_verbs_buf(size_t size, void *data) { - struct mlx5_priv *priv = data; + struct mlx5_dev_ctx_shared *sh = data; void *ret; - unsigned int socket = SOCKET_ID_ANY; size_t alignment = rte_mem_page_size(); if (alignment == (size_t)-1) { DRV_LOG(ERR, "Failed to get mem page size"); @@ -178,18 +177,8 @@ mlx5_alloc_verbs_buf(size_t size, void *data) return NULL; } - if (priv->verbs_alloc_ctx.type == MLX5_VERBS_ALLOC_TYPE_TX_QUEUE) { - const struct mlx5_txq_ctrl *ctrl = priv->verbs_alloc_ctx.obj; - - socket = ctrl->socket; - } else if (priv->verbs_alloc_ctx.type == - MLX5_VERBS_ALLOC_TYPE_RX_QUEUE) { - const struct mlx5_rxq_ctrl *ctrl = priv->verbs_alloc_ctx.obj; - - socket = ctrl->socket; - } MLX5_ASSERT(data != NULL); - ret = mlx5_malloc(0, size, alignment, socket); + ret = mlx5_malloc(0, size, alignment, sh->numa_node); if (!ret && size) rte_errno = ENOMEM; return ret; @@ -681,8 +670,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, int err = 0; unsigned int hw_padding = 0; unsigned int mps; - unsigned int cqe_comp; - unsigned int cqe_pad = 0; unsigned int tunnel_en = 0; unsigned int mpls_en = 0; unsigned int swp = 0; @@ -762,7 +749,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, rte_eth_devices[priv->sh->bond_dev].device; else eth_dev->device = dpdk_dev; - eth_dev->dev_ops = &mlx5_os_dev_sec_ops; + eth_dev->dev_ops = &mlx5_dev_sec_ops; eth_dev->rx_descriptor_status = mlx5_rx_descriptor_status; eth_dev->tx_descriptor_status = mlx5_tx_descriptor_status; err = mlx5_proc_priv_init(eth_dev); @@ -874,17 +861,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, mprq_caps.max_single_wqe_log_num_of_strides; } #endif - if (RTE_CACHE_LINE_SIZE == 128 && - !(dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP)) - cqe_comp = 0; - else - cqe_comp = 1; - config->cqe_comp = cqe_comp; -#ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD - /* Whether device supports 128B Rx CQE padding. */ - cqe_pad = RTE_CACHE_LINE_SIZE == 128 && - (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD); -#endif + /* Rx CQE compression is enabled by default. */ + config->cqe_comp = 1; #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) { tunnel_en = ((dv_attr.tunnel_offloads_caps & @@ -941,8 +919,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, priv->dev_port = spawn->phys_port; priv->pci_dev = spawn->pci_dev; priv->mtu = RTE_ETHER_MTU; - priv->mp_id.port_id = port_id; - strlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN); /* Some internal functions rely on Netlink sockets, open them now. */ priv->nl_socket_rdma = mlx5_nl_init(NETLINK_RDMA); priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE); @@ -1117,16 +1093,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, config->mps == MLX5_MPW_ENHANCED ? "enhanced " : config->mps == MLX5_MPW ? "legacy " : "", config->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled"); - if (config->cqe_comp && !cqe_comp) { - DRV_LOG(WARNING, "Rx CQE compression isn't supported"); - config->cqe_comp = 0; - } - if (config->cqe_pad && !cqe_pad) { - DRV_LOG(WARNING, "Rx CQE padding isn't supported"); - config->cqe_pad = 0; - } else if (config->cqe_pad) { - DRV_LOG(INFO, "Rx CQE padding is enabled"); - } if (config->devx) { err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config->hca_attr); if (err) { @@ -1225,6 +1191,25 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, } #endif } + if (config->cqe_comp && RTE_CACHE_LINE_SIZE == 128 && + !(dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP)) { + DRV_LOG(WARNING, "Rx CQE 128B compression is not supported"); + config->cqe_comp = 0; + } + if (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX && + (!config->devx || !config->hca_attr.mini_cqe_resp_flow_tag)) { + DRV_LOG(WARNING, "Flow Tag CQE compression" + " format isn't supported."); + config->cqe_comp = 0; + } + if (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_L34H_STRIDX && + (!config->devx || !config->hca_attr.mini_cqe_resp_l3_l4_tag)) { + DRV_LOG(WARNING, "L3/L4 Header CQE compression" + " format isn't supported."); + config->cqe_comp = 0; + } + DRV_LOG(DEBUG, "Rx CQE compression is %ssupported", + config->cqe_comp ? "" : "not "); if (config->tx_pp) { DRV_LOG(DEBUG, "Timestamp counter frequency %u kHz", config->hca_attr.dev_freq_khz); @@ -1364,6 +1349,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR; eth_dev->data->representor_id = priv->representor_id; } + priv->mp_id.port_id = eth_dev->data->port_id; + strlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN); /* * Store associated network device interface index. This index * is permanent throughout the lifetime of device. So, we may store @@ -1416,7 +1403,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, mac.addr_bytes[4], mac.addr_bytes[5]); #ifdef RTE_LIBRTE_MLX5_DEBUG { - char ifname[IF_NAMESIZE]; + char ifname[MLX5_NAMESIZE]; if (mlx5_get_ifname(eth_dev, &ifname) == 0) DRV_LOG(DEBUG, "port %u ifname is \"%s\"", @@ -1437,7 +1424,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, /* Initialize burst functions to prevent crashes before link-up. */ eth_dev->rx_pkt_burst = removed_rx_burst; eth_dev->tx_pkt_burst = removed_tx_burst; - eth_dev->dev_ops = &mlx5_os_dev_ops; + eth_dev->dev_ops = &mlx5_dev_ops; eth_dev->rx_descriptor_status = mlx5_rx_descriptor_status; eth_dev->tx_descriptor_status = mlx5_tx_descriptor_status; eth_dev->rx_queue_count = mlx5_rx_queue_count; @@ -1459,7 +1446,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, (void *)((uintptr_t)&(struct mlx5dv_ctx_allocators){ .alloc = &mlx5_alloc_verbs_buf, .free = &mlx5_free_verbs_buf, - .data = priv, + .data = sh, })); /* Bring Ethernet device up. */ DRV_LOG(DEBUG, "port %u forcing Ethernet interface up", @@ -2324,6 +2311,16 @@ mlx5_os_open_device(const struct mlx5_dev_spawn_data *spawn, DRV_LOG(DEBUG, "DevX is NOT supported"); err = 0; } + if (!err && sh->ctx) { + /* Hint libmlx5 to use PMD allocator for data plane resources */ + mlx5_glue->dv_set_context_attr(sh->ctx, + MLX5DV_CTX_ATTR_BUF_ALLOCATORS, + (void *)((uintptr_t)&(struct mlx5dv_ctx_allocators){ + .alloc = &mlx5_alloc_verbs_buf, + .free = &mlx5_free_verbs_buf, + .data = sh, + })); + } return err; } @@ -2606,153 +2603,3 @@ mlx5_os_mac_addr_flush(struct rte_eth_dev *dev) dev->data->mac_addrs, MLX5_MAX_MAC_ADDRESSES, priv->mac_own); } - -const struct eth_dev_ops mlx5_os_dev_ops = { - .dev_configure = mlx5_dev_configure, - .dev_start = mlx5_dev_start, - .dev_stop = mlx5_dev_stop, - .dev_set_link_down = mlx5_set_link_down, - .dev_set_link_up = mlx5_set_link_up, - .dev_close = mlx5_dev_close, - .promiscuous_enable = mlx5_promiscuous_enable, - .promiscuous_disable = mlx5_promiscuous_disable, - .allmulticast_enable = mlx5_allmulticast_enable, - .allmulticast_disable = mlx5_allmulticast_disable, - .link_update = mlx5_link_update, - .stats_get = mlx5_stats_get, - .stats_reset = mlx5_stats_reset, - .xstats_get = mlx5_xstats_get, - .xstats_reset = mlx5_xstats_reset, - .xstats_get_names = mlx5_xstats_get_names, - .fw_version_get = mlx5_fw_version_get, - .dev_infos_get = mlx5_dev_infos_get, - .read_clock = mlx5_txpp_read_clock, - .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, - .vlan_filter_set = mlx5_vlan_filter_set, - .rx_queue_setup = mlx5_rx_queue_setup, - .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, - .tx_queue_setup = mlx5_tx_queue_setup, - .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, - .rx_queue_release = mlx5_rx_queue_release, - .tx_queue_release = mlx5_tx_queue_release, - .rx_queue_start = mlx5_rx_queue_start, - .rx_queue_stop = mlx5_rx_queue_stop, - .tx_queue_start = mlx5_tx_queue_start, - .tx_queue_stop = mlx5_tx_queue_stop, - .flow_ctrl_get = mlx5_dev_get_flow_ctrl, - .flow_ctrl_set = mlx5_dev_set_flow_ctrl, - .mac_addr_remove = mlx5_mac_addr_remove, - .mac_addr_add = mlx5_mac_addr_add, - .mac_addr_set = mlx5_mac_addr_set, - .set_mc_addr_list = mlx5_set_mc_addr_list, - .mtu_set = mlx5_dev_set_mtu, - .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, - .vlan_offload_set = mlx5_vlan_offload_set, - .reta_update = mlx5_dev_rss_reta_update, - .reta_query = mlx5_dev_rss_reta_query, - .rss_hash_update = mlx5_rss_hash_update, - .rss_hash_conf_get = mlx5_rss_hash_conf_get, - .filter_ctrl = mlx5_dev_filter_ctrl, - .rxq_info_get = mlx5_rxq_info_get, - .txq_info_get = mlx5_txq_info_get, - .rx_burst_mode_get = mlx5_rx_burst_mode_get, - .tx_burst_mode_get = mlx5_tx_burst_mode_get, - .rx_queue_intr_enable = mlx5_rx_intr_enable, - .rx_queue_intr_disable = mlx5_rx_intr_disable, - .is_removed = mlx5_is_removed, - .udp_tunnel_port_add = mlx5_udp_tunnel_port_add, - .get_module_info = mlx5_get_module_info, - .get_module_eeprom = mlx5_get_module_eeprom, - .hairpin_cap_get = mlx5_hairpin_cap_get, - .mtr_ops_get = mlx5_flow_meter_ops_get, - .hairpin_bind = mlx5_hairpin_bind, - .hairpin_unbind = mlx5_hairpin_unbind, - .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports, - .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, - .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, - .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, -}; - -/* Available operations from secondary process. */ -const struct eth_dev_ops mlx5_os_dev_sec_ops = { - .stats_get = mlx5_stats_get, - .stats_reset = mlx5_stats_reset, - .xstats_get = mlx5_xstats_get, - .xstats_reset = mlx5_xstats_reset, - .xstats_get_names = mlx5_xstats_get_names, - .fw_version_get = mlx5_fw_version_get, - .dev_infos_get = mlx5_dev_infos_get, - .read_clock = mlx5_txpp_read_clock, - .rx_queue_start = mlx5_rx_queue_start, - .rx_queue_stop = mlx5_rx_queue_stop, - .tx_queue_start = mlx5_tx_queue_start, - .tx_queue_stop = mlx5_tx_queue_stop, - .rxq_info_get = mlx5_rxq_info_get, - .txq_info_get = mlx5_txq_info_get, - .rx_burst_mode_get = mlx5_rx_burst_mode_get, - .tx_burst_mode_get = mlx5_tx_burst_mode_get, - .get_module_info = mlx5_get_module_info, - .get_module_eeprom = mlx5_get_module_eeprom, -}; - -/* Available operations in flow isolated mode. */ -const struct eth_dev_ops mlx5_os_dev_ops_isolate = { - .dev_configure = mlx5_dev_configure, - .dev_start = mlx5_dev_start, - .dev_stop = mlx5_dev_stop, - .dev_set_link_down = mlx5_set_link_down, - .dev_set_link_up = mlx5_set_link_up, - .dev_close = mlx5_dev_close, - .promiscuous_enable = mlx5_promiscuous_enable, - .promiscuous_disable = mlx5_promiscuous_disable, - .allmulticast_enable = mlx5_allmulticast_enable, - .allmulticast_disable = mlx5_allmulticast_disable, - .link_update = mlx5_link_update, - .stats_get = mlx5_stats_get, - .stats_reset = mlx5_stats_reset, - .xstats_get = mlx5_xstats_get, - .xstats_reset = mlx5_xstats_reset, - .xstats_get_names = mlx5_xstats_get_names, - .fw_version_get = mlx5_fw_version_get, - .dev_infos_get = mlx5_dev_infos_get, - .read_clock = mlx5_txpp_read_clock, - .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, - .vlan_filter_set = mlx5_vlan_filter_set, - .rx_queue_setup = mlx5_rx_queue_setup, - .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, - .tx_queue_setup = mlx5_tx_queue_setup, - .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, - .rx_queue_release = mlx5_rx_queue_release, - .tx_queue_release = mlx5_tx_queue_release, - .rx_queue_start = mlx5_rx_queue_start, - .rx_queue_stop = mlx5_rx_queue_stop, - .tx_queue_start = mlx5_tx_queue_start, - .tx_queue_stop = mlx5_tx_queue_stop, - .flow_ctrl_get = mlx5_dev_get_flow_ctrl, - .flow_ctrl_set = mlx5_dev_set_flow_ctrl, - .mac_addr_remove = mlx5_mac_addr_remove, - .mac_addr_add = mlx5_mac_addr_add, - .mac_addr_set = mlx5_mac_addr_set, - .set_mc_addr_list = mlx5_set_mc_addr_list, - .mtu_set = mlx5_dev_set_mtu, - .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, - .vlan_offload_set = mlx5_vlan_offload_set, - .filter_ctrl = mlx5_dev_filter_ctrl, - .rxq_info_get = mlx5_rxq_info_get, - .txq_info_get = mlx5_txq_info_get, - .rx_burst_mode_get = mlx5_rx_burst_mode_get, - .tx_burst_mode_get = mlx5_tx_burst_mode_get, - .rx_queue_intr_enable = mlx5_rx_intr_enable, - .rx_queue_intr_disable = mlx5_rx_intr_disable, - .is_removed = mlx5_is_removed, - .get_module_info = mlx5_get_module_info, - .get_module_eeprom = mlx5_get_module_eeprom, - .hairpin_cap_get = mlx5_hairpin_cap_get, - .mtr_ops_get = mlx5_flow_meter_ops_get, - .hairpin_bind = mlx5_hairpin_bind, - .hairpin_unbind = mlx5_hairpin_unbind, - .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports, - .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, - .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, - .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, -}; diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_os.h b/dpdk/drivers/net/mlx5/linux/mlx5_os.h index 759def2f4b..7dbacceabe 100644 --- a/dpdk/drivers/net/mlx5/linux/mlx5_os.h +++ b/dpdk/drivers/net/mlx5/linux/mlx5_os.h @@ -14,11 +14,9 @@ enum { DEV_SYSFS_PATH_MAX = IBV_SYSFS_PATH_MAX + 1 }; +#define MLX5_NAMESIZE IF_NAMESIZE + #define PCI_DRV_FLAGS (RTE_PCI_DRV_INTR_LSC | \ RTE_PCI_DRV_INTR_RMV | \ RTE_PCI_DRV_PROBE_AGAIN) - -/* mlx5_ethdev_os.c */ - -int mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]); #endif /* RTE_PMD_MLX5_OS_H_ */ diff --git a/dpdk/drivers/net/mlx5/linux/mlx5_verbs.c b/dpdk/drivers/net/mlx5/linux/mlx5_verbs.c index 540ce32990..6b98a4c166 100644 --- a/dpdk/drivers/net/mlx5/linux/mlx5_verbs.c +++ b/dpdk/drivers/net/mlx5/linux/mlx5_verbs.c @@ -213,13 +213,22 @@ mlx5_rxq_ibv_cq_create(struct rte_eth_dev *dev, uint16_t idx) if (priv->config.cqe_comp && !rxq_data->hw_timestamp) { cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; + rxq_data->byte_mask = UINT32_MAX; #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT - cq_attr.mlx5.cqe_comp_res_format = - mlx5_rxq_mprq_enabled(rxq_data) ? - MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX : - MLX5DV_CQE_RES_FORMAT_HASH; + if (mlx5_rxq_mprq_enabled(rxq_data)) { + cq_attr.mlx5.cqe_comp_res_format = + MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX; + rxq_data->mcqe_format = + MLX5_CQE_RESP_FORMAT_CSUM_STRIDX; + } else { + cq_attr.mlx5.cqe_comp_res_format = + MLX5DV_CQE_RES_FORMAT_HASH; + rxq_data->mcqe_format = + MLX5_CQE_RESP_FORMAT_HASH; + } #else cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH; + rxq_data->mcqe_format = MLX5_CQE_RESP_FORMAT_HASH; #endif /* * For vectorized Rx, it must not be doubled in order to @@ -234,7 +243,7 @@ mlx5_rxq_ibv_cq_create(struct rte_eth_dev *dev, uint16_t idx) dev->data->port_id); } #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD - if (priv->config.cqe_pad) { + if (RTE_CACHE_LINE_SIZE == 128) { cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS; cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD; } @@ -366,8 +375,6 @@ mlx5_rxq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx) MLX5_ASSERT(rxq_data); MLX5_ASSERT(tmpl); - priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE; - priv->verbs_alloc_ctx.obj = rxq_ctrl; tmpl->rxq_ctrl = rxq_ctrl; if (rxq_ctrl->irq) { tmpl->ibv_channel = @@ -438,7 +445,6 @@ mlx5_rxq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx) rxq_data->cq_arm_sn = 0; mlx5_rxq_initialize(rxq_data); rxq_data->cq_ci = 0; - priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; rxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num; return 0; @@ -451,7 +457,6 @@ mlx5_rxq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx) if (tmpl->ibv_channel) claim_zero(mlx5_glue->destroy_comp_channel(tmpl->ibv_channel)); rte_errno = ret; /* Restore rte_errno. */ - priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; return -rte_errno; } @@ -932,8 +937,6 @@ mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx) MLX5_ASSERT(txq_data); MLX5_ASSERT(txq_obj); txq_obj->txq_ctrl = txq_ctrl; - priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE; - priv->verbs_alloc_ctx.obj = txq_ctrl; if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) { DRV_LOG(ERR, "Port %u MLX5_ENABLE_CQE_COMPRESSION " "must never be set.", dev->data->port_id); @@ -1039,7 +1042,6 @@ mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx) } txq_uar_init(txq_ctrl); dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED; - priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; return 0; error: ret = rte_errno; /* Save rte_errno before cleanup. */ @@ -1047,7 +1049,6 @@ mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx) claim_zero(mlx5_glue->destroy_cq(txq_obj->cq)); if (txq_obj->qp) claim_zero(mlx5_glue->destroy_qp(txq_obj->qp)); - priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; rte_errno = ret; /* Restore rte_errno. */ return -rte_errno; } diff --git a/dpdk/drivers/net/mlx5/mlx5.c b/dpdk/drivers/net/mlx5/mlx5.c index ca3667a469..bdb446d2d2 100644 --- a/dpdk/drivers/net/mlx5/mlx5.c +++ b/dpdk/drivers/net/mlx5/mlx5.c @@ -37,14 +37,12 @@ #include "mlx5_autoconf.h" #include "mlx5_mr.h" #include "mlx5_flow.h" +#include "mlx5_flow_os.h" #include "rte_pmd_mlx5.h" /* Device parameter to enable RX completion queue compression. */ #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" -/* Device parameter to enable RX completion entry padding to 128B. */ -#define MLX5_RXQ_CQE_PAD_EN "rxq_cqe_pad_en" - /* Device parameter to enable padding Rx packet to cacheline size. */ #define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en" @@ -413,8 +411,8 @@ mlx5_flow_aso_age_mng_close(struct mlx5_dev_ctx_shared *sh) for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) if (pool->actions[j].dr_action) claim_zero - (mlx5_glue->destroy_flow_action - (pool->actions[j].dr_action)); + (mlx5_flow_os_destroy_flow_action + (pool->actions[j].dr_action)); mlx5_free(pool); } mlx5_free(sh->aso_age_mng->pools); @@ -521,7 +519,7 @@ mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh) if (cnt->action) claim_zero - (mlx5_glue->destroy_flow_action + (mlx5_flow_os_destroy_flow_action (cnt->action)); if (fallback && MLX5_POOL_GET_CNT (pool, j)->dcs_when_free) @@ -1249,13 +1247,13 @@ mlx5_proc_priv_init(struct rte_eth_dev *dev) */ ppriv_size = sizeof(struct mlx5_proc_priv) + priv->txqs_n * sizeof(void *); - ppriv = mlx5_malloc(MLX5_MEM_RTE, ppriv_size, RTE_CACHE_LINE_SIZE, - dev->device->numa_node); + ppriv = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, ppriv_size, + RTE_CACHE_LINE_SIZE, dev->device->numa_node); if (!ppriv) { rte_errno = ENOMEM; return -rte_errno; } - ppriv->uar_table_sz = ppriv_size; + ppriv->uar_table_sz = priv->txqs_n; dev->process_private = ppriv; return 0; } @@ -1266,7 +1264,7 @@ mlx5_proc_priv_init(struct rte_eth_dev *dev) * @param dev * Pointer to Ethernet device structure. */ -static void +void mlx5_proc_priv_uninit(struct rte_eth_dev *dev) { if (!dev->process_private) @@ -1426,6 +1424,156 @@ mlx5_dev_close(struct rte_eth_dev *dev) return 0; } +const struct eth_dev_ops mlx5_dev_ops = { + .dev_configure = mlx5_dev_configure, + .dev_start = mlx5_dev_start, + .dev_stop = mlx5_dev_stop, + .dev_set_link_down = mlx5_set_link_down, + .dev_set_link_up = mlx5_set_link_up, + .dev_close = mlx5_dev_close, + .promiscuous_enable = mlx5_promiscuous_enable, + .promiscuous_disable = mlx5_promiscuous_disable, + .allmulticast_enable = mlx5_allmulticast_enable, + .allmulticast_disable = mlx5_allmulticast_disable, + .link_update = mlx5_link_update, + .stats_get = mlx5_stats_get, + .stats_reset = mlx5_stats_reset, + .xstats_get = mlx5_xstats_get, + .xstats_reset = mlx5_xstats_reset, + .xstats_get_names = mlx5_xstats_get_names, + .fw_version_get = mlx5_fw_version_get, + .dev_infos_get = mlx5_dev_infos_get, + .read_clock = mlx5_txpp_read_clock, + .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, + .vlan_filter_set = mlx5_vlan_filter_set, + .rx_queue_setup = mlx5_rx_queue_setup, + .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, + .tx_queue_setup = mlx5_tx_queue_setup, + .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, + .rx_queue_release = mlx5_rx_queue_release, + .tx_queue_release = mlx5_tx_queue_release, + .rx_queue_start = mlx5_rx_queue_start, + .rx_queue_stop = mlx5_rx_queue_stop, + .tx_queue_start = mlx5_tx_queue_start, + .tx_queue_stop = mlx5_tx_queue_stop, + .flow_ctrl_get = mlx5_dev_get_flow_ctrl, + .flow_ctrl_set = mlx5_dev_set_flow_ctrl, + .mac_addr_remove = mlx5_mac_addr_remove, + .mac_addr_add = mlx5_mac_addr_add, + .mac_addr_set = mlx5_mac_addr_set, + .set_mc_addr_list = mlx5_set_mc_addr_list, + .mtu_set = mlx5_dev_set_mtu, + .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, + .vlan_offload_set = mlx5_vlan_offload_set, + .reta_update = mlx5_dev_rss_reta_update, + .reta_query = mlx5_dev_rss_reta_query, + .rss_hash_update = mlx5_rss_hash_update, + .rss_hash_conf_get = mlx5_rss_hash_conf_get, + .filter_ctrl = mlx5_dev_filter_ctrl, + .rxq_info_get = mlx5_rxq_info_get, + .txq_info_get = mlx5_txq_info_get, + .rx_burst_mode_get = mlx5_rx_burst_mode_get, + .tx_burst_mode_get = mlx5_tx_burst_mode_get, + .rx_queue_intr_enable = mlx5_rx_intr_enable, + .rx_queue_intr_disable = mlx5_rx_intr_disable, + .is_removed = mlx5_is_removed, + .udp_tunnel_port_add = mlx5_udp_tunnel_port_add, + .get_module_info = mlx5_get_module_info, + .get_module_eeprom = mlx5_get_module_eeprom, + .hairpin_cap_get = mlx5_hairpin_cap_get, + .mtr_ops_get = mlx5_flow_meter_ops_get, + .hairpin_bind = mlx5_hairpin_bind, + .hairpin_unbind = mlx5_hairpin_unbind, + .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports, + .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, + .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, + .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, +}; + +/* Available operations from secondary process. */ +const struct eth_dev_ops mlx5_dev_sec_ops = { + .stats_get = mlx5_stats_get, + .stats_reset = mlx5_stats_reset, + .xstats_get = mlx5_xstats_get, + .xstats_reset = mlx5_xstats_reset, + .xstats_get_names = mlx5_xstats_get_names, + .fw_version_get = mlx5_fw_version_get, + .dev_infos_get = mlx5_dev_infos_get, + .read_clock = mlx5_txpp_read_clock, + .rx_queue_start = mlx5_rx_queue_start, + .rx_queue_stop = mlx5_rx_queue_stop, + .tx_queue_start = mlx5_tx_queue_start, + .tx_queue_stop = mlx5_tx_queue_stop, + .rxq_info_get = mlx5_rxq_info_get, + .txq_info_get = mlx5_txq_info_get, + .rx_burst_mode_get = mlx5_rx_burst_mode_get, + .tx_burst_mode_get = mlx5_tx_burst_mode_get, + .get_module_info = mlx5_get_module_info, + .get_module_eeprom = mlx5_get_module_eeprom, +}; + +/* Available operations in flow isolated mode. */ +const struct eth_dev_ops mlx5_dev_ops_isolate = { + .dev_configure = mlx5_dev_configure, + .dev_start = mlx5_dev_start, + .dev_stop = mlx5_dev_stop, + .dev_set_link_down = mlx5_set_link_down, + .dev_set_link_up = mlx5_set_link_up, + .dev_close = mlx5_dev_close, + .promiscuous_enable = mlx5_promiscuous_enable, + .promiscuous_disable = mlx5_promiscuous_disable, + .allmulticast_enable = mlx5_allmulticast_enable, + .allmulticast_disable = mlx5_allmulticast_disable, + .link_update = mlx5_link_update, + .stats_get = mlx5_stats_get, + .stats_reset = mlx5_stats_reset, + .xstats_get = mlx5_xstats_get, + .xstats_reset = mlx5_xstats_reset, + .xstats_get_names = mlx5_xstats_get_names, + .fw_version_get = mlx5_fw_version_get, + .dev_infos_get = mlx5_dev_infos_get, + .read_clock = mlx5_txpp_read_clock, + .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, + .vlan_filter_set = mlx5_vlan_filter_set, + .rx_queue_setup = mlx5_rx_queue_setup, + .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup, + .tx_queue_setup = mlx5_tx_queue_setup, + .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup, + .rx_queue_release = mlx5_rx_queue_release, + .tx_queue_release = mlx5_tx_queue_release, + .rx_queue_start = mlx5_rx_queue_start, + .rx_queue_stop = mlx5_rx_queue_stop, + .tx_queue_start = mlx5_tx_queue_start, + .tx_queue_stop = mlx5_tx_queue_stop, + .flow_ctrl_get = mlx5_dev_get_flow_ctrl, + .flow_ctrl_set = mlx5_dev_set_flow_ctrl, + .mac_addr_remove = mlx5_mac_addr_remove, + .mac_addr_add = mlx5_mac_addr_add, + .mac_addr_set = mlx5_mac_addr_set, + .set_mc_addr_list = mlx5_set_mc_addr_list, + .mtu_set = mlx5_dev_set_mtu, + .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, + .vlan_offload_set = mlx5_vlan_offload_set, + .filter_ctrl = mlx5_dev_filter_ctrl, + .rxq_info_get = mlx5_rxq_info_get, + .txq_info_get = mlx5_txq_info_get, + .rx_burst_mode_get = mlx5_rx_burst_mode_get, + .tx_burst_mode_get = mlx5_tx_burst_mode_get, + .rx_queue_intr_enable = mlx5_rx_intr_enable, + .rx_queue_intr_disable = mlx5_rx_intr_disable, + .is_removed = mlx5_is_removed, + .get_module_info = mlx5_get_module_info, + .get_module_eeprom = mlx5_get_module_eeprom, + .hairpin_cap_get = mlx5_hairpin_cap_get, + .mtr_ops_get = mlx5_flow_meter_ops_get, + .hairpin_bind = mlx5_hairpin_bind, + .hairpin_unbind = mlx5_hairpin_unbind, + .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports, + .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update, + .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind, + .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind, +}; + /** * Verify and store value for device argument. * @@ -1472,8 +1620,6 @@ mlx5_args_check(const char *key, const char *val, void *opaque) } config->cqe_comp = !!tmp; config->cqe_comp_fmt = tmp; - } else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) { - config->cqe_pad = !!tmp; } else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) { config->hw_padding = !!tmp; } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) { @@ -1602,7 +1748,6 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) { const char **params = (const char *[]){ MLX5_RXQ_CQE_COMP_EN, - MLX5_RXQ_CQE_PAD_EN, MLX5_RXQ_PKT_PAD_EN, MLX5_RX_MPRQ_EN, MLX5_RX_MPRQ_LOG_STRIDE_NUM, diff --git a/dpdk/drivers/net/mlx5/mlx5.h b/dpdk/drivers/net/mlx5/mlx5.h index 041240e6fe..9bf1bf3146 100644 --- a/dpdk/drivers/net/mlx5/mlx5.h +++ b/dpdk/drivers/net/mlx5/mlx5.h @@ -135,9 +135,9 @@ struct mlx5_local_data { extern struct mlx5_shared_data *mlx5_shared_data; /* Dev ops structs */ -extern const struct eth_dev_ops mlx5_os_dev_ops; -extern const struct eth_dev_ops mlx5_os_dev_sec_ops; -extern const struct eth_dev_ops mlx5_os_dev_ops_isolate; +extern const struct eth_dev_ops mlx5_dev_ops; +extern const struct eth_dev_ops mlx5_dev_sec_ops; +extern const struct eth_dev_ops mlx5_dev_ops_isolate; struct mlx5_counter_ctrl { /* Name of the counter. */ @@ -207,7 +207,6 @@ struct mlx5_dev_config { unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */ unsigned int cqe_comp:1; /* CQE compression is enabled. */ unsigned int cqe_comp_fmt:3; /* CQE compression format. */ - unsigned int cqe_pad:1; /* CQE padding is enabled. */ unsigned int tso:1; /* Whether TSO is supported. */ unsigned int rx_vec_en:1; /* Rx vector is enabled. */ unsigned int mr_ext_memseg_en:1; @@ -258,30 +257,12 @@ struct mlx5_dev_config { }; -/** - * Type of object being allocated. - */ -enum mlx5_verbs_alloc_type { - MLX5_VERBS_ALLOC_TYPE_NONE, - MLX5_VERBS_ALLOC_TYPE_TX_QUEUE, - MLX5_VERBS_ALLOC_TYPE_RX_QUEUE, -}; - /* Structure for VF VLAN workaround. */ struct mlx5_vf_vlan { uint32_t tag:12; uint32_t created:1; }; -/** - * Verbs allocator needs a context to know in the callback which kind of - * resources it is allocating. - */ -struct mlx5_verbs_alloc_ctx { - enum mlx5_verbs_alloc_type type; /* Kind of object being allocated. */ - const void *obj; /* Pointer to the DPDK object. */ -}; - /* Flow drop context necessary due to Verbs API. */ struct mlx5_drop { struct mlx5_hrxq *hrxq; /* Hash Rx queue queue. */ @@ -768,7 +749,10 @@ struct mlx5_dev_ctx_shared { struct mlx5_dev_shared_port port[]; /* per device port data array. */ }; -/* Per-process private structure. */ +/* + * Per-process private structure. + * Caution, secondary process may rebuild the struct during port start. + */ struct mlx5_proc_priv { size_t uar_table_sz; /* Size of UAR register table. */ @@ -957,7 +941,7 @@ struct mlx5_priv { int32_t pf_bond; /* >=0 means PF index in bonding configuration. */ unsigned int if_index; /* Associated kernel network device index. */ uint32_t bond_ifindex; /**< Bond interface index. */ - char bond_name[IF_NAMESIZE]; /**< Bond interface name. */ + char bond_name[MLX5_NAMESIZE]; /**< Bond interface name. */ /* RX/TX queues. */ unsigned int rxqs_n; /* RX queues array size. */ unsigned int txqs_n; /* TX queues array size. */ @@ -989,7 +973,6 @@ struct mlx5_priv { struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */ struct mlx5_stats_ctrl stats_ctrl; /* Stats control. */ struct mlx5_dev_config config; /* Device configuration. */ - struct mlx5_verbs_alloc_ctx verbs_alloc_ctx; /* Context for Verbs allocator. */ int nl_socket_rdma; /* Netlink socket (NETLINK_RDMA). */ int nl_socket_route; /* Netlink socket (NETLINK_ROUTE). */ @@ -1024,6 +1007,7 @@ struct rte_hairpin_peer_info { int mlx5_getenv_int(const char *); int mlx5_proc_priv_init(struct rte_eth_dev *dev); +void mlx5_proc_priv_uninit(struct rte_eth_dev *dev); int mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev, struct rte_eth_udp_tunnel *udp_tunnel); uint16_t mlx5_eth_find_next(uint16_t port_id, struct rte_pci_device *pci_dev); @@ -1075,6 +1059,8 @@ int mlx5_dev_configure_rss_reta(struct rte_eth_dev *dev); /* mlx5_ethdev_os.c */ +int mlx5_get_ifname(const struct rte_eth_dev *dev, + char (*ifname)[MLX5_NAMESIZE]); unsigned int mlx5_ifindex(const struct rte_eth_dev *dev); int mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[RTE_ETHER_ADDR_LEN]); int mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu); diff --git a/dpdk/drivers/net/mlx5/mlx5_devx.c b/dpdk/drivers/net/mlx5/mlx5_devx.c index de9b204075..9970a58156 100644 --- a/dpdk/drivers/net/mlx5/mlx5_devx.c +++ b/dpdk/drivers/net/mlx5/mlx5_devx.c @@ -23,7 +23,7 @@ #include "mlx5_utils.h" #include "mlx5_devx.h" #include "mlx5_flow.h" - +#include "mlx5_flow_os.h" /** * Modify RQ vlan stripping offload @@ -486,8 +486,6 @@ mlx5_rxq_create_devx_cq_resources(struct rte_eth_dev *dev, uint16_t idx) "Port %u Rx CQE compression is disabled for LRO.", dev->data->port_id); } - if (priv->config.cqe_pad) - cq_attr.cqe_size = MLX5_CQE_SIZE_128B; log_cqe_n = log2above(cqe_n); cq_size = sizeof(struct mlx5_cqe) * (1 << log_cqe_n); buf = rte_calloc_socket(__func__, 1, cq_size, page_size, @@ -942,9 +940,8 @@ mlx5_devx_hrxq_new(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq, goto error; } #ifdef HAVE_IBV_FLOW_DV_SUPPORT - hrxq->action = mlx5_glue->dv_create_flow_action_dest_devx_tir - (hrxq->tir->obj); - if (!hrxq->action) { + if (mlx5_flow_os_create_flow_action_dest_devx_tir(hrxq->tir, + &hrxq->action)) { rte_errno = errno; goto error; } @@ -1263,8 +1260,6 @@ mlx5_txq_create_devx_cq_resources(struct rte_eth_dev *dev, uint16_t idx) DRV_LOG(ERR, "Failed to allocate CQ door-bell."); goto error; } - cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? - MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(priv->sh->tx_uar); cq_attr.eqn = priv->sh->eqn; cq_attr.q_umem_valid = 1; @@ -1304,12 +1299,15 @@ mlx5_txq_create_devx_cq_resources(struct rte_eth_dev *dev, uint16_t idx) * Pointer to Ethernet device. * @param idx * Queue index in DPDK Tx queue array. + * @param[in] log_desc_n + * Log of number of descriptors in queue. * * @return * Number of WQEs in SQ, 0 otherwise and rte_errno is set. */ static uint32_t -mlx5_txq_create_devx_sq_resources(struct rte_eth_dev *dev, uint16_t idx) +mlx5_txq_create_devx_sq_resources(struct rte_eth_dev *dev, uint16_t idx, + uint16_t log_desc_n) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; @@ -1329,7 +1327,7 @@ mlx5_txq_create_devx_sq_resources(struct rte_eth_dev *dev, uint16_t idx) rte_errno = ENOMEM; return 0; } - wqe_n = RTE_MIN(1UL << txq_data->elts_n, + wqe_n = RTE_MIN(1UL << log_desc_n, (uint32_t)priv->sh->device_attr.max_qp_wr); txq_obj->sq_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, wqe_n * sizeof(struct mlx5_wqe), @@ -1431,8 +1429,8 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx) struct mlx5_dev_ctx_shared *sh = priv->sh; struct mlx5_txq_obj *txq_obj = txq_ctrl->obj; void *reg_addr; - uint32_t cqe_n; - uint32_t wqe_n; + uint32_t cqe_n, log_desc_n; + uint32_t wqe_n, wqe_size; int ret = 0; MLX5_ASSERT(txq_data); @@ -1453,8 +1451,29 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx) txq_data->cq_db = (volatile uint32_t *)(txq_obj->cq_dbrec_page->dbrs + txq_obj->cq_dbrec_offset); *txq_data->cq_db = 0; + /* + * Adjust the amount of WQEs depending on inline settings. + * The number of descriptors should be enough to handle + * the specified number of packets. If queue is being created + * with Verbs the rdma-core does queue size adjustment + * internally in the mlx5_calc_sq_size(), we do the same + * for the queue being created with DevX at this point. + */ + wqe_size = txq_data->tso_en ? + RTE_ALIGN(txq_ctrl->max_tso_header, MLX5_WSEG_SIZE) : 0; + wqe_size += sizeof(struct mlx5_wqe_cseg) + + sizeof(struct mlx5_wqe_eseg) + + sizeof(struct mlx5_wqe_dseg); + if (txq_data->inlen_send) + wqe_size = RTE_MAX(wqe_size, sizeof(struct mlx5_wqe_cseg) + + sizeof(struct mlx5_wqe_eseg) + + RTE_ALIGN(txq_data->inlen_send + + sizeof(uint32_t), + MLX5_WSEG_SIZE)); + wqe_size = RTE_ALIGN(wqe_size, MLX5_WQE_SIZE) / MLX5_WQE_SIZE; /* Create Send Queue object with DevX. */ - wqe_n = mlx5_txq_create_devx_sq_resources(dev, idx); + log_desc_n = log2above((1UL << txq_data->elts_n) * wqe_size); + wqe_n = mlx5_txq_create_devx_sq_resources(dev, idx, log_desc_n); if (!wqe_n) { rte_errno = errno; goto error; diff --git a/dpdk/drivers/net/mlx5/mlx5_ethdev.c b/dpdk/drivers/net/mlx5/mlx5_ethdev.c index a3910cf922..45ee7e4488 100644 --- a/dpdk/drivers/net/mlx5/mlx5_ethdev.c +++ b/dpdk/drivers/net/mlx5/mlx5_ethdev.c @@ -310,8 +310,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); info->rx_seg_capa.max_nseg = MLX5_MAX_RXQ_NSEG; - info->rx_seg_capa.multi_pools = 1; - info->rx_seg_capa.offset_allowed = 1; + info->rx_seg_capa.multi_pools = !config->mprq.enabled; + info->rx_seg_capa.offset_allowed = !config->mprq.enabled; info->rx_seg_capa.offset_align_log2 = 0; info->rx_offload_capa = (mlx5_get_rx_port_offloads() | info->rx_queue_offload_capa); diff --git a/dpdk/drivers/net/mlx5/mlx5_flow.c b/dpdk/drivers/net/mlx5/mlx5_flow.c index 52ade39a42..cda3ca557c 100644 --- a/dpdk/drivers/net/mlx5/mlx5_flow.c +++ b/dpdk/drivers/net/mlx5/mlx5_flow.c @@ -212,6 +212,8 @@ mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item) return ret; } +#define MLX5_RSS_EXP_ELT_N 8 + /** * Expand RSS flows into several possible flows according to the RSS hash * fields requested and the driver capabilities. @@ -242,13 +244,12 @@ mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, const struct mlx5_flow_expand_node graph[], int graph_root_index) { - const int elt_n = 8; const struct rte_flow_item *item; const struct mlx5_flow_expand_node *node = &graph[graph_root_index]; const int *next_node; - const int *stack[elt_n]; + const int *stack[MLX5_RSS_EXP_ELT_N]; int stack_pos = 0; - struct rte_flow_item flow_items[elt_n]; + struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N]; unsigned int i; size_t lsize; size_t user_pattern_size = 0; @@ -261,10 +262,10 @@ mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, memset(&missed_item, 0, sizeof(missed_item)); lsize = offsetof(struct mlx5_flow_expand_rss, entry) + - elt_n * sizeof(buf->entry[0]); + MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]); if (lsize <= size) { buf->entry[0].priority = 0; - buf->entry[0].pattern = (void *)&buf->entry[elt_n]; + buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N]; buf->entries = 0; addr = buf->entry[0].pattern; } @@ -367,7 +368,7 @@ mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, /* Go deeper. */ if (node->next) { next_node = node->next; - if (stack_pos++ == elt_n) { + if (stack_pos++ == MLX5_RSS_EXP_ELT_N) { rte_errno = E2BIG; return -rte_errno; } @@ -797,7 +798,7 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev, start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 : (priv->mtr_reg_share ? REG_C_3 : REG_C_4); skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2); - if (id > (REG_C_7 - start_reg)) + if (id > (uint32_t)(REG_C_7 - start_reg)) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, NULL, "invalid tag id"); @@ -813,7 +814,7 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev, */ if (skip_mtr_reg && config->flow_mreg_c [id + start_reg - REG_C_0] >= priv->mtr_color_reg) { - if (id >= (REG_C_7 - start_reg)) + if (id >= (uint32_t)(REG_C_7 - start_reg)) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, NULL, "invalid tag id"); @@ -1001,17 +1002,29 @@ flow_drv_rxq_flags_set(struct rte_eth_dev *dev, struct mlx5_priv *priv = dev->data->dev_private; const int mark = dev_handle->mark; const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); - struct mlx5_hrxq *hrxq; + struct mlx5_ind_table_obj *ind_tbl = NULL; unsigned int i; - if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) - return; - hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], + if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) { + struct mlx5_hrxq *hrxq; + + hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], dev_handle->rix_hrxq); - if (!hrxq) + if (hrxq) + ind_tbl = hrxq->ind_table; + } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) { + struct mlx5_shared_action_rss *shared_rss; + + shared_rss = mlx5_ipool_get + (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], + dev_handle->rix_srss); + if (shared_rss) + ind_tbl = shared_rss->ind_tbl; + } + if (!ind_tbl) return; - for (i = 0; i != hrxq->ind_table->queues_n; ++i) { - int idx = hrxq->ind_table->queues[i]; + for (i = 0; i != ind_tbl->queues_n; ++i) { + int idx = ind_tbl->queues[i]; struct mlx5_rxq_ctrl *rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); @@ -1083,18 +1096,30 @@ flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, struct mlx5_priv *priv = dev->data->dev_private; const int mark = dev_handle->mark; const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL); - struct mlx5_hrxq *hrxq; + struct mlx5_ind_table_obj *ind_tbl = NULL; unsigned int i; - if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE) - return; - hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], + if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) { + struct mlx5_hrxq *hrxq; + + hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], dev_handle->rix_hrxq); - if (!hrxq) + if (hrxq) + ind_tbl = hrxq->ind_table; + } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) { + struct mlx5_shared_action_rss *shared_rss; + + shared_rss = mlx5_ipool_get + (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], + dev_handle->rix_srss); + if (shared_rss) + ind_tbl = shared_rss->ind_tbl; + } + if (!ind_tbl) return; MLX5_ASSERT(dev->data->dev_started); - for (i = 0; i != hrxq->ind_table->queues_n; ++i) { - int idx = hrxq->ind_table->queues[i]; + for (i = 0; i != ind_tbl->queues_n; ++i) { + int idx = ind_tbl->queues[i]; struct mlx5_rxq_ctrl *rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); @@ -3523,7 +3548,7 @@ flow_check_hairpin_split(struct rte_eth_dev *dev, if (queue == NULL) return 0; conf = mlx5_rxq_get_hairpin_conf(dev, queue->index); - if (conf != NULL && !!conf->tx_explicit) + if (conf == NULL || conf->tx_explicit != 0) return 0; queue_action = 1; action_n++; @@ -3533,7 +3558,7 @@ flow_check_hairpin_split(struct rte_eth_dev *dev, if (rss == NULL || rss->queue_num == 0) return 0; conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]); - if (conf != NULL && !!conf->tx_explicit) + if (conf == NULL || conf->tx_explicit != 0) return 0; queue_action = 1; action_n++; @@ -5243,7 +5268,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, struct mlx5_priv *priv = dev->data->dev_private; struct rte_flow *flow = NULL; struct mlx5_flow *dev_flow; - const struct rte_flow_action_rss *rss; + const struct rte_flow_action_rss *rss = NULL; struct mlx5_translated_shared_action shared_actions[MLX5_MAX_SHARED_ACTIONS]; int shared_actions_n = MLX5_MAX_SHARED_ACTIONS; @@ -5321,7 +5346,9 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN && flow->drv_type < MLX5_FLOW_TYPE_MAX); memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue)); - rss = flow_get_rss_action(p_actions_rx); + /* RSS Action only works on NIC RX domain */ + if (attr->ingress && !attr->transfer) + rss = flow_get_rss_action(p_actions_rx); if (rss) { if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num)) return 0; @@ -6124,9 +6151,9 @@ mlx5_flow_isolate(struct rte_eth_dev *dev, } priv->isolated = !!enable; if (enable) - dev->dev_ops = &mlx5_os_dev_ops_isolate; + dev->dev_ops = &mlx5_dev_ops_isolate; else - dev->dev_ops = &mlx5_os_dev_ops; + dev->dev_ops = &mlx5_dev_ops; dev->rx_descriptor_status = mlx5_rx_descriptor_status; dev->tx_descriptor_status = mlx5_tx_descriptor_status; @@ -7150,12 +7177,12 @@ mlx5_shared_action_flush(struct rte_eth_dev *dev) { struct rte_flow_error error; struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_shared_action_rss *action; + struct mlx5_shared_action_rss *shared_rss; int ret = 0; uint32_t idx; ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], - priv->rss_shared_actions, idx, action, next) { + priv->rss_shared_actions, idx, shared_rss, next) { ret |= mlx5_shared_action_destroy(dev, (struct rte_flow_shared_action *)(uintptr_t)idx, &error); } diff --git a/dpdk/drivers/net/mlx5/mlx5_flow.h b/dpdk/drivers/net/mlx5/mlx5_flow.h index a249c292e3..91f48923c0 100644 --- a/dpdk/drivers/net/mlx5/mlx5_flow.h +++ b/dpdk/drivers/net/mlx5/mlx5_flow.h @@ -552,7 +552,6 @@ struct mlx5_flow_sub_actions_list { struct mlx5_flow_sub_actions_idx { uint32_t rix_hrxq; /**< Hash Rx queue object index. */ uint32_t rix_tag; /**< Index to the tag action. */ - uint32_t cnt; uint32_t rix_port_id_action; /**< Index to port ID action resource. */ uint32_t rix_encap_decap; /**< Index to encap/decap resource. */ }; @@ -1049,17 +1048,17 @@ struct rte_flow { #define MLX5_RSS_HASH_IPV4 (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4) #define MLX5_RSS_HASH_IPV4_TCP \ (MLX5_RSS_HASH_IPV4 | \ - IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_SRC_PORT_TCP) + IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP) #define MLX5_RSS_HASH_IPV4_UDP \ (MLX5_RSS_HASH_IPV4 | \ - IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_SRC_PORT_UDP) + IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP) #define MLX5_RSS_HASH_IPV6 (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6) #define MLX5_RSS_HASH_IPV6_TCP \ (MLX5_RSS_HASH_IPV6 | \ - IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_SRC_PORT_TCP) + IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP) #define MLX5_RSS_HASH_IPV6_UDP \ (MLX5_RSS_HASH_IPV6 | \ - IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_SRC_PORT_UDP) + IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP) #define MLX5_RSS_HASH_NONE 0ULL /* array of valid combinations of RX Hash fields for RSS */ diff --git a/dpdk/drivers/net/mlx5/mlx5_flow_age.c b/dpdk/drivers/net/mlx5/mlx5_flow_age.c index cea2cf769d..0ea61be4eb 100644 --- a/dpdk/drivers/net/mlx5/mlx5_flow_age.c +++ b/dpdk/drivers/net/mlx5/mlx5_flow_age.c @@ -278,7 +278,8 @@ mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, sizeof(*sq->db_rec) * 2, 4096, socket); if (!sq->umem_buf) { DRV_LOG(ERR, "Can't allocate wqe buffer."); - return -ENOMEM; + rte_errno = ENOMEM; + goto error; } sq->wqe_umem = mlx5_glue->devx_umem_reg(ctx, (void *)(uintptr_t)sq->umem_buf, diff --git a/dpdk/drivers/net/mlx5/mlx5_flow_dv.c b/dpdk/drivers/net/mlx5/mlx5_flow_dv.c index aa21ff9613..3fdc3ffe16 100644 --- a/dpdk/drivers/net/mlx5/mlx5_flow_dv.c +++ b/dpdk/drivers/net/mlx5/mlx5_flow_dv.c @@ -955,7 +955,7 @@ flow_dv_convert_action_set_reg RTE_FLOW_ERROR_TYPE_ACTION, NULL, "too many items to modify"); MLX5_ASSERT(conf->id != REG_NON); - MLX5_ASSERT(conf->id < RTE_DIM(reg_to_field)); + MLX5_ASSERT(conf->id < (enum modify_reg)RTE_DIM(reg_to_field)); actions[i] = (struct mlx5_modification_cmd) { .action_type = MLX5_MODIFICATION_TYPE_SET, .field = reg_to_field[conf->id], @@ -2375,6 +2375,11 @@ flow_dv_validate_action_mark(struct rte_eth_dev *dev, const struct rte_flow_action_mark *mark = action->conf; int ret; + if (is_tunnel_offload_active(dev)) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "no mark action " + "if tunnel offload active"); /* Fall back if no extended metadata register support. */ if (config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY) return mlx5_flow_validate_action_mark(action, action_flags, @@ -2537,6 +2542,10 @@ flow_dv_validate_action_set_tag(struct rte_eth_dev *dev, * * @param[in] dev * Pointer to rte_eth_dev structure. + * @param[in] action + * Pointer to the action structure. + * @param[in] action_flags + * Holds the actions detected until now. * @param[out] error * Pointer to error structure. * @@ -2545,12 +2554,25 @@ flow_dv_validate_action_set_tag(struct rte_eth_dev *dev, */ static int flow_dv_validate_action_count(struct rte_eth_dev *dev, + const struct rte_flow_action *action, + uint64_t action_flags, struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; + const struct rte_flow_action_count *count; if (!priv->config.devx) goto notsup_err; + if (action_flags & MLX5_FLOW_ACTION_COUNT) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "duplicate count actions set"); + count = (const struct rte_flow_action_count *)action->conf; + if (count && count->shared && (action_flags & MLX5_FLOW_ACTION_AGE) && + !priv->sh->flow_hit_aso_en) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "old age and shared count combination is not supported"); #ifdef HAVE_IBV_FLOW_DEVX_COUNTERS return 0; #endif @@ -2612,6 +2634,10 @@ flow_dv_validate_action_l2_encap(struct rte_eth_dev *dev, * Pointer to the rte_eth_dev structure. * @param[in] action_flags * Holds the actions detected until now. + * @param[in] action + * Pointer to the action structure. + * @param[in] item_flags + * Holds the items detected. * @param[in] attr * Pointer to flow attributes * @param[out] error @@ -2623,6 +2649,8 @@ flow_dv_validate_action_l2_encap(struct rte_eth_dev *dev, static int flow_dv_validate_action_decap(struct rte_eth_dev *dev, uint64_t action_flags, + const struct rte_flow_action *action, + const uint64_t item_flags, const struct rte_flow_attr *attr, struct rte_flow_error *error) { @@ -2656,6 +2684,11 @@ flow_dv_validate_action_decap(struct rte_eth_dev *dev, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "decap action for VF representor " "not supported on NIC table"); + if (action->type == RTE_FLOW_ACTION_TYPE_VXLAN_DECAP && + !(item_flags & MLX5_FLOW_LAYER_VXLAN)) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "VXLAN item should be present for VXLAN decap"); return 0; } @@ -2676,6 +2709,10 @@ const struct rte_flow_action_raw_decap empty_decap = {.data = NULL, .size = 0,}; * Holds the actions detected until now. * @param[out] actions_n * pointer to the number of actions counter. + * @param[in] action + * Pointer to the action structure. + * @param[in] item_flags + * Holds the items detected. * @param[out] error * Pointer to error structure. * @@ -2688,7 +2725,8 @@ flow_dv_validate_action_raw_encap_decap const struct rte_flow_action_raw_decap *decap, const struct rte_flow_action_raw_encap *encap, const struct rte_flow_attr *attr, uint64_t *action_flags, - int *actions_n, struct rte_flow_error *error) + int *actions_n, const struct rte_flow_action *action, + uint64_t item_flags, struct rte_flow_error *error) { const struct mlx5_priv *priv = dev->data->dev_private; int ret; @@ -2723,8 +2761,8 @@ flow_dv_validate_action_raw_encap_decap "encap combination"); } if (decap) { - ret = flow_dv_validate_action_decap(dev, *action_flags, attr, - error); + ret = flow_dv_validate_action_decap(dev, *action_flags, action, + item_flags, attr, error); if (ret < 0) return ret; *action_flags |= MLX5_FLOW_ACTION_DECAP; @@ -4321,7 +4359,7 @@ flow_dv_modify_create_cb(struct mlx5_hlist *list, uint64_t key __rte_unused, /** * Validate the sample action. * - * @param[in] action_flags + * @param[in, out] action_flags * Holds the actions detected until now. * @param[in] action * Pointer to the sample action. @@ -4329,6 +4367,10 @@ flow_dv_modify_create_cb(struct mlx5_hlist *list, uint64_t key __rte_unused, * Pointer to the Ethernet device structure. * @param[in] attr * Attributes of flow that includes this action. + * @param[in] item_flags + * Holds the items detected. + * @param[out] count + * Pointer to the COUNT action in sample action list. * @param[out] error * Pointer to error structure. * @@ -4336,10 +4378,12 @@ flow_dv_modify_create_cb(struct mlx5_hlist *list, uint64_t key __rte_unused, * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -flow_dv_validate_action_sample(uint64_t action_flags, +flow_dv_validate_action_sample(uint64_t *action_flags, const struct rte_flow_action *action, struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + const uint64_t item_flags, + const struct rte_flow_action_count **count, struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; @@ -4365,17 +4409,17 @@ flow_dv_validate_action_sample(uint64_t action_flags, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "sample action not supported"); - if (action_flags & MLX5_FLOW_ACTION_SAMPLE) + if (*action_flags & MLX5_FLOW_ACTION_SAMPLE) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, NULL, "Multiple sample actions not " "supported"); - if (action_flags & MLX5_FLOW_ACTION_METER) + if (*action_flags & MLX5_FLOW_ACTION_METER) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, action, "wrong action order, meter should " "be after sample action"); - if (action_flags & MLX5_FLOW_ACTION_JUMP) + if (*action_flags & MLX5_FLOW_ACTION_JUMP) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, action, "wrong action order, jump should " @@ -4413,10 +4457,15 @@ flow_dv_validate_action_sample(uint64_t action_flags, ++actions_n; break; case RTE_FLOW_ACTION_TYPE_COUNT: - ret = flow_dv_validate_action_count(dev, error); + ret = flow_dv_validate_action_count + (dev, act, + *action_flags | sub_action_flags, + error); if (ret < 0) return ret; + *count = act->conf; sub_action_flags |= MLX5_FLOW_ACTION_COUNT; + *action_flags |= MLX5_FLOW_ACTION_COUNT; ++actions_n; break; case RTE_FLOW_ACTION_TYPE_PORT_ID: @@ -4433,7 +4482,7 @@ flow_dv_validate_action_sample(uint64_t action_flags, case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: ret = flow_dv_validate_action_raw_encap_decap (dev, NULL, act->conf, attr, &sub_action_flags, - &actions_n, error); + &actions_n, action, item_flags, error); if (ret < 0) return ret; ++actions_n; @@ -5224,6 +5273,8 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, const struct rte_flow_action_raw_decap *decap; const struct rte_flow_action_raw_encap *encap; const struct rte_flow_action_rss *rss; + const struct rte_flow_action_count *count = NULL; + const struct rte_flow_action_count *sample_count = NULL; const struct rte_flow_item_tcp nic_tcp_mask = { .hdr = { .tcp_flags = 0xFF, @@ -5282,6 +5333,11 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, } else { tunnel = NULL; } + if (tunnel && priv->representor) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "decap not supported " + "for VF representor"); grp_info.std_tbl_fix = tunnel_use_standard_attr_group_translate (dev, tunnel, attr, items, actions); ret = flow_dv_validate_attributes(dev, tunnel, attr, &grp_info, error); @@ -5702,9 +5758,12 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, ++actions_n; break; case RTE_FLOW_ACTION_TYPE_COUNT: - ret = flow_dv_validate_action_count(dev, error); + ret = flow_dv_validate_action_count(dev, actions, + action_flags, + error); if (ret < 0) return ret; + count = actions->conf; action_flags |= MLX5_FLOW_ACTION_COUNT; ++actions_n; break; @@ -5761,6 +5820,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP: case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP: ret = flow_dv_validate_action_decap(dev, action_flags, + actions, item_flags, attr, error); if (ret < 0) return ret; @@ -5770,7 +5830,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: ret = flow_dv_validate_action_raw_encap_decap (dev, NULL, actions->conf, attr, &action_flags, - &actions_n, error); + &actions_n, actions, item_flags, error); if (ret < 0) return ret; break; @@ -5788,7 +5848,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, (dev, decap ? decap : &empty_decap, encap, attr, &action_flags, &actions_n, - error); + actions, item_flags, error); if (ret < 0) return ret; break; @@ -5955,7 +6015,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, rw_act_num += MLX5_ACT_NUM_SET_TAG; break; case MLX5_RTE_FLOW_ACTION_TYPE_AGE: - if (!attr->group) + if (!attr->transfer && !attr->group) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -5969,6 +6029,24 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, error); if (ret < 0) return ret; + /* + * Validate the regular AGE action (using counter) + * mutual exclusion with share counter actions. + */ + if (!priv->sh->flow_hit_aso_en) { + if (count && count->shared) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "old age and shared count combination is not supported"); + if (sample_count) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "old age action and count must be in the same sub flow"); + } action_flags |= MLX5_FLOW_ACTION_AGE; ++actions_n; break; @@ -6001,9 +6079,11 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, rw_act_num += MLX5_ACT_NUM_SET_DSCP; break; case RTE_FLOW_ACTION_TYPE_SAMPLE: - ret = flow_dv_validate_action_sample(action_flags, + ret = flow_dv_validate_action_sample(&action_flags, actions, dev, - attr, error); + attr, item_flags, + &sample_count, + error); if (ret < 0) return ret; action_flags |= MLX5_FLOW_ACTION_SAMPLE; @@ -6079,8 +6159,11 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, * Validate the drop action mutual exclusion with other actions. * Drop action is mutually-exclusive with any other action, except for * Count action. + * Drop action compatibility with tunnel offload was already validated. */ - if ((action_flags & MLX5_FLOW_ACTION_DROP) && + if (action_flags & (MLX5_FLOW_ACTION_TUNNEL_MATCH | + MLX5_FLOW_ACTION_TUNNEL_MATCH)); + else if ((action_flags & MLX5_FLOW_ACTION_DROP) && (action_flags & ~(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_COUNT))) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, NULL, @@ -6232,8 +6315,9 @@ flow_dv_prepare(struct rte_eth_dev *dev, "not enough memory to create flow handle"); return NULL; } - MLX5_ASSERT(wks->flow_idx + 1 < RTE_DIM(wks->flows)); + MLX5_ASSERT(wks->flow_idx < RTE_DIM(wks->flows)); dev_flow = &wks->flows[wks->flow_idx++]; + memset(dev_flow, 0, sizeof(*dev_flow)); dev_flow->handle = dev_handle; dev_flow->handle_idx = handle_idx; /* @@ -6245,12 +6329,6 @@ flow_dv_prepare(struct rte_eth_dev *dev, */ dev_flow->dv.value.size = MLX5_ST_SZ_BYTES(fte_match_param) - MLX5_ST_SZ_BYTES(fte_match_set_misc4); - /* - * The matching value needs to be cleared to 0 before using. In the - * past, it will be automatically cleared when using rte_*alloc - * API. The time consumption will be almost the same as before. - */ - memset(dev_flow->dv.value.buf, 0, MLX5_ST_SZ_BYTES(fte_match_param)); dev_flow->ingress = attr->ingress; dev_flow->dv.transfer = attr->transfer; return dev_flow; @@ -7659,11 +7737,15 @@ flow_dv_translate_item_port_id(struct rte_eth_dev *dev, void *matcher, priv->pf_bond < 0 && attr->transfer) flow_dv_translate_item_source_vport (matcher, key, priv->vport_id, mask); - else - flow_dv_translate_item_meta_vport - (matcher, key, - priv->vport_meta_tag, - priv->vport_meta_mask); + /* + * We should always set the vport metadata register, + * otherwise the SW steering library can drop + * the rule if wire vport metadata value is not zero, + * it depends on kernel configuration. + */ + flow_dv_translate_item_meta_vport(matcher, key, + priv->vport_meta_tag, + priv->vport_meta_mask); } else { flow_dv_translate_item_source_vport(matcher, key, priv->vport_id, mask); @@ -8656,10 +8738,6 @@ flow_dv_sample_sub_actions_release(struct rte_eth_dev *dev, flow_dv_tag_release(dev, act_res->rix_tag); act_res->rix_tag = 0; } - if (act_res->cnt) { - flow_dv_counter_free(dev, act_res->cnt); - act_res->cnt = 0; - } } int @@ -9038,6 +9116,7 @@ flow_dv_translate_action_sample(struct rte_eth_dev *dev, struct mlx5_flow_sub_actions_list *sample_act; struct mlx5_flow_sub_actions_idx *sample_idx; struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace(); + struct rte_flow *flow = dev_flow->flow; struct mlx5_flow_rss_desc *rss_desc; uint64_t action_flags = 0; @@ -9109,21 +9188,22 @@ flow_dv_translate_action_sample(struct rte_eth_dev *dev, } case RTE_FLOW_ACTION_TYPE_COUNT: { - uint32_t counter; - - counter = flow_dv_translate_create_counter(dev, - dev_flow, sub_actions->conf, 0); - if (!counter) - return rte_flow_error_set + if (!flow->counter) { + flow->counter = + flow_dv_translate_create_counter(dev, + dev_flow, sub_actions->conf, + 0); + if (!flow->counter) + return rte_flow_error_set (error, rte_errno, - RTE_FLOW_ERROR_TYPE_ACTION, - NULL, - "cannot create counter" - " object."); - sample_idx->cnt = counter; + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "cannot create counter" + " object."); + } sample_act->dr_cnt_action = (flow_dv_counter_get_by_idx(dev, - counter, NULL))->action; + flow->counter, NULL))->action; sample_actions[sample_act->actions_num++] = sample_act->dr_cnt_action; action_flags |= MLX5_FLOW_ACTION_COUNT; @@ -9876,14 +9956,22 @@ flow_dv_translate(struct rte_eth_dev *dev, break; case RTE_FLOW_ACTION_TYPE_AGE: if (priv->sh->flow_hit_aso_en && attr->group) { - flow->age = flow_dv_translate_create_aso_age - (dev, action->conf, error); - if (!flow->age) - return rte_flow_error_set + /* + * Create one shared age action, to be used + * by all sub-flows. + */ + if (!flow->age) { + flow->age = + flow_dv_translate_create_aso_age + (dev, action->conf, + error); + if (!flow->age) + return rte_flow_error_set (error, rte_errno, RTE_FLOW_ERROR_TYPE_ACTION, NULL, "can't create ASO age action"); + } dev_flow->dv.actions[actions_n++] = (flow_aso_age_get_by_idx (dev, flow->age))->dr_action; @@ -10214,17 +10302,22 @@ flow_dv_translate(struct rte_eth_dev *dev, handle->dvh.modify_hdr->action; } if (action_flags & MLX5_FLOW_ACTION_COUNT) { - flow->counter = - flow_dv_translate_create_counter(dev, - dev_flow, count, age); - - if (!flow->counter) - return rte_flow_error_set + /* + * Create one count action, to be used + * by all sub-flows. + */ + if (!flow->counter) { + flow->counter = + flow_dv_translate_create_counter + (dev, dev_flow, count, + age); + if (!flow->counter) + return rte_flow_error_set (error, rte_errno, - RTE_FLOW_ERROR_TYPE_ACTION, - NULL, - "cannot create counter" - " object."); + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "cannot create counter" + " object."); + } dev_flow->dv.actions[actions_n] = (flow_dv_counter_get_by_idx(dev, flow->counter, NULL))->action; @@ -10652,47 +10745,6 @@ __flow_dv_action_rss_hrxq_lookup(struct rte_eth_dev *dev, uint32_t idx, } } -/** - * Retrieves hash RX queue suitable for the *flow*. - * If shared action configured for *flow* suitable hash RX queue will be - * retrieved from attached shared action. - * - * @param[in] dev - * Pointer to the Ethernet device structure. - * @param[in] dev_flow - * Pointer to the sub flow. - * @param[in] rss_desc - * Pointer to the RSS descriptor. - * @param[out] hrxq - * Pointer to retrieved hash RX queue object. - * - * @return - * Valid hash RX queue index, otherwise 0 and rte_errno is set. - */ -static uint32_t -__flow_dv_rss_get_hrxq(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, - struct mlx5_flow_rss_desc *rss_desc, - struct mlx5_hrxq **hrxq) -{ - struct mlx5_priv *priv = dev->data->dev_private; - uint32_t hrxq_idx; - - if (rss_desc->shared_rss) { - hrxq_idx = __flow_dv_action_rss_hrxq_lookup - (dev, rss_desc->shared_rss, - dev_flow->hash_fields, - !!(dev_flow->handle->layers & - MLX5_FLOW_LAYER_TUNNEL)); - if (hrxq_idx) - *hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ], - hrxq_idx); - } else { - *hrxq = flow_dv_hrxq_prepare(dev, dev_flow, rss_desc, - &hrxq_idx); - } - return hrxq_idx; -} - /** * Apply the flow to the NIC, lock free, * (mutex should be acquired by caller). @@ -10724,11 +10776,6 @@ flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, struct mlx5_flow_rss_desc *rss_desc = &wks->rss_desc; MLX5_ASSERT(wks); - if (rss_desc->shared_rss) { - dh = wks->flows[wks->flow_idx - 1].handle; - MLX5_ASSERT(dh->fate_action == MLX5_FLOW_FATE_SHARED_RSS); - dh->rix_srss = rss_desc->shared_rss; - } for (idx = wks->flow_idx - 1; idx >= 0; idx--) { dev_flow = &wks->flows[idx]; dv = &dev_flow->dv; @@ -10744,11 +10791,34 @@ flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, priv->drop_queue.hrxq->action; } } else if ((dh->fate_action == MLX5_FLOW_FATE_QUEUE && - !dv_h->rix_sample && !dv_h->rix_dest_array) || - (dh->fate_action == MLX5_FLOW_FATE_SHARED_RSS)) { + !dv_h->rix_sample && !dv_h->rix_dest_array)) { + struct mlx5_hrxq *hrxq; + uint32_t hrxq_idx; + + hrxq = flow_dv_hrxq_prepare(dev, dev_flow, rss_desc, + &hrxq_idx); + if (!hrxq) { + rte_flow_error_set + (error, rte_errno, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "cannot get hash queue"); + goto error; + } + dh->rix_hrxq = hrxq_idx; + dv->actions[n++] = hrxq->action; + } else if (dh->fate_action == MLX5_FLOW_FATE_SHARED_RSS) { struct mlx5_hrxq *hrxq = NULL; - uint32_t hrxq_idx = __flow_dv_rss_get_hrxq - (dev, dev_flow, rss_desc, &hrxq); + uint32_t hrxq_idx; + + hrxq_idx = __flow_dv_action_rss_hrxq_lookup(dev, + rss_desc->shared_rss, + dev_flow->hash_fields, + !!(dh->layers & + MLX5_FLOW_LAYER_TUNNEL)); + if (hrxq_idx) + hrxq = mlx5_ipool_get + (priv->sh->ipool[MLX5_IPOOL_HRXQ], + hrxq_idx); if (!hrxq) { rte_flow_error_set (error, rte_errno, @@ -10756,8 +10826,7 @@ flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, "cannot get hash queue"); goto error; } - if (dh->fate_action == MLX5_FLOW_FATE_QUEUE) - dh->rix_hrxq = hrxq_idx; + dh->rix_srss = rss_desc->shared_rss; dv->actions[n++] = hrxq->action; } else if (dh->fate_action == MLX5_FLOW_FATE_DEFAULT_MISS) { if (!priv->sh->default_miss_action) { @@ -10799,12 +10868,12 @@ flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, if (dh->fate_action == MLX5_FLOW_FATE_QUEUE && dh->rix_hrxq) { mlx5_hrxq_release(dev, dh->rix_hrxq); dh->rix_hrxq = 0; + } else if (dh->fate_action == MLX5_FLOW_FATE_SHARED_RSS) { + dh->rix_srss = 0; } if (dh->vf_vlan.tag && dh->vf_vlan.created) mlx5_vlan_vmwa_release(dev, &dh->vf_vlan); } - if (rss_desc->shared_rss) - wks->flows[wks->flow_idx - 1].handle->rix_srss = 0; rte_errno = err; /* Restore rte_errno. */ return -rte_errno; } @@ -11072,9 +11141,6 @@ flow_dv_fate_resource_release(struct rte_eth_dev *dev, flow_dv_port_id_action_resource_release(dev, handle->rix_port_id_action); break; - case MLX5_FLOW_FATE_SHARED_RSS: - flow_dv_shared_rss_action_release(dev, handle->rix_srss); - break; default: DRV_LOG(DEBUG, "Incorrect fate action:%d", handle->fate_action); break; @@ -11092,11 +11158,11 @@ flow_dv_sample_remove_cb(struct mlx5_cache_list *list __rte_unused, struct mlx5_priv *priv = dev->data->dev_private; if (cache_resource->verbs_action) - claim_zero(mlx5_glue->destroy_flow_action + claim_zero(mlx5_flow_os_destroy_flow_action (cache_resource->verbs_action)); if (cache_resource->ft_type == MLX5DV_FLOW_TABLE_TYPE_FDB) { if (cache_resource->default_miss) - claim_zero(mlx5_glue->destroy_flow_action + claim_zero(mlx5_flow_os_destroy_flow_action (cache_resource->default_miss)); } if (cache_resource->normal_path_tbl) @@ -11149,7 +11215,7 @@ flow_dv_dest_array_remove_cb(struct mlx5_cache_list *list __rte_unused, MLX5_ASSERT(cache_resource->action); if (cache_resource->action) - claim_zero(mlx5_glue->destroy_flow_action + claim_zero(mlx5_flow_os_destroy_flow_action (cache_resource->action)); for (; i < cache_resource->num_of_dest; i++) flow_dv_sample_sub_actions_release(dev, @@ -11237,6 +11303,7 @@ flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) { struct mlx5_flow_handle *dev_handle; struct mlx5_priv *priv = dev->data->dev_private; + uint32_t srss = 0; if (!flow) return; @@ -11281,10 +11348,15 @@ flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) if (dev_handle->dvh.rix_tag) flow_dv_tag_release(dev, dev_handle->dvh.rix_tag); - flow_dv_fate_resource_release(dev, dev_handle); + if (dev_handle->fate_action != MLX5_FLOW_FATE_SHARED_RSS) + flow_dv_fate_resource_release(dev, dev_handle); + else if (!srss) + srss = dev_handle->rix_srss; mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], tmp_idx); } + if (srss) + flow_dv_shared_rss_action_release(dev, srss); } /** @@ -11332,10 +11404,10 @@ __flow_dv_hrxqs_release(struct rte_eth_dev *dev, */ static int __flow_dv_action_rss_hrxqs_release(struct rte_eth_dev *dev, - struct mlx5_shared_action_rss *action) + struct mlx5_shared_action_rss *shared_rss) { - return __flow_dv_hrxqs_release(dev, &action->hrxq) + - __flow_dv_hrxqs_release(dev, &action->hrxq_tunnel); + return __flow_dv_hrxqs_release(dev, &shared_rss->hrxq) + + __flow_dv_hrxqs_release(dev, &shared_rss->hrxq_tunnel); } /** @@ -11359,25 +11431,25 @@ __flow_dv_action_rss_hrxqs_release(struct rte_eth_dev *dev, static int __flow_dv_action_rss_setup(struct rte_eth_dev *dev, uint32_t action_idx, - struct mlx5_shared_action_rss *action, + struct mlx5_shared_action_rss *shared_rss, struct rte_flow_error *error) { struct mlx5_flow_rss_desc rss_desc = { 0 }; size_t i; int err; - if (mlx5_ind_table_obj_setup(dev, action->ind_tbl)) { + if (mlx5_ind_table_obj_setup(dev, shared_rss->ind_tbl)) { return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "cannot setup indirection table"); } - memcpy(rss_desc.key, action->origin.key, MLX5_RSS_HASH_KEY_LEN); + memcpy(rss_desc.key, shared_rss->origin.key, MLX5_RSS_HASH_KEY_LEN); rss_desc.key_len = MLX5_RSS_HASH_KEY_LEN; - rss_desc.const_q = action->origin.queue; - rss_desc.queue_num = action->origin.queue_num; + rss_desc.const_q = shared_rss->origin.queue; + rss_desc.queue_num = shared_rss->origin.queue_num; /* Set non-zero value to indicate a shared RSS. */ rss_desc.shared_rss = action_idx; - rss_desc.ind_tbl = action->ind_tbl; + rss_desc.ind_tbl = shared_rss->ind_tbl; for (i = 0; i < MLX5_RSS_HASH_FIELDS_LEN; i++) { uint32_t hrxq_idx; uint64_t hash_fields = mlx5_rss_hash_fields[i]; @@ -11395,16 +11467,16 @@ __flow_dv_action_rss_setup(struct rte_eth_dev *dev, goto error_hrxq_new; } err = __flow_dv_action_rss_hrxq_set - (action, hash_fields, tunnel, hrxq_idx); + (shared_rss, hash_fields, tunnel, hrxq_idx); MLX5_ASSERT(!err); } } return 0; error_hrxq_new: err = rte_errno; - __flow_dv_action_rss_hrxqs_release(dev, action); - if (!mlx5_ind_table_obj_release(dev, action->ind_tbl, true)) - action->ind_tbl = NULL; + __flow_dv_action_rss_hrxqs_release(dev, shared_rss); + if (!mlx5_ind_table_obj_release(dev, shared_rss->ind_tbl, true)) + shared_rss->ind_tbl = NULL; rte_errno = err; return -rte_errno; } @@ -11433,7 +11505,7 @@ __flow_dv_action_rss_create(struct rte_eth_dev *dev, struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_shared_action_rss *shared_action = NULL; + struct mlx5_shared_action_rss *shared_rss = NULL; void *queue = NULL; struct rte_flow_action_rss *origin; const uint8_t *rss_key; @@ -11443,9 +11515,9 @@ __flow_dv_action_rss_create(struct rte_eth_dev *dev, RTE_SET_USED(conf); queue = mlx5_malloc(0, RTE_ALIGN_CEIL(queue_size, sizeof(void *)), 0, SOCKET_ID_ANY); - shared_action = mlx5_ipool_zmalloc + shared_rss = mlx5_ipool_zmalloc (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], &idx); - if (!shared_action || !queue) { + if (!shared_rss || !queue) { rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "cannot allocate resource memory"); @@ -11457,43 +11529,43 @@ __flow_dv_action_rss_create(struct rte_eth_dev *dev, "rss action number out of range"); goto error_rss_init; } - shared_action->ind_tbl = mlx5_malloc(MLX5_MEM_ZERO, - sizeof(*shared_action->ind_tbl), - 0, SOCKET_ID_ANY); - if (!shared_action->ind_tbl) { + shared_rss->ind_tbl = mlx5_malloc(MLX5_MEM_ZERO, + sizeof(*shared_rss->ind_tbl), + 0, SOCKET_ID_ANY); + if (!shared_rss->ind_tbl) { rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "cannot allocate resource memory"); goto error_rss_init; } memcpy(queue, rss->queue, queue_size); - shared_action->ind_tbl->queues = queue; - shared_action->ind_tbl->queues_n = rss->queue_num; - origin = &shared_action->origin; + shared_rss->ind_tbl->queues = queue; + shared_rss->ind_tbl->queues_n = rss->queue_num; + origin = &shared_rss->origin; origin->func = rss->func; origin->level = rss->level; /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */ origin->types = !rss->types ? ETH_RSS_IP : rss->types; /* NULL RSS key indicates default RSS key. */ rss_key = !rss->key ? rss_hash_default_key : rss->key; - memcpy(shared_action->key, rss_key, MLX5_RSS_HASH_KEY_LEN); - origin->key = &shared_action->key[0]; + memcpy(shared_rss->key, rss_key, MLX5_RSS_HASH_KEY_LEN); + origin->key = &shared_rss->key[0]; origin->key_len = MLX5_RSS_HASH_KEY_LEN; origin->queue = queue; origin->queue_num = rss->queue_num; - if (__flow_dv_action_rss_setup(dev, idx, shared_action, error)) + if (__flow_dv_action_rss_setup(dev, idx, shared_rss, error)) goto error_rss_init; - rte_spinlock_init(&shared_action->action_rss_sl); - __atomic_add_fetch(&shared_action->refcnt, 1, __ATOMIC_RELAXED); + rte_spinlock_init(&shared_rss->action_rss_sl); + __atomic_add_fetch(&shared_rss->refcnt, 1, __ATOMIC_RELAXED); rte_spinlock_lock(&priv->shared_act_sl); ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], - &priv->rss_shared_actions, idx, shared_action, next); + &priv->rss_shared_actions, idx, shared_rss, next); rte_spinlock_unlock(&priv->shared_act_sl); return idx; error_rss_init: - if (shared_action) { - if (shared_action->ind_tbl) - mlx5_free(shared_action->ind_tbl); + if (shared_rss) { + if (shared_rss->ind_tbl) + mlx5_free(shared_rss->ind_tbl); mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx); } @@ -11538,6 +11610,13 @@ __flow_dv_action_rss_release(struct rte_eth_dev *dev, uint32_t idx, RTE_FLOW_ERROR_TYPE_ACTION, NULL, "shared rss hrxq has references"); + if (!__atomic_compare_exchange_n(&shared_rss->refcnt, &old_refcnt, + 0, 0, __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED)) + return rte_flow_error_set(error, EBUSY, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "shared rss has references"); queue = shared_rss->ind_tbl->queues; remaining = mlx5_ind_table_obj_release(dev, shared_rss->ind_tbl, true); if (remaining) @@ -11546,13 +11625,6 @@ __flow_dv_action_rss_release(struct rte_eth_dev *dev, uint32_t idx, NULL, "shared rss indirection table has" " references"); - if (!__atomic_compare_exchange_n(&shared_rss->refcnt, &old_refcnt, - 0, 0, __ATOMIC_ACQUIRE, - __ATOMIC_RELAXED)) - return rte_flow_error_set(error, EBUSY, - RTE_FLOW_ERROR_TYPE_ACTION, - NULL, - "shared rss has references"); mlx5_free(queue); rte_spinlock_lock(&priv->shared_act_sl); ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], @@ -11700,6 +11772,10 @@ __flow_dv_action_rss_update(struct rte_eth_dev *dev, uint32_t idx, return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, NULL, "invalid shared action to update"); + if (priv->obj_ops.ind_table_modify == NULL) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "cannot modify indirection table"); queue = mlx5_malloc(MLX5_MEM_ZERO, RTE_ALIGN_CEIL(queue_size, sizeof(void *)), 0, SOCKET_ID_ANY); @@ -12582,6 +12658,20 @@ flow_dv_action_validate(struct rte_eth_dev *dev, RTE_SET_USED(conf); switch (action->type) { case RTE_FLOW_ACTION_TYPE_RSS: + /* + * priv->obj_ops is set according to driver capabilities. + * When DevX capabilities are + * sufficient, it is set to devx_obj_ops. + * Otherwise, it is set to ibv_obj_ops. + * ibv_obj_ops doesn't support ind_table_modify operation. + * In this case the shared RSS action can't be used. + */ + if (priv->obj_ops.ind_table_modify == NULL) + return rte_flow_error_set + (err, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "shared RSS action not supported"); return mlx5_validate_action_rss(dev, action, err); case RTE_FLOW_ACTION_TYPE_AGE: if (!priv->sh->aso_age_mng) diff --git a/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c b/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c index 59291fbd09..bd060e9d44 100644 --- a/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c +++ b/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c @@ -1247,6 +1247,7 @@ flow_verbs_validate(struct rte_eth_dev *dev, uint64_t last_item = 0; uint8_t next_protocol = 0xff; uint16_t ether_type = 0; + bool is_empty_vlan = false; if (items == NULL) return -1; @@ -1274,6 +1275,8 @@ flow_verbs_validate(struct rte_eth_dev *dev, ether_type &= ((const struct rte_flow_item_eth *) items->mask)->type; + if (ether_type == RTE_BE16(RTE_ETHER_TYPE_VLAN)) + is_empty_vlan = true; ether_type = rte_be_to_cpu_16(ether_type); } else { ether_type = 0; @@ -1299,6 +1302,7 @@ flow_verbs_validate(struct rte_eth_dev *dev, } else { ether_type = 0; } + is_empty_vlan = false; break; case RTE_FLOW_ITEM_TYPE_IPV4: ret = mlx5_flow_validate_item_ipv4 @@ -1410,6 +1414,10 @@ flow_verbs_validate(struct rte_eth_dev *dev, } item_flags |= last_item; } + if (is_empty_vlan) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "VLAN matching without vid specification is not supported"); for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { switch (actions->type) { case RTE_FLOW_ACTION_TYPE_VOID: diff --git a/dpdk/drivers/net/mlx5/mlx5_rxq.c b/dpdk/drivers/net/mlx5/mlx5_rxq.c index da7a8b3cd7..1a5cf99d51 100644 --- a/dpdk/drivers/net/mlx5/mlx5_rxq.c +++ b/dpdk/drivers/net/mlx5/mlx5_rxq.c @@ -346,7 +346,9 @@ rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) (1 << rxq->elts_n) * (1 << rxq->strd_num_n) : (1 << rxq->elts_n); const uint16_t q_mask = q_n - 1; - uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi); + uint16_t elts_ci = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? + rxq->elts_ci : rxq->rq_ci; + uint16_t used = q_n - (elts_ci - rxq->rq_pi); uint16_t i; DRV_LOG(DEBUG, "port %u Rx queue %u freeing %d WRs", @@ -359,8 +361,8 @@ rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) */ if (mlx5_rxq_check_vec_support(rxq) > 0) { for (i = 0; i < used; ++i) - (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL; - rxq->rq_pi = rxq->rq_ci; + (*rxq->elts)[(elts_ci + i) & q_mask] = NULL; + rxq->rq_pi = elts_ci; } for (i = 0; i != q_n; ++i) { if ((*rxq->elts)[i] != NULL) @@ -402,14 +404,14 @@ mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev) struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_dev_config *config = &priv->config; uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER | - RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT | DEV_RX_OFFLOAD_TIMESTAMP | DEV_RX_OFFLOAD_JUMBO_FRAME | DEV_RX_OFFLOAD_RSS_HASH); + if (!config->mprq.enabled) + offloads |= RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT; if (config->hw_fcs_strip) offloads |= DEV_RX_OFFLOAD_KEEP_CRC; - if (config->hw_csum) offloads |= (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM | @@ -1689,6 +1691,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); return tmpl; error: + mlx5_mr_btree_free(&tmpl->rxq.mr_ctrl.cache_bh); mlx5_free(tmpl); return NULL; } @@ -2421,7 +2424,9 @@ uint32_t mlx5_hrxq_get(struct rte_eth_dev *dev, return 0; hrxq = container_of(entry, typeof(*hrxq), entry); } - return hrxq->idx; + if (hrxq) + return hrxq->idx; + return 0; } /** diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx.h b/dpdk/drivers/net/mlx5/mlx5_rxtx.h index 7989a50403..c57ccc32ed 100644 --- a/dpdk/drivers/net/mlx5/mlx5_rxtx.h +++ b/dpdk/drivers/net/mlx5/mlx5_rxtx.h @@ -126,7 +126,7 @@ struct mlx5_rxq_data { unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */ unsigned int lro:1; /* Enable LRO. */ unsigned int dynf_meta:1; /* Dynamic metadata is configured. */ - unsigned int mcqe_format:3; /* Dynamic metadata is configured. */ + unsigned int mcqe_format:3; /* CQE compression format. */ volatile uint32_t *rq_db; volatile uint32_t *cq_db; uint16_t port_id; diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_sse.h index d4df9816aa..0b3f240e10 100644 --- a/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +++ b/dpdk/drivers/net/mlx5/mlx5_rxtx_vec_sse.h @@ -197,8 +197,8 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, const __m128i flow_mark_adj = _mm_set_epi32(-1, -1, -1, -1); const __m128i flow_mark_shuf = - _mm_set_epi8(-1, 1, 0, 4, - -1, 9, 8, 12, + _mm_set_epi8(-1, 9, 8, 12, + -1, 1, 0, 4, -1, -1, -1, -1, -1, -1, -1, -1); const __m128i ft_mask = diff --git a/dpdk/drivers/net/mlx5/mlx5_txpp.c b/dpdk/drivers/net/mlx5/mlx5_txpp.c index 2438bf1f1d..28afda28cb 100644 --- a/dpdk/drivers/net/mlx5/mlx5_txpp.c +++ b/dpdk/drivers/net/mlx5/mlx5_txpp.c @@ -57,11 +57,16 @@ mlx5_txpp_create_event_channel(struct mlx5_dev_ctx_shared *sh) static void mlx5_txpp_free_pp_index(struct mlx5_dev_ctx_shared *sh) { +#ifdef HAVE_MLX5DV_PP_ALLOC if (sh->txpp.pp) { mlx5_glue->dv_free_pp(sh->txpp.pp); sh->txpp.pp = NULL; sh->txpp.pp_id = 0; } +#else + RTE_SET_USED(sh); + DRV_LOG(ERR, "Freeing pacing index is not supported."); +#endif } /* Allocate Packet Pacing index from kernel via mlx5dv call. */ @@ -270,8 +275,6 @@ mlx5_txpp_create_rearm_queue(struct mlx5_dev_ctx_shared *sh) goto error; } /* Create completion queue object for Rearm Queue. */ - cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? - MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar); cq_attr.eqn = sh->eqn; cq_attr.q_umem_valid = 1; @@ -508,8 +511,6 @@ mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh) goto error; } /* Create completion queue object for Clock Queue. */ - cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ? - MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B; cq_attr.use_first_only = 1; cq_attr.overrun_ignore = 1; cq_attr.uar_page_id = mlx5_os_get_devx_uar_page_id(sh->tx_uar); diff --git a/dpdk/drivers/net/mlx5/mlx5_txq.c b/dpdk/drivers/net/mlx5/mlx5_txq.c index d96abef883..c53af10d58 100644 --- a/dpdk/drivers/net/mlx5/mlx5_txq.c +++ b/dpdk/drivers/net/mlx5/mlx5_txq.c @@ -634,18 +634,23 @@ txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl) void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev) { - struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_txq_data *txq; - struct mlx5_txq_ctrl *txq_ctrl; + struct mlx5_proc_priv *ppriv = (struct mlx5_proc_priv *) + dev->process_private; + const size_t page_size = rte_mem_page_size(); + void *addr; unsigned int i; + if (page_size == (size_t)-1) { + DRV_LOG(ERR, "Failed to get mem page size"); + return; + } MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY); - for (i = 0; i != priv->txqs_n; ++i) { - if (!(*priv->txqs)[i]) + for (i = 0; i != ppriv->uar_table_sz; ++i) { + if (!ppriv->uar_table[i]) continue; - txq = (*priv->txqs)[i]; - txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); - txq_uar_uninit_secondary(txq_ctrl); + addr = ppriv->uar_table[i]; + rte_mem_unmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); + } } @@ -1146,6 +1151,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); return tmpl; error: + mlx5_mr_btree_free(&tmpl->txq.mr_ctrl.cache_bh); mlx5_free(tmpl); return NULL; } diff --git a/dpdk/drivers/net/mvneta/mvneta_rxtx.c b/dpdk/drivers/net/mvneta/mvneta_rxtx.c index 10b6f57584..dfa7ecc090 100644 --- a/dpdk/drivers/net/mvneta/mvneta_rxtx.c +++ b/dpdk/drivers/net/mvneta/mvneta_rxtx.c @@ -872,7 +872,17 @@ mvneta_rx_queue_flush(struct mvneta_rxq *rxq) int ret, i; descs = rte_malloc("rxdesc", MRVL_NETA_RXD_MAX * sizeof(*descs), 0); + if (descs == NULL) { + MVNETA_LOG(ERR, "Failed to allocate descs."); + return; + } + bufs = rte_malloc("buffs", MRVL_NETA_RXD_MAX * sizeof(*bufs), 0); + if (bufs == NULL) { + MVNETA_LOG(ERR, "Failed to allocate bufs."); + rte_free(descs); + return; + } do { num = MRVL_NETA_RXD_MAX; diff --git a/dpdk/drivers/net/mvpp2/mrvl_ethdev.c b/dpdk/drivers/net/mvpp2/mrvl_ethdev.c index f25cf9e46d..6cd5acd337 100644 --- a/dpdk/drivers/net/mvpp2/mrvl_ethdev.c +++ b/dpdk/drivers/net/mvpp2/mrvl_ethdev.c @@ -441,8 +441,8 @@ mrvl_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) * when this feature has not been enabled/supported so far * (TODO check scattered_rx flag here once scattered RX is supported). */ - if (mru + MRVL_PKT_OFFS > mbuf_data_size) { - mru = mbuf_data_size - MRVL_PKT_OFFS; + if (mru - RTE_ETHER_CRC_LEN + MRVL_PKT_OFFS > mbuf_data_size) { + mru = mbuf_data_size + RTE_ETHER_CRC_LEN - MRVL_PKT_OFFS; mtu = MRVL_PP2_MRU_TO_MTU(mru); MRVL_LOG(WARNING, "MTU too big, max MTU possible limitted " "by current mbuf size: %u. Set MTU to %u, MRU to %u", @@ -671,18 +671,6 @@ mrvl_dev_start(struct rte_eth_dev *dev) priv->uc_mc_flushed = 1; } - if (!priv->vlan_flushed) { - ret = pp2_ppio_flush_vlan(priv->ppio); - if (ret) { - MRVL_LOG(ERR, "Failed to flush vlan list"); - /* - * TODO - * once pp2_ppio_flush_vlan() is supported jump to out - * goto out; - */ - } - priv->vlan_flushed = 1; - } ret = mrvl_mtu_set(dev, dev->data->mtu); if (ret) MRVL_LOG(ERR, "Failed to set MTU to %d", dev->data->mtu); @@ -1614,8 +1602,8 @@ mrvl_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) static int mrvl_fill_bpool(struct mrvl_rxq *rxq, int num) { - struct buff_release_entry entries[MRVL_PP2_RXD_MAX]; - struct rte_mbuf *mbufs[MRVL_PP2_RXD_MAX]; + struct buff_release_entry entries[num]; + struct rte_mbuf *mbufs[num]; int i, ret; unsigned int core_id; struct pp2_hif *hif; @@ -1711,7 +1699,8 @@ mrvl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, return -EFAULT; } - frame_size = buf_size - RTE_PKTMBUF_HEADROOM - MRVL_PKT_EFFEC_OFFS; + frame_size = buf_size - RTE_PKTMBUF_HEADROOM - + MRVL_PKT_EFFEC_OFFS + RTE_ETHER_CRC_LEN; if (frame_size < max_rx_pkt_len) { MRVL_LOG(WARNING, "Mbuf size must be increased to %u bytes to hold up " @@ -2171,7 +2160,6 @@ mrvl_desc_to_packet_type_and_offset(struct pp2_ppio_desc *desc, *l4_offset = *l3_offset + MRVL_ARP_LENGTH; break; default: - MRVL_LOG(DEBUG, "Failed to recognise l3 packet type"); break; } @@ -2183,7 +2171,6 @@ mrvl_desc_to_packet_type_and_offset(struct pp2_ppio_desc *desc, packet_type |= RTE_PTYPE_L4_UDP; break; default: - MRVL_LOG(DEBUG, "Failed to recognise l4 packet type"); break; } @@ -2253,10 +2240,9 @@ mrvl_rx_pkt_burst(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) ret = pp2_ppio_recv(q->priv->ppio, q->priv->rxq_map[q->queue_id].tc, q->priv->rxq_map[q->queue_id].inq, descs, &nb_pkts); - if (unlikely(ret < 0)) { - MRVL_LOG(ERR, "Failed to receive packets"); + if (unlikely(ret < 0)) return 0; - } + mrvl_port_bpool_size[bpool->pp2_id][bpool->id][core_id] -= nb_pkts; for (i = 0; i < nb_pkts; i++) { @@ -2319,21 +2305,13 @@ mrvl_rx_pkt_burst(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) if (unlikely(num <= q->priv->bpool_min_size || (!rx_done && num < q->priv->bpool_init_size))) { - ret = mrvl_fill_bpool(q, MRVL_BURST_SIZE); - if (ret) - MRVL_LOG(ERR, "Failed to fill bpool"); + mrvl_fill_bpool(q, MRVL_BURST_SIZE); } else if (unlikely(num > q->priv->bpool_max_size)) { int i; int pkt_to_remove = num - q->priv->bpool_init_size; struct rte_mbuf *mbuf; struct pp2_buff_inf buff; - MRVL_LOG(DEBUG, - "port-%d:%d: bpool %d oversize - remove %d buffers (pool size: %d -> %d)", - bpool->pp2_id, q->priv->ppio->port_id, - bpool->id, pkt_to_remove, num, - q->priv->bpool_init_size); - for (i = 0; i < pkt_to_remove; i++) { ret = pp2_bpool_get_buff(hif, bpool, &buff); if (ret) @@ -2526,12 +2504,8 @@ mrvl_tx_pkt_burst(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) sq, q->queue_id, 0); sq_free_size = MRVL_PP2_TX_SHADOWQ_SIZE - sq->size - 1; - if (unlikely(nb_pkts > sq_free_size)) { - MRVL_LOG(DEBUG, - "No room in shadow queue for %d packets! %d packets will be sent.", - nb_pkts, sq_free_size); + if (unlikely(nb_pkts > sq_free_size)) nb_pkts = sq_free_size; - } for (i = 0; i < nb_pkts; i++) { struct rte_mbuf *mbuf = tx_pkts[i]; @@ -2648,10 +2622,6 @@ mrvl_tx_sg_pkt_burst(void *txq, struct rte_mbuf **tx_pkts, */ if (unlikely(total_descs > sq_free_size)) { total_descs -= nb_segs; - RTE_LOG(DEBUG, PMD, - "No room in shadow queue for %d packets! " - "%d packets will be sent.\n", - nb_pkts, i); break; } diff --git a/dpdk/drivers/net/mvpp2/mrvl_ethdev.h b/dpdk/drivers/net/mvpp2/mrvl_ethdev.h index db6632f5b6..eee5182ce8 100644 --- a/dpdk/drivers/net/mvpp2/mrvl_ethdev.h +++ b/dpdk/drivers/net/mvpp2/mrvl_ethdev.h @@ -186,7 +186,6 @@ struct mrvl_priv { uint8_t bpool_bit; uint8_t rss_hf_tcp; uint8_t uc_mc_flushed; - uint8_t vlan_flushed; uint8_t isolated; uint8_t multiseg; diff --git a/dpdk/drivers/net/netvsc/hn_nvs.c b/dpdk/drivers/net/netvsc/hn_nvs.c index eeb82ab9ee..03b6cc1551 100644 --- a/dpdk/drivers/net/netvsc/hn_nvs.c +++ b/dpdk/drivers/net/netvsc/hn_nvs.c @@ -97,8 +97,13 @@ __hn_nvs_execute(struct hn_data *hv, hdr = (struct hn_nvs_hdr *)buffer; /* Silently drop received packets while waiting for response */ - if (hdr->type == NVS_TYPE_RNDIS) { + switch (hdr->type) { + case NVS_TYPE_RNDIS: hn_nvs_ack_rxbuf(chan, xactid); + /* fallthrough */ + + case NVS_TYPE_TXTBL_NOTE: + PMD_DRV_LOG(DEBUG, "discard packet type 0x%x", hdr->type); goto retry; } diff --git a/dpdk/drivers/net/nfb/meson.build b/dpdk/drivers/net/nfb/meson.build index d53e8eca7d..995c44c61c 100644 --- a/dpdk/drivers/net/nfb/meson.build +++ b/dpdk/drivers/net/nfb/meson.build @@ -3,7 +3,7 @@ # Copyright(c) 2019 Netcope Technologies, a.s. # All rights reserved. -dep = dependency('netcope-common', required: false) +dep = dependency('netcope-common', required: false, method: 'pkg-config') reason = 'missing dependency, "libnfb"' build = dep.found() ext_deps += dep diff --git a/dpdk/drivers/net/nfp/nfp_net.c b/dpdk/drivers/net/nfp/nfp_net.c index 1608bf5ea1..9ea24e5bda 100644 --- a/dpdk/drivers/net/nfp/nfp_net.c +++ b/dpdk/drivers/net/nfp/nfp_net.c @@ -1508,7 +1508,7 @@ nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) } /* switch to jumbo mode if needed */ - if ((uint32_t)mtu > RTE_ETHER_MAX_LEN) + if ((uint32_t)mtu > RTE_ETHER_MTU) dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else dev->data->dev_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME; diff --git a/dpdk/drivers/net/nfp/nfpcore/nfp_cpp.h b/dpdk/drivers/net/nfp/nfpcore/nfp_cpp.h index 1427954c17..08d656da14 100644 --- a/dpdk/drivers/net/nfp/nfpcore/nfp_cpp.h +++ b/dpdk/drivers/net/nfp/nfpcore/nfp_cpp.h @@ -170,7 +170,7 @@ void *nfp_cpp_priv(struct nfp_cpp *cpp); */ void *nfp_cpp_area_priv(struct nfp_cpp_area *cpp_area); -uint32_t __nfp_cpp_model_autodetect(struct nfp_cpp *cpp); +uint32_t __nfp_cpp_model_autodetect(struct nfp_cpp *cpp, uint32_t *model); /* * NFP CPP core interface for CPP clients. diff --git a/dpdk/drivers/net/nfp/nfpcore/nfp_cppcore.c b/dpdk/drivers/net/nfp/nfpcore/nfp_cppcore.c index dec4a8b6d1..6d629430d4 100644 --- a/dpdk/drivers/net/nfp/nfpcore/nfp_cppcore.c +++ b/dpdk/drivers/net/nfp/nfpcore/nfp_cppcore.c @@ -22,8 +22,9 @@ #define NFP_PL_DEVICE_ID 0x00000004 #define NFP_PL_DEVICE_ID_MASK 0xff - -#define NFP6000_ARM_GCSR_SOFTMODEL0 0x00400144 +#define NFP_PL_DEVICE_PART_MASK 0xffff0000 +#define NFP_PL_DEVICE_MODEL_MASK (NFP_PL_DEVICE_PART_MASK | \ + NFP_PL_DEVICE_ID_MASK) void nfp_cpp_priv_set(struct nfp_cpp *cpp, void *priv) @@ -46,13 +47,18 @@ nfp_cpp_model_set(struct nfp_cpp *cpp, uint32_t model) uint32_t nfp_cpp_model(struct nfp_cpp *cpp) { + int err; + uint32_t model; + if (!cpp) return NFP_CPP_MODEL_INVALID; - if (cpp->model == 0) - cpp->model = __nfp_cpp_model_autodetect(cpp); + err = __nfp_cpp_model_autodetect(cpp, &model); - return cpp->model; + if (err < 0) + return err; + + return model; } void @@ -389,9 +395,6 @@ nfp_xpb_to_cpp(struct nfp_cpp *cpp, uint32_t *xpb_addr) uint32_t xpb; int island; - if (!NFP_CPP_MODEL_IS_6000(cpp->model)) - return 0; - xpb = NFP_CPP_ID(14, NFP_CPP_ACTION_RW, 0); /* @@ -796,29 +799,21 @@ nfp_cpp_area_fill(struct nfp_cpp_area *area, unsigned long offset, * as those are model-specific */ uint32_t -__nfp_cpp_model_autodetect(struct nfp_cpp *cpp) +__nfp_cpp_model_autodetect(struct nfp_cpp *cpp, uint32_t *model) { - uint32_t arm_id = NFP_CPP_ID(NFP_CPP_TARGET_ARM, 0, 0); - uint32_t model = 0; - - if (nfp_cpp_readl(cpp, arm_id, NFP6000_ARM_GCSR_SOFTMODEL0, &model)) - return 0; - - if (NFP_CPP_MODEL_IS_6000(model)) { - uint32_t tmp; - - nfp_cpp_model_set(cpp, model); + uint32_t reg; + int err; - /* The PL's PluDeviceID revision code is authoratative */ - model &= ~0xff; - if (nfp_xpb_readl(cpp, NFP_XPB_DEVICE(1, 1, 16) + - NFP_PL_DEVICE_ID, &tmp)) - return 0; + err = nfp_xpb_readl(cpp, NFP_XPB_DEVICE(1, 1, 16) + NFP_PL_DEVICE_ID, + ®); + if (err < 0) + return err; - model |= (NFP_PL_DEVICE_ID_MASK & tmp) - 0x10; - } + *model = reg & NFP_PL_DEVICE_MODEL_MASK; + if (*model & NFP_PL_DEVICE_ID_MASK) + *model -= 0x10; - return model; + return 0; } /* diff --git a/dpdk/drivers/net/octeontx/base/octeontx_io.h b/dpdk/drivers/net/octeontx/base/octeontx_io.h index 04b9ce1910..d0b9cfbc67 100644 --- a/dpdk/drivers/net/octeontx/base/octeontx_io.h +++ b/dpdk/drivers/net/octeontx/base/octeontx_io.h @@ -52,6 +52,11 @@ do { \ #endif #if defined(RTE_ARCH_ARM64) +#if defined(__ARM_FEATURE_SVE) +#define __LSE_PREAMBLE " .cpu generic+lse+sve\n" +#else +#define __LSE_PREAMBLE " .cpu generic+lse\n" +#endif /** * Perform an atomic fetch-and-add operation. */ @@ -61,7 +66,7 @@ octeontx_reg_ldadd_u64(void *addr, int64_t off) uint64_t old_val; __asm__ volatile( - " .cpu generic+lse\n" + __LSE_PREAMBLE " ldadd %1, %0, [%2]\n" : "=r" (old_val) : "r" (off), "r" (addr) : "memory"); @@ -98,12 +103,13 @@ octeontx_reg_lmtst(void *lmtline_va, void *ioreg_va, const uint64_t cmdbuf[], /* LDEOR initiates atomic transfer to I/O device */ __asm__ volatile( - " .cpu generic+lse\n" + __LSE_PREAMBLE " ldeor xzr, %0, [%1]\n" : "=r" (result) : "r" (ioreg_va) : "memory"); } while (!result); } +#undef __LSE_PREAMBLE #else static inline uint64_t diff --git a/dpdk/drivers/net/octeontx/octeontx_ethdev.c b/dpdk/drivers/net/octeontx/octeontx_ethdev.c index 3ee7b043fd..5836dbe09e 100644 --- a/dpdk/drivers/net/octeontx/octeontx_ethdev.c +++ b/dpdk/drivers/net/octeontx/octeontx_ethdev.c @@ -552,7 +552,7 @@ octeontx_dev_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu) if (rc) return rc; - if (frame_size > RTE_ETHER_MAX_LEN) + if (frame_size > OCCTX_L2_MAX_LEN) nic->rx_offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else nic->rx_offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME; @@ -867,7 +867,6 @@ octeontx_dev_info(struct rte_eth_dev *dev, dev_info->max_mac_addrs = octeontx_bgx_port_mac_entries_get(nic->port_id); - dev_info->max_rx_pktlen = PKI_MAX_PKTLEN; dev_info->max_rx_queues = 1; dev_info->max_tx_queues = PKO_MAX_NUM_DQ; dev_info->min_rx_bufsize = 0; diff --git a/dpdk/drivers/net/octeontx/octeontx_ethdev.h b/dpdk/drivers/net/octeontx/octeontx_ethdev.h index 7246fb6d1d..780a094ffa 100644 --- a/dpdk/drivers/net/octeontx/octeontx_ethdev.h +++ b/dpdk/drivers/net/octeontx/octeontx_ethdev.h @@ -44,6 +44,7 @@ /* ETH_HLEN+ETH_FCS+2*VLAN_HLEN */ #define OCCTX_L2_OVERHEAD (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + \ OCCTX_MAX_VTAG_ACT_SIZE) +#define OCCTX_L2_MAX_LEN (RTE_ETHER_MTU + OCCTX_L2_OVERHEAD) /* Since HW FRS includes NPC VTAG insertion space, user has reduced FRS */ #define OCCTX_MAX_FRS \ diff --git a/dpdk/drivers/net/octeontx2/otx2_ethdev.h b/dpdk/drivers/net/octeontx2/otx2_ethdev.h index 3b9871f4dc..99f0469d89 100644 --- a/dpdk/drivers/net/octeontx2/otx2_ethdev.h +++ b/dpdk/drivers/net/octeontx2/otx2_ethdev.h @@ -51,6 +51,8 @@ /* ETH_HLEN+ETH_FCS+2*VLAN_HLEN */ #define NIX_L2_OVERHEAD \ (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + 8) +#define NIX_L2_MAX_LEN \ + (RTE_ETHER_MTU + NIX_L2_OVERHEAD) /* HW config of frame size doesn't include FCS */ #define NIX_MAX_HW_FRS 9212 diff --git a/dpdk/drivers/net/octeontx2/otx2_ethdev_ops.c b/dpdk/drivers/net/octeontx2/otx2_ethdev_ops.c index b36d37b9f7..963cc285ed 100644 --- a/dpdk/drivers/net/octeontx2/otx2_ethdev_ops.c +++ b/dpdk/drivers/net/octeontx2/otx2_ethdev_ops.c @@ -58,7 +58,7 @@ otx2_nix_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu) if (rc) return rc; - if (frame_size > RTE_ETHER_MAX_LEN) + if (frame_size > NIX_L2_MAX_LEN) dev->rx_offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else dev->rx_offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME; diff --git a/dpdk/drivers/net/octeontx2/otx2_flow_parse.c b/dpdk/drivers/net/octeontx2/otx2_flow_parse.c index 476195d634..e9b940f6c0 100644 --- a/dpdk/drivers/net/octeontx2/otx2_flow_parse.c +++ b/dpdk/drivers/net/octeontx2/otx2_flow_parse.c @@ -1090,7 +1090,10 @@ otx2_flow_parse_actions(struct rte_eth_dev *dev, set_pf_func: /* Ideally AF must ensure that correct pf_func is set */ - flow->npc_action |= (uint64_t)pf_func << 4; + if (attr->egress) + flow->npc_action |= (uint64_t)pf_func << 48; + else + flow->npc_action |= (uint64_t)pf_func << 4; return 0; diff --git a/dpdk/drivers/net/octeontx2/otx2_flow_utils.c b/dpdk/drivers/net/octeontx2/otx2_flow_utils.c index 9a0a5f9fb4..7ed86ba742 100644 --- a/dpdk/drivers/net/octeontx2/otx2_flow_utils.c +++ b/dpdk/drivers/net/octeontx2/otx2_flow_utils.c @@ -944,7 +944,7 @@ otx2_flow_mcam_alloc_and_write(struct rte_flow *flow, struct otx2_mbox *mbox, req->entry_data.kw[0] |= flow_info->channel; req->entry_data.kw_mask[0] |= (BIT_ULL(12) - 1); } else { - uint16_t pf_func = (flow->npc_action >> 4) & 0xffff; + uint16_t pf_func = (flow->npc_action >> 48) & 0xffff; pf_func = htons(pf_func); req->entry_data.kw[0] |= ((uint64_t)pf_func << 32); diff --git a/dpdk/drivers/net/octeontx2/otx2_rx.c b/dpdk/drivers/net/octeontx2/otx2_rx.c index 2da8efe77c..ffeade5952 100644 --- a/dpdk/drivers/net/octeontx2/otx2_rx.c +++ b/dpdk/drivers/net/octeontx2/otx2_rx.c @@ -279,6 +279,12 @@ nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts, vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2); vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3); + /* Update that no more segments */ + mbuf0->next = NULL; + mbuf1->next = NULL; + mbuf2->next = NULL; + mbuf3->next = NULL; + /* Store the mbufs to rx_pkts */ vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01); vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23); diff --git a/dpdk/drivers/net/octeontx2/otx2_rx.h b/dpdk/drivers/net/octeontx2/otx2_rx.h index 926f614a4e..0ba3d3d96c 100644 --- a/dpdk/drivers/net/octeontx2/otx2_rx.h +++ b/dpdk/drivers/net/octeontx2/otx2_rx.h @@ -215,6 +215,7 @@ nix_cqe_xtract_mseg(const struct nix_rx_parse_s *rx, iova_list = (const rte_iova_t *)(iova_list + 1); } } + mbuf->next = NULL; } static __rte_always_inline uint16_t @@ -330,10 +331,12 @@ otx2_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, *(uint64_t *)(&mbuf->rearm_data) = val; mbuf->pkt_len = len; - if (flag & NIX_RX_MULTI_SEG_F) + if (flag & NIX_RX_MULTI_SEG_F) { nix_cqe_xtract_mseg(rx, mbuf, val); - else + } else { mbuf->data_len = len; + mbuf->next = NULL; + } } #define CKSUM_F NIX_RX_OFFLOAD_CHECKSUM_F diff --git a/dpdk/drivers/net/pcap/rte_eth_pcap.c b/dpdk/drivers/net/pcap/rte_eth_pcap.c index 4930d7d382..40f4fa9021 100644 --- a/dpdk/drivers/net/pcap/rte_eth_pcap.c +++ b/dpdk/drivers/net/pcap/rte_eth_pcap.c @@ -386,7 +386,7 @@ eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) return 0; for (i = 0; i < nb_pkts; i++) { - tx_bytes += bufs[i]->data_len; + tx_bytes += bufs[i]->pkt_len; rte_pktmbuf_free(bufs[i]); } @@ -735,6 +735,17 @@ eth_stats_reset(struct rte_eth_dev *dev) return 0; } +static inline void +infinite_rx_ring_free(struct rte_ring *pkts) +{ + struct rte_mbuf *bufs; + + while (!rte_ring_dequeue(pkts, (void **)&bufs)) + rte_pktmbuf_free(bufs); + + rte_ring_free(pkts); +} + static int eth_dev_close(struct rte_eth_dev *dev) { @@ -753,7 +764,6 @@ eth_dev_close(struct rte_eth_dev *dev) if (internals->infinite_rx) { for (i = 0; i < dev->data->nb_rx_queues; i++) { struct pcap_rx_queue *pcap_q = &internals->rx_queue[i]; - struct rte_mbuf *pcap_buf; /* * 'pcap_q->pkts' can be NULL if 'eth_dev_close()' @@ -762,11 +772,7 @@ eth_dev_close(struct rte_eth_dev *dev) if (pcap_q->pkts == NULL) continue; - while (!rte_ring_dequeue(pcap_q->pkts, - (void **)&pcap_buf)) - rte_pktmbuf_free(pcap_buf); - - rte_ring_free(pcap_q->pkts); + infinite_rx_ring_free(pcap_q->pkts); } } @@ -835,21 +841,25 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, while (eth_pcap_rx(pcap_q, bufs, 1)) { /* Check for multiseg mbufs. */ if (bufs[0]->nb_segs != 1) { - rte_pktmbuf_free(*bufs); - - while (!rte_ring_dequeue(pcap_q->pkts, - (void **)bufs)) - rte_pktmbuf_free(*bufs); - - rte_ring_free(pcap_q->pkts); - PMD_LOG(ERR, "Multiseg mbufs are not supported in infinite_rx " - "mode."); + infinite_rx_ring_free(pcap_q->pkts); + PMD_LOG(ERR, + "Multiseg mbufs are not supported in infinite_rx mode."); return -EINVAL; } rte_ring_enqueue_bulk(pcap_q->pkts, (void * const *)bufs, 1, NULL); } + + if (rte_ring_count(pcap_q->pkts) < pcap_pkt_count) { + infinite_rx_ring_free(pcap_q->pkts); + PMD_LOG(ERR, + "Not enough mbufs to accommodate packets in pcap file. " + "At least %" PRIu64 " mbufs per queue is required.", + pcap_pkt_count); + return -EINVAL; + } + /* * Reset the stats for this queue since eth_pcap_rx calls above * didn't result in the application receiving packets. @@ -1324,9 +1334,8 @@ eth_from_pcaps(struct rte_vdev_device *vdev, /* phy_mac arg is applied only only if "iface" devarg is provided */ if (rx_queues->phy_mac) { - int ret = eth_pcap_update_mac(rx_queues->queue[0].name, - eth_dev, vdev->device.numa_node); - if (ret == 0) + if (eth_pcap_update_mac(rx_queues->queue[0].name, + eth_dev, vdev->device.numa_node) == 0) internals->phy_mac = 1; } } diff --git a/dpdk/drivers/net/qede/qede_ethdev.c b/dpdk/drivers/net/qede/qede_ethdev.c index 549013557c..ab5f5b1065 100644 --- a/dpdk/drivers/net/qede/qede_ethdev.c +++ b/dpdk/drivers/net/qede/qede_ethdev.c @@ -1885,6 +1885,8 @@ static int qede_allmulticast_enable(struct rte_eth_dev *eth_dev) QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC; enum _ecore_status_t ecore_status; + if (rte_eth_promiscuous_get(eth_dev->data->port_id) == 1) + type = QED_FILTER_RX_MODE_TYPE_PROMISC; ecore_status = qed_configure_filter_rx_mode(eth_dev, type); return ecore_status >= ECORE_SUCCESS ? 0 : -EAGAIN; @@ -2367,7 +2369,7 @@ static int qede_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) fp->rxq->rx_buf_size = rc; } } - if (max_rx_pkt_len > RTE_ETHER_MAX_LEN) + if (frame_size > QEDE_ETH_MAX_LEN) dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else dev->data->dev_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME; diff --git a/dpdk/drivers/net/qede/qede_rxtx.h b/dpdk/drivers/net/qede/qede_rxtx.h index d7ff870b20..fcb564a1bb 100644 --- a/dpdk/drivers/net/qede/qede_rxtx.h +++ b/dpdk/drivers/net/qede/qede_rxtx.h @@ -71,6 +71,7 @@ + (QEDE_LLC_SNAP_HDR_LEN) + 2) #define QEDE_MAX_ETHER_HDR_LEN (RTE_ETHER_HDR_LEN + QEDE_ETH_OVERHEAD) +#define QEDE_ETH_MAX_LEN (RTE_ETHER_MTU + QEDE_MAX_ETHER_HDR_LEN) #define QEDE_RSS_OFFLOAD_ALL (ETH_RSS_IPV4 |\ ETH_RSS_NONFRAG_IPV4_TCP |\ diff --git a/dpdk/drivers/net/sfc/sfc_ef10_tx.c b/dpdk/drivers/net/sfc/sfc_ef10_tx.c index 87fa40f3eb..33d2d637c2 100644 --- a/dpdk/drivers/net/sfc/sfc_ef10_tx.c +++ b/dpdk/drivers/net/sfc/sfc_ef10_tx.c @@ -481,6 +481,25 @@ sfc_ef10_xmit_tso_pkt(struct sfc_ef10_txq * const txq, struct rte_mbuf *m_seg, needed_desc--; } + /* + * 8000-series EF10 hardware requires that innermost IP length + * be greater than or equal to the value which each segment is + * supposed to have; otherwise, TCP checksum will be incorrect. + * + * The same concern applies to outer UDP datagram length field. + */ + switch (m_seg->ol_flags & PKT_TX_TUNNEL_MASK) { + case PKT_TX_TUNNEL_VXLAN: + /* FALLTHROUGH */ + case PKT_TX_TUNNEL_GENEVE: + sfc_tso_outer_udp_fix_len(first_m_seg, hdr_addr); + break; + default: + break; + } + + sfc_tso_innermost_ip_fix_len(first_m_seg, hdr_addr, iph_off); + /* * Tx prepare has debug-only checks that offload flags are correctly * filled in in TSO mbuf. Use zero IPID if there is no IPv4 flag. diff --git a/dpdk/drivers/net/sfc/sfc_ethdev.c b/dpdk/drivers/net/sfc/sfc_ethdev.c index 93fc7baa0d..a002e2c037 100644 --- a/dpdk/drivers/net/sfc/sfc_ethdev.c +++ b/dpdk/drivers/net/sfc/sfc_ethdev.c @@ -640,10 +640,19 @@ sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) mac_stats[EFX_MAC_VADAPTER_TX_BROADCAST_BYTES]; stats->imissed = mac_stats[EFX_MAC_VADAPTER_RX_BAD_PACKETS]; stats->oerrors = mac_stats[EFX_MAC_VADAPTER_TX_BAD_PACKETS]; + + /* CRC is included in these stats, but shouldn't be */ + stats->ibytes -= stats->ipackets * RTE_ETHER_CRC_LEN; + stats->obytes -= stats->opackets * RTE_ETHER_CRC_LEN; } else { stats->opackets = mac_stats[EFX_MAC_TX_PKTS]; stats->ibytes = mac_stats[EFX_MAC_RX_OCTETS]; stats->obytes = mac_stats[EFX_MAC_TX_OCTETS]; + + /* CRC is included in these stats, but shouldn't be */ + stats->ibytes -= mac_stats[EFX_MAC_RX_PKTS] * RTE_ETHER_CRC_LEN; + stats->obytes -= mac_stats[EFX_MAC_TX_PKTS] * RTE_ETHER_CRC_LEN; + /* * Take into account stats which are whenever supported * on EF10. If some stat is not supported by current @@ -1017,7 +1026,7 @@ sfc_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) * The driver does not use it, but other PMDs update jumbo frame * flag and max_rx_pkt_len when MTU is set. */ - if (mtu > RTE_ETHER_MAX_LEN) { + if (mtu > RTE_ETHER_MTU) { struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; rxmode->offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; } diff --git a/dpdk/drivers/net/sfc/sfc_tso.c b/dpdk/drivers/net/sfc/sfc_tso.c index d6f1119890..b090ef14db 100644 --- a/dpdk/drivers/net/sfc/sfc_tso.c +++ b/dpdk/drivers/net/sfc/sfc_tso.c @@ -140,6 +140,13 @@ sfc_efx_tso_do(struct sfc_efx_txq *txq, unsigned int idx, tsoh = rte_pktmbuf_mtod(m, uint8_t *); } + /* + * 8000-series EF10 hardware requires that innermost IP length + * be greater than or equal to the value which each segment is + * supposed to have; otherwise, TCP checksum will be incorrect. + */ + sfc_tso_innermost_ip_fix_len(m, tsoh, nh_off); + /* * Handle IP header. Tx prepare has debug-only checks that offload flags * are correctly filled in in TSO mbuf. Use zero IPID if there is no diff --git a/dpdk/drivers/net/sfc/sfc_tso.h b/dpdk/drivers/net/sfc/sfc_tso.h index 8597c2868a..361aa22192 100644 --- a/dpdk/drivers/net/sfc/sfc_tso.h +++ b/dpdk/drivers/net/sfc/sfc_tso.h @@ -38,6 +38,36 @@ sfc_tso_ip4_get_ipid(const uint8_t *pkt_hdrp, size_t ip_hdr_off) return rte_be_to_cpu_16(ipid); } +static inline void +sfc_tso_outer_udp_fix_len(const struct rte_mbuf *m, uint8_t *tsoh) +{ + rte_be16_t len = rte_cpu_to_be_16(m->l2_len + m->l3_len + m->l4_len + + m->tso_segsz); + + rte_memcpy(tsoh + m->outer_l2_len + m->outer_l3_len + + offsetof(struct rte_udp_hdr, dgram_len), + &len, sizeof(len)); +} + +static inline void +sfc_tso_innermost_ip_fix_len(const struct rte_mbuf *m, uint8_t *tsoh, + size_t iph_ofst) +{ + size_t ip_payload_len = m->l4_len + m->tso_segsz; + size_t field_ofst; + rte_be16_t len; + + if (m->ol_flags & PKT_TX_IPV4) { + field_ofst = offsetof(struct rte_ipv4_hdr, total_length); + len = rte_cpu_to_be_16(m->l3_len + ip_payload_len); + } else { + field_ofst = offsetof(struct rte_ipv6_hdr, payload_len); + len = rte_cpu_to_be_16(ip_payload_len); + } + + rte_memcpy(tsoh + iph_ofst + field_ofst, &len, sizeof(len)); +} + unsigned int sfc_tso_prepare_header(uint8_t *tsoh, size_t header_len, struct rte_mbuf **in_seg, size_t *in_off); diff --git a/dpdk/drivers/net/szedata2/meson.build b/dpdk/drivers/net/szedata2/meson.build index b53fcbc591..77a5b0ed80 100644 --- a/dpdk/drivers/net/szedata2/meson.build +++ b/dpdk/drivers/net/szedata2/meson.build @@ -1,7 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2018 Intel Corporation -dep = dependency('libsze2', required: false) +dep = dependency('libsze2', required: false, method: 'pkg-config') build = dep.found() reason = 'missing dependency, "libsze2"' ext_deps += dep diff --git a/dpdk/drivers/net/thunderx/base/nicvf_hw_defs.h b/dpdk/drivers/net/thunderx/base/nicvf_hw_defs.h index b12c8ec50a..adc8ec943d 100644 --- a/dpdk/drivers/net/thunderx/base/nicvf_hw_defs.h +++ b/dpdk/drivers/net/thunderx/base/nicvf_hw_defs.h @@ -176,6 +176,7 @@ #define NIC_HW_MAX_MTU (9190) #define NIC_HW_MAX_FRS (NIC_HW_MAX_MTU + NIC_HW_L2_OVERHEAD) #define NIC_HW_MAX_SEGS (12) +#define NIC_HW_L2_MAX_LEN (RTE_ETHER_MTU + NIC_HW_L2_OVERHEAD) /* Descriptor alignments */ #define NICVF_RBDR_BASE_ALIGN_BYTES (128) /* 7 bits */ diff --git a/dpdk/drivers/net/thunderx/nicvf_ethdev.c b/dpdk/drivers/net/thunderx/nicvf_ethdev.c index b6bb05e500..c2e7c334d4 100644 --- a/dpdk/drivers/net/thunderx/nicvf_ethdev.c +++ b/dpdk/drivers/net/thunderx/nicvf_ethdev.c @@ -176,7 +176,7 @@ nicvf_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) (frame_size + 2 * VLAN_TAG_SIZE > buffsz * NIC_HW_MAX_SEGS)) return -EINVAL; - if (frame_size > RTE_ETHER_MAX_LEN) + if (frame_size > NIC_HW_L2_MAX_LEN) rxmode->offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; else rxmode->offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME; diff --git a/dpdk/drivers/net/virtio/virtio_user/vhost.h b/dpdk/drivers/net/virtio/virtio_user/vhost.h index 210a3704e7..be286173b0 100644 --- a/dpdk/drivers/net/virtio/virtio_user/vhost.h +++ b/dpdk/drivers/net/virtio/virtio_user/vhost.h @@ -86,6 +86,14 @@ enum vhost_user_request { VHOST_USER_MAX }; +#ifndef VHOST_BACKEND_F_IOTLB_MSG_V2 +#define VHOST_BACKEND_F_IOTLB_MSG_V2 1 +#endif + +#ifndef VHOST_BACKEND_F_IOTLB_BATCH +#define VHOST_BACKEND_F_IOTLB_BATCH 2 +#endif + extern const char * const vhost_msg_strings[VHOST_USER_MAX]; struct vhost_memory_region { diff --git a/dpdk/drivers/net/virtio/virtio_user/vhost_user.c b/dpdk/drivers/net/virtio/virtio_user/vhost_user.c index b93e65c60b..350eed4182 100644 --- a/dpdk/drivers/net/virtio/virtio_user/vhost_user.c +++ b/dpdk/drivers/net/virtio/virtio_user/vhost_user.c @@ -297,13 +297,18 @@ vhost_user_sock(struct virtio_user_dev *dev, if (has_reply_ack) msg.flags |= VHOST_USER_NEED_REPLY_MASK; /* Fallthrough */ - case VHOST_USER_SET_FEATURES: case VHOST_USER_SET_PROTOCOL_FEATURES: case VHOST_USER_SET_LOG_BASE: msg.payload.u64 = *((__u64 *)arg); msg.size = sizeof(m.payload.u64); break; + case VHOST_USER_SET_FEATURES: + msg.payload.u64 = *((__u64 *)arg) | (dev->device_features & + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)); + msg.size = sizeof(m.payload.u64); + break; + case VHOST_USER_SET_OWNER: case VHOST_USER_RESET_OWNER: break; diff --git a/dpdk/drivers/net/virtio/virtio_user/vhost_vdpa.c b/dpdk/drivers/net/virtio/virtio_user/vhost_vdpa.c index c7b9349fc8..269bab2f8e 100644 --- a/dpdk/drivers/net/virtio/virtio_user/vhost_vdpa.c +++ b/dpdk/drivers/net/virtio/virtio_user/vhost_vdpa.c @@ -35,6 +35,8 @@ #define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8) #define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, \ struct vhost_vring_state) +#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) +#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) static uint64_t vhost_req_user_to_vdpa[] = { [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, @@ -51,6 +53,8 @@ static uint64_t vhost_req_user_to_vdpa[] = { [VHOST_USER_SET_STATUS] = VHOST_VDPA_SET_STATUS, [VHOST_USER_GET_STATUS] = VHOST_VDPA_GET_STATUS, [VHOST_USER_SET_VRING_ENABLE] = VHOST_VDPA_SET_VRING_ENABLE, + [VHOST_USER_GET_PROTOCOL_FEATURES] = VHOST_GET_BACKEND_FEATURES, + [VHOST_USER_SET_PROTOCOL_FEATURES] = VHOST_SET_BACKEND_FEATURES, }; /* no alignment requirement */ @@ -66,6 +70,8 @@ struct vhost_iotlb_msg { #define VHOST_IOTLB_UPDATE 2 #define VHOST_IOTLB_INVALIDATE 3 #define VHOST_IOTLB_ACCESS_FAIL 4 +#define VHOST_IOTLB_BATCH_BEGIN 5 +#define VHOST_IOTLB_BATCH_END 6 uint8_t type; }; @@ -80,12 +86,67 @@ struct vhost_msg { }; }; +static int +vhost_vdpa_iotlb_batch_begin(struct virtio_user_dev *dev) +{ + struct vhost_msg msg = {}; + + if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) + return 0; + + if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2))) { + PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend."); + return -1; + } + + msg.type = VHOST_IOTLB_MSG_V2; + msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN; + + if (write(dev->vhostfd, &msg, sizeof(msg)) != sizeof(msg)) { + PMD_DRV_LOG(ERR, "Failed to send IOTLB batch begin (%s)", + strerror(errno)); + return -1; + } + + return 0; +} + +static int +vhost_vdpa_iotlb_batch_end(struct virtio_user_dev *dev) +{ + struct vhost_msg msg = {}; + + if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) + return 0; + + if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2))) { + PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend."); + return -1; + } + + msg.type = VHOST_IOTLB_MSG_V2; + msg.iotlb.type = VHOST_IOTLB_BATCH_END; + + if (write(dev->vhostfd, &msg, sizeof(msg)) != sizeof(msg)) { + PMD_DRV_LOG(ERR, "Failed to send IOTLB batch end (%s)", + strerror(errno)); + return -1; + } + + return 0; +} + static int vhost_vdpa_dma_map(struct virtio_user_dev *dev, void *addr, uint64_t iova, size_t len) { struct vhost_msg msg = {}; + if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2))) { + PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend."); + return -1; + } + msg.type = VHOST_IOTLB_MSG_V2; msg.iotlb.type = VHOST_IOTLB_UPDATE; msg.iotlb.iova = iova; @@ -108,6 +169,11 @@ vhost_vdpa_dma_unmap(struct virtio_user_dev *dev, __rte_unused void *addr, { struct vhost_msg msg = {}; + if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2))) { + PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend."); + return -1; + } + msg.type = VHOST_IOTLB_MSG_V2; msg.iotlb.type = VHOST_IOTLB_INVALIDATE; msg.iotlb.iova = iova; @@ -122,6 +188,39 @@ vhost_vdpa_dma_unmap(struct virtio_user_dev *dev, __rte_unused void *addr, return 0; } +static int +vhost_vdpa_dma_map_batch(struct virtio_user_dev *dev, void *addr, + uint64_t iova, size_t len) +{ + int ret; + + if (vhost_vdpa_iotlb_batch_begin(dev) < 0) + return -1; + + ret = vhost_vdpa_dma_map(dev, addr, iova, len); + + if (vhost_vdpa_iotlb_batch_end(dev) < 0) + return -1; + + return ret; +} + +static int +vhost_vdpa_dma_unmap_batch(struct virtio_user_dev *dev, void *addr, + uint64_t iova, size_t len) +{ + int ret; + + if (vhost_vdpa_iotlb_batch_begin(dev) < 0) + return -1; + + ret = vhost_vdpa_dma_unmap(dev, addr, iova, len); + + if (vhost_vdpa_iotlb_batch_end(dev) < 0) + return -1; + + return ret; +} static int vhost_vdpa_map_contig(const struct rte_memseg_list *msl, @@ -159,21 +258,32 @@ vhost_vdpa_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms, static int vhost_vdpa_dma_map_all(struct virtio_user_dev *dev) { + int ret; + + if (vhost_vdpa_iotlb_batch_begin(dev) < 0) + return -1; + vhost_vdpa_dma_unmap(dev, NULL, 0, SIZE_MAX); if (rte_eal_iova_mode() == RTE_IOVA_VA) { /* with IOVA as VA mode, we can get away with mapping contiguous * chunks rather than going page-by-page. */ - int ret = rte_memseg_contig_walk_thread_unsafe( + ret = rte_memseg_contig_walk_thread_unsafe( vhost_vdpa_map_contig, dev); if (ret) - return ret; + goto batch_end; /* we have to continue the walk because we've skipped the * external segments during the config walk. */ } - return rte_memseg_walk_thread_unsafe(vhost_vdpa_map, dev); + ret = rte_memseg_walk_thread_unsafe(vhost_vdpa_map, dev); + +batch_end: + if (vhost_vdpa_iotlb_batch_end(dev) < 0) + return -1; + + return ret; } /* with below features, vhost vdpa does not need to do the checksum and TSO, @@ -293,6 +403,6 @@ struct virtio_user_backend_ops virtio_ops_vdpa = { .setup = vhost_vdpa_setup, .send_request = vhost_vdpa_ioctl, .enable_qp = vhost_vdpa_enable_queue_pair, - .dma_map = vhost_vdpa_dma_map, - .dma_unmap = vhost_vdpa_dma_unmap, + .dma_map = vhost_vdpa_dma_map_batch, + .dma_unmap = vhost_vdpa_dma_unmap_batch, }; diff --git a/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c b/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c index 053f0267ca..202431ca22 100644 --- a/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -276,6 +276,7 @@ virtio_user_dev_init_notify(struct virtio_user_dev *dev) } kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); if (kickfd < 0) { + close(callfd); PMD_DRV_LOG(ERR, "kickfd error, %s", strerror(errno)); break; } @@ -284,7 +285,7 @@ virtio_user_dev_init_notify(struct virtio_user_dev *dev) } if (i < VIRTIO_MAX_VIRTQUEUES) { - for (j = 0; j <= i; ++j) { + for (j = 0; j < i; ++j) { close(dev->callfds[j]); close(dev->kickfds[j]); } @@ -439,11 +440,14 @@ virtio_user_dev_setup(struct virtio_user_dev *dev) 1ULL << VIRTIO_F_RING_PACKED | \ 1ULL << VHOST_USER_F_PROTOCOL_FEATURES) -#define VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES \ +#define VHOST_USER_SUPPORTED_PROTOCOL_FEATURES \ (1ULL << VHOST_USER_PROTOCOL_F_MQ | \ 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK | \ 1ULL << VHOST_USER_PROTOCOL_F_STATUS) +#define VHOST_VDPA_SUPPORTED_PROTOCOL_FEATURES \ + (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | \ + 1ULL << VHOST_BACKEND_F_IOTLB_BATCH) int virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, int cq, int queue_size, const char *mac, char **ifname, @@ -462,9 +466,13 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, dev->mac_specified = 0; dev->frontend_features = 0; dev->unsupported_features = ~VIRTIO_USER_SUPPORTED_FEATURES; - dev->protocol_features = VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES; dev->backend_type = backend_type; + if (dev->backend_type == VIRTIO_USER_BACKEND_VHOST_USER) + dev->protocol_features = VHOST_USER_SUPPORTED_PROTOCOL_FEATURES; + else if (dev->backend_type == VIRTIO_USER_BACKEND_VHOST_VDPA) + dev->protocol_features = VHOST_VDPA_SUPPORTED_PROTOCOL_FEATURES; + parse_mac(dev, mac); if (*ifname) { @@ -497,8 +505,8 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, } - if (dev->device_features & - (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)) { + if ((dev->device_features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)) || + (dev->backend_type == VIRTIO_USER_BACKEND_VHOST_VDPA)) { if (dev->ops->send_request(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &protocol_features)) diff --git a/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h b/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h index e053897d8f..3b5b6bc3ae 100644 --- a/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h +++ b/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h @@ -48,9 +48,7 @@ struct virtio_user_dev { uint64_t device_features; /* supported features by device */ uint64_t frontend_features; /* enabled frontend features */ uint64_t unsupported_features; /* unsupported features mask */ - uint64_t protocol_features; /* negotiated protocol features - * (Vhost-user only) - */ + uint64_t protocol_features; /* negotiated protocol features */ uint8_t status; uint16_t net_status; uint16_t port_id; diff --git a/dpdk/drivers/net/virtio/virtio_user_ethdev.c b/dpdk/drivers/net/virtio/virtio_user_ethdev.c index 40345193e6..78998427cc 100644 --- a/dpdk/drivers/net/virtio/virtio_user_ethdev.c +++ b/dpdk/drivers/net/virtio/virtio_user_ethdev.c @@ -77,7 +77,7 @@ virtio_user_server_reconnect(struct virtio_user_dev *dev) return -1; dev->vhostfd = connectfd; - old_status = vtpci_get_status(hw); + old_status = dev->status; vtpci_reset(hw); diff --git a/dpdk/drivers/regex/mlx5/mlx5_regex_fastpath.c b/dpdk/drivers/regex/mlx5/mlx5_regex_fastpath.c index 5857617282..8d134ac98e 100644 --- a/dpdk/drivers/regex/mlx5/mlx5_regex_fastpath.c +++ b/dpdk/drivers/regex/mlx5/mlx5_regex_fastpath.c @@ -105,7 +105,21 @@ prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp, { size_t wqe_offset = (sq->pi & (sq_size_get(sq) - 1)) * MLX5_SEND_WQE_BB; uint32_t lkey; - + uint16_t group0 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID0_VALID_F ? + op->group_id0 : 0; + uint16_t group1 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID1_VALID_F ? + op->group_id1 : 0; + uint16_t group2 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID2_VALID_F ? + op->group_id2 : 0; + uint16_t group3 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID3_VALID_F ? + op->group_id3 : 0; + + /* For backward compatibility. */ + if (!(op->req_flags & (RTE_REGEX_OPS_REQ_GROUP_ID0_VALID_F | + RTE_REGEX_OPS_REQ_GROUP_ID1_VALID_F | + RTE_REGEX_OPS_REQ_GROUP_ID2_VALID_F | + RTE_REGEX_OPS_REQ_GROUP_ID3_VALID_F))) + group0 = op->group_id0; lkey = mlx5_mr_addr2mr_bh(priv->pd, 0, &priv->mr_scache, &qp->mr_ctrl, rte_pktmbuf_mtod(op->mbuf, uintptr_t), @@ -116,9 +130,8 @@ prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp, set_wqe_ctrl_seg((struct mlx5_wqe_ctrl_seg *)wqe, sq->pi, MLX5_OPCODE_MMO, MLX5_OPC_MOD_MMO_REGEX, sq->obj->id, 0, ds, 0, 0); - set_regex_ctrl_seg(wqe + 12, 0, op->group_id0, op->group_id1, - op->group_id2, - op->group_id3, 0); + set_regex_ctrl_seg(wqe + 12, 0, group0, group1, group2, group3, + 0); struct mlx5_wqe_data_seg *input_seg = (struct mlx5_wqe_data_seg *)(wqe + MLX5_REGEX_WQE_GATHER_OFFSET); diff --git a/dpdk/drivers/regex/mlx5/mlx5_rxp.c b/dpdk/drivers/regex/mlx5/mlx5_rxp.c index fcbc766441..0753ab3bdc 100644 --- a/dpdk/drivers/regex/mlx5/mlx5_rxp.c +++ b/dpdk/drivers/regex/mlx5/mlx5_rxp.c @@ -115,11 +115,10 @@ mlx5_regex_info_get(struct rte_regexdev *dev __rte_unused, info->max_payload_size = MLX5_REGEX_MAX_PAYLOAD_SIZE; info->max_rules_per_group = MLX5_REGEX_MAX_RULES_PER_GROUP; info->max_groups = MLX5_REGEX_MAX_GROUPS; - info->max_queue_pairs = 1; info->regexdev_capa = RTE_REGEXDEV_SUPP_PCRE_GREEDY_F | RTE_REGEXDEV_CAPA_QUEUE_PAIR_OOS_F; info->rule_flags = 0; - info->max_queue_pairs = 10; + info->max_queue_pairs = UINT16_MAX; return 0; } @@ -892,7 +891,7 @@ rxp_db_setup(struct mlx5_regex_priv *priv) /* Setup database memories for both RXP engines + reprogram memory. */ for (i = 0; i < (priv->nb_engines + MLX5_RXP_EM_COUNT); i++) { - priv->db[i].ptr = rte_malloc("", MLX5_MAX_DB_SIZE, 0); + priv->db[i].ptr = rte_malloc("", MLX5_MAX_DB_SIZE, 1 << 21); if (!priv->db[i].ptr) { DRV_LOG(ERR, "Failed to alloc db memory!"); ret = ENODEV; diff --git a/dpdk/drivers/regex/octeontx2/otx2_regexdev.c b/dpdk/drivers/regex/octeontx2/otx2_regexdev.c index 39eed7a20d..b6e55853e9 100644 --- a/dpdk/drivers/regex/octeontx2/otx2_regexdev.c +++ b/dpdk/drivers/regex/octeontx2/otx2_regexdev.c @@ -988,6 +988,9 @@ static struct rte_pci_id pci_id_ree_table[] = { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_RVU_REE_PF) }, + { + .vendor_id = 0, + } }; static struct rte_pci_driver otx2_regexdev_pmd = { diff --git a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa.c b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa.c index b64f364eb7..0b2f1ab68e 100644 --- a/dpdk/drivers/vdpa/mlx5/mlx5_vdpa.c +++ b/dpdk/drivers/vdpa/mlx5/mlx5_vdpa.c @@ -295,6 +295,8 @@ mlx5_vdpa_dev_close(int vid) } priv->configured = 0; priv->vid = 0; + /* The mutex may stay locked after event thread cancel - initiate it. */ + pthread_mutex_init(&priv->vq_config_lock, NULL); DRV_LOG(INFO, "vDPA device %d was closed.", vid); return ret; } diff --git a/dpdk/examples/eventdev_pipeline/main.c b/dpdk/examples/eventdev_pipeline/main.c index 823f8b51c2..3dbef6ed45 100644 --- a/dpdk/examples/eventdev_pipeline/main.c +++ b/dpdk/examples/eventdev_pipeline/main.c @@ -22,6 +22,32 @@ struct config_data cdata = { .worker_cq_depth = 16 }; +static void +dump_core_info(unsigned int lcore_id, struct worker_data *data, + unsigned int worker_idx) +{ + if (fdata->rx_core[lcore_id]) + printf( + "[%s()] lcore %d executing NIC Rx\n", + __func__, lcore_id); + + if (fdata->tx_core[lcore_id]) + printf( + "[%s()] lcore %d executing NIC Tx\n", + __func__, lcore_id); + + if (fdata->sched_core[lcore_id]) + printf( + "[%s()] lcore %d executing scheduler\n", + __func__, lcore_id); + + if (fdata->worker_core[lcore_id]) + printf( + "[%s()] lcore %d executing worker, using eventdev port %u\n", + __func__, lcore_id, + data[worker_idx].port_id); +} + static bool core_in_use(unsigned int lcore_id) { return (fdata->rx_core[lcore_id] || fdata->sched_core[lcore_id] || @@ -239,8 +265,15 @@ parse_app_args(int argc, char **argv) if (fdata->worker_core[i]) cdata.num_workers++; - if (core_in_use(i)) + if (core_in_use(i)) { + if (!rte_lcore_is_enabled(i)) { + printf("lcore %d is not enabled in lcore list\n", + i); + rte_exit(EXIT_FAILURE, + "check lcore params failed\n"); + } cdata.active_cores++; + } } } @@ -280,7 +313,6 @@ static void signal_handler(int signum) { static uint8_t once; - uint16_t portid; if (fdata->done) rte_exit(1, "Exiting on signal %d\n", signum); @@ -291,17 +323,6 @@ signal_handler(int signum) rte_event_dev_dump(0, stdout); once = 1; fdata->done = 1; - rte_smp_wmb(); - - RTE_ETH_FOREACH_DEV(portid) { - rte_event_eth_rx_adapter_stop(portid); - rte_event_eth_tx_adapter_stop(portid); - if (rte_eth_dev_stop(portid) < 0) - printf("Failed to stop port %u", portid); - } - - rte_eal_mp_wait_lcore(); - } if (signum == SIGTSTP) rte_event_dev_dump(0, stdout); @@ -406,25 +427,7 @@ main(int argc, char **argv) !fdata->sched_core[lcore_id]) continue; - if (fdata->rx_core[lcore_id]) - printf( - "[%s()] lcore %d executing NIC Rx\n", - __func__, lcore_id); - - if (fdata->tx_core[lcore_id]) - printf( - "[%s()] lcore %d executing NIC Tx\n", - __func__, lcore_id); - - if (fdata->sched_core[lcore_id]) - printf("[%s()] lcore %d executing scheduler\n", - __func__, lcore_id); - - if (fdata->worker_core[lcore_id]) - printf( - "[%s()] lcore %d executing worker, using eventdev port %u\n", - __func__, lcore_id, - worker_data[worker_idx].port_id); + dump_core_info(lcore_id, worker_data, worker_idx); err = rte_eal_remote_launch(fdata->cap.worker, &worker_data[worker_idx], lcore_id); @@ -439,8 +442,13 @@ main(int argc, char **argv) lcore_id = rte_lcore_id(); - if (core_in_use(lcore_id)) - fdata->cap.worker(&worker_data[worker_idx++]); + if (core_in_use(lcore_id)) { + dump_core_info(lcore_id, worker_data, worker_idx); + fdata->cap.worker(&worker_data[worker_idx]); + + if (fdata->worker_core[lcore_id]) + worker_idx++; + } rte_eal_mp_wait_lcore(); @@ -465,6 +473,10 @@ main(int argc, char **argv) } RTE_ETH_FOREACH_DEV(portid) { + rte_event_eth_rx_adapter_stop(portid); + rte_event_eth_tx_adapter_stop(portid); + if (rte_eth_dev_stop(portid) < 0) + printf("Failed to stop port %u", portid); rte_eth_dev_close(portid); } diff --git a/dpdk/examples/l3fwd/main.c b/dpdk/examples/l3fwd/main.c index d62dec434c..bb49e5faff 100644 --- a/dpdk/examples/l3fwd/main.c +++ b/dpdk/examples/l3fwd/main.c @@ -48,7 +48,7 @@ #include "l3fwd.h" #include "l3fwd_event.h" -#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS +#define MAX_TX_QUEUE_PER_PORT RTE_MAX_LCORE #define MAX_RX_QUEUE_PER_PORT 128 #define MAX_LCORE_PARAMS 1024 diff --git a/dpdk/examples/meson.build b/dpdk/examples/meson.build index 46ec80919e..b9ab24223f 100644 --- a/dpdk/examples/meson.build +++ b/dpdk/examples/meson.build @@ -63,6 +63,10 @@ default_cflags = machine_args if cc.has_argument('-Wno-format-truncation') default_cflags += '-Wno-format-truncation' endif +default_ldflags = dpdk_extra_ldflags +if get_option('default_library') == 'static' and not is_windows + default_ldflags += ['-Wl,--export-dynamic'] +endif foreach example: examples name = example.split('/')[-1] @@ -70,6 +74,7 @@ foreach example: examples sources = [] allow_experimental_apis = false cflags = default_cflags + ldflags = default_ldflags ext_deps = [execinfo] includes = [include_directories(example)] @@ -91,7 +96,7 @@ foreach example: examples executable('dpdk-' + name, sources, include_directories: includes, link_whole: link_whole_libs, - link_args: dpdk_extra_ldflags, + link_args: ldflags, c_args: cflags, dependencies: dep_objs) elif not allow_skips diff --git a/dpdk/examples/pipeline/cli.c b/dpdk/examples/pipeline/cli.c index d0150cfcf6..e97e120606 100644 --- a/dpdk/examples/pipeline/cli.c +++ b/dpdk/examples/pipeline/cli.c @@ -1294,7 +1294,7 @@ cli_process(char *in, char *out, size_t out_size, void *obj) } if (strcmp(tokens[0], "link") == 0) { - if (strcmp(tokens[1], "show") == 0) { + if ((n_tokens >= 2) && (strcmp(tokens[1], "show") == 0)) { cmd_link_show(tokens, n_tokens, out, out_size, obj); return; } diff --git a/dpdk/examples/pipeline/examples/vxlan_table.py b/dpdk/examples/pipeline/examples/vxlan_table.py old mode 100644 new mode 100755 diff --git a/dpdk/examples/vm_power_manager/channel_manager.c b/dpdk/examples/vm_power_manager/channel_manager.c index a26315051b..0a28cb643b 100644 --- a/dpdk/examples/vm_power_manager/channel_manager.c +++ b/dpdk/examples/vm_power_manager/channel_manager.c @@ -27,7 +27,6 @@ #include #include "channel_manager.h" -#include "channel_commands.h" #include "channel_monitor.h" #include "power_manager.h" diff --git a/dpdk/examples/vm_power_manager/channel_monitor.c b/dpdk/examples/vm_power_manager/channel_monitor.c index 228f06803d..99f81544d7 100644 --- a/dpdk/examples/vm_power_manager/channel_monitor.c +++ b/dpdk/examples/vm_power_manager/channel_monitor.c @@ -35,7 +35,6 @@ #include #include "channel_monitor.h" -#include "channel_commands.h" #include "channel_manager.h" #include "power_manager.h" #include "oob_monitor.h" @@ -108,7 +107,7 @@ str_to_ether_addr(const char *a, struct rte_ether_addr *ether_addr) } static int -set_policy_mac(struct channel_packet *pkt, int idx, char *mac) +set_policy_mac(struct rte_power_channel_packet *pkt, int idx, char *mac) { union PFID pfid; int ret; @@ -165,7 +164,7 @@ get_resource_id_from_vmname(const char *vm_name) } static int -parse_json_to_pkt(json_t *element, struct channel_packet *pkt, +parse_json_to_pkt(json_t *element, struct rte_power_channel_packet *pkt, const char *vm_name) { const char *key; @@ -173,14 +172,14 @@ parse_json_to_pkt(json_t *element, struct channel_packet *pkt, int ret; int resource_id; - memset(pkt, 0, sizeof(struct channel_packet)); + memset(pkt, 0, sizeof(*pkt)); pkt->nb_mac_to_monitor = 0; pkt->t_boost_status.tbEnabled = false; - pkt->workload = LOW; - pkt->policy_to_use = TIME; - pkt->command = PKT_POLICY; - pkt->core_type = CORE_TYPE_PHYSICAL; + pkt->workload = RTE_POWER_WL_LOW; + pkt->policy_to_use = RTE_POWER_POLICY_TIME; + pkt->command = RTE_POWER_PKT_POLICY; + pkt->core_type = RTE_POWER_CORE_TYPE_PHYSICAL; if (vm_name == NULL) { RTE_LOG(ERR, CHANNEL_MONITOR, @@ -203,11 +202,11 @@ parse_json_to_pkt(json_t *element, struct channel_packet *pkt, char command[32]; strlcpy(command, json_string_value(value), 32); if (!strcmp(command, "power")) { - pkt->command = CPU_POWER; + pkt->command = RTE_POWER_CPU_POWER; } else if (!strcmp(command, "create")) { - pkt->command = PKT_POLICY; + pkt->command = RTE_POWER_PKT_POLICY; } else if (!strcmp(command, "destroy")) { - pkt->command = PKT_POLICY_REMOVE; + pkt->command = RTE_POWER_PKT_POLICY_REMOVE; } else { RTE_LOG(ERR, CHANNEL_MONITOR, "Invalid command received in JSON\n"); @@ -217,13 +216,17 @@ parse_json_to_pkt(json_t *element, struct channel_packet *pkt, char command[32]; strlcpy(command, json_string_value(value), 32); if (!strcmp(command, "TIME")) { - pkt->policy_to_use = TIME; + pkt->policy_to_use = + RTE_POWER_POLICY_TIME; } else if (!strcmp(command, "TRAFFIC")) { - pkt->policy_to_use = TRAFFIC; + pkt->policy_to_use = + RTE_POWER_POLICY_TRAFFIC; } else if (!strcmp(command, "WORKLOAD")) { - pkt->policy_to_use = WORKLOAD; + pkt->policy_to_use = + RTE_POWER_POLICY_WORKLOAD; } else if (!strcmp(command, "BRANCH_RATIO")) { - pkt->policy_to_use = BRANCH_RATIO; + pkt->policy_to_use = + RTE_POWER_POLICY_BRANCH_RATIO; } else { RTE_LOG(ERR, CHANNEL_MONITOR, "Wrong policy_type received in JSON\n"); @@ -233,11 +236,11 @@ parse_json_to_pkt(json_t *element, struct channel_packet *pkt, char command[32]; strlcpy(command, json_string_value(value), 32); if (!strcmp(command, "HIGH")) { - pkt->workload = HIGH; + pkt->workload = RTE_POWER_WL_HIGH; } else if (!strcmp(command, "MEDIUM")) { - pkt->workload = MEDIUM; + pkt->workload = RTE_POWER_WL_MEDIUM; } else if (!strcmp(command, "LOW")) { - pkt->workload = LOW; + pkt->workload = RTE_POWER_WL_LOW; } else { RTE_LOG(ERR, CHANNEL_MONITOR, "Wrong workload received in JSON\n"); @@ -283,17 +286,17 @@ parse_json_to_pkt(json_t *element, struct channel_packet *pkt, char unit[32]; strlcpy(unit, json_string_value(value), 32); if (!strcmp(unit, "SCALE_UP")) { - pkt->unit = CPU_POWER_SCALE_UP; + pkt->unit = RTE_POWER_SCALE_UP; } else if (!strcmp(unit, "SCALE_DOWN")) { - pkt->unit = CPU_POWER_SCALE_DOWN; + pkt->unit = RTE_POWER_SCALE_DOWN; } else if (!strcmp(unit, "SCALE_MAX")) { - pkt->unit = CPU_POWER_SCALE_MAX; + pkt->unit = RTE_POWER_SCALE_MAX; } else if (!strcmp(unit, "SCALE_MIN")) { - pkt->unit = CPU_POWER_SCALE_MIN; + pkt->unit = RTE_POWER_SCALE_MIN; } else if (!strcmp(unit, "ENABLE_TURBO")) { - pkt->unit = CPU_POWER_ENABLE_TURBO; + pkt->unit = RTE_POWER_ENABLE_TURBO; } else if (!strcmp(unit, "DISABLE_TURBO")) { - pkt->unit = CPU_POWER_DISABLE_TURBO; + pkt->unit = RTE_POWER_DISABLE_TURBO; } else { RTE_LOG(ERR, CHANNEL_MONITOR, "Invalid command received in JSON\n"); @@ -312,7 +315,7 @@ parse_json_to_pkt(json_t *element, struct channel_packet *pkt, vm_name); return -1; } - strlcpy(pkt->vm_name, vm_name, VM_MAX_NAME_SZ); + strlcpy(pkt->vm_name, vm_name, RTE_POWER_VM_MAX_NAME_SZ); pkt->resource_id = resource_id; } return 0; @@ -367,7 +370,7 @@ pcpu_monitor(struct policy *pol, struct core_info *ci, int pcpu, int count) { int ret = 0; - if (pol->pkt.policy_to_use == BRANCH_RATIO) { + if (pol->pkt.policy_to_use == RTE_POWER_POLICY_BRANCH_RATIO) { ci->cd[pcpu].oob_enabled = 1; ret = add_core_to_monitor(pcpu); if (ret == 0) @@ -407,7 +410,7 @@ get_pcpu_to_control(struct policy *pol) * differenciate between them when adding them to the branch monitor. * Virtual cores need to be converted to physical cores. */ - if (pol->pkt.core_type == CORE_TYPE_VIRTUAL) { + if (pol->pkt.core_type == RTE_POWER_CORE_TYPE_VIRTUAL) { /* * If the cores in the policy are virtual, we need to map them * to physical core. We look up the vm info and use that for @@ -463,7 +466,7 @@ get_pfid(struct policy *pol) } static int -update_policy(struct channel_packet *pkt) +update_policy(struct rte_power_channel_packet *pkt) { unsigned int updated = 0; @@ -479,7 +482,8 @@ update_policy(struct channel_packet *pkt) policies[i].pkt = *pkt; get_pcpu_to_control(&policies[i]); /* Check Eth dev only for Traffic policy */ - if (policies[i].pkt.policy_to_use == TRAFFIC) { + if (policies[i].pkt.policy_to_use == + RTE_POWER_POLICY_TRAFFIC) { if (get_pfid(&policies[i]) < 0) { updated = 1; break; @@ -496,7 +500,8 @@ update_policy(struct channel_packet *pkt) policies[i].pkt = *pkt; get_pcpu_to_control(&policies[i]); /* Check Eth dev only for Traffic policy */ - if (policies[i].pkt.policy_to_use == TRAFFIC) { + if (policies[i].pkt.policy_to_use == + RTE_POWER_POLICY_TRAFFIC) { if (get_pfid(&policies[i]) < 0) { updated = 1; break; @@ -512,7 +517,7 @@ update_policy(struct channel_packet *pkt) } static int -remove_policy(struct channel_packet *pkt __rte_unused) +remove_policy(struct rte_power_channel_packet *pkt __rte_unused) { unsigned int i; @@ -615,7 +620,7 @@ apply_time_profile(struct policy *pol) /* Format the date and time, down to a single second. */ strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm); - for (x = 0; x < HOURS; x++) { + for (x = 0; x < RTE_POWER_HOURS_PER_DAY; x++) { if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) { for (count = 0; count < pol->pkt.num_vcpu; count++) { @@ -648,19 +653,19 @@ apply_workload_profile(struct policy *pol) int count; - if (pol->pkt.workload == HIGH) { + if (pol->pkt.workload == RTE_POWER_WL_HIGH) { for (count = 0; count < pol->pkt.num_vcpu; count++) { if (pol->core_share[count].status != 1) power_manager_scale_core_max( pol->core_share[count].pcpu); } - } else if (pol->pkt.workload == MEDIUM) { + } else if (pol->pkt.workload == RTE_POWER_WL_MEDIUM) { for (count = 0; count < pol->pkt.num_vcpu; count++) { if (pol->core_share[count].status != 1) power_manager_scale_core_med( pol->core_share[count].pcpu); } - } else if (pol->pkt.workload == LOW) { + } else if (pol->pkt.workload == RTE_POWER_WL_LOW) { for (count = 0; count < pol->pkt.num_vcpu; count++) { if (pol->core_share[count].status != 1) power_manager_scale_core_min( @@ -673,14 +678,14 @@ static void apply_policy(struct policy *pol) { - struct channel_packet *pkt = &pol->pkt; + struct rte_power_channel_packet *pkt = &pol->pkt; /*Check policy to use*/ - if (pkt->policy_to_use == TRAFFIC) + if (pkt->policy_to_use == RTE_POWER_POLICY_TRAFFIC) apply_traffic_profile(pol); - else if (pkt->policy_to_use == TIME) + else if (pkt->policy_to_use == RTE_POWER_POLICY_TIME) apply_time_profile(pol); - else if (pkt->policy_to_use == WORKLOAD) + else if (pkt->policy_to_use == RTE_POWER_POLICY_WORKLOAD) apply_workload_profile(pol); } @@ -715,24 +720,24 @@ write_binary_packet(void *buffer, } static int -send_freq(struct channel_packet *pkt, +send_freq(struct rte_power_channel_packet *pkt, struct channel_info *chan_info, bool freq_list) { unsigned int vcore_id = pkt->resource_id; - struct channel_packet_freq_list channel_pkt_freq_list; + struct rte_power_channel_packet_freq_list channel_pkt_freq_list; struct vm_info info; if (get_info_vm(pkt->vm_name, &info) != 0) return -1; - if (!freq_list && vcore_id >= MAX_VCPU_PER_VM) + if (!freq_list && vcore_id >= RTE_POWER_MAX_VCPU_PER_VM) return -1; if (!info.allow_query) return -1; - channel_pkt_freq_list.command = CPU_POWER_FREQ_LIST; + channel_pkt_freq_list.command = RTE_POWER_FREQ_LIST; channel_pkt_freq_list.num_vcpu = info.num_vcpus; if (freq_list) { @@ -751,12 +756,12 @@ send_freq(struct channel_packet *pkt, } static int -send_capabilities(struct channel_packet *pkt, +send_capabilities(struct rte_power_channel_packet *pkt, struct channel_info *chan_info, bool list_requested) { unsigned int vcore_id = pkt->resource_id; - struct channel_packet_caps_list channel_pkt_caps_list; + struct rte_power_channel_packet_caps_list channel_pkt_caps_list; struct vm_info info; struct rte_power_core_capabilities caps; int ret; @@ -764,13 +769,13 @@ send_capabilities(struct channel_packet *pkt, if (get_info_vm(pkt->vm_name, &info) != 0) return -1; - if (!list_requested && vcore_id >= MAX_VCPU_PER_VM) + if (!list_requested && vcore_id >= RTE_POWER_MAX_VCPU_PER_VM) return -1; if (!info.allow_query) return -1; - channel_pkt_caps_list.command = CPU_POWER_CAPS_LIST; + channel_pkt_caps_list.command = RTE_POWER_CAPS_LIST; channel_pkt_caps_list.num_vcpu = info.num_vcpus; if (list_requested) { @@ -805,18 +810,19 @@ send_capabilities(struct channel_packet *pkt, } static int -send_ack_for_received_cmd(struct channel_packet *pkt, +send_ack_for_received_cmd(struct rte_power_channel_packet *pkt, struct channel_info *chan_info, uint32_t command) { pkt->command = command; return write_binary_packet(pkt, - sizeof(struct channel_packet), + sizeof(*pkt), chan_info); } static int -process_request(struct channel_packet *pkt, struct channel_info *chan_info) +process_request(struct rte_power_channel_packet *pkt, + struct channel_info *chan_info) { int ret; @@ -827,10 +833,10 @@ process_request(struct channel_packet *pkt, struct channel_info *chan_info) CHANNEL_MGR_CHANNEL_PROCESSING) == 0) return -1; - if (pkt->command == CPU_POWER) { + if (pkt->command == RTE_POWER_CPU_POWER) { unsigned int core_num; - if (pkt->core_type == CORE_TYPE_VIRTUAL) + if (pkt->core_type == RTE_POWER_CORE_TYPE_VIRTUAL) core_num = get_pcpu(chan_info, pkt->resource_id); else core_num = pkt->resource_id; @@ -842,22 +848,22 @@ process_request(struct channel_packet *pkt, struct channel_info *chan_info) bool valid_unit = true; switch (pkt->unit) { - case(CPU_POWER_SCALE_MIN): + case(RTE_POWER_SCALE_MIN): scale_res = power_manager_scale_core_min(core_num); break; - case(CPU_POWER_SCALE_MAX): + case(RTE_POWER_SCALE_MAX): scale_res = power_manager_scale_core_max(core_num); break; - case(CPU_POWER_SCALE_DOWN): + case(RTE_POWER_SCALE_DOWN): scale_res = power_manager_scale_core_down(core_num); break; - case(CPU_POWER_SCALE_UP): + case(RTE_POWER_SCALE_UP): scale_res = power_manager_scale_core_up(core_num); break; - case(CPU_POWER_ENABLE_TURBO): + case(RTE_POWER_ENABLE_TURBO): scale_res = power_manager_enable_turbo_core(core_num); break; - case(CPU_POWER_DISABLE_TURBO): + case(RTE_POWER_DISABLE_TURBO): scale_res = power_manager_disable_turbo_core(core_num); break; default: @@ -869,8 +875,8 @@ process_request(struct channel_packet *pkt, struct channel_info *chan_info) ret = send_ack_for_received_cmd(pkt, chan_info, scale_res >= 0 ? - CPU_POWER_CMD_ACK : - CPU_POWER_CMD_NACK); + RTE_POWER_CMD_ACK : + RTE_POWER_CMD_NACK); if (ret < 0) RTE_LOG(ERR, CHANNEL_MONITOR, "Error during sending ack command.\n"); } else @@ -878,19 +884,19 @@ process_request(struct channel_packet *pkt, struct channel_info *chan_info) } - if (pkt->command == PKT_POLICY) { + if (pkt->command == RTE_POWER_PKT_POLICY) { RTE_LOG(INFO, CHANNEL_MONITOR, "Processing policy request %s\n", pkt->vm_name); int ret = send_ack_for_received_cmd(pkt, chan_info, - CPU_POWER_CMD_ACK); + RTE_POWER_CMD_ACK); if (ret < 0) RTE_LOG(ERR, CHANNEL_MONITOR, "Error during sending ack command.\n"); update_policy(pkt); policy_is_set = 1; } - if (pkt->command == PKT_POLICY_REMOVE) { + if (pkt->command == RTE_POWER_PKT_POLICY_REMOVE) { ret = remove_policy(pkt); if (ret == 0) RTE_LOG(INFO, CHANNEL_MONITOR, @@ -900,26 +906,26 @@ process_request(struct channel_packet *pkt, struct channel_info *chan_info) "Policy %s does not exist\n", pkt->vm_name); } - if (pkt->command == CPU_POWER_QUERY_FREQ_LIST || - pkt->command == CPU_POWER_QUERY_FREQ) { + if (pkt->command == RTE_POWER_QUERY_FREQ_LIST || + pkt->command == RTE_POWER_QUERY_FREQ) { RTE_LOG(INFO, CHANNEL_MONITOR, "Frequency for %s requested.\n", pkt->vm_name); int ret = send_freq(pkt, chan_info, - pkt->command == CPU_POWER_QUERY_FREQ_LIST); + pkt->command == RTE_POWER_QUERY_FREQ_LIST); if (ret < 0) RTE_LOG(ERR, CHANNEL_MONITOR, "Error during frequency sending.\n"); } - if (pkt->command == CPU_POWER_QUERY_CAPS_LIST || - pkt->command == CPU_POWER_QUERY_CAPS) { + if (pkt->command == RTE_POWER_QUERY_CAPS_LIST || + pkt->command == RTE_POWER_QUERY_CAPS) { RTE_LOG(INFO, CHANNEL_MONITOR, "Capabilities for %s requested.\n", pkt->vm_name); int ret = send_capabilities(pkt, chan_info, - pkt->command == CPU_POWER_QUERY_CAPS_LIST); + pkt->command == RTE_POWER_QUERY_CAPS_LIST); if (ret < 0) RTE_LOG(ERR, CHANNEL_MONITOR, "Error during sending capabilities.\n"); } @@ -988,7 +994,7 @@ channel_monitor_init(void) static void read_binary_packet(struct channel_info *chan_info) { - struct channel_packet pkt; + struct rte_power_channel_packet pkt; void *buffer = &pkt; int buffer_len = sizeof(pkt); int n_bytes, err = 0; @@ -1019,7 +1025,7 @@ read_binary_packet(struct channel_info *chan_info) static void read_json_packet(struct channel_info *chan_info) { - struct channel_packet pkt; + struct rte_power_channel_packet pkt; int n_bytes, ret; json_t *root; json_error_t error; @@ -1063,7 +1069,7 @@ read_json_packet(struct channel_info *chan_info) /* * Because our data is now in the json * object, we can overwrite the pkt - * with a channel_packet struct, using + * with a rte_power_channel_packet struct, using * parse_json_to_pkt() */ ret = parse_json_to_pkt(root, &pkt, resource_name); diff --git a/dpdk/examples/vm_power_manager/channel_monitor.h b/dpdk/examples/vm_power_manager/channel_monitor.h index 7362a80d26..2b38c554b5 100644 --- a/dpdk/examples/vm_power_manager/channel_monitor.h +++ b/dpdk/examples/vm_power_manager/channel_monitor.h @@ -5,8 +5,9 @@ #ifndef CHANNEL_MONITOR_H_ #define CHANNEL_MONITOR_H_ +#include + #include "channel_manager.h" -#include "channel_commands.h" struct core_share { unsigned int pcpu; @@ -18,11 +19,11 @@ struct core_share { }; struct policy { - struct channel_packet pkt; - uint32_t pfid[MAX_VFS]; - uint32_t port[MAX_VFS]; + struct rte_power_channel_packet pkt; + uint32_t pfid[RTE_POWER_MAX_VFS]; + uint32_t port[RTE_POWER_MAX_VFS]; unsigned int enabled; - struct core_share core_share[MAX_VCPU_PER_VM]; + struct core_share core_share[RTE_POWER_MAX_VCPU_PER_VM]; }; #ifdef __cplusplus diff --git a/dpdk/examples/vm_power_manager/guest_cli/main.c b/dpdk/examples/vm_power_manager/guest_cli/main.c index f63b3c988a..4e17f7fb90 100644 --- a/dpdk/examples/vm_power_manager/guest_cli/main.c +++ b/dpdk/examples/vm_power_manager/guest_cli/main.c @@ -48,10 +48,10 @@ parse_args(int argc, char **argv) { "policy", required_argument, 0, 'o'}, {NULL, 0, 0, 0} }; - struct channel_packet *policy; + struct rte_power_channel_packet *policy; unsigned short int hours[MAX_HOURS]; - unsigned short int cores[MAX_VCPU_PER_VM]; - unsigned short int ports[MAX_VCPU_PER_VM]; + unsigned short int cores[RTE_POWER_MAX_VCPU_PER_VM]; + unsigned short int ports[RTE_POWER_MAX_VCPU_PER_VM]; int i, cnt, idx; policy = get_policy(); @@ -69,7 +69,8 @@ parse_args(int argc, char **argv) switch (opt) { /* portmask */ case 'n': - strlcpy(policy->vm_name, optarg, VM_MAX_NAME_SZ); + strlcpy(policy->vm_name, optarg, + RTE_POWER_VM_MAX_NAME_SZ); printf("Setting VM Name to [%s]\n", policy->vm_name); break; case 'b': @@ -97,14 +98,15 @@ parse_args(int argc, char **argv) } break; case 'l': - cnt = parse_set(optarg, cores, MAX_VCPU_PER_VM); + cnt = parse_set(optarg, cores, + RTE_POWER_MAX_VCPU_PER_VM); if (cnt < 0) { printf("Invalid value passed to vcpu-list - [%s]\n", optarg); break; } idx = 0; - for (i = 0; i < MAX_VCPU_PER_VM; i++) { + for (i = 0; i < RTE_POWER_MAX_VCPU_PER_VM; i++) { if (cores[i]) { printf("***Using core %d\n", i); policy->vcpu_to_control[idx++] = i; @@ -114,14 +116,15 @@ parse_args(int argc, char **argv) printf("Total cores: %d\n", idx); break; case 'p': - cnt = parse_set(optarg, ports, MAX_VCPU_PER_VM); + cnt = parse_set(optarg, ports, + RTE_POWER_MAX_VCPU_PER_VM); if (cnt < 0) { printf("Invalid value passed to port-list - [%s]\n", optarg); break; } idx = 0; - for (i = 0; i < MAX_VCPU_PER_VM; i++) { + for (i = 0; i < RTE_POWER_MAX_VCPU_PER_VM; i++) { if (ports[i]) { printf("***Using port %d\n", i); if (set_policy_mac(i, idx++) != 0) { @@ -135,13 +138,17 @@ parse_args(int argc, char **argv) break; case 'o': if (!strcmp(optarg, "TRAFFIC")) - policy->policy_to_use = TRAFFIC; + policy->policy_to_use = + RTE_POWER_POLICY_TRAFFIC; else if (!strcmp(optarg, "TIME")) - policy->policy_to_use = TIME; + policy->policy_to_use = + RTE_POWER_POLICY_TIME; else if (!strcmp(optarg, "WORKLOAD")) - policy->policy_to_use = WORKLOAD; + policy->policy_to_use = + RTE_POWER_POLICY_WORKLOAD; else if (!strcmp(optarg, "BRANCH_RATIO")) - policy->policy_to_use = BRANCH_RATIO; + policy->policy_to_use = + RTE_POWER_POLICY_BRANCH_RATIO; else { printf("Invalid policy specified: %s\n", optarg); diff --git a/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c b/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c index cf1636e784..0bf5774ffc 100644 --- a/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c +++ b/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c @@ -19,7 +19,6 @@ #include #include -#include #include "vm_power_cli_guest.h" @@ -38,9 +37,9 @@ union PFID { uint64_t pfid; }; -static struct channel_packet policy; +static struct rte_power_channel_packet policy; -struct channel_packet * +struct rte_power_channel_packet * get_policy(void) { return &policy; @@ -49,7 +48,7 @@ get_policy(void) int set_policy_mac(int port, int idx) { - struct channel_packet *policy; + struct rte_power_channel_packet *policy; union PFID pfid; int ret; @@ -73,7 +72,7 @@ set_policy_mac(int port, int idx) } int -set_policy_defaults(struct channel_packet *pkt) +set_policy_defaults(struct rte_power_channel_packet *pkt) { int ret; @@ -103,10 +102,10 @@ set_policy_defaults(struct channel_packet *pkt) pkt->timer_policy.hours_to_use_traffic_profile[0] = 8; pkt->timer_policy.hours_to_use_traffic_profile[1] = 10; - pkt->core_type = CORE_TYPE_VIRTUAL; - pkt->workload = LOW; - pkt->policy_to_use = TIME; - pkt->command = PKT_POLICY; + pkt->core_type = RTE_POWER_CORE_TYPE_VIRTUAL; + pkt->workload = RTE_POWER_WL_LOW; + pkt->policy_to_use = RTE_POWER_POLICY_TIME; + pkt->command = RTE_POWER_PKT_POLICY; strlcpy(pkt->vm_name, "ubuntu2", sizeof(pkt->vm_name)); return 0; @@ -145,7 +144,7 @@ struct cmd_freq_list_result { }; static int -query_data(struct channel_packet *pkt, unsigned int lcore_id) +query_data(struct rte_power_channel_packet *pkt, unsigned int lcore_id) { int ret; ret = rte_power_guest_channel_send_msg(pkt, lcore_id); @@ -157,19 +156,19 @@ query_data(struct channel_packet *pkt, unsigned int lcore_id) } static int -receive_freq_list(struct channel_packet_freq_list *pkt_freq_list, +receive_freq_list(struct rte_power_channel_packet_freq_list *pkt_freq_list, unsigned int lcore_id) { int ret; ret = rte_power_guest_channel_receive_msg(pkt_freq_list, - sizeof(struct channel_packet_freq_list), + sizeof(*pkt_freq_list), lcore_id); if (ret < 0) { RTE_LOG(ERR, GUEST_CLI, "Error receiving message.\n"); return -1; } - if (pkt_freq_list->command != CPU_POWER_FREQ_LIST) { + if (pkt_freq_list->command != RTE_POWER_FREQ_LIST) { RTE_LOG(ERR, GUEST_CLI, "Unexpected message received.\n"); return -1; } @@ -183,14 +182,14 @@ cmd_query_freq_list_parsed(void *parsed_result, { struct cmd_freq_list_result *res = parsed_result; unsigned int lcore_id; - struct channel_packet_freq_list pkt_freq_list; - struct channel_packet pkt; + struct rte_power_channel_packet_freq_list pkt_freq_list; + struct rte_power_channel_packet pkt; bool query_list = false; int ret; char *ep; - memset(&pkt, 0, sizeof(struct channel_packet)); - memset(&pkt_freq_list, 0, sizeof(struct channel_packet_freq_list)); + memset(&pkt, 0, sizeof(pkt)); + memset(&pkt_freq_list, 0, sizeof(pkt_freq_list)); if (!strcmp(res->cpu_num, "all")) { @@ -203,18 +202,18 @@ cmd_query_freq_list_parsed(void *parsed_result, return; } - pkt.command = CPU_POWER_QUERY_FREQ_LIST; + pkt.command = RTE_POWER_QUERY_FREQ_LIST; strlcpy(pkt.vm_name, policy.vm_name, sizeof(pkt.vm_name)); query_list = true; } else { errno = 0; lcore_id = (unsigned int)strtol(res->cpu_num, &ep, 10); - if (errno != 0 || lcore_id >= MAX_VCPU_PER_VM || + if (errno != 0 || lcore_id >= RTE_POWER_MAX_VCPU_PER_VM || ep == res->cpu_num) { cmdline_printf(cl, "Invalid parameter provided.\n"); return; } - pkt.command = CPU_POWER_QUERY_FREQ; + pkt.command = RTE_POWER_QUERY_FREQ; strlcpy(pkt.vm_name, policy.vm_name, sizeof(pkt.vm_name)); pkt.resource_id = lcore_id; } @@ -267,19 +266,19 @@ struct cmd_query_caps_result { }; static int -receive_capabilities(struct channel_packet_caps_list *pkt_caps_list, +receive_capabilities(struct rte_power_channel_packet_caps_list *pkt_caps_list, unsigned int lcore_id) { int ret; ret = rte_power_guest_channel_receive_msg(pkt_caps_list, - sizeof(struct channel_packet_caps_list), + sizeof(*pkt_caps_list), lcore_id); if (ret < 0) { RTE_LOG(ERR, GUEST_CLI, "Error receiving message.\n"); return -1; } - if (pkt_caps_list->command != CPU_POWER_CAPS_LIST) { + if (pkt_caps_list->command != RTE_POWER_CAPS_LIST) { RTE_LOG(ERR, GUEST_CLI, "Unexpected message received.\n"); return -1; } @@ -293,14 +292,14 @@ cmd_query_caps_list_parsed(void *parsed_result, { struct cmd_query_caps_result *res = parsed_result; unsigned int lcore_id; - struct channel_packet_caps_list pkt_caps_list; - struct channel_packet pkt; + struct rte_power_channel_packet_caps_list pkt_caps_list; + struct rte_power_channel_packet pkt; bool query_list = false; int ret; char *ep; - memset(&pkt, 0, sizeof(struct channel_packet)); - memset(&pkt_caps_list, 0, sizeof(struct channel_packet_caps_list)); + memset(&pkt, 0, sizeof(pkt)); + memset(&pkt_caps_list, 0, sizeof(pkt_caps_list)); if (!strcmp(res->cpu_num, "all")) { @@ -313,18 +312,18 @@ cmd_query_caps_list_parsed(void *parsed_result, return; } - pkt.command = CPU_POWER_QUERY_CAPS_LIST; + pkt.command = RTE_POWER_QUERY_CAPS_LIST; strlcpy(pkt.vm_name, policy.vm_name, sizeof(pkt.vm_name)); query_list = true; } else { errno = 0; lcore_id = (unsigned int)strtol(res->cpu_num, &ep, 10); - if (errno != 0 || lcore_id >= MAX_VCPU_PER_VM || + if (errno != 0 || lcore_id >= RTE_POWER_MAX_VCPU_PER_VM || ep == res->cpu_num) { cmdline_printf(cl, "Invalid parameter provided.\n"); return; } - pkt.command = CPU_POWER_QUERY_CAPS; + pkt.command = RTE_POWER_QUERY_CAPS; strlcpy(pkt.vm_name, policy.vm_name, sizeof(pkt.vm_name)); pkt.resource_id = lcore_id; } @@ -380,7 +379,7 @@ cmdline_parse_inst_t cmd_query_caps_list = { static int check_response_cmd(unsigned int lcore_id, int *result) { - struct channel_packet pkt; + struct rte_power_channel_packet pkt; int ret; ret = rte_power_guest_channel_receive_msg(&pkt, sizeof pkt, lcore_id); @@ -388,10 +387,10 @@ check_response_cmd(unsigned int lcore_id, int *result) return -1; switch (pkt.command) { - case(CPU_POWER_CMD_ACK): + case(RTE_POWER_CMD_ACK): *result = 1; break; - case(CPU_POWER_CMD_NACK): + case(RTE_POWER_CMD_NACK): *result = 0; break; default: @@ -473,7 +472,7 @@ struct cmd_send_policy_result { }; static inline int -send_policy(struct channel_packet *pkt, struct cmdline *cl) +send_policy(struct rte_power_channel_packet *pkt, struct cmdline *cl) { int ret; diff --git a/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.h b/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.h index 6ad14a3dea..b578ec0723 100644 --- a/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.h +++ b/dpdk/examples/vm_power_manager/guest_cli/vm_power_cli_guest.h @@ -9,13 +9,11 @@ extern "C" { #endif -#include "channel_commands.h" - -struct channel_packet *get_policy(void); +struct rte_power_channel_packet *get_policy(void); int set_policy_mac(int port, int idx); -int set_policy_defaults(struct channel_packet *pkt); +int set_policy_defaults(struct rte_power_channel_packet *pkt); void run_cli(__rte_unused void *arg); diff --git a/dpdk/examples/vm_power_manager/main.c b/dpdk/examples/vm_power_manager/main.c index 75d5b5364f..799d7b9bc3 100644 --- a/dpdk/examples/vm_power_manager/main.c +++ b/dpdk/examples/vm_power_manager/main.c @@ -394,7 +394,7 @@ main(int argc, char **argv) "Cannot init port %"PRIu8 "\n", portid); - for (w = 0; w < MAX_VFS; w++) { + for (w = 0; w < RTE_POWER_MAX_VFS; w++) { eth.addr_bytes[5] = w + 0xf0; ret = -ENOTSUP; diff --git a/dpdk/examples/vm_power_manager/meson.build b/dpdk/examples/vm_power_manager/meson.build index 1f813fbe87..637bd23235 100644 --- a/dpdk/examples/vm_power_manager/meson.build +++ b/dpdk/examples/vm_power_manager/meson.build @@ -41,7 +41,7 @@ opt_dep = cc.find_library('virt', required : false) build = opt_dep.found() ext_deps += opt_dep -opt_dep = dependency('jansson', required : false) +opt_dep = dependency('jansson', required : false, method: 'pkg-config') if opt_dep.found() ext_deps += opt_dep cflags += '-DUSE_JANSSON' diff --git a/dpdk/examples/vm_power_manager/vm_power_cli.c b/dpdk/examples/vm_power_manager/vm_power_cli.c index ed0623a41d..1a55e553b9 100644 --- a/dpdk/examples/vm_power_manager/vm_power_cli.c +++ b/dpdk/examples/vm_power_manager/vm_power_cli.c @@ -21,7 +21,6 @@ #include "channel_manager.h" #include "channel_monitor.h" #include "power_manager.h" -#include "channel_commands.h" struct cmd_quit_result { cmdline_fixed_string_t quit; diff --git a/dpdk/lib/librte_bitratestats/rte_bitrate.h b/dpdk/lib/librte_bitratestats/rte_bitrate.h index 4865929e8f..fcd1564ddc 100644 --- a/dpdk/lib/librte_bitratestats/rte_bitrate.h +++ b/dpdk/lib/librte_bitratestats/rte_bitrate.h @@ -7,6 +7,8 @@ #include +#include + #ifdef __cplusplus extern "C" { #endif diff --git a/dpdk/lib/librte_bpf/meson.build b/dpdk/lib/librte_bpf/meson.build index 48460e9505..614277effd 100644 --- a/dpdk/lib/librte_bpf/meson.build +++ b/dpdk/lib/librte_bpf/meson.build @@ -19,7 +19,7 @@ headers = files('bpf_def.h', deps += ['mbuf', 'net', 'ethdev'] -dep = dependency('libelf', required: false) +dep = dependency('libelf', required: false, method: 'pkg-config') if dep.found() dpdk_conf.set('RTE_LIBRTE_BPF_ELF', 1) sources += files('bpf_load_elf.c') diff --git a/dpdk/lib/librte_compressdev/rte_compressdev_pmd.h b/dpdk/lib/librte_compressdev/rte_compressdev_pmd.h index d5898a5b71..16b6bc6b35 100644 --- a/dpdk/lib/librte_compressdev/rte_compressdev_pmd.h +++ b/dpdk/lib/librte_compressdev/rte_compressdev_pmd.h @@ -138,6 +138,8 @@ typedef void (*compressdev_stats_reset_t)(struct rte_compressdev *dev); * * @param dev * Compress device + * @param dev_info + * Compress device information to populate */ typedef void (*compressdev_info_get_t)(struct rte_compressdev *dev, struct rte_compressdev_info *dev_info); diff --git a/dpdk/lib/librte_cryptodev/rte_cryptodev_pmd.h b/dpdk/lib/librte_cryptodev/rte_cryptodev_pmd.h index 9a8a7e632b..1274436870 100644 --- a/dpdk/lib/librte_cryptodev/rte_cryptodev_pmd.h +++ b/dpdk/lib/librte_cryptodev/rte_cryptodev_pmd.h @@ -121,7 +121,7 @@ extern struct rte_cryptodev *rte_cryptodevs; * Function used to configure device. * * @param dev Crypto device pointer - * config Crypto device configurations + * @param config Crypto device configurations * * @return Returns 0 on success */ @@ -176,7 +176,8 @@ typedef void (*cryptodev_stats_reset_t)(struct rte_cryptodev *dev); /** * Function used to get specific information of a device. * - * @param dev Crypto device pointer + * @param dev Crypto device pointer + * @param dev_info Pointer to infos structure to populate */ typedef void (*cryptodev_info_get_t)(struct rte_cryptodev *dev, struct rte_cryptodev_info *dev_info); @@ -213,7 +214,7 @@ typedef int (*cryptodev_queue_pair_release_t)(struct rte_cryptodev *dev, * * @param dev Crypto device pointer * @param nb_objs number of sessions objects in mempool - * @param obj_cache l-core object cache size, see *rte_ring_create* + * @param obj_cache_size l-core object cache size, see *rte_ring_create* * @param socket_id Socket Id to allocate mempool on. * * @return @@ -253,7 +254,7 @@ typedef unsigned int (*cryptodev_asym_get_session_private_size_t)( * * @param dev Crypto device pointer * @param xform Single or chain of crypto xforms - * @param priv_sess Pointer to cryptodev's private session structure + * @param session Pointer to cryptodev's private session structure * @param mp Mempool where the private session is allocated * * @return @@ -271,7 +272,7 @@ typedef int (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev, * * @param dev Crypto device pointer * @param xform Single or chain of crypto xforms - * @param priv_sess Pointer to cryptodev's private session structure + * @param session Pointer to cryptodev's private session structure * @param mp Mempool where the private session is allocated * * @return @@ -333,7 +334,6 @@ typedef int (*cryptodev_sym_get_raw_dp_ctx_size_t)(struct rte_cryptodev *dev); * * @param dev Crypto device pointer. * @param qp_id Crypto device queue pair index. - * @param service_type Type of the service requested. * @param ctx The raw data-path context data. * @param sess_type session type. * @param session_ctx Session context data. If NULL the driver diff --git a/dpdk/lib/librte_eal/arm/include/rte_atomic_64.h b/dpdk/lib/librte_eal/arm/include/rte_atomic_64.h index 467d32a455..fa6f334c0d 100644 --- a/dpdk/lib/librte_eal/arm/include/rte_atomic_64.h +++ b/dpdk/lib/librte_eal/arm/include/rte_atomic_64.h @@ -53,15 +53,15 @@ rte_atomic_thread_fence(int memorder) #endif #define __ATOMIC128_CAS_OP(cas_op_name, op_string) \ -static __rte_noinline rte_int128_t \ -cas_op_name(rte_int128_t *dst, rte_int128_t old, rte_int128_t updated) \ +static __rte_noinline void \ +cas_op_name(rte_int128_t *dst, rte_int128_t *old, rte_int128_t updated) \ { \ /* caspX instructions register pair must start from even-numbered * register at operand 1. * So, specify registers for local variables here. */ \ - register uint64_t x0 __asm("x0") = (uint64_t)old.val[0]; \ - register uint64_t x1 __asm("x1") = (uint64_t)old.val[1]; \ + register uint64_t x0 __asm("x0") = (uint64_t)old->val[0]; \ + register uint64_t x1 __asm("x1") = (uint64_t)old->val[1]; \ register uint64_t x2 __asm("x2") = (uint64_t)updated.val[0]; \ register uint64_t x3 __asm("x3") = (uint64_t)updated.val[1]; \ asm volatile( \ @@ -73,9 +73,8 @@ cas_op_name(rte_int128_t *dst, rte_int128_t old, rte_int128_t updated) \ [upd1] "r" (x3), \ [dst] "r" (dst) \ : "memory"); \ - old.val[0] = x0; \ - old.val[1] = x1; \ - return old; \ + old->val[0] = x0; \ + old->val[1] = x1; \ } __ATOMIC128_CAS_OP(__cas_128_relaxed, "casp") @@ -113,13 +112,14 @@ rte_atomic128_cmp_exchange(rte_int128_t *dst, rte_int128_t *exp, #if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS) if (success == __ATOMIC_RELAXED) - old = __cas_128_relaxed(dst, expected, desired); + __cas_128_relaxed(dst, exp, desired); else if (success == __ATOMIC_ACQUIRE) - old = __cas_128_acquire(dst, expected, desired); + __cas_128_acquire(dst, exp, desired); else if (success == __ATOMIC_RELEASE) - old = __cas_128_release(dst, expected, desired); + __cas_128_release(dst, exp, desired); else - old = __cas_128_acq_rel(dst, expected, desired); + __cas_128_acq_rel(dst, exp, desired); + old = *exp; #else #define __HAS_ACQ(mo) ((mo) != __ATOMIC_RELAXED && (mo) != __ATOMIC_RELEASE) #define __HAS_RLS(mo) ((mo) == __ATOMIC_RELEASE || (mo) == __ATOMIC_ACQ_REL || \ @@ -183,12 +183,12 @@ rte_atomic128_cmp_exchange(rte_int128_t *dst, rte_int128_t *exp, #undef __STORE_128 } while (unlikely(ret)); -#endif - /* Unconditionally updating expected removes an 'if' statement. - * expected should already be in register if not in the cache. + /* Unconditionally updating the value of exp removes an 'if' statement. + * The value of exp should already be in register if not in the cache. */ *exp = old; +#endif return (old.int128 == expected.int128); } diff --git a/dpdk/lib/librte_eal/common/eal_common_fbarray.c b/dpdk/lib/librte_eal/common/eal_common_fbarray.c index 1220e2bae9..d974f3dab7 100644 --- a/dpdk/lib/librte_eal/common/eal_common_fbarray.c +++ b/dpdk/lib/librte_eal/common/eal_common_fbarray.c @@ -110,7 +110,7 @@ overlap(const struct mem_area *ma, const void *start, size_t len) if (start >= ma_start && start < ma_end) return 1; /* end overlap? */ - if (end >= ma_start && end < ma_end) + if (end > ma_start && end < ma_end) return 1; return 0; } diff --git a/dpdk/lib/librte_eal/common/eal_common_options.c b/dpdk/lib/librte_eal/common/eal_common_options.c index 424e8bcf87..622c7bc429 100644 --- a/dpdk/lib/librte_eal/common/eal_common_options.c +++ b/dpdk/lib/librte_eal/common/eal_common_options.c @@ -494,6 +494,39 @@ eal_dlopen(const char *pathname) return retval; } +static int +is_shared_build(void) +{ +#define EAL_SO "librte_eal.so" + char soname[32]; + size_t len, minlen = strlen(EAL_SO); + + len = strlcpy(soname, EAL_SO"."ABI_VERSION, sizeof(soname)); + if (len > sizeof(soname)) { + RTE_LOG(ERR, EAL, "Shared lib name too long in shared build check\n"); + len = sizeof(soname) - 1; + } + + while (len >= minlen) { + /* check if we have this .so loaded, if so - shared build */ + RTE_LOG(DEBUG, EAL, "Checking presence of .so '%s'\n", soname); + if (dlopen(soname, RTLD_LAZY | RTLD_NOLOAD) != NULL) { + RTE_LOG(INFO, EAL, "Detected shared linkage of DPDK\n"); + return 1; + } + + /* remove any version numbers off the end to retry */ + while (len-- > 0) + if (soname[len] == '.') { + soname[len] = '\0'; + break; + } + } + + RTE_LOG(INFO, EAL, "Detected static linkage of DPDK\n"); + return 0; +} + int eal_plugins_init(void) { @@ -505,7 +538,7 @@ eal_plugins_init(void) * (Using dlopen with NOLOAD flag on EAL, will return NULL if the EAL * shared library is not already loaded i.e. it's statically linked.) */ - if (dlopen("librte_eal.so."ABI_VERSION, RTLD_LAZY | RTLD_NOLOAD) != NULL && + if (is_shared_build() && *default_solib_dir != '\0' && stat(default_solib_dir, &sb) == 0 && S_ISDIR(sb.st_mode)) diff --git a/dpdk/lib/librte_eal/freebsd/eal.c b/dpdk/lib/librte_eal/freebsd/eal.c index d6ea023750..51478358c7 100644 --- a/dpdk/lib/librte_eal/freebsd/eal.c +++ b/dpdk/lib/librte_eal/freebsd/eal.c @@ -906,7 +906,7 @@ rte_eal_init(int argc, char **argv) ret = rte_service_init(); if (ret) { rte_eal_init_alert("rte_service_init() failed"); - rte_errno = ENOEXEC; + rte_errno = -ret; return -1; } @@ -922,7 +922,7 @@ rte_eal_init(int argc, char **argv) */ ret = rte_service_start_with_defaults(); if (ret < 0 && ret != -ENOTSUP) { - rte_errno = ENOEXEC; + rte_errno = -ret; return -1; } diff --git a/dpdk/lib/librte_eal/include/generic/rte_mcslock.h b/dpdk/lib/librte_eal/include/generic/rte_mcslock.h index d370bef17a..9f323bd2a2 100644 --- a/dpdk/lib/librte_eal/include/generic/rte_mcslock.h +++ b/dpdk/lib/librte_eal/include/generic/rte_mcslock.h @@ -22,6 +22,7 @@ #include #include #include +#include /** * The rte_mcslock_t type. diff --git a/dpdk/lib/librte_eal/include/rte_compat.h b/dpdk/lib/librte_eal/include/rte_compat.h index 4cd8f68d68..2718612cce 100644 --- a/dpdk/lib/librte_eal/include/rte_compat.h +++ b/dpdk/lib/librte_eal/include/rte_compat.h @@ -19,12 +19,23 @@ __attribute__((section(".text.experimental"))) #endif -#ifndef ALLOW_INTERNAL_API +#ifndef __has_attribute +/* if no has_attribute assume no support for attribute too */ +#define __has_attribute(x) 0 +#endif + +#if !defined ALLOW_INTERNAL_API && __has_attribute(error) /* For GCC */ #define __rte_internal \ __attribute__((error("Symbol is not public ABI"), \ section(".text.internal"))) +#elif !defined ALLOW_INTERNAL_API && __has_attribute(diagnose_if) /* For clang */ + +#define __rte_internal \ +__attribute__((diagnose_if(1, "Symbol is not public ABI", "error"), \ +section(".text.internal"))) + #else #define __rte_internal \ diff --git a/dpdk/lib/librte_eal/include/rte_keepalive.h b/dpdk/lib/librte_eal/include/rte_keepalive.h index 4bda7ca56f..bd25508da8 100644 --- a/dpdk/lib/librte_eal/include/rte_keepalive.h +++ b/dpdk/lib/librte_eal/include/rte_keepalive.h @@ -52,7 +52,7 @@ typedef void (*rte_keepalive_failure_callback_t)( * @param data Data pointer passed to rte_keepalive_register_relay_callback() * @param id_core ID of the core for which state is being reported * @param core_state The current state of the core - * @param Timestamp of when core was last seen alive + * @param last_seen Timestamp of when core was last seen alive */ typedef void (*rte_keepalive_relay_callback_t)( void *data, diff --git a/dpdk/lib/librte_eal/include/rte_reciprocal.h b/dpdk/lib/librte_eal/include/rte_reciprocal.h index 63e16fde0a..735adb029b 100644 --- a/dpdk/lib/librte_eal/include/rte_reciprocal.h +++ b/dpdk/lib/librte_eal/include/rte_reciprocal.h @@ -27,6 +27,8 @@ #include +#include + struct rte_reciprocal { uint32_t m; uint8_t sh1, sh2; diff --git a/dpdk/lib/librte_eal/linux/eal.c b/dpdk/lib/librte_eal/linux/eal.c index a4161be630..32b48c3de9 100644 --- a/dpdk/lib/librte_eal/linux/eal.c +++ b/dpdk/lib/librte_eal/linux/eal.c @@ -1273,7 +1273,7 @@ rte_eal_init(int argc, char **argv) ret = rte_service_init(); if (ret) { rte_eal_init_alert("rte_service_init() failed"); - rte_errno = ENOEXEC; + rte_errno = -ret; return -1; } @@ -1295,7 +1295,7 @@ rte_eal_init(int argc, char **argv) */ ret = rte_service_start_with_defaults(); if (ret < 0 && ret != -ENOTSUP) { - rte_errno = ENOEXEC; + rte_errno = -ret; return -1; } diff --git a/dpdk/lib/librte_eal/linux/eal_interrupts.c b/dpdk/lib/librte_eal/linux/eal_interrupts.c index 2f03a61254..1dd994bd1f 100644 --- a/dpdk/lib/librte_eal/linux/eal_interrupts.c +++ b/dpdk/lib/librte_eal/linux/eal_interrupts.c @@ -1241,7 +1241,7 @@ eal_epoll_process_event(struct epoll_event *evs, unsigned int n, events[count].status = RTE_EPOLL_VALID; events[count].fd = rev->fd; events[count].epfd = rev->epfd; - events[count].epdata.event = rev->epdata.event; + events[count].epdata.event = evs[i].events; events[count].epdata.data = rev->epdata.data; if (rev->epdata.cb_fun) rev->epdata.cb_fun(rev->fd, diff --git a/dpdk/lib/librte_eal/windows/eal.c b/dpdk/lib/librte_eal/windows/eal.c index 105549de1b..1e5f6576f0 100644 --- a/dpdk/lib/librte_eal/windows/eal.c +++ b/dpdk/lib/librte_eal/windows/eal.c @@ -264,6 +264,7 @@ rte_eal_init(int argc, char **argv) const struct rte_config *config = rte_eal_get_configuration(); struct internal_config *internal_conf = eal_get_internal_configuration(); + int ret; rte_eal_log_init(NULL, 0); @@ -387,9 +388,10 @@ rte_eal_init(int argc, char **argv) } /* Initialize services so drivers can register services during probe. */ - if (rte_service_init()) { + ret = rte_service_init(); + if (ret) { rte_eal_init_alert("rte_service_init() failed"); - rte_errno = ENOEXEC; + rte_errno = -ret; return -1; } diff --git a/dpdk/lib/librte_eal/windows/eal_lcore.c b/dpdk/lib/librte_eal/windows/eal_lcore.c index d5ff721e03..a85149be95 100644 --- a/dpdk/lib/librte_eal/windows/eal_lcore.c +++ b/dpdk/lib/librte_eal/windows/eal_lcore.c @@ -38,6 +38,7 @@ static struct cpu_map cpu_map = { 0 }; /* eal_create_cpu_map() is called before logging is initialized */ static void +__rte_format_printf(1, 2) log_early(const char *format, ...) { va_list va; diff --git a/dpdk/lib/librte_eal/windows/eal_memory.c b/dpdk/lib/librte_eal/windows/eal_memory.c index 7f8d3c2fa4..2cf5a5e649 100644 --- a/dpdk/lib/librte_eal/windows/eal_memory.c +++ b/dpdk/lib/librte_eal/windows/eal_memory.c @@ -18,13 +18,12 @@ #include /* MinGW-w64 headers lack VirtualAlloc2() in some distributions. - * Provide a copy of definitions and code to load it dynamically. * Note: definitions are copied verbatim from Microsoft documentation * and don't follow DPDK code style. - * - * MEM_RESERVE_PLACEHOLDER being defined means VirtualAlloc2() is present too. */ -#ifndef MEM_PRESERVE_PLACEHOLDER +#ifndef MEM_EXTENDED_PARAMETER_TYPE_BITS + +#define MEM_EXTENDED_PARAMETER_TYPE_BITS 4 /* https://docs.microsoft.com/en-us/windows/win32/api/winnt/ne-winnt-mem_extended_parameter_type */ typedef enum MEM_EXTENDED_PARAMETER_TYPE { @@ -37,8 +36,6 @@ typedef enum MEM_EXTENDED_PARAMETER_TYPE { MemExtendedParameterMax } *PMEM_EXTENDED_PARAMETER_TYPE; -#define MEM_EXTENDED_PARAMETER_TYPE_BITS 4 - /* https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-mem_extended_parameter */ typedef struct MEM_EXTENDED_PARAMETER { struct { @@ -54,6 +51,8 @@ typedef struct MEM_EXTENDED_PARAMETER { } DUMMYUNIONNAME; } MEM_EXTENDED_PARAMETER, *PMEM_EXTENDED_PARAMETER; +#endif /* defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) */ + /* https://docs.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualalloc2 */ typedef PVOID (*VirtualAlloc2_type)( HANDLE Process, @@ -65,17 +64,19 @@ typedef PVOID (*VirtualAlloc2_type)( ULONG ParameterCount ); -/* VirtualAlloc2() flags. */ +/* MinGW-w64 distributions, even those that declare VirtualAlloc2(), + * lack it in import libraries, which results in a failure at link time. + * Link it dynamically in such case. + */ +static VirtualAlloc2_type VirtualAlloc2_ptr; + +#ifdef RTE_TOOLCHAIN_GCC + #define MEM_COALESCE_PLACEHOLDERS 0x00000001 #define MEM_PRESERVE_PLACEHOLDER 0x00000002 #define MEM_REPLACE_PLACEHOLDER 0x00004000 #define MEM_RESERVE_PLACEHOLDER 0x00040000 -/* Named exactly as the function, so that user code does not depend - * on it being found at compile time or dynamically. - */ -static VirtualAlloc2_type VirtualAlloc2; - int eal_mem_win32api_init(void) { @@ -89,7 +90,7 @@ eal_mem_win32api_init(void) int ret = 0; /* Already done. */ - if (VirtualAlloc2 != NULL) + if (VirtualAlloc2_ptr != NULL) return 0; library = LoadLibraryA(library_name); @@ -98,9 +99,9 @@ eal_mem_win32api_init(void) return -1; } - VirtualAlloc2 = (VirtualAlloc2_type)( + VirtualAlloc2_ptr = (VirtualAlloc2_type)( (void *)GetProcAddress(library, function)); - if (VirtualAlloc2 == NULL) { + if (VirtualAlloc2_ptr == NULL) { RTE_LOG_WIN32_ERR("GetProcAddress(\"%s\", \"%s\")\n", library_name, function); @@ -117,14 +118,15 @@ eal_mem_win32api_init(void) #else -/* Stub in case VirtualAlloc2() is provided by the compiler. */ +/* Stub in case VirtualAlloc2() is provided by the toolchain. */ int eal_mem_win32api_init(void) { + VirtualAlloc2_ptr = VirtualAlloc2; return 0; } -#endif /* defined(MEM_RESERVE_PLACEHOLDER) */ +#endif /* defined(RTE_TOOLCHAIN_GCC) */ static HANDLE virt2phys_device = INVALID_HANDLE_VALUE; @@ -278,7 +280,7 @@ eal_mem_reserve(void *requested_addr, size_t size, int flags) process = GetCurrentProcess(); - virt = VirtualAlloc2(process, requested_addr, size, + virt = VirtualAlloc2_ptr(process, requested_addr, size, MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, PAGE_NOACCESS, NULL, 0); if (virt == NULL) { @@ -364,7 +366,7 @@ eal_mem_commit(void *requested_addr, size_t size, int socket_id) } flags = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES; - addr = VirtualAlloc2(process, requested_addr, size, + addr = VirtualAlloc2_ptr(process, requested_addr, size, flags, PAGE_READWRITE, ¶m, param_count); if (addr == NULL) { /* Logging may overwrite GetLastError() result. */ @@ -406,7 +408,7 @@ eal_mem_decommit(void *addr, size_t size) } flags = MEM_RESERVE | MEM_RESERVE_PLACEHOLDER; - stub = VirtualAlloc2( + stub = VirtualAlloc2_ptr( process, addr, size, flags, PAGE_NOACCESS, NULL, 0); if (stub == NULL) { /* We lost the race for the VA. */ diff --git a/dpdk/lib/librte_eal/windows/include/rte_os.h b/dpdk/lib/librte_eal/windows/include/rte_os.h index 569ed92d51..7ef38ff06c 100644 --- a/dpdk/lib/librte_eal/windows/include/rte_os.h +++ b/dpdk/lib/librte_eal/windows/include/rte_os.h @@ -25,22 +25,42 @@ extern "C" { #define PATH_MAX _MAX_PATH #endif +#ifndef sleep #define sleep(x) Sleep(1000 * (x)) +#endif +#ifndef strerror_r #define strerror_r(a, b, c) strerror_s(b, c, a) +#endif +#ifndef strdup /* strdup is deprecated in Microsoft libc and _strdup is preferred */ #define strdup(str) _strdup(str) +#endif +#ifndef strtok_r #define strtok_r(str, delim, saveptr) strtok_s(str, delim, saveptr) +#endif +#ifndef index #define index(a, b) strchr(a, b) +#endif + +#ifndef rindex #define rindex(a, b) strrchr(a, b) +#endif +#ifndef strncasecmp #define strncasecmp(s1, s2, count) _strnicmp(s1, s2, count) +#endif +#ifndef close #define close _close +#endif + +#ifndef unlink #define unlink _unlink +#endif /* cpu_set macros implementation */ #define RTE_CPU_AND(dst, src1, src2) CPU_AND(dst, src1, src2) @@ -66,7 +86,7 @@ asprintf(char **buffer, const char *format, ...) return -1; size++; - *buffer = malloc(size); + *buffer = (char *)malloc(size); if (*buffer == NULL) return -1; @@ -89,7 +109,9 @@ eal_strerror(int code) return buffer; } +#ifndef strerror #define strerror eal_strerror +#endif #endif /* RTE_TOOLCHAIN_GCC */ diff --git a/dpdk/lib/librte_ethdev/rte_eth_ctrl.h b/dpdk/lib/librte_ethdev/rte_eth_ctrl.h index 1cca522fa3..8a50dbfef9 100644 --- a/dpdk/lib/librte_ethdev/rte_eth_ctrl.h +++ b/dpdk/lib/librte_ethdev/rte_eth_ctrl.h @@ -9,6 +9,7 @@ #include #include #include "rte_flow.h" +#include "rte_ethdev.h" /** * @deprecated Please use rte_flow API instead of this legacy one. diff --git a/dpdk/lib/librte_ethdev/rte_ethdev.c b/dpdk/lib/librte_ethdev/rte_ethdev.c index 17ddacc78d..ecd46ac01f 100644 --- a/dpdk/lib/librte_ethdev/rte_ethdev.c +++ b/dpdk/lib/librte_ethdev/rte_ethdev.c @@ -1292,8 +1292,10 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; struct rte_eth_conf orig_conf; + uint16_t overhead_len; int diag; int ret; + uint16_t old_mtu; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); @@ -1319,10 +1321,20 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, memcpy(&dev->data->dev_conf, dev_conf, sizeof(dev->data->dev_conf)); + /* Backup mtu for rollback */ + old_mtu = dev->data->mtu; + ret = rte_eth_dev_info_get(port_id, &dev_info); if (ret != 0) goto rollback; + /* Get the real Ethernet overhead length */ + if (dev_info.max_mtu != UINT16_MAX && + dev_info.max_rx_pktlen > dev_info.max_mtu) + overhead_len = dev_info.max_rx_pktlen - dev_info.max_mtu; + else + overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN; + /* If number of queues specified by application for both Rx and Tx is * zero, use driver preferred values. This cannot be done individually * as it is valid for either Tx or Rx (but not both) to be zero. @@ -1409,12 +1421,17 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, ret = -EINVAL; goto rollback; } + + /* Scale the MTU size to adapt max_rx_pkt_len */ + dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len - + overhead_len; } else { - if (dev_conf->rxmode.max_rx_pkt_len < RTE_ETHER_MIN_LEN || - dev_conf->rxmode.max_rx_pkt_len > RTE_ETHER_MAX_LEN) + uint16_t pktlen = dev_conf->rxmode.max_rx_pkt_len; + if (pktlen < RTE_ETHER_MIN_MTU + overhead_len || + pktlen > RTE_ETHER_MTU + overhead_len) /* Use default value */ dev->data->dev_conf.rxmode.max_rx_pkt_len = - RTE_ETHER_MAX_LEN; + RTE_ETHER_MTU + overhead_len; } /* @@ -1549,6 +1566,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, eth_dev_tx_queue_config(dev, 0); rollback: memcpy(&dev->data->dev_conf, &orig_conf, sizeof(dev->data->dev_conf)); + if (old_mtu != dev->data->mtu) + dev->data->mtu = old_mtu; rte_ethdev_trace_configure(port_id, nb_rx_q, nb_tx_q, dev_conf, ret); return ret; @@ -1801,7 +1820,7 @@ rte_eth_dev_close(uint16_t port_id) rte_ethdev_trace_close(port_id); *lasterr = rte_eth_dev_release_port(dev); - return eth_err(port_id, firsterr); + return firsterr; } int @@ -5692,7 +5711,7 @@ eth_dev_handle_port_link_status(const char *cmd __rte_unused, if (!rte_eth_dev_is_valid_port(port_id)) return -1; - ret = rte_eth_link_get(port_id, &link); + ret = rte_eth_link_get_nowait(port_id, &link); if (ret < 0) return -1; diff --git a/dpdk/lib/librte_eventdev/rte_eventdev_pmd.h b/dpdk/lib/librte_eventdev/rte_eventdev_pmd.h index 27be376ed1..7eb9a77393 100644 --- a/dpdk/lib/librte_eventdev/rte_eventdev_pmd.h +++ b/dpdk/lib/librte_eventdev/rte_eventdev_pmd.h @@ -158,9 +158,6 @@ rte_event_pmd_is_valid_dev(uint8_t dev_id) * Event device pointer * @param dev_info * Event device information structure - * - * @return - * Returns 0 on success */ typedef void (*eventdev_info_get_t)(struct rte_eventdev *dev, struct rte_event_dev_info *dev_info); @@ -297,7 +294,7 @@ typedef void (*eventdev_port_release_t)(void *port); * Event device pointer * @param port * Event port pointer - * @param link + * @param queues * Points to an array of *nb_links* event queues to be linked * to the event port. * @param priorities @@ -383,6 +380,10 @@ typedef void (*eventdev_dump_t)(struct rte_eventdev *dev, FILE *f); * * @param dev * Event device pointer + * @param mode + * Level (device, port or queue) + * @param queue_port_id + * Queue or port number depending on mode * @param ids * The stat ids to retrieve * @param values @@ -410,8 +411,14 @@ typedef int (*eventdev_xstats_reset_t)(struct rte_eventdev *dev, * * @param dev * Event device pointer + * @param mode + * Level (device, port or queue) + * @param queue_port_id + * Queue or port number depending on mode * @param xstats_names * Array of name values to be filled in + * @param ids + * The stat ids to retrieve * @param size * Number of values in the xstats_names array * @return diff --git a/dpdk/lib/librte_fib/rte_fib.h b/dpdk/lib/librte_fib/rte_fib.h index fef0749525..acad20963c 100644 --- a/dpdk/lib/librte_fib/rte_fib.h +++ b/dpdk/lib/librte_fib/rte_fib.h @@ -19,6 +19,8 @@ * for IPv4 Longest Prefix Match */ +#include + #include #ifdef __cplusplus diff --git a/dpdk/lib/librte_fib/rte_fib6.h b/dpdk/lib/librte_fib/rte_fib6.h index 668bffb2ba..0e193b8e7b 100644 --- a/dpdk/lib/librte_fib/rte_fib6.h +++ b/dpdk/lib/librte_fib/rte_fib6.h @@ -19,6 +19,8 @@ * for IPv6 Longest Prefix Match */ +#include + #include #ifdef __cplusplus diff --git a/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c b/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c index 1dda8aca02..69666c8b82 100644 --- a/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c +++ b/dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c @@ -104,6 +104,7 @@ rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, const unaligned_uint64_t *psd; uint16_t flag_offset, ip_ofs, ip_flag; int32_t ip_len; + int32_t trim; flag_offset = rte_be_to_cpu_16(ip_hdr->fragment_offset); ip_ofs = (uint16_t)(flag_offset & RTE_IPV4_HDR_OFFSET_MASK); @@ -117,14 +118,15 @@ rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, ip_ofs *= RTE_IPV4_HDR_OFFSET_UNITS; ip_len = rte_be_to_cpu_16(ip_hdr->total_length) - mb->l3_len; + trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len); IP_FRAG_LOG(DEBUG, "%s:%d:\n" - "mbuf: %p, tms: %" PRIu64 - ", key: <%" PRIx64 ", %#x>, ofs: %u, len: %d, flags: %#x\n" + "mbuf: %p, tms: %" PRIu64 ", key: <%" PRIx64 ", %#x>" + "ofs: %u, len: %d, padding: %d, flags: %#x\n" "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " "max_entries: %u, use_entries: %u\n\n", __func__, __LINE__, - mb, tms, key.src_dst[0], key.id, ip_ofs, ip_len, ip_flag, + mb, tms, key.src_dst[0], key.id, ip_ofs, ip_len, trim, ip_flag, tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, tbl->use_entries); @@ -134,6 +136,9 @@ rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, return NULL; } + if (unlikely(trim > 0)) + rte_pktmbuf_trim(mb, trim); + /* try to find/add entry into the fragment's table. */ if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) { IP_FRAG_MBUF2DR(dr, mb); diff --git a/dpdk/lib/librte_ip_frag/rte_ipv6_reassembly.c b/dpdk/lib/librte_ip_frag/rte_ipv6_reassembly.c index ad01055184..6bc0bf792a 100644 --- a/dpdk/lib/librte_ip_frag/rte_ipv6_reassembly.c +++ b/dpdk/lib/librte_ip_frag/rte_ipv6_reassembly.c @@ -142,6 +142,7 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, struct ip_frag_key key; uint16_t ip_ofs; int32_t ip_len; + int32_t trim; rte_memcpy(&key.src_dst[0], ip_hdr->src_addr, 16); rte_memcpy(&key.src_dst[2], ip_hdr->dst_addr, 16); @@ -158,16 +159,17 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, * this is what we remove from the payload len. */ ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr); + trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len); IP_FRAG_LOG(DEBUG, "%s:%d:\n" "mbuf: %p, tms: %" PRIu64 ", key: <" IPv6_KEY_BYTES_FMT ", %#x>, " - "ofs: %u, len: %d, flags: %#x\n" + "ofs: %u, len: %d, padding: %d, flags: %#x\n" "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " "max_entries: %u, use_entries: %u\n\n", __func__, __LINE__, mb, tms, IPv6_KEY_BYTES(key.src_dst), key.id, ip_ofs, ip_len, - RTE_IPV6_GET_MF(frag_hdr->frag_data), + trim, RTE_IPV6_GET_MF(frag_hdr->frag_data), tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, tbl->use_entries); @@ -177,6 +179,9 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, return NULL; } + if (unlikely(trim > 0)) + rte_pktmbuf_trim(mb, trim); + /* try to find/add entry into the fragment's table. */ fp = ip_frag_find(tbl, dr, &key, tms); if (fp == NULL) { diff --git a/dpdk/lib/librte_ipsec/rte_ipsec_sad.h b/dpdk/lib/librte_ipsec/rte_ipsec_sad.h index 3e67ab1e4b..b65d295831 100644 --- a/dpdk/lib/librte_ipsec/rte_ipsec_sad.h +++ b/dpdk/lib/librte_ipsec/rte_ipsec_sad.h @@ -6,6 +6,8 @@ #ifndef _RTE_IPSEC_SAD_H_ #define _RTE_IPSEC_SAD_H_ +#include + #include /** diff --git a/dpdk/lib/librte_lpm/rte_lpm_altivec.h b/dpdk/lib/librte_lpm/rte_lpm_altivec.h index 228c41b38e..4fbc1b595d 100644 --- a/dpdk/lib/librte_lpm/rte_lpm_altivec.h +++ b/dpdk/lib/librte_lpm/rte_lpm_altivec.h @@ -88,28 +88,28 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[0] = i8.u32[0] + - (uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + (tbl[0] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]]; tbl[0] = *ptbl; } if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[1] = i8.u32[1] + - (uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + (tbl[1] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]]; tbl[1] = *ptbl; } if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[2] = i8.u32[2] + - (uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + (tbl[2] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]]; tbl[2] = *ptbl; } if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[3] = i8.u32[3] + - (uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + (tbl[3] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]]; tbl[3] = *ptbl; } diff --git a/dpdk/lib/librte_lpm/rte_lpm_neon.h b/dpdk/lib/librte_lpm/rte_lpm_neon.h index 6c131d3125..4642a866f1 100644 --- a/dpdk/lib/librte_lpm/rte_lpm_neon.h +++ b/dpdk/lib/librte_lpm/rte_lpm_neon.h @@ -81,28 +81,28 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[0] = i8.u32[0] + - (uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + (tbl[0] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]]; tbl[0] = *ptbl; } if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[1] = i8.u32[1] + - (uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + (tbl[1] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]]; tbl[1] = *ptbl; } if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[2] = i8.u32[2] + - (uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + (tbl[2] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]]; tbl[2] = *ptbl; } if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[3] = i8.u32[3] + - (uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + (tbl[3] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]]; tbl[3] = *ptbl; } diff --git a/dpdk/lib/librte_lpm/rte_lpm_sse.h b/dpdk/lib/librte_lpm/rte_lpm_sse.h index 44770b6ff8..eaa863c522 100644 --- a/dpdk/lib/librte_lpm/rte_lpm_sse.h +++ b/dpdk/lib/librte_lpm/rte_lpm_sse.h @@ -82,28 +82,28 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[0] = i8.u32[0] + - (uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + (tbl[0] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]]; tbl[0] = *ptbl; } if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[1] = i8.u32[1] + - (uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + (tbl[1] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]]; tbl[1] = *ptbl; } if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[2] = i8.u32[2] + - (uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + (tbl[2] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]]; tbl[2] = *ptbl; } if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { i8.u32[3] = i8.u32[3] + - (uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + (tbl[3] & 0x00FFFFFF) * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]]; tbl[3] = *ptbl; } diff --git a/dpdk/lib/librte_mbuf/rte_mbuf_core.h b/dpdk/lib/librte_mbuf/rte_mbuf_core.h index 567551deab..9d1609336a 100644 --- a/dpdk/lib/librte_mbuf/rte_mbuf_core.h +++ b/dpdk/lib/librte_mbuf/rte_mbuf_core.h @@ -17,8 +17,9 @@ */ #include + #include -#include +#include #ifdef __cplusplus extern "C" { diff --git a/dpdk/lib/librte_mbuf/rte_mbuf_dyn.h b/dpdk/lib/librte_mbuf/rte_mbuf_dyn.h index d88e7bacc5..13f06d8ed2 100644 --- a/dpdk/lib/librte_mbuf/rte_mbuf_dyn.h +++ b/dpdk/lib/librte_mbuf/rte_mbuf_dyn.h @@ -66,7 +66,16 @@ * - any name that does not start with "rte_" in an application */ +#include +#include #include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + /** * Maximum length of the dynamic field or flag string. */ @@ -326,4 +335,8 @@ int rte_mbuf_dyn_rx_timestamp_register(int *field_offset, uint64_t *rx_flag); __rte_experimental int rte_mbuf_dyn_tx_timestamp_register(int *field_offset, uint64_t *tx_flag); +#ifdef __cplusplus +} #endif + +#endif /* _RTE_MBUF_DYN_H_ */ diff --git a/dpdk/lib/librte_mempool/rte_mempool.c b/dpdk/lib/librte_mempool/rte_mempool.c index b9f3fbd614..afb1239c8d 100644 --- a/dpdk/lib/librte_mempool/rte_mempool.c +++ b/dpdk/lib/librte_mempool/rte_mempool.c @@ -1167,7 +1167,7 @@ mempool_audit_cache(const struct rte_mempool *mp) for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { const struct rte_mempool_cache *cache; cache = &mp->local_cache[lcore_id]; - if (cache->len > cache->flushthresh) { + if (cache->len > RTE_DIM(cache->objs)) { RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n", lcore_id); rte_panic("MEMPOOL: invalid cache len\n"); diff --git a/dpdk/lib/librte_metrics/meson.build b/dpdk/lib/librte_metrics/meson.build index eed27b880a..28a8cc1155 100644 --- a/dpdk/lib/librte_metrics/meson.build +++ b/dpdk/lib/librte_metrics/meson.build @@ -4,7 +4,7 @@ sources = files('rte_metrics.c') headers = files('rte_metrics.h') -jansson = dependency('jansson', required: false) +jansson = dependency('jansson', required: false, method: 'pkg-config') if jansson.found() ext_deps += jansson sources += files('rte_metrics_telemetry.c') diff --git a/dpdk/lib/librte_metrics/rte_metrics_telemetry.c b/dpdk/lib/librte_metrics/rte_metrics_telemetry.c index 901cbeb0a3..b8ee56ef01 100644 --- a/dpdk/lib/librte_metrics/rte_metrics_telemetry.c +++ b/dpdk/lib/librte_metrics/rte_metrics_telemetry.c @@ -13,6 +13,8 @@ #include "rte_metrics.h" #include "rte_metrics_telemetry.h" +struct telemetry_metrics_data tel_met_data; + int metrics_log_level; /* Logging Macros */ diff --git a/dpdk/lib/librte_metrics/rte_metrics_telemetry.h b/dpdk/lib/librte_metrics/rte_metrics_telemetry.h index 3435a55425..5dbb32ca0c 100644 --- a/dpdk/lib/librte_metrics/rte_metrics_telemetry.h +++ b/dpdk/lib/librte_metrics/rte_metrics_telemetry.h @@ -34,8 +34,6 @@ struct telemetry_metrics_data { int metrics_register_done; }; -struct telemetry_metrics_data tel_met_data; - __rte_experimental int32_t rte_metrics_tel_reg_all_ethdev(int *metrics_register_done, int *reg_index_list); diff --git a/dpdk/lib/librte_net/rte_geneve.h b/dpdk/lib/librte_net/rte_geneve.h index bb67724c31..3bbc561847 100644 --- a/dpdk/lib/librte_net/rte_geneve.h +++ b/dpdk/lib/librte_net/rte_geneve.h @@ -12,6 +12,8 @@ */ #include +#include + #ifdef __cplusplus extern "C" { #endif diff --git a/dpdk/lib/librte_node/rte_node_ip4_api.h b/dpdk/lib/librte_node/rte_node_ip4_api.h index eb9ebd5f89..46d0d8976b 100644 --- a/dpdk/lib/librte_node/rte_node_ip4_api.h +++ b/dpdk/lib/librte_node/rte_node_ip4_api.h @@ -21,6 +21,7 @@ extern "C" { #endif #include +#include /** * IP4 lookup next nodes. diff --git a/dpdk/lib/librte_pipeline/rte_swx_ctl.h b/dpdk/lib/librte_pipeline/rte_swx_ctl.h index bab1894944..32815b69e2 100644 --- a/dpdk/lib/librte_pipeline/rte_swx_ctl.h +++ b/dpdk/lib/librte_pipeline/rte_swx_ctl.h @@ -15,6 +15,7 @@ extern "C" { #include #include +#include #include diff --git a/dpdk/lib/librte_pipeline/rte_swx_pipeline.h b/dpdk/lib/librte_pipeline/rte_swx_pipeline.h index d0a3439edf..f0a2cef777 100644 --- a/dpdk/lib/librte_pipeline/rte_swx_pipeline.h +++ b/dpdk/lib/librte_pipeline/rte_swx_pipeline.h @@ -15,6 +15,7 @@ extern "C" { #include #include +#include #include diff --git a/dpdk/lib/librte_port/rte_port.h b/dpdk/lib/librte_port/rte_port.h index 7f156ef47d..6b6a2cdd17 100644 --- a/dpdk/lib/librte_port/rte_port.h +++ b/dpdk/lib/librte_port/rte_port.h @@ -186,7 +186,7 @@ typedef int (*rte_port_out_op_tx)( */ typedef int (*rte_port_out_op_tx_bulk)( void *port, - struct rte_mbuf **pkt, + struct rte_mbuf **pkts, uint64_t pkts_mask); /** diff --git a/dpdk/lib/librte_port/rte_swx_port.h b/dpdk/lib/librte_port/rte_swx_port.h index 4beb59991f..ecf109d2ca 100644 --- a/dpdk/lib/librte_port/rte_swx_port.h +++ b/dpdk/lib/librte_port/rte_swx_port.h @@ -50,7 +50,7 @@ typedef void * /** * Input port free * - * @param[in] args + * @param[in] port * Input port handle. */ typedef void @@ -129,7 +129,7 @@ typedef void * /** * Output port free * - * @param[in] args + * @param[in] port * Output port handle. */ typedef void diff --git a/dpdk/lib/librte_power/channel_commands.h b/dpdk/lib/librte_power/channel_commands.h deleted file mode 100644 index adc8e5ca27..0000000000 --- a/dpdk/lib/librte_power/channel_commands.h +++ /dev/null @@ -1,125 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation - */ - -#ifndef CHANNEL_COMMANDS_H_ -#define CHANNEL_COMMANDS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -/* --- Incoming messages --- */ - -/* Valid Commands */ -#define CPU_POWER 1 -#define CPU_POWER_CONNECT 2 -#define PKT_POLICY 3 -#define PKT_POLICY_REMOVE 4 - -/* CPU Power Command Scaling */ -#define CPU_POWER_SCALE_UP 1 -#define CPU_POWER_SCALE_DOWN 2 -#define CPU_POWER_SCALE_MAX 3 -#define CPU_POWER_SCALE_MIN 4 -#define CPU_POWER_ENABLE_TURBO 5 -#define CPU_POWER_DISABLE_TURBO 6 - -/* CPU Power Queries */ -#define CPU_POWER_QUERY_FREQ_LIST 7 -#define CPU_POWER_QUERY_FREQ 8 -#define CPU_POWER_QUERY_CAPS_LIST 9 -#define CPU_POWER_QUERY_CAPS 10 - -/* --- Outgoing messages --- */ - -/* Generic Power Command Response */ -#define CPU_POWER_CMD_ACK 1 -#define CPU_POWER_CMD_NACK 2 - -/* CPU Power Query Responses */ -#define CPU_POWER_FREQ_LIST 3 -#define CPU_POWER_CAPS_LIST 4 - -#define HOURS 24 - -#define MAX_VFS 10 -#define VM_MAX_NAME_SZ 32 - -#define MAX_VCPU_PER_VM 8 - -struct t_boost_status { - bool tbEnabled; -}; - -struct timer_profile { - int busy_hours[HOURS]; - int quiet_hours[HOURS]; - int hours_to_use_traffic_profile[HOURS]; -}; - -enum workload {HIGH, MEDIUM, LOW}; -enum policy_to_use { - TRAFFIC, - TIME, - WORKLOAD, - BRANCH_RATIO -}; - -struct traffic { - uint32_t min_packet_thresh; - uint32_t avg_max_packet_thresh; - uint32_t max_max_packet_thresh; -}; - -#define CORE_TYPE_VIRTUAL 0 -#define CORE_TYPE_PHYSICAL 1 - -struct channel_packet { - uint64_t resource_id; /**< core_num, device */ - uint32_t unit; /**< scale down/up/min/max */ - uint32_t command; /**< Power, IO, etc */ - char vm_name[VM_MAX_NAME_SZ]; - - uint64_t vfid[MAX_VFS]; - int nb_mac_to_monitor; - struct traffic traffic_policy; - uint8_t vcpu_to_control[MAX_VCPU_PER_VM]; - uint8_t num_vcpu; - struct timer_profile timer_policy; - bool core_type; - enum workload workload; - enum policy_to_use policy_to_use; - struct t_boost_status t_boost_status; -}; - -struct channel_packet_freq_list { - uint64_t resource_id; /**< core_num, device */ - uint32_t unit; /**< scale down/up/min/max */ - uint32_t command; /**< Power, IO, etc */ - char vm_name[VM_MAX_NAME_SZ]; - - uint32_t freq_list[MAX_VCPU_PER_VM]; - uint8_t num_vcpu; -}; - -struct channel_packet_caps_list { - uint64_t resource_id; /**< core_num, device */ - uint32_t unit; /**< scale down/up/min/max */ - uint32_t command; /**< Power, IO, etc */ - char vm_name[VM_MAX_NAME_SZ]; - - uint64_t turbo[MAX_VCPU_PER_VM]; - uint64_t priority[MAX_VCPU_PER_VM]; - uint8_t num_vcpu; -}; - - -#ifdef __cplusplus -} -#endif - -#endif /* CHANNEL_COMMANDS_H_ */ diff --git a/dpdk/lib/librte_power/guest_channel.c b/dpdk/lib/librte_power/guest_channel.c index 7b5926e5c4..2f7507a03c 100644 --- a/dpdk/lib/librte_power/guest_channel.c +++ b/dpdk/lib/librte_power/guest_channel.c @@ -15,9 +15,9 @@ #include +#include #include "guest_channel.h" -#include "channel_commands.h" #define RTE_LOGTYPE_GUEST_CHANNEL RTE_LOGTYPE_USER1 @@ -55,7 +55,7 @@ int guest_channel_host_connect(const char *path, unsigned int lcore_id) { int flags, ret; - struct channel_packet pkt; + struct rte_power_channel_packet pkt; char fd_path[PATH_MAX]; int fd = -1; @@ -100,7 +100,7 @@ guest_channel_host_connect(const char *path, unsigned int lcore_id) /* Send a test packet, this command is ignored by the host, but a successful * send indicates that the host endpoint is monitoring. */ - pkt.command = CPU_POWER_CONNECT; + pkt.command = RTE_POWER_CPU_POWER_CONNECT; global_fds[lcore_id] = fd; ret = guest_channel_send_msg(&pkt, lcore_id); if (ret != 0) { @@ -119,7 +119,8 @@ guest_channel_host_connect(const char *path, unsigned int lcore_id) } int -guest_channel_send_msg(struct channel_packet *pkt, unsigned int lcore_id) +guest_channel_send_msg(struct rte_power_channel_packet *pkt, + unsigned int lcore_id) { int ret, buffer_len = sizeof(*pkt); void *buffer = pkt; @@ -149,7 +150,7 @@ guest_channel_send_msg(struct channel_packet *pkt, unsigned int lcore_id) return 0; } -int rte_power_guest_channel_send_msg(struct channel_packet *pkt, +int rte_power_guest_channel_send_msg(struct rte_power_channel_packet *pkt, unsigned int lcore_id) { return guest_channel_send_msg(pkt, lcore_id); diff --git a/dpdk/lib/librte_power/guest_channel.h b/dpdk/lib/librte_power/guest_channel.h index e15db46fc7..43d532a5aa 100644 --- a/dpdk/lib/librte_power/guest_channel.h +++ b/dpdk/lib/librte_power/guest_channel.h @@ -8,8 +8,6 @@ extern "C" { #endif -#include - /** * Check if any Virtio-Serial VM end-points exist in path. * @@ -63,31 +61,16 @@ void guest_channel_host_disconnect(unsigned int lcore_id); * - Negative on channel not connected. * - errno on write to channel error. */ -int guest_channel_send_msg(struct channel_packet *pkt, unsigned int lcore_id); - -/** - * Send a message contained in pkt over the Virtio-Serial to the host endpoint. - * - * @param pkt - * Pointer to a populated struct channel_packet - * - * @param lcore_id - * lcore_id. - * - * @return - * - 0 on success. - * - Negative on error. - */ -int rte_power_guest_channel_send_msg(struct channel_packet *pkt, - unsigned int lcore_id); +int guest_channel_send_msg(struct rte_power_channel_packet *pkt, + unsigned int lcore_id); /** * Read a message contained in pkt over the Virtio-Serial * from the host endpoint. * * @param pkt - * Pointer to channel_packet or - * channel_packet_freq_list struct. + * Pointer to rte_power_channel_packet or + * rte_power_channel_packet_freq_list struct. * * @param pkt_len * Size of expected data packet. @@ -103,30 +86,6 @@ int power_guest_channel_read_msg(void *pkt, size_t pkt_len, unsigned int lcore_id); -/** - * Receive a message contained in pkt over the Virtio-Serial - * from the host endpoint. - * - * @param pkt - * Pointer to channel_packet or - * channel_packet_freq_list struct. - * - * @param pkt_len - * Size of expected data packet. - * - * @param lcore_id - * lcore_id. - * - * @return - * - 0 on success. - * - Negative on error. - */ -__rte_experimental -int -rte_power_guest_channel_receive_msg(void *pkt, - size_t pkt_len, - unsigned int lcore_id); - #ifdef __cplusplus } diff --git a/dpdk/lib/librte_power/meson.build b/dpdk/lib/librte_power/meson.build index 4b4cf1b90b..5415695281 100644 --- a/dpdk/lib/librte_power/meson.build +++ b/dpdk/lib/librte_power/meson.build @@ -10,5 +10,6 @@ sources = files('rte_power.c', 'power_acpi_cpufreq.c', 'rte_power_empty_poll.c', 'power_pstate_cpufreq.c', 'power_common.c') -headers = files('rte_power.h','rte_power_empty_poll.h') +headers = files('rte_power.h','rte_power_empty_poll.h', + 'rte_power_guest_channel.h') deps += ['timer'] diff --git a/dpdk/lib/librte_power/power_kvm_vm.c b/dpdk/lib/librte_power/power_kvm_vm.c index 409c3e03ab..ab7d4b8cee 100644 --- a/dpdk/lib/librte_power/power_kvm_vm.c +++ b/dpdk/lib/librte_power/power_kvm_vm.c @@ -6,14 +6,14 @@ #include +#include "rte_power_guest_channel.h" #include "guest_channel.h" -#include "channel_commands.h" #include "power_kvm_vm.h" #include "power_common.h" #define FD_PATH "/dev/virtio-ports/virtio.serial.port.poweragent" -static struct channel_packet pkt[RTE_MAX_LCORE]; +static struct rte_power_channel_packet pkt[RTE_MAX_LCORE]; int power_kvm_vm_check_supported(void) @@ -29,7 +29,7 @@ power_kvm_vm_init(unsigned int lcore_id) lcore_id, RTE_MAX_LCORE-1); return -1; } - pkt[lcore_id].command = CPU_POWER; + pkt[lcore_id].command = RTE_POWER_CPU_POWER; pkt[lcore_id].resource_id = lcore_id; return guest_channel_host_connect(FD_PATH, lcore_id); } @@ -90,25 +90,25 @@ send_msg(unsigned int lcore_id, uint32_t scale_direction) int power_kvm_vm_freq_up(unsigned int lcore_id) { - return send_msg(lcore_id, CPU_POWER_SCALE_UP); + return send_msg(lcore_id, RTE_POWER_SCALE_UP); } int power_kvm_vm_freq_down(unsigned int lcore_id) { - return send_msg(lcore_id, CPU_POWER_SCALE_DOWN); + return send_msg(lcore_id, RTE_POWER_SCALE_DOWN); } int power_kvm_vm_freq_max(unsigned int lcore_id) { - return send_msg(lcore_id, CPU_POWER_SCALE_MAX); + return send_msg(lcore_id, RTE_POWER_SCALE_MAX); } int power_kvm_vm_freq_min(unsigned int lcore_id) { - return send_msg(lcore_id, CPU_POWER_SCALE_MIN); + return send_msg(lcore_id, RTE_POWER_SCALE_MIN); } int @@ -121,13 +121,13 @@ power_kvm_vm_turbo_status(__rte_unused unsigned int lcore_id) int power_kvm_vm_enable_turbo(unsigned int lcore_id) { - return send_msg(lcore_id, CPU_POWER_ENABLE_TURBO); + return send_msg(lcore_id, RTE_POWER_ENABLE_TURBO); } int power_kvm_vm_disable_turbo(unsigned int lcore_id) { - return send_msg(lcore_id, CPU_POWER_DISABLE_TURBO); + return send_msg(lcore_id, RTE_POWER_DISABLE_TURBO); } struct rte_power_core_capabilities; diff --git a/dpdk/lib/librte_power/rte_power.h b/dpdk/lib/librte_power/rte_power.h index bbbde4dfb4..c8086bf6ba 100644 --- a/dpdk/lib/librte_power/rte_power.h +++ b/dpdk/lib/librte_power/rte_power.h @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { diff --git a/dpdk/lib/librte_power/rte_power_guest_channel.h b/dpdk/lib/librte_power/rte_power_guest_channel.h new file mode 100644 index 0000000000..ed4fbfdcd3 --- /dev/null +++ b/dpdk/lib/librte_power/rte_power_guest_channel.h @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2021 Intel Corporation + */ +#ifndef RTE_POWER_GUEST_CHANNEL_H +#define RTE_POWER_GUEST_CHANNEL_H + +#include +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_POWER_MAX_VFS 10 +#define RTE_POWER_VM_MAX_NAME_SZ 32 +#define RTE_POWER_MAX_VCPU_PER_VM 8 +#define RTE_POWER_HOURS_PER_DAY 24 + +/* Valid Commands */ +#define RTE_POWER_CPU_POWER 1 +#define RTE_POWER_CPU_POWER_CONNECT 2 +#define RTE_POWER_PKT_POLICY 3 +#define RTE_POWER_PKT_POLICY_REMOVE 4 + +#define RTE_POWER_CORE_TYPE_VIRTUAL 0 +#define RTE_POWER_CORE_TYPE_PHYSICAL 1 + +/* CPU Power Command Scaling */ +#define RTE_POWER_SCALE_UP 1 +#define RTE_POWER_SCALE_DOWN 2 +#define RTE_POWER_SCALE_MAX 3 +#define RTE_POWER_SCALE_MIN 4 +#define RTE_POWER_ENABLE_TURBO 5 +#define RTE_POWER_DISABLE_TURBO 6 + +/* CPU Power Queries */ +#define RTE_POWER_QUERY_FREQ_LIST 7 +#define RTE_POWER_QUERY_FREQ 8 +#define RTE_POWER_QUERY_CAPS_LIST 9 +#define RTE_POWER_QUERY_CAPS 10 + +/* Generic Power Command Response */ +#define RTE_POWER_CMD_ACK 1 +#define RTE_POWER_CMD_NACK 2 + +/* CPU Power Query Responses */ +#define RTE_POWER_FREQ_LIST 3 +#define RTE_POWER_CAPS_LIST 4 + +struct rte_power_traffic_policy { + uint32_t min_packet_thresh; + uint32_t avg_max_packet_thresh; + uint32_t max_max_packet_thresh; +}; + +struct rte_power_timer_profile { + int busy_hours[RTE_POWER_HOURS_PER_DAY]; + int quiet_hours[RTE_POWER_HOURS_PER_DAY]; + int hours_to_use_traffic_profile[RTE_POWER_HOURS_PER_DAY]; +}; + +enum rte_power_workload_level { + RTE_POWER_WL_HIGH, + RTE_POWER_WL_MEDIUM, + RTE_POWER_WL_LOW +}; + +enum rte_power_policy { + RTE_POWER_POLICY_TRAFFIC, + RTE_POWER_POLICY_TIME, + RTE_POWER_POLICY_WORKLOAD, + RTE_POWER_POLICY_BRANCH_RATIO +}; + +struct rte_power_turbo_status { + bool tbEnabled; +}; + +struct rte_power_channel_packet { + uint64_t resource_id; /**< core_num, device */ + uint32_t unit; /**< scale down/up/min/max */ + uint32_t command; /**< Power, IO, etc */ + char vm_name[RTE_POWER_VM_MAX_NAME_SZ]; + + uint64_t vfid[RTE_POWER_MAX_VFS]; + int nb_mac_to_monitor; + struct rte_power_traffic_policy traffic_policy; + uint8_t vcpu_to_control[RTE_POWER_MAX_VCPU_PER_VM]; + uint8_t num_vcpu; + struct rte_power_timer_profile timer_policy; + bool core_type; + enum rte_power_workload_level workload; + enum rte_power_policy policy_to_use; + struct rte_power_turbo_status t_boost_status; +}; + +struct rte_power_channel_packet_freq_list { + uint64_t resource_id; /**< core_num, device */ + uint32_t unit; /**< scale down/up/min/max */ + uint32_t command; /**< Power, IO, etc */ + char vm_name[RTE_POWER_VM_MAX_NAME_SZ]; + + uint32_t freq_list[RTE_POWER_MAX_VCPU_PER_VM]; + uint8_t num_vcpu; +}; + +struct rte_power_channel_packet_caps_list { + uint64_t resource_id; /**< core_num, device */ + uint32_t unit; /**< scale down/up/min/max */ + uint32_t command; /**< Power, IO, etc */ + char vm_name[RTE_POWER_VM_MAX_NAME_SZ]; + + uint64_t turbo[RTE_POWER_MAX_VCPU_PER_VM]; + uint64_t priority[RTE_POWER_MAX_VCPU_PER_VM]; + uint8_t num_vcpu; +}; + +/** + * @internal + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Send a message contained in pkt over the Virtio-Serial to the host endpoint. + * + * @param pkt + * Pointer to a populated struct channel_packet. + * + * @param lcore_id + * Use channel specific to this lcore_id. + * + * @return + * - 0 on success. + * - Negative on error. + */ +__rte_experimental +int rte_power_guest_channel_send_msg(struct rte_power_channel_packet *pkt, + unsigned int lcore_id); + +/** + * @internal + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Receive a message contained in pkt over the Virtio-Serial + * from the host endpoint. + * + * @param pkt + * Pointer to channel_packet or + * channel_packet_freq_list struct. + * + * @param pkt_len + * Size of expected data packet. + * + * @param lcore_id + * Use channel specific to this lcore_id. + * + * @return + * - 0 on success. + * - Negative on error. + */ +__rte_experimental +int rte_power_guest_channel_receive_msg(void *pkt, + size_t pkt_len, + unsigned int lcore_id); + + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_POWER_GUEST_CHANNEL_H_ */ diff --git a/dpdk/lib/librte_power/version.map b/dpdk/lib/librte_power/version.map index 69ca9af616..13f0af3b2d 100644 --- a/dpdk/lib/librte_power/version.map +++ b/dpdk/lib/librte_power/version.map @@ -34,4 +34,8 @@ EXPERIMENTAL { rte_power_guest_channel_receive_msg; rte_power_poll_stat_fetch; rte_power_poll_stat_update; + + # added in 21.02 + rte_power_guest_channel_receive_msg; + rte_power_guest_channel_send_msg; }; diff --git a/dpdk/lib/librte_rawdev/rte_rawdev_pmd.h b/dpdk/lib/librte_rawdev/rte_rawdev_pmd.h index 34dd7181b4..b1bed13ee2 100644 --- a/dpdk/lib/librte_rawdev/rte_rawdev_pmd.h +++ b/dpdk/lib/librte_rawdev/rte_rawdev_pmd.h @@ -155,6 +155,8 @@ typedef int (*rawdev_info_get_t)(struct rte_rawdev *dev, * Raw device pointer * @param config * Void object containing device specific configuration + * @param config_size + * Size of the memory allocated for the configuration * * @return * Returns 0 on success @@ -214,6 +216,8 @@ typedef int (*rawdev_reset_t)(struct rte_rawdev *dev); * Raw device queue index * @param[out] queue_conf * Raw device queue configuration structure + * @param queue_conf_size + * Size of the memory allocated for the configuration * * @return * Returns 0 on success, negative errno on failure @@ -232,6 +236,8 @@ typedef int (*rawdev_queue_conf_get_t)(struct rte_rawdev *dev, * Rawqueue index * @param queue_conf * Rawqueue configuration structure + * @param queue_conf_size + * Size of the memory allocated for the configuration * * @return * Returns 0 on success. @@ -263,7 +269,7 @@ typedef int (*rawdev_queue_release_t)(struct rte_rawdev *dev, * This function helps in getting queue count supported, independently. It * can help in cases where iterator needs to be implemented. * - * @param + * @param dev * Raw device pointer * @return * Number of queues; 0 is assumed to be a valid response. @@ -279,7 +285,7 @@ typedef uint16_t (*rawdev_queue_count_t)(struct rte_rawdev *dev); * * @param dev * Raw device pointer - * @param bufs + * @param buffers * array of buffers * @param count * number of buffers passed @@ -303,7 +309,7 @@ typedef int (*rawdev_enqueue_bufs_t)(struct rte_rawdev *dev, * * @param dev * Raw device pointer - * @param bufs + * @param buffers * array of buffers * @param count * Max buffers expected to be dequeued @@ -444,7 +450,7 @@ typedef uint64_t (*rawdev_xstats_get_by_name_t)(const struct rte_rawdev *dev, * * @param dev * Raw device pointer - * @param status + * @param status_info * void block containing device specific status information * @return * 0 for success, @@ -472,8 +478,8 @@ typedef int (*rawdev_firmware_version_get_t)(struct rte_rawdev *dev, * * @param dev * Raw device pointer - * @param firmware_file - * file pointer to firmware area + * @param firmware_buf + * Pointer to firmware image * @return * >0, ~0: for successful load * <0: for failure diff --git a/dpdk/lib/librte_rib/rte_rib.c b/dpdk/lib/librte_rib/rte_rib.c index 2a370d7f84..6c29e1c49a 100644 --- a/dpdk/lib/librte_rib/rte_rib.c +++ b/dpdk/lib/librte_rib/rte_rib.c @@ -301,7 +301,7 @@ rte_rib_insert(struct rte_rib *rib, uint32_t ip, uint8_t depth) /* closest node found, new_node should be inserted in the middle */ common_depth = RTE_MIN(depth, (*tmp)->depth); common_prefix = ip ^ (*tmp)->ip; - d = __builtin_clz(common_prefix); + d = (common_prefix == 0) ? 32 : __builtin_clz(common_prefix); common_depth = RTE_MIN(d, common_depth); common_prefix = ip & rte_rib_depth_to_mask(common_depth); diff --git a/dpdk/lib/librte_rib/rte_rib.h b/dpdk/lib/librte_rib/rte_rib.h index f80752e5bd..ec97079c35 100644 --- a/dpdk/lib/librte_rib/rte_rib.h +++ b/dpdk/lib/librte_rib/rte_rib.h @@ -18,6 +18,9 @@ * Level compressed tree implementation for IPv4 Longest Prefix Match */ +#include +#include + #include #ifdef __cplusplus diff --git a/dpdk/lib/librte_rib/rte_rib6.h b/dpdk/lib/librte_rib/rte_rib6.h index b5e10569b9..dbd52928a2 100644 --- a/dpdk/lib/librte_rib/rte_rib6.h +++ b/dpdk/lib/librte_rib/rte_rib6.h @@ -20,6 +20,7 @@ #include #include +#include #ifdef __cplusplus extern "C" { diff --git a/dpdk/lib/librte_security/rte_security_driver.h b/dpdk/lib/librte_security/rte_security_driver.h index c5abb07990..938373205c 100644 --- a/dpdk/lib/librte_security/rte_security_driver.h +++ b/dpdk/lib/librte_security/rte_security_driver.h @@ -41,7 +41,7 @@ typedef int (*security_session_create_t)(void *device, /** * Free driver private session data. * - * @param dev Crypto/eth device pointer + * @param device Crypto/eth device pointer * @param sess Security session structure */ typedef int (*security_session_destroy_t)(void *device, @@ -95,16 +95,17 @@ int rte_security_dynfield_register(void); /** * Update the mbuf with provided metadata. * + * @param device Crypto/eth device pointer * @param sess Security session structure * @param mb Packet buffer - * @param mt Metadata + * @param params Metadata * * @return * - Returns 0 if metadata updated successfully. * - Returns -ve value for errors. */ typedef int (*security_set_pkt_metadata_t)(void *device, - struct rte_security_session *sess, struct rte_mbuf *m, + struct rte_security_session *sess, struct rte_mbuf *mb, void *params); /** diff --git a/dpdk/lib/librte_table/rte_lru_x86.h b/dpdk/lib/librte_table/rte_lru_x86.h index 0e24906c2c..38476d956e 100644 --- a/dpdk/lib/librte_table/rte_lru_x86.h +++ b/dpdk/lib/librte_table/rte_lru_x86.h @@ -12,6 +12,7 @@ extern "C" { #include #include +#include #ifndef RTE_TABLE_HASH_LRU_STRATEGY #define RTE_TABLE_HASH_LRU_STRATEGY 2 diff --git a/dpdk/lib/librte_table/rte_swx_table.h b/dpdk/lib/librte_table/rte_swx_table.h index dc434b72ef..5a3137ec53 100644 --- a/dpdk/lib/librte_table/rte_swx_table.h +++ b/dpdk/lib/librte_table/rte_swx_table.h @@ -127,12 +127,6 @@ typedef uint64_t * progress and it is passed as a parameter to the lookup operation. This allows * for multiple concurrent lookup operations into the same table. * - * @param[in] params - * Table creation parameters. - * @param[in] entries - * Entries to be added to the table at creation time. - * @param[in] args - * Any additional table create arguments. It may be NULL. * @return * Table memory footprint in bytes, on success, or zero, on error. */ diff --git a/dpdk/lib/librte_table/rte_table.h b/dpdk/lib/librte_table/rte_table.h index cccded1a1c..096ab8a7c8 100644 --- a/dpdk/lib/librte_table/rte_table.h +++ b/dpdk/lib/librte_table/rte_table.h @@ -129,7 +129,7 @@ typedef int (*rte_table_op_entry_delete)( * * @param table * Handle to lookup table instance - * @param key + * @param keys * Array containing lookup keys * @param entries * Array containing data to be associated with each key. Every item in the @@ -166,7 +166,7 @@ typedef int (*rte_table_op_entry_add_bulk)( * * @param table * Handle to lookup table instance - * @param key + * @param keys * Array containing lookup keys * @param n_keys * Number of keys to delete diff --git a/dpdk/lib/librte_telemetry/rte_telemetry.h b/dpdk/lib/librte_telemetry/rte_telemetry.h index 4693275c24..76172222c9 100644 --- a/dpdk/lib/librte_telemetry/rte_telemetry.h +++ b/dpdk/lib/librte_telemetry/rte_telemetry.h @@ -4,7 +4,9 @@ #include #include + #include +#include #ifndef _RTE_TELEMETRY_H_ #define _RTE_TELEMETRY_H_ diff --git a/dpdk/lib/librte_vhost/rte_vdpa.h b/dpdk/lib/librte_vhost/rte_vdpa.h index f074ec0c4a..1437f400bf 100644 --- a/dpdk/lib/librte_vhost/rte_vdpa.h +++ b/dpdk/lib/librte_vhost/rte_vdpa.h @@ -11,6 +11,8 @@ * Device specific vhost lib */ +#include + /** Maximum name length for statistics counters */ #define RTE_VDPA_STATS_NAME_SIZE 64 diff --git a/dpdk/lib/librte_vhost/rte_vdpa_dev.h b/dpdk/lib/librte_vhost/rte_vdpa_dev.h index a60183f780..bfada387b0 100644 --- a/dpdk/lib/librte_vhost/rte_vdpa_dev.h +++ b/dpdk/lib/librte_vhost/rte_vdpa_dev.h @@ -8,6 +8,7 @@ #include #include "rte_vhost.h" +#include "rte_vdpa.h" #define RTE_VHOST_QUEUE_ALL UINT16_MAX diff --git a/dpdk/lib/librte_vhost/rte_vhost_crypto.h b/dpdk/lib/librte_vhost/rte_vhost_crypto.h index c809c46a21..8531757285 100644 --- a/dpdk/lib/librte_vhost/rte_vhost_crypto.h +++ b/dpdk/lib/librte_vhost/rte_vhost_crypto.h @@ -5,6 +5,14 @@ #ifndef _VHOST_CRYPTO_H_ #define _VHOST_CRYPTO_H_ +#include + +#include + +/* pre-declare structs to avoid including full headers */ +struct rte_mempool; +struct rte_crypto_op; + #define VHOST_CRYPTO_MBUF_POOL_SIZE (8192) #define VHOST_CRYPTO_MAX_BURST_SIZE (64) #define VHOST_CRYPTO_MAX_DATA_SIZE (4096) diff --git a/dpdk/lib/librte_vhost/vhost.c b/dpdk/lib/librte_vhost/vhost.c index b83cf639eb..4de588d752 100644 --- a/dpdk/lib/librte_vhost/vhost.c +++ b/dpdk/lib/librte_vhost/vhost.c @@ -26,6 +26,7 @@ #include "vhost_user.h" struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; +pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER; /* Called with iotlb_lock read-locked */ uint64_t @@ -645,6 +646,7 @@ vhost_new_device(void) struct virtio_net *dev; int i; + pthread_mutex_lock(&vhost_dev_lock); for (i = 0; i < MAX_VHOST_DEVICE; i++) { if (vhost_devices[i] == NULL) break; @@ -653,6 +655,7 @@ vhost_new_device(void) if (i == MAX_VHOST_DEVICE) { VHOST_LOG_CONFIG(ERR, "Failed to find a free slot for new device.\n"); + pthread_mutex_unlock(&vhost_dev_lock); return -1; } @@ -660,10 +663,13 @@ vhost_new_device(void) if (dev == NULL) { VHOST_LOG_CONFIG(ERR, "Failed to allocate memory for new dev.\n"); + pthread_mutex_unlock(&vhost_dev_lock); return -1; } vhost_devices[i] = dev; + pthread_mutex_unlock(&vhost_dev_lock); + dev->vid = i; dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET; dev->slave_req_fd = -1; diff --git a/dpdk/lib/librte_vhost/virtio_net.c b/dpdk/lib/librte_vhost/virtio_net.c index 6c5128665e..55bfc161b5 100644 --- a/dpdk/lib/librte_vhost/virtio_net.c +++ b/dpdk/lib/librte_vhost/virtio_net.c @@ -2232,7 +2232,6 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev, { bool wrap = vq->avail_wrap_counter; struct vring_packed_desc *descs = vq->desc_packed; - struct virtio_net_hdr *hdr; uint64_t lens[PACKED_BATCH_SIZE]; uint64_t buf_lens[PACKED_BATCH_SIZE]; uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); @@ -2289,13 +2288,6 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev, ids[i] = descs[avail_idx + i].id; } - if (virtio_net_with_host_offload(dev)) { - vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { - hdr = (struct virtio_net_hdr *)(desc_addrs[i]); - vhost_dequeue_offload(hdr, pkts[i]); - } - } - return 0; free_buf: @@ -2313,6 +2305,7 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev, { uint16_t avail_idx = vq->last_avail_idx; uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf); + struct virtio_net_hdr *hdr; uintptr_t desc_addrs[PACKED_BATCH_SIZE]; uint16_t ids[PACKED_BATCH_SIZE]; uint16_t i; @@ -2329,6 +2322,13 @@ virtio_dev_tx_batch_packed(struct virtio_net *dev, (void *)(uintptr_t)(desc_addrs[i] + buf_offset), pkts[i]->pkt_len); + if (virtio_net_with_host_offload(dev)) { + vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { + hdr = (struct virtio_net_hdr *)(desc_addrs[i]); + vhost_dequeue_offload(hdr, pkts[i]); + } + } + if (virtio_net_is_inorder(dev)) vhost_shadow_dequeue_batch_packed_inorder(vq, ids[PACKED_BATCH_SIZE - 1]); diff --git a/dpdk/license/bsd-2-clause.txt b/dpdk/license/bsd-2-clause.txt new file mode 100644 index 0000000000..dfb3f1adea --- /dev/null +++ b/dpdk/license/bsd-2-clause.txt @@ -0,0 +1,20 @@ +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/dpdk/license/isc.txt b/dpdk/license/isc.txt new file mode 100644 index 0000000000..34a6a760d5 --- /dev/null +++ b/dpdk/license/isc.txt @@ -0,0 +1,11 @@ +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH REGARD +TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR +CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +OF THIS SOFTWARE. diff --git a/dpdk/license/mit.txt b/dpdk/license/mit.txt new file mode 100644 index 0000000000..c4037a4605 --- /dev/null +++ b/dpdk/license/mit.txt @@ -0,0 +1,18 @@ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/dpdk/usertools/dpdk-devbind.py b/dpdk/usertools/dpdk-devbind.py index c2ede3d4df..98bd1b7e4d 100755 --- a/dpdk/usertools/dpdk-devbind.py +++ b/dpdk/usertools/dpdk-devbind.py @@ -7,6 +7,7 @@ import os import subprocess import argparse +import platform from glob import glob from os.path import exists, basename @@ -107,7 +108,17 @@ def module_is_loaded(module): loaded_modules = sysfs_mods - return module in sysfs_mods + # add built-in modules as loaded + release = platform.uname().release + filename = os.path.join("/lib/modules/", release, "modules.builtin") + if os.path.exists(filename): + try: + with open(filename) as f: + loaded_modules += [os.path.splitext(os.path.basename(mod))[0] for mod in f] + except IOError: + print("Warning: cannot read list of built-in kernel modules") + + return module in loaded_modules def check_modules(): diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index b0a5ce8bec..bc51a5767f 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -1,7 +1,7 @@ #ifndef __LINUX_PKT_CLS_WRAPPER_H #define __LINUX_PKT_CLS_WRAPPER_H 1 -#if defined(__KERNEL__) || defined(HAVE_TCA_ACT_FLAGS) +#if defined(__KERNEL__) || defined(HAVE_TCA_FLOWER_KEY_CT_FLAGS_REPLY) #include_next #else @@ -255,6 +255,9 @@ enum { TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED = 1 << 1, /* Part of an existing connection. */ TCA_FLOWER_KEY_CT_FLAGS_RELATED = 1 << 2, /* Related to an established connection. */ TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */ + TCA_FLOWER_KEY_CT_FLAGS_INVALID = 1 << 4, /* Conntrack is invalid. */ + TCA_FLOWER_KEY_CT_FLAGS_REPLY = 1 << 5, /* Packet is in the reply direction. */ + __TCA_FLOWER_KEY_CT_FLAGS_MAX, }; enum { diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in index 64111768b3..668507fd37 100755 --- a/ipsec/ovs-monitor-ipsec.in +++ b/ipsec/ovs-monitor-ipsec.in @@ -14,6 +14,7 @@ # limitations under the License. import argparse +import ipaddress import re import subprocess import sys @@ -413,6 +414,11 @@ conn prevent_unencrypted_vxlan leftprotoport=udp/4789 mark={0} +""" + + IPV6_CONN = """\ + hostaddrfamily=ipv6 + clientaddrfamily=ipv6 """ auth_tmpl = {"psk": Template("""\ @@ -520,6 +526,9 @@ conn prevent_unencrypted_vxlan else: auth_section = self.auth_tmpl["pki_ca"].substitute(tunnel.conf) + if tunnel.conf["address_family"] == "IPv6": + auth_section = self.IPV6_CONN + auth_section + vals = tunnel.conf.copy() vals["auth_section"] = auth_section vals["version"] = tunnel.version @@ -756,6 +765,7 @@ class IPsecTunnel(object): Tunnel Type: $tunnel_type Local IP: $local_ip Remote IP: $remote_ip + Address Family: $address_family SKB mark: $skb_mark Local cert: $certificate Local name: $local_name @@ -797,6 +807,9 @@ class IPsecTunnel(object): "tunnel_type": row.type, "local_ip": options.get("local_ip", "%defaultroute"), "remote_ip": options.get("remote_ip"), + "address_family": self._get_conn_address_family( + options.get("remote_ip"), + options.get("local_ip")), "skb_mark": monitor.conf["skb_mark"], "certificate": monitor.conf["pki"]["certificate"], "private_key": monitor.conf["pki"]["private_key"], @@ -865,6 +878,17 @@ class IPsecTunnel(object): return header + conf + status + spds + sas + cons + "\n" + def _get_conn_address_family(self, remote_ip, local_ip): + remote = address_family(remote_ip) + local = address_family(local_ip) + + if local is None: + return remote + elif local != remote: + return None + else: + return remote + def _is_valid_tunnel_conf(self): """This function verifies if IPsec tunnel has valid configuration set in 'conf'. If it is valid, then it returns True. Otherwise, @@ -1120,6 +1144,19 @@ class IPsecMonitor(object): return m.group(1) +def address_family(address): + try: + ip = ipaddress.ip_address(address) + ipstr = str(type(ip)) + # ipaddress has inconsistencies with what exceptions are raised: + # https://mail.openvswitch.org/pipermail/ovs-dev/2021-April/381696.html + except (ValueError, ipaddress.AddressValueError): + return None + if ipstr.find('v6') != -1: + return "IPv6" + return "IPv4" + + def unixctl_xfrm_policies(conn, unused_argv, unused_aux): global xfrm policies = xfrm.get_policies() diff --git a/lib/conntrack.c b/lib/conntrack.c index feaaec1c3f..15d1cde79d 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -46,6 +46,7 @@ VLOG_DEFINE_THIS_MODULE(conntrack); COVERAGE_DEFINE(conntrack_full); COVERAGE_DEFINE(conntrack_long_cleanup); COVERAGE_DEFINE(conntrack_l4csum_err); +COVERAGE_DEFINE(conntrack_lookup_natted_miss); struct conn_lookup_ctx { struct conn_key key; @@ -291,6 +292,11 @@ conntrack_init(void) static struct ovsthread_once setup_l4_once = OVSTHREAD_ONCE_INITIALIZER; struct conntrack *ct = xzalloc(sizeof *ct); + /* This value can be used during init (e.g. timeout_policy_init()), + * set it first to ensure it is available. + */ + ct->hash_basis = random_uint32(); + ovs_rwlock_init(&ct->resources_lock); ovs_rwlock_wrlock(&ct->resources_lock); hmap_init(&ct->alg_expectations); @@ -308,7 +314,6 @@ conntrack_init(void) timeout_policy_init(ct); ovs_mutex_unlock(&ct->ct_lock); - ct->hash_basis = random_uint32(); atomic_count_init(&ct->n_conn, 0); atomic_init(&ct->n_conn_limit, DEFAULT_N_CONN_LIMIT); atomic_init(&ct->tcp_seq_chk, true); @@ -1281,6 +1286,34 @@ process_one_fast(uint16_t zone, const uint32_t *setmark, } } +static void +initial_conn_lookup(struct conntrack *ct, struct conn_lookup_ctx *ctx, + long long now, bool natted) +{ + if (natted) { + /* If the packet has been already natted (e.g. a previous + * action took place), retrieve it performing a lookup of its + * reverse key. */ + conn_key_reverse(&ctx->key); + } + + conn_key_lookup(ct, &ctx->key, ctx->hash, now, &ctx->conn, &ctx->reply); + + if (natted) { + if (OVS_LIKELY(ctx->conn)) { + ctx->reply = !ctx->reply; + ctx->key = ctx->reply ? ctx->conn->rev_key : ctx->conn->key; + ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); + } else { + /* A lookup failure does not necessarily imply that an + * error occurred, it may simply indicate that a conn got + * removed during the recirculation. */ + COVERAGE_INC(conntrack_lookup_natted_miss); + conn_key_reverse(&ctx->key); + } + } +} + static void process_one(struct conntrack *ct, struct dp_packet *pkt, struct conn_lookup_ctx *ctx, uint16_t zone, @@ -1296,7 +1329,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, } bool create_new_conn = false; - conn_key_lookup(ct, &ctx->key, ctx->hash, now, &ctx->conn, &ctx->reply); + initial_conn_lookup(ct, ctx, now, !!(pkt->md.ct_state & + (CS_SRC_NAT | CS_DST_NAT))); struct conn *conn = ctx->conn; /* Delete found entry if in wrong direction. 'force' implies commit. */ @@ -1669,15 +1703,22 @@ static inline bool checksum_valid(const struct conn_key *key, const void *data, size_t size, const void *l3) { + bool valid; + if (key->dl_type == htons(ETH_TYPE_IP)) { uint32_t csum = packet_csum_pseudoheader(l3); - return csum_finish(csum_continue(csum, data, size)) == 0; + valid = (csum_finish(csum_continue(csum, data, size)) == 0); } else if (key->dl_type == htons(ETH_TYPE_IPV6)) { - return packet_csum_upperlayer6(l3, data, key->nw_proto, size) == 0; + valid = (packet_csum_upperlayer6(l3, data, key->nw_proto, size) == 0); } else { + valid = false; + } + + if (!valid) { COVERAGE_INC(conntrack_l4csum_err); - return false; } + + return valid; } static inline bool @@ -2076,6 +2117,8 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); return true; } + } else { + COVERAGE_INC(conntrack_l4csum_err); } } diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c index 6a5ba052dd..cfc2315e3d 100644 --- a/lib/ct-dpif.c +++ b/lib/ct-dpif.c @@ -889,3 +889,11 @@ ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id, dpif, tp_id, dl_type, nw_proto, tp_name, is_generic) : EOPNOTSUPP); } + +int +ct_dpif_get_features(struct dpif *dpif, enum ct_features *features) +{ + return (dpif->dpif_class->ct_get_features + ? dpif->dpif_class->ct_get_features(dpif, features) + : EOPNOTSUPP); +} diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h index 88f4c7e28c..b59cba962a 100644 --- a/lib/ct-dpif.h +++ b/lib/ct-dpif.h @@ -271,6 +271,11 @@ struct ct_dpif_timeout_policy { * timeout attribute values */ }; +/* Conntrack Features. */ +enum ct_features { + CONNTRACK_F_ZERO_SNAT = 1 << 0, /* All-zero SNAT support. */ +}; + int ct_dpif_dump_start(struct dpif *, struct ct_dpif_dump_state **, const uint16_t *zone, int *); int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *); @@ -325,5 +330,6 @@ int ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state); int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id, uint16_t dl_type, uint8_t nw_proto, char **tp_name, bool *is_generic); +int ct_dpif_get_features(struct dpif *dpif, enum ct_features *features); #endif /* CT_DPIF_H */ diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c index ae59ecf2c2..34d45b82a1 100644 --- a/lib/daemon-unix.c +++ b/lib/daemon-unix.c @@ -285,6 +285,7 @@ fork_and_wait_for_startup(int *fdp, pid_t *child_pid) VLOG_ERR("fork child died before signaling startup (%s)", status_msg); ret = -1; + free(status_msg); } } else if (retval < 0) { VLOG_FATAL("waitpid failed (%s)", ovs_strerror(errno)); diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 9e2d06b3dd..4e02425f7c 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -199,6 +199,7 @@ struct dp_packet *dp_packet_clone_data_with_headroom(const void *, size_t, void dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom); static inline void dp_packet_delete(struct dp_packet *); +static inline void dp_packet_swap(struct dp_packet *, struct dp_packet *); static inline void *dp_packet_at(const struct dp_packet *, size_t offset, size_t size); @@ -256,6 +257,18 @@ dp_packet_delete(struct dp_packet *b) } } +/* Swaps content of two packets. */ +static inline void +dp_packet_swap(struct dp_packet *a, struct dp_packet *b) +{ + ovs_assert(a->source == DPBUF_MALLOC || a->source == DPBUF_STUB); + ovs_assert(b->source == DPBUF_MALLOC || b->source == DPBUF_STUB); + struct dp_packet c = *a; + + *a = *b; + *b = c; +} + /* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to * byte 'offset'. Otherwise, returns a null pointer. */ static inline void * @@ -726,7 +739,6 @@ enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */ struct dp_packet_batch { size_t count; bool trunc; /* true if the batch needs truncate. */ - bool do_not_steal; /* Indicate that the packets should not be stolen. */ struct dp_packet *packets[NETDEV_MAX_BURST]; }; @@ -735,7 +747,6 @@ dp_packet_batch_init(struct dp_packet_batch *batch) { batch->count = 0; batch->trunc = false; - batch->do_not_steal = false; } static inline void diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c index b7d577870d..fe24f9abdf 100644 --- a/lib/dpdk-stub.c +++ b/lib/dpdk-stub.c @@ -83,7 +83,7 @@ bool dpdk_get_cpu_has_isa(const char *arch OVS_UNUSED, const char *feature OVS_UNUSED) { - VLOG_ERR_ONCE("DPDK not supported in this version of Open vSwitch, " + VLOG_DBG_ONCE("DPDK not supported in this version of Open vSwitch, " "cannot use CPU flag based optimizations"); return false; } diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 4381c618f1..a07a34b89a 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -279,8 +279,9 @@ static bool dpcls_lookup(struct dpcls *cls, ( 1 << OFPMBT13_DROP ) struct dp_meter_band { - struct ofputil_meter_band up; /* type, prec_level, pad, rate, burst_size */ - uint32_t bucket; /* In 1/1000 packets (for PKTPS), or in bits (for KBPS) */ + uint32_t rate; + uint32_t burst_size; + uint64_t bucket; /* In 1/1000 packets (for PKTPS), or in bits (for KBPS) */ uint64_t packet_count; uint64_t byte_count; }; @@ -1338,19 +1339,21 @@ dpif_netdev_subtable_lookup_get(struct unixctl_conn *conn, int argc OVS_UNUSED, } static void -dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc, +dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { /* This function requires 2 parameters (argv[1] and argv[2]) to execute. * argv[1] is subtable name * argv[2] is priority - * argv[3] is the datapath name (optional if only 1 datapath exists) */ const char *func_name = argv[1]; errno = 0; char *err_char; uint32_t new_prio = strtoul(argv[2], &err_char, 10); + uint32_t lookup_dpcls_changed = 0; + uint32_t lookup_subtable_changed = 0; + struct shash_node *node; if (errno != 0 || new_prio > UINT8_MAX) { unixctl_command_reply_error(conn, "error converting priority, use integer in range 0-255\n"); @@ -1364,58 +1367,43 @@ dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc, return; } - /* argv[3] is optional datapath instance. If no datapath name is provided - * and only one datapath exists, the one existing datapath is reprobed. - */ ovs_mutex_lock(&dp_netdev_mutex); - struct dp_netdev *dp = NULL; - - if (argc == 4) { - dp = shash_find_data(&dp_netdevs, argv[3]); - } else if (shash_count(&dp_netdevs) == 1) { - dp = shash_first(&dp_netdevs)->data; - } - - if (!dp) { - ovs_mutex_unlock(&dp_netdev_mutex); - unixctl_command_reply_error(conn, - "please specify an existing datapath"); - return; - } + SHASH_FOR_EACH (node, &dp_netdevs) { + struct dp_netdev *dp = node->data; - /* Get PMD threads list, required to get DPCLS instances. */ - size_t n; - uint32_t lookup_dpcls_changed = 0; - uint32_t lookup_subtable_changed = 0; - struct dp_netdev_pmd_thread **pmd_list; - sorted_poll_thread_list(dp, &pmd_list, &n); + /* Get PMD threads list, required to get DPCLS instances. */ + size_t n; + struct dp_netdev_pmd_thread **pmd_list; + sorted_poll_thread_list(dp, &pmd_list, &n); - /* take port mutex as HMAP iters over them. */ - ovs_mutex_lock(&dp->port_mutex); + /* take port mutex as HMAP iters over them. */ + ovs_mutex_lock(&dp->port_mutex); - for (size_t i = 0; i < n; i++) { - struct dp_netdev_pmd_thread *pmd = pmd_list[i]; - if (pmd->core_id == NON_PMD_CORE_ID) { - continue; - } - - struct dp_netdev_port *port = NULL; - HMAP_FOR_EACH (port, node, &dp->ports) { - odp_port_t in_port = port->port_no; - struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port); - if (!cls) { + for (size_t i = 0; i < n; i++) { + struct dp_netdev_pmd_thread *pmd = pmd_list[i]; + if (pmd->core_id == NON_PMD_CORE_ID) { continue; } - uint32_t subtbl_changes = dpcls_subtable_lookup_reprobe(cls); - if (subtbl_changes) { - lookup_dpcls_changed++; - lookup_subtable_changed += subtbl_changes; + + struct dp_netdev_port *port = NULL; + HMAP_FOR_EACH (port, node, &dp->ports) { + odp_port_t in_port = port->port_no; + struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port); + if (!cls) { + continue; + } + uint32_t subtbl_changes = dpcls_subtable_lookup_reprobe(cls); + if (subtbl_changes) { + lookup_dpcls_changed++; + lookup_subtable_changed += subtbl_changes; + } } } - } - /* release port mutex before netdev mutex. */ - ovs_mutex_unlock(&dp->port_mutex); + /* release port mutex before netdev mutex. */ + ovs_mutex_unlock(&dp->port_mutex); + free(pmd_list); + } ovs_mutex_unlock(&dp_netdev_mutex); struct ds reply = DS_EMPTY_INITIALIZER; @@ -1644,8 +1632,8 @@ dpif_netdev_init(void) 0, 1, dpif_netdev_bond_show, NULL); unixctl_command_register("dpif-netdev/subtable-lookup-prio-set", - "[lookup_func] [prio] [dp]", - 2, 3, dpif_netdev_subtable_lookup_set, + "[lookup_func] [prio]", + 2, 2, dpif_netdev_subtable_lookup_set, NULL); unixctl_command_register("dpif-netdev/subtable-lookup-prio-get", "", 0, 0, dpif_netdev_subtable_lookup_get, @@ -2569,18 +2557,6 @@ mark_to_flow_disassociate(struct dp_netdev_pmd_thread *pmd, return ret; } -static void -flow_mark_flush(struct dp_netdev_pmd_thread *pmd) -{ - struct dp_netdev_flow *flow; - - CMAP_FOR_EACH (flow, mark_node, &flow_mark.mark_to_flow) { - if (flow->pmd_id == pmd->core_id) { - queue_netdev_flow_del(pmd, flow); - } - } -} - static struct dp_netdev_flow * mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd, const uint32_t mark) @@ -2659,7 +2635,8 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload) struct dp_netdev_flow *flow = offload->flow; odp_port_t in_port = flow->flow.in_port.odp_port; const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type); - bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD; + bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD + && flow->mark != INVALID_FLOW_MARK; struct offload_info info; struct netdev *port; uint32_t mark; @@ -2671,7 +2648,6 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload) if (modification) { mark = flow->mark; - ovs_assert(mark != INVALID_FLOW_MARK); } else { /* * If a mega flow has already been offloaded (from other PMD @@ -2798,10 +2774,9 @@ queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd, static void queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_flow *flow, struct match *match, - const struct nlattr *actions, size_t actions_len) + const struct nlattr *actions, size_t actions_len, int op) { struct dp_flow_offload_item *offload; - int op; if (!netdev_is_flow_api_enabled()) { return; @@ -2814,11 +2789,6 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, ovsthread_once_done(&offload_thread_once); } - if (flow->mark != INVALID_FLOW_MARK) { - op = DP_NETDEV_FLOW_OFFLOAD_OP_MOD; - } else { - op = DP_NETDEV_FLOW_OFFLOAD_OP_ADD; - } offload = dp_netdev_alloc_flow_offload(pmd, flow, op); offload->match = *match; offload->actions = xmalloc(actions_len); @@ -3691,7 +3661,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *, &flow->node), dp_netdev_flow_hash(&flow->ufid)); - queue_netdev_flow_put(pmd, flow, match, actions, actions_len); + queue_netdev_flow_put(pmd, flow, match, actions, actions_len, + DP_NETDEV_FLOW_OFFLOAD_OP_ADD); if (OVS_UNLIKELY(!VLOG_DROP_DBG((&upcall_rl)))) { struct ds ds = DS_EMPTY_INITIALIZER; @@ -3778,7 +3749,8 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, ovsrcu_set(&netdev_flow->actions, new_actions); queue_netdev_flow_put(pmd, netdev_flow, match, - put->actions, put->actions_len); + put->actions, put->actions_len, + DP_NETDEV_FLOW_OFFLOAD_OP_MOD); if (stats) { get_dpif_flow_status(pmd->dp, netdev_flow, stats, NULL); @@ -3834,6 +3806,15 @@ dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) return error; } + if (match.wc.masks.in_port.odp_port != ODPP_NONE) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + + VLOG_ERR_RL(&rl, "failed to put%s flow: in_port is not an exact match", + (put->flags & DPIF_FP_CREATE) ? "[create]" + : (put->flags & DPIF_FP_MODIFY) ? "[modify]" : "[zero]"); + return EINVAL; + } + if (put->ufid) { ufid = *put->ufid; } else { @@ -4158,8 +4139,10 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) flow_hash_5tuple(execute->flow, 0)); } - dp_packet_batch_init_packet(&pp, execute->packet); - pp.do_not_steal = true; + /* Making a copy because the packet might be stolen during the execution + * and caller might still need it. */ + struct dp_packet *packet_clone = dp_packet_clone(execute->packet); + dp_packet_batch_init_packet(&pp, packet_clone); dp_netdev_execute_actions(pmd, &pp, false, execute->flow, execute->actions, execute->actions_len); dp_netdev_pmd_flush_output_packets(pmd, true); @@ -4169,6 +4152,24 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) dp_netdev_pmd_unref(pmd); } + if (dp_packet_batch_size(&pp) == 1) { + /* Packet wasn't dropped during the execution. Swapping content with + * the original packet, because the caller might expect actions to + * modify it. Uisng the packet from a batch instead of 'packet_clone' + * because it maybe stolen and replaced by other packet, e.g. by + * the fragmentation engine. */ + dp_packet_swap(execute->packet, pp.packets[0]); + dp_packet_delete_batch(&pp, true); + } else if (dp_packet_batch_size(&pp)) { + /* FIXME: We have more packets than expected. Likely, we got IP + * fragments of the reassembled packet. Dropping them here as we have + * no way to get them to the caller. It might be that all the required + * actions with them are already executed, but it also might not be a + * case, e.g. if dpif_netdev_execute() called to execute a single + * tunnel push. */ + dp_packet_delete_batch(&pp, true); + } + return 0; } @@ -4878,6 +4879,12 @@ struct rr_numa { bool idx_inc; }; +static size_t +rr_numa_list_count(struct rr_numa_list *rr) +{ + return hmap_count(&rr->numas); +} + static struct rr_numa * rr_numa_list_lookup(struct rr_numa_list *rr, int numa_id) { @@ -5149,7 +5156,6 @@ reload_affected_pmds(struct dp_netdev *dp) CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { if (pmd->need_reload) { - flow_mark_flush(pmd); dp_netdev_reload_pmd__(pmd); } } @@ -5590,10 +5596,17 @@ get_dry_run_variance(struct dp_netdev *dp, uint32_t *core_list, for (int i = 0; i < n_rxqs; i++) { int numa_id = netdev_get_numa_id(rxqs[i]->port->netdev); numa = rr_numa_list_lookup(&rr, numa_id); + /* If there is no available pmd on the local numa but there is only one + * numa for cross-numa polling, we can estimate the dry run. */ + if (!numa && rr_numa_list_count(&rr) == 1) { + numa = rr_numa_list_next(&rr, NULL); + } if (!numa) { - /* Abort if cross NUMA polling. */ - VLOG_DBG("PMD auto lb dry run." - " Aborting due to cross-numa polling."); + VLOG_DBG("PMD auto lb dry run: " + "There's no available (non-isolated) PMD thread on NUMA " + "node %d for port '%s' and there are PMD threads on more " + "than one NUMA node available for cross-NUMA polling. " + "Aborting.", numa_id, netdev_rxq_get_name(rxqs[i]->rx)); goto cleanup; } @@ -6203,12 +6216,14 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, /* Update all bands and find the one hit with the highest rate for each * packet (if any). */ for (int m = 0; m < meter->n_bands; ++m) { - band = &meter->bands[m]; + uint64_t max_bucket_size; + band = &meter->bands[m]; + max_bucket_size = band->burst_size * 1000ULL; /* Update band's bucket. */ - band->bucket += delta_t * band->up.rate; - if (band->bucket > band->up.burst_size) { - band->bucket = band->up.burst_size; + band->bucket += (uint64_t) delta_t * band->rate; + if (band->bucket > max_bucket_size) { + band->bucket = max_bucket_size; } /* Drain the bucket for all the packets, if possible. */ @@ -6226,8 +6241,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, * (Only one band will be fired by a packet, and that * can be different for each packet.) */ for (int i = band_exceeded_pkt; i < cnt; i++) { - if (band->up.rate > exceeded_rate[i]) { - exceeded_rate[i] = band->up.rate; + if (band->rate > exceeded_rate[i]) { + exceeded_rate[i] = band->rate; exceeded_band[i] = m; } } @@ -6246,8 +6261,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, /* Update the exceeding band for the exceeding packet. * (Only one band will be fired by a packet, and that * can be different for each packet.) */ - if (band->up.rate > exceeded_rate[i]) { - exceeded_rate[i] = band->up.rate; + if (band->rate > exceeded_rate[i]) { + exceeded_rate[i] = band->rate; exceeded_band[i] = m; } } @@ -6329,16 +6344,14 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, config->bands[i].burst_size = config->bands[i].rate; } - meter->bands[i].up = config->bands[i]; - /* Convert burst size to the bucket units: */ - /* pkts => 1/1000 packets, kilobits => bits. */ - meter->bands[i].up.burst_size *= 1000; - /* Initialize bucket to empty. */ - meter->bands[i].bucket = 0; + meter->bands[i].rate = config->bands[i].rate; + meter->bands[i].burst_size = config->bands[i].burst_size; + /* Start with a full bucket. */ + meter->bands[i].bucket = meter->bands[i].burst_size * 1000ULL; /* Figure out max delta_t that is enough to fill any bucket. */ band_max_delta_t - = meter->bands[i].up.burst_size / meter->bands[i].up.rate; + = meter->bands[i].bucket / meter->bands[i].rate; if (band_max_delta_t > meter->max_delta_t) { meter->max_delta_t = band_max_delta_t; } @@ -8493,6 +8506,7 @@ const struct dpif_class dpif_netdev_class = { NULL, /* ct_timeout_policy_dump_next */ NULL, /* ct_timeout_policy_dump_done */ dpif_netdev_ct_get_timeout_policy_name, + NULL, /* ct_get_features */ dpif_netdev_ipf_set_enabled, dpif_netdev_ipf_set_min_frag, dpif_netdev_ipf_set_max_nfrags, diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index ceb56c6851..e6cb3ca7fa 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -2061,6 +2061,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) uint8_t csum_on = false; int err; + info.tc_modify_flow_deleted = false; if (put->flags & DPIF_FP_PROBE) { return EOPNOTSUPP; } @@ -2105,7 +2106,6 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) info.tunnel_csum_on = csum_on; info.recirc_id_shared_with_tc = (dpif->user_features & OVS_DP_F_TC_RECIRC_SHARING); - info.tc_modify_flow_deleted = false; err = netdev_flow_put(dev, &match, CONST_CAST(struct nlattr *, put->actions), put->actions_len, @@ -2923,8 +2923,6 @@ dpif_netlink_ct_set_limits(struct dpif *dpif OVS_UNUSED, const uint32_t *default_limits, const struct ovs_list *zone_limits) { - struct ovs_zone_limit req_zone_limit; - if (ovs_ct_limit_family < 0) { return EOPNOTSUPP; } @@ -2941,8 +2939,10 @@ dpif_netlink_ct_set_limits(struct dpif *dpif OVS_UNUSED, size_t opt_offset; opt_offset = nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT); if (default_limits) { - req_zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE; - req_zone_limit.limit = *default_limits; + struct ovs_zone_limit req_zone_limit = { + .zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE, + .limit = *default_limits, + }; nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit); } @@ -2950,8 +2950,10 @@ dpif_netlink_ct_set_limits(struct dpif *dpif OVS_UNUSED, struct ct_dpif_zone_limit *zone_limit; LIST_FOR_EACH (zone_limit, node, zone_limits) { - req_zone_limit.zone_id = zone_limit->zone; - req_zone_limit.limit = zone_limit->limit; + struct ovs_zone_limit req_zone_limit = { + .zone_id = zone_limit->zone, + .limit = zone_limit->limit, + }; nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit); } } @@ -3035,8 +3037,9 @@ dpif_netlink_ct_get_limits(struct dpif *dpif OVS_UNUSED, size_t opt_offset = nl_msg_start_nested(request, OVS_CT_LIMIT_ATTR_ZONE_LIMIT); - struct ovs_zone_limit req_zone_limit; - req_zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE; + struct ovs_zone_limit req_zone_limit = { + .zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE, + }; nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit); struct ct_dpif_zone_limit *zone_limit; @@ -3086,8 +3089,9 @@ dpif_netlink_ct_del_limits(struct dpif *dpif OVS_UNUSED, struct ct_dpif_zone_limit *zone_limit; LIST_FOR_EACH (zone_limit, node, zone_limits) { - struct ovs_zone_limit req_zone_limit; - req_zone_limit.zone_id = zone_limit->zone; + struct ovs_zone_limit req_zone_limit = { + .zone_id = zone_limit->zone, + }; nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit); } nl_msg_end_nested(request, opt_offset); @@ -3161,6 +3165,20 @@ dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED, return 0; } +static int +dpif_netlink_ct_get_features(struct dpif *dpif OVS_UNUSED, + enum ct_features *features) +{ + if (features != NULL) { +#ifndef _WIN32 + *features = CONNTRACK_F_ZERO_SNAT; +#else + *features = 0; +#endif + } + return 0; +} + #define CT_DPIF_NL_TP_TCP_MAPPINGS \ CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT) \ CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV) \ @@ -4003,6 +4021,7 @@ const struct dpif_class dpif_netlink_class = { dpif_netlink_ct_timeout_policy_dump_next, dpif_netlink_ct_timeout_policy_dump_done, dpif_netlink_ct_get_timeout_policy_name, + dpif_netlink_ct_get_features, NULL, /* ipf_set_enabled */ NULL, /* ipf_set_min_frag */ NULL, /* ipf_set_max_nfrags */ @@ -4662,7 +4681,7 @@ report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx, time_msec() - ch->last_poll); } - VLOG_WARN("%s: lost packet on port channel %u of handler %u", - dpif_name(&dpif->dpif), ch_idx, handler_id); + VLOG_WARN("%s: lost packet on port channel %u of handler %u%s", + dpif_name(&dpif->dpif), ch_idx, handler_id, ds_cstr(&s)); ds_destroy(&s); } diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index b817fceac6..59e0a3a9dd 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -81,6 +81,7 @@ struct ct_dpif_dump_state; struct ct_dpif_entry; struct ct_dpif_tuple; struct ct_dpif_timeout_policy; +enum ct_features; /* 'dpif_ipf_proto_status' and 'dpif_ipf_status' are presently in * sync with 'ipf_proto_status' and 'ipf_status', but more @@ -562,6 +563,10 @@ struct dpif_class { uint16_t dl_type, uint8_t nw_proto, char **tp_name, bool *is_generic); + /* Stores the conntrack features supported by 'dpif' into features. + * The value is a bitmap of CONNTRACK_F_* bits. */ + int (*ct_get_features)(struct dpif *, enum ct_features *features); + /* IP Fragmentation. */ /* Disables or enables conntrack fragment reassembly. The default diff --git a/lib/dpif.c b/lib/dpif.c index 56d0b4a654..26e8bfb7db 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1240,6 +1240,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_, execute.needs_help = false; execute.probe = false; execute.mtu = 0; + execute.hash = 0; aux->error = dpif_execute(aux->dpif, &execute); log_execute_message(aux->dpif, &this_module, &execute, true, aux->error); diff --git a/lib/dpif.h b/lib/dpif.h index ecda896c78..f9728e6739 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -727,7 +727,7 @@ struct dpif_execute { bool probe; /* Suppress error messages. */ unsigned int mtu; /* Maximum transmission unit to fragment. 0 if not a fragmented packet */ - uint64_t hash; + uint64_t hash; /* Packet flow hash. 0 if not specified. */ const struct flow *flow; /* Flow extracted from 'packet'. */ /* Input, but possibly modified as a side effect of execution. */ diff --git a/lib/dynamic-string.c b/lib/dynamic-string.c index 6f7b610a99..fd0127ed17 100644 --- a/lib/dynamic-string.c +++ b/lib/dynamic-string.c @@ -460,6 +460,10 @@ ds_chomp(struct ds *ds, int c) void ds_clone(struct ds *dst, struct ds *source) { + if (!source->allocated) { + ds_init(dst); + return; + } dst->length = source->length; dst->allocated = dst->length; dst->string = xmalloc(dst->allocated + 1); diff --git a/lib/ipf.c b/lib/ipf.c index c20bcc0b33..009f5d1e9b 100644 --- a/lib/ipf.c +++ b/lib/ipf.c @@ -93,7 +93,6 @@ struct ipf_frag { struct dp_packet *pkt; uint16_t start_data_byte; uint16_t end_data_byte; - bool dnsteal; /* 'do not steal': if true, ipf should not free packet. */ }; /* The key for a collection of fragments potentially making up an unfragmented @@ -795,8 +794,7 @@ ipf_is_frag_duped(const struct ipf_frag *frag_list, int last_inuse_idx, static bool ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list, struct dp_packet *pkt, uint16_t start_data_byte, - uint16_t end_data_byte, bool ff, bool lf, bool v6, - bool dnsteal) + uint16_t end_data_byte, bool ff, bool lf, bool v6) OVS_REQUIRES(ipf->ipf_lock) { bool duped_frag = ipf_is_frag_duped(ipf_list->frag_list, @@ -811,10 +809,9 @@ ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list, * recommend not setting the mempool number of buffers too low * and also clamp the number of fragments. */ struct ipf_frag *frag = &ipf_list->frag_list[last_inuse_idx + 1]; - frag->pkt = pkt; + frag->pkt = dp_packet_clone(pkt); frag->start_data_byte = start_data_byte; frag->end_data_byte = end_data_byte; - frag->dnsteal = dnsteal; ipf_list->last_inuse_idx++; atomic_count_inc(&ipf->nfrag); ipf_count(ipf, v6, IPF_NFRAGS_ACCEPTED); @@ -851,8 +848,7 @@ ipf_list_init(struct ipf_list *ipf_list, struct ipf_list_key *key, * to a list of fragemnts. */ static bool ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type, - uint16_t zone, long long now, uint32_t hash_basis, - bool dnsteal) + uint16_t zone, long long now, uint32_t hash_basis) OVS_REQUIRES(ipf->ipf_lock) { struct ipf_list_key key; @@ -921,7 +917,7 @@ ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type, } return ipf_process_frag(ipf, ipf_list, pkt, start_data_byte, - end_data_byte, ff, lf, v6, dnsteal); + end_data_byte, ff, lf, v6); } /* Filters out fragments from a batch of fragments and adjust the batch. */ @@ -942,9 +938,10 @@ ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb, ipf_is_valid_v6_frag(ipf, pkt)))) { ovs_mutex_lock(&ipf->ipf_lock); - if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis, - pb->do_not_steal)) { + if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis)) { dp_packet_batch_refill(pb, pkt, pb_idx); + } else { + dp_packet_delete(pkt); } ovs_mutex_unlock(&ipf->ipf_lock); } else { @@ -1154,52 +1151,56 @@ ipf_post_execute_reass_pkts(struct ipf *ipf, * NETDEV_MAX_BURST. */ DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) { if (rp && pkt == rp->list->reass_execute_ctx) { + const struct ipf_frag *frag_0 = &rp->list->frag_list[0]; + void *l4_frag = dp_packet_l4(frag_0->pkt); + void *l4_reass = dp_packet_l4(pkt); + memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt)); + for (int i = 0; i <= rp->list->last_inuse_idx; i++) { - rp->list->frag_list[i].pkt->md.ct_label = pkt->md.ct_label; - rp->list->frag_list[i].pkt->md.ct_mark = pkt->md.ct_mark; - rp->list->frag_list[i].pkt->md.ct_state = pkt->md.ct_state; - rp->list->frag_list[i].pkt->md.ct_zone = pkt->md.ct_zone; - rp->list->frag_list[i].pkt->md.ct_orig_tuple_ipv6 = + const struct ipf_frag *frag_i = &rp->list->frag_list[i]; + + frag_i->pkt->md.ct_label = pkt->md.ct_label; + frag_i->pkt->md.ct_mark = pkt->md.ct_mark; + frag_i->pkt->md.ct_state = pkt->md.ct_state; + frag_i->pkt->md.ct_zone = pkt->md.ct_zone; + frag_i->pkt->md.ct_orig_tuple_ipv6 = pkt->md.ct_orig_tuple_ipv6; if (pkt->md.ct_orig_tuple_ipv6) { - rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv6 = + frag_i->pkt->md.ct_orig_tuple.ipv6 = pkt->md.ct_orig_tuple.ipv6; } else { - rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv4 = + frag_i->pkt->md.ct_orig_tuple.ipv4 = pkt->md.ct_orig_tuple.ipv4; } - } - - const struct ipf_frag *frag_0 = &rp->list->frag_list[0]; - void *l4_frag = dp_packet_l4(frag_0->pkt); - void *l4_reass = dp_packet_l4(pkt); - memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt)); - - if (v6) { - struct ovs_16aligned_ip6_hdr *l3_frag - = dp_packet_l3(frag_0->pkt); - struct ovs_16aligned_ip6_hdr *l3_reass = dp_packet_l3(pkt); - l3_frag->ip6_src = l3_reass->ip6_src; - l3_frag->ip6_dst = l3_reass->ip6_dst; - } else { - struct ip_header *l3_frag = dp_packet_l3(frag_0->pkt); - struct ip_header *l3_reass = dp_packet_l3(pkt); - if (!dp_packet_hwol_is_ipv4(frag_0->pkt)) { - ovs_be32 reass_ip = - get_16aligned_be32(&l3_reass->ip_src); - ovs_be32 frag_ip = - get_16aligned_be32(&l3_frag->ip_src); - - l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, - frag_ip, reass_ip); - reass_ip = get_16aligned_be32(&l3_reass->ip_dst); - frag_ip = get_16aligned_be32(&l3_frag->ip_dst); - l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, - frag_ip, reass_ip); + if (v6) { + struct ovs_16aligned_ip6_hdr *l3_frag + = dp_packet_l3(frag_i->pkt); + struct ovs_16aligned_ip6_hdr *l3_reass + = dp_packet_l3(pkt); + l3_frag->ip6_src = l3_reass->ip6_src; + l3_frag->ip6_dst = l3_reass->ip6_dst; + } else { + struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt); + struct ip_header *l3_reass = dp_packet_l3(pkt); + if (!dp_packet_hwol_is_ipv4(frag_i->pkt)) { + ovs_be32 reass_ip = + get_16aligned_be32(&l3_reass->ip_src); + ovs_be32 frag_ip = + get_16aligned_be32(&l3_frag->ip_src); + + l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, + frag_ip, + reass_ip); + reass_ip = get_16aligned_be32(&l3_reass->ip_dst); + frag_ip = get_16aligned_be32(&l3_frag->ip_dst); + l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, + frag_ip, + reass_ip); + } + + l3_frag->ip_src = l3_reass->ip_src; + l3_frag->ip_dst = l3_reass->ip_dst; } - - l3_frag->ip_src = l3_reass->ip_src; - l3_frag->ip_dst = l3_reass->ip_dst; } ipf_completed_list_add(&ipf->frag_complete_list, rp->list); @@ -1338,9 +1339,7 @@ ipf_destroy(struct ipf *ipf) while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { struct dp_packet *pkt = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; - if (!ipf_list->frag_list[ipf_list->last_sent_idx + 1].dnsteal) { - dp_packet_delete(pkt); - } + dp_packet_delete(pkt); atomic_count_dec(&ipf->nfrag); ipf_list->last_sent_idx++; } diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c index 8c5126ffcb..df0396815a 100644 --- a/lib/jsonrpc.c +++ b/lib/jsonrpc.c @@ -1261,6 +1261,24 @@ jsonrpc_session_force_reconnect(struct jsonrpc_session *s) reconnect_force_reconnect(s->reconnect, time_msec()); } +/* Resets the reconnect backoff for 's' by allowing as many free tries as the + * number of configured remotes. This is to be used by upper layers before + * calling jsonrpc_session_force_reconnect() if backoff is undesirable. + */ +void +jsonrpc_session_reset_backoff(struct jsonrpc_session *s) +{ + unsigned int free_tries = s->remotes.n; + + if (jsonrpc_session_is_connected(s)) { + /* The extra free try will be consumed when the current remote + * is disconnected. + */ + free_tries++; + } + reconnect_set_backoff_free_tries(s->reconnect, free_tries); +} + /* Sets 'max_backoff' as the maximum time, in milliseconds, to wait after a * connection attempt fails before attempting to connect again. */ void diff --git a/lib/jsonrpc.h b/lib/jsonrpc.h index d75d66b863..ba096dd0c8 100644 --- a/lib/jsonrpc.h +++ b/lib/jsonrpc.h @@ -136,6 +136,7 @@ void jsonrpc_session_get_reconnect_stats(const struct jsonrpc_session *, void jsonrpc_session_enable_reconnect(struct jsonrpc_session *); void jsonrpc_session_force_reconnect(struct jsonrpc_session *); +void jsonrpc_session_reset_backoff(struct jsonrpc_session *); void jsonrpc_session_set_max_backoff(struct jsonrpc_session *, int max_backoff); diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 9d8096668e..6699c383e6 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -961,14 +961,6 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) rte_eth_dev_info_get(dev->port_id, &info); - /* As of DPDK 19.11, it is not allowed to set a mq_mode for - * virtio PMD driver. */ - if (!strcmp(info.driver_name, "net_virtio")) { - conf.rxmode.mq_mode = ETH_MQ_RX_NONE; - } else { - conf.rxmode.mq_mode = ETH_MQ_RX_RSS; - } - /* As of DPDK 17.11.1 a few PMDs require to explicitly enable * scatter to support jumbo RX. * Setting scatter for the device is done after checking for @@ -1000,6 +992,11 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) /* Limit configured rss hash functions to only those supported * by the eth device. */ conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads; + if (conf.rx_adv_conf.rss_conf.rss_hf == 0) { + conf.rxmode.mq_mode = ETH_MQ_RX_NONE; + } else { + conf.rxmode.mq_mode = ETH_MQ_RX_RSS; + } /* A device may report more queues than it makes available (this has * been observed for Intel xl710, which reserves some of them for diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 6be23dbeed..97b0d18781 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -625,6 +625,7 @@ netdev_linux_notify_sock(void) if (!error) { size_t i; + nl_sock_listen_all_nsid(sock, true); for (i = 0; i < ARRAY_SIZE(mcgroups); i++) { error = nl_sock_join_mcgroup(sock, mcgroups[i]); if (error) { @@ -634,7 +635,6 @@ netdev_linux_notify_sock(void) } } } - nl_sock_listen_all_nsid(sock, true); ovsthread_once_done(&once); } @@ -672,7 +672,9 @@ netdev_linux_update_lag(struct rtnetlink_change *change) uint32_t block_id; int error = 0; - if_indextoname(change->master_ifindex, master_name); + if (!if_indextoname(change->master_ifindex, master_name)) { + return; + } master_netdev = netdev_from_name(master_name); if (!master_netdev) { return; @@ -1255,21 +1257,21 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu, * aux_buf is allocated so that it can be prepended to TSO buffer. */ std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu; for (i = 0; i < NETDEV_MAX_BURST; i++) { - buffers[i] = dp_packet_new_with_headroom(std_len, DP_NETDEV_HEADROOM); - iovs[i][IOV_PACKET].iov_base = dp_packet_data(buffers[i]); - iovs[i][IOV_PACKET].iov_len = std_len; - if (iovlen == IOV_TSO_SIZE) { - iovs[i][IOV_AUXBUF].iov_base = dp_packet_data(rx->aux_bufs[i]); - iovs[i][IOV_AUXBUF].iov_len = dp_packet_tailroom(rx->aux_bufs[i]); - } + buffers[i] = dp_packet_new_with_headroom(std_len, DP_NETDEV_HEADROOM); + iovs[i][IOV_PACKET].iov_base = dp_packet_data(buffers[i]); + iovs[i][IOV_PACKET].iov_len = std_len; + if (iovlen == IOV_TSO_SIZE) { + iovs[i][IOV_AUXBUF].iov_base = dp_packet_data(rx->aux_bufs[i]); + iovs[i][IOV_AUXBUF].iov_len = dp_packet_tailroom(rx->aux_bufs[i]); + } - mmsgs[i].msg_hdr.msg_name = NULL; - mmsgs[i].msg_hdr.msg_namelen = 0; - mmsgs[i].msg_hdr.msg_iov = iovs[i]; - mmsgs[i].msg_hdr.msg_iovlen = iovlen; - mmsgs[i].msg_hdr.msg_control = &cmsg_buffers[i]; - mmsgs[i].msg_hdr.msg_controllen = sizeof cmsg_buffers[i]; - mmsgs[i].msg_hdr.msg_flags = 0; + mmsgs[i].msg_hdr.msg_name = NULL; + mmsgs[i].msg_hdr.msg_namelen = 0; + mmsgs[i].msg_hdr.msg_iov = iovs[i]; + mmsgs[i].msg_hdr.msg_iovlen = iovlen; + mmsgs[i].msg_hdr.msg_control = &cmsg_buffers[i]; + mmsgs[i].msg_hdr.msg_controllen = sizeof cmsg_buffers[i]; + mmsgs[i].msg_hdr.msg_flags = 0; } do { @@ -1288,14 +1290,28 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu, for (i = 0; i < retval; i++) { struct dp_packet *pkt; - if (mmsgs[i].msg_len < ETH_HEADER_LEN) { + if (mmsgs[i].msg_hdr.msg_flags & MSG_TRUNC + || mmsgs[i].msg_len < ETH_HEADER_LEN) { struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); struct netdev_linux *netdev = netdev_linux_cast(netdev_); + /* The rx->aux_bufs[i] will be re-used next time. */ dp_packet_delete(buffers[i]); netdev->rx_dropped += 1; - VLOG_WARN_RL(&rl, "%s: Dropped packet: less than ether hdr size", - netdev_get_name(netdev_)); + if (mmsgs[i].msg_hdr.msg_flags & MSG_TRUNC) { + /* Data is truncated, so the packet is corrupted, and needs + * to be dropped. This can happen if TSO/GRO is enabled in + * the kernel, but not in userspace, i.e. there is no dp + * buffer to store the full packet. */ + VLOG_WARN_RL(&rl, + "%s: Dropped packet: Too big. GRO/TSO enabled?", + netdev_get_name(netdev_)); + } else { + VLOG_WARN_RL(&rl, + "%s: Dropped packet: less than ether hdr size", + netdev_get_name(netdev_)); + } + continue; } @@ -2572,7 +2588,7 @@ exit: static struct tc_police tc_matchall_fill_police(uint32_t kbits_rate, uint32_t kbits_burst) { - unsigned int bsize = MIN(UINT32_MAX / 1024, kbits_burst) * 1024 / 64; + unsigned int bsize = MIN(UINT32_MAX / 1024, kbits_burst) * 1024 / 8; unsigned int bps = ((uint64_t) kbits_rate * 1000) / 8; struct tc_police police; struct tc_ratespec rate; diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index f2413f5be6..0e4119c0a6 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -503,8 +503,11 @@ dump_flow_action(struct ds *s, struct ds *s_extra, ds_put_format(s, "set_ipv6_%s ", dirstr); if (set_ipv6) { + struct in6_addr addr; + ds_put_cstr(s, "ipv6_addr "); - ipv6_format_addr((struct in6_addr *) &set_ipv6->ipv6_addr, s); + memcpy(&addr, set_ipv6->ipv6_addr, sizeof addr); + ipv6_format_addr(&addr, s); ds_put_cstr(s, " "); } ds_put_cstr(s, "/ "); diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 72b7915052..32f52b46f2 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -48,6 +48,7 @@ static struct hmap ufid_to_tc = HMAP_INITIALIZER(&ufid_to_tc); static struct hmap tc_to_ufid = HMAP_INITIALIZER(&tc_to_ufid); static bool multi_mask_per_prio = false; static bool block_support = false; +static uint16_t ct_state_support; struct netlink_field { int offset; @@ -676,6 +677,27 @@ parse_tc_flower_to_match(struct tc_flower *flower, ct_statem |= OVS_CS_F_TRACKED; } + if (mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_REPLY) { + if (key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_REPLY) { + ct_statev |= OVS_CS_F_REPLY_DIR; + } + ct_statem |= OVS_CS_F_REPLY_DIR; + } + + if (mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_INVALID) { + if (key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_INVALID) { + ct_statev |= OVS_CS_F_INVALID; + } + ct_statem |= OVS_CS_F_INVALID; + } + + if (mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_RELATED) { + if (key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_RELATED) { + ct_statev |= OVS_CS_F_RELATED; + } + ct_statem |= OVS_CS_F_RELATED; + } + match_set_ct_state_masked(match, ct_statev, ct_statem); } @@ -820,8 +842,7 @@ parse_tc_flower_to_match(struct tc_flower *flower, action->encap.tp_dst); } if (!action->encap.no_csum) { - nl_msg_put_u8(buf, OVS_TUNNEL_KEY_ATTR_CSUM, - !action->encap.no_csum); + nl_msg_put_flag(buf, OVS_TUNNEL_KEY_ATTR_CSUM); } parse_tc_flower_geneve_opts(action, buf); @@ -1406,6 +1427,90 @@ flower_match_to_tun_opt(struct tc_flower *flower, const struct flow_tnl *tnl, flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len; } +static void +parse_match_ct_state_to_flower(struct tc_flower *flower, struct match *match) +{ + const struct flow *key = &match->flow; + struct flow *mask = &match->wc.masks; + + if (!ct_state_support) { + return; + } + + if ((ct_state_support & mask->ct_state) == mask->ct_state) { + if (mask->ct_state & OVS_CS_F_NEW) { + if (key->ct_state & OVS_CS_F_NEW) { + flower->key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_NEW; + } + flower->mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_NEW; + mask->ct_state &= ~OVS_CS_F_NEW; + } + + if (mask->ct_state & OVS_CS_F_ESTABLISHED) { + if (key->ct_state & OVS_CS_F_ESTABLISHED) { + flower->key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + } + flower->mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + mask->ct_state &= ~OVS_CS_F_ESTABLISHED; + } + + if (mask->ct_state & OVS_CS_F_TRACKED) { + if (key->ct_state & OVS_CS_F_TRACKED) { + flower->key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + } + flower->mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + mask->ct_state &= ~OVS_CS_F_TRACKED; + } + + if (mask->ct_state & OVS_CS_F_REPLY_DIR) { + if (key->ct_state & OVS_CS_F_REPLY_DIR) { + flower->key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_REPLY; + } + flower->mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_REPLY; + mask->ct_state &= ~OVS_CS_F_REPLY_DIR; + } + + if (mask->ct_state & OVS_CS_F_INVALID) { + if (key->ct_state & OVS_CS_F_INVALID) { + flower->key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_INVALID; + } + flower->mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_INVALID; + mask->ct_state &= ~OVS_CS_F_INVALID; + } + + if (mask->ct_state & OVS_CS_F_RELATED) { + if (key->ct_state & OVS_CS_F_RELATED) { + flower->key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_RELATED; + } + flower->mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_RELATED; + mask->ct_state &= ~OVS_CS_F_RELATED; + } + + if (flower->key.ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) { + flower->key.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW); + flower->mask.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW); + } + } + + if (mask->ct_zone) { + flower->key.ct_zone = key->ct_zone; + flower->mask.ct_zone = mask->ct_zone; + mask->ct_zone = 0; + } + + if (mask->ct_mark) { + flower->key.ct_mark = key->ct_mark; + flower->mask.ct_mark = mask->ct_mark; + mask->ct_mark = 0; + } + + if (!ovs_u128_is_zero(mask->ct_label)) { + flower->key.ct_label = key->ct_label; + flower->mask.ct_label = mask->ct_label; + mask->ct_label = OVS_U128_ZERO; + } +} + static int netdev_tc_flow_put(struct netdev *netdev, struct match *match, struct nlattr *actions, size_t actions_len, @@ -1650,54 +1755,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, } } - if (mask->ct_state) { - if (mask->ct_state & OVS_CS_F_NEW) { - if (key->ct_state & OVS_CS_F_NEW) { - flower.key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_NEW; - } - flower.mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_NEW; - mask->ct_state &= ~OVS_CS_F_NEW; - } - - if (mask->ct_state & OVS_CS_F_ESTABLISHED) { - if (key->ct_state & OVS_CS_F_ESTABLISHED) { - flower.key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; - } - flower.mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; - mask->ct_state &= ~OVS_CS_F_ESTABLISHED; - } - - if (mask->ct_state & OVS_CS_F_TRACKED) { - if (key->ct_state & OVS_CS_F_TRACKED) { - flower.key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED; - } - flower.mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED; - mask->ct_state &= ~OVS_CS_F_TRACKED; - } - - if (flower.key.ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) { - flower.key.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW); - flower.mask.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW); - } - } - - if (mask->ct_zone) { - flower.key.ct_zone = key->ct_zone; - flower.mask.ct_zone = mask->ct_zone; - mask->ct_zone = 0; - } - - if (mask->ct_mark) { - flower.key.ct_mark = key->ct_mark; - flower.mask.ct_mark = mask->ct_mark; - mask->ct_mark = 0; - } - - if (!ovs_u128_is_zero(mask->ct_label)) { - flower.key.ct_label = key->ct_label; - flower.mask.ct_label = mask->ct_label; - mask->ct_label = OVS_U128_ZERO; - } + parse_match_ct_state_to_flower(&flower, match); /* ignore exact match on skb_mark of 0. */ if (mask->pkt_mark == UINT32_MAX && !key->pkt_mark) { @@ -1779,6 +1837,10 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, const struct nlattr *ct = nl_attr_get(nla); const size_t ct_len = nl_attr_get_size(nla); + if (!ct_state_support) { + return -EOPNOTSUPP; + } + err = parse_put_flow_ct_action(&flower, action, ct, ct_len); if (err) { return err; @@ -1971,6 +2033,96 @@ out: tc_add_del_qdisc(ifindex, false, block_id, TC_INGRESS); } + +static int +probe_insert_ct_state_rule(int ifindex, uint16_t ct_state, struct tcf_id *id) +{ + int prio = TC_RESERVED_PRIORITY_MAX + 1; + struct tc_flower flower; + + memset(&flower, 0, sizeof flower); + flower.key.ct_state = ct_state; + flower.mask.ct_state = ct_state; + flower.tc_policy = TC_POLICY_SKIP_HW; + flower.key.eth_type = htons(ETH_P_IP); + flower.mask.eth_type = OVS_BE16_MAX; + + *id = tc_make_tcf_id(ifindex, 0, prio, TC_INGRESS); + return tc_replace_flower(id, &flower); +} + +static void +probe_ct_state_support(int ifindex) +{ + struct tc_flower flower; + uint16_t ct_state; + struct tcf_id id; + int error; + + error = tc_add_del_qdisc(ifindex, true, 0, TC_INGRESS); + if (error) { + return; + } + + /* Test for base ct_state match support */ + ct_state = TCA_FLOWER_KEY_CT_FLAGS_NEW | TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + error = probe_insert_ct_state_rule(ifindex, ct_state, &id); + if (error) { + goto out; + } + + error = tc_get_flower(&id, &flower); + if (error || flower.mask.ct_state != ct_state) { + goto out_del; + } + + tc_del_filter(&id); + ct_state_support = OVS_CS_F_NEW | + OVS_CS_F_ESTABLISHED | + OVS_CS_F_TRACKED | + OVS_CS_F_RELATED; + + /* Test for reject, ct_state >= MAX */ + ct_state = ~0; + error = probe_insert_ct_state_rule(ifindex, ct_state, &id); + if (!error) { + /* No reject, can't continue probing other flags */ + goto out_del; + } + + tc_del_filter(&id); + + /* Test for ct_state INVALID support */ + memset(&flower, 0, sizeof flower); + ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | + TCA_FLOWER_KEY_CT_FLAGS_INVALID; + error = probe_insert_ct_state_rule(ifindex, ct_state, &id); + if (error) { + goto out; + } + + tc_del_filter(&id); + ct_state_support |= OVS_CS_F_INVALID; + + /* Test for ct_state REPLY support */ + memset(&flower, 0, sizeof flower); + ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | + TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | + TCA_FLOWER_KEY_CT_FLAGS_REPLY; + error = probe_insert_ct_state_rule(ifindex, ct_state, &id); + if (error) { + goto out; + } + + ct_state_support |= OVS_CS_F_REPLY_DIR; + +out_del: + tc_del_filter(&id); +out: + tc_add_del_qdisc(ifindex, false, 0, TC_INGRESS); + VLOG_INFO("probe tc: supported ovs ct_state bits: 0x%x", ct_state_support); +} + static void probe_tc_block_support(int ifindex) { @@ -2038,6 +2190,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) block_id = get_block_id_from_netdev(netdev); probe_multi_mask_per_prio(ifindex); + probe_ct_state_support(ifindex); ovsthread_once_done(&once); } diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index 47077e9478..5cb96fa6ec 100644 --- a/lib/netlink-socket.c +++ b/lib/netlink-socket.c @@ -917,7 +917,7 @@ nl_sock_transact_multiple__(struct nl_sock *sock, } if (txn->error) { VLOG_DBG_RL(&rl, "received NAK error=%d (%s)", - error, ovs_strerror(txn->error)); + txn->error, ovs_strerror(txn->error)); } } else { txn->error = 0; diff --git a/lib/odp-util.c b/lib/odp-util.c index a8598d52af..ec25976d67 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -2937,7 +2937,7 @@ odp_nsh_key_from_attr__(const struct nlattr *attr, bool is_mask, const struct ovs_nsh_key_md1 *md1 = nl_attr_get(a); has_md1 = true; memcpy(nsh->context, md1->context, sizeof md1->context); - if (len == 2 * sizeof(*md1)) { + if (nsh_mask && (len == 2 * sizeof *md1)) { const struct ovs_nsh_key_md1 *md1_mask = md1 + 1; memcpy(nsh_mask->context, md1_mask->context, sizeof(*md1_mask)); @@ -3189,17 +3189,17 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key, if ((!tnl_type || !strcmp(tnl_type, "erspan") || !strcmp(tnl_type, "ip6erspan")) && (tun_key->erspan_ver == 1 || tun_key->erspan_ver == 2)) { - struct erspan_metadata opts; + struct erspan_metadata *opts; - opts.version = tun_key->erspan_ver; - if (opts.version == 1) { - opts.u.index = htonl(tun_key->erspan_idx); + opts = nl_msg_put_unspec_zero(a, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, + sizeof *opts); + opts->version = tun_key->erspan_ver; + if (opts->version == 1) { + opts->u.index = htonl(tun_key->erspan_idx); } else { - opts.u.md2.dir = tun_key->erspan_dir; - set_hwid(&opts.u.md2, tun_key->erspan_hwid); + opts->u.md2.dir = tun_key->erspan_dir; + set_hwid(&opts->u.md2, tun_key->erspan_hwid); } - nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, - &opts, sizeof(opts)); } if ((!tnl_type || !strcmp(tnl_type, "gtpu")) && @@ -4614,7 +4614,7 @@ odp_flow_format(const struct nlattr *key, size_t key_len, } ds_put_char(ds, ')'); } - if (!has_ethtype_key) { + if (!has_ethtype_key && mask) { const struct nlattr *ma = nl_attr_find__(mask, mask_len, OVS_KEY_ATTR_ETHERTYPE); if (ma) { diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index e2e829772a..6fb3da5074 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -3777,11 +3777,22 @@ parse_SET_MPLS_LABEL(char *arg, const struct ofpact_parse_params *pp) { struct ofpact_mpls_label *mpls_label = ofpact_put_SET_MPLS_LABEL(pp->ofpacts); + uint32_t label; + char *error; + if (*arg == '\0') { return xstrdup("set_mpls_label: expected label."); } - mpls_label->label = htonl(atoi(arg)); + error = str_to_u32(arg, &label); + if (error) { + return error; + } + + if (label & ~0xfffff) { + return xasprintf("%s: not a valid MPLS label", arg); + } + mpls_label->label = htonl(label); return NULL; } @@ -3837,12 +3848,22 @@ static char * OVS_WARN_UNUSED_RESULT parse_SET_MPLS_TC(char *arg, const struct ofpact_parse_params *pp) { struct ofpact_mpls_tc *mpls_tc = ofpact_put_SET_MPLS_TC(pp->ofpacts); + uint8_t tc; + char *error; if (*arg == '\0') { return xstrdup("set_mpls_tc: expected tc."); } - mpls_tc->tc = atoi(arg); + error = str_to_u8(arg, "MPLS TC", &tc); + if (error) { + return error; + } + + if (tc & ~7) { + return xasprintf("%s: not a valid MPLS TC", arg); + } + mpls_tc->tc = tc; return NULL; } @@ -3850,7 +3871,7 @@ static void format_SET_MPLS_TC(const struct ofpact_mpls_tc *a, const struct ofpact_format_params *fp) { - ds_put_format(fp->s, "%sset_mpls_ttl(%s%"PRIu8"%s)%s", + ds_put_format(fp->s, "%sset_mpls_tc(%s%"PRIu8"%s)%s", colors.paren, colors.end, a->tc, colors.paren, colors.end); } @@ -3889,12 +3910,18 @@ static char * OVS_WARN_UNUSED_RESULT parse_SET_MPLS_TTL(char *arg, const struct ofpact_parse_params *pp) { struct ofpact_mpls_ttl *mpls_ttl = ofpact_put_SET_MPLS_TTL(pp->ofpacts); + uint8_t ttl; + char *error; if (*arg == '\0') { return xstrdup("set_mpls_ttl: expected ttl."); } - mpls_ttl->ttl = atoi(arg); + error = str_to_u8(arg, "MPLS TTL", &ttl); + if (error) { + return error; + } + mpls_ttl->ttl = ttl; return NULL; } @@ -4431,6 +4458,7 @@ decode_NXAST_RAW_ENCAP(const struct nx_action_encap *nae, { struct ofpact_encap *encap; const struct ofp_ed_prop_header *ofp_prop; + const size_t encap_ofs = out->size; size_t props_len; uint16_t n_props = 0; int err; @@ -4458,6 +4486,7 @@ decode_NXAST_RAW_ENCAP(const struct nx_action_encap *nae, } n_props++; } + encap = ofpbuf_at_assert(out, encap_ofs, sizeof *encap); encap->n_props = n_props; out->header = &encap->ofpact; ofpact_finish_ENCAP(out, &encap); diff --git a/lib/ofp-group.c b/lib/ofp-group.c index bf0f8af544..737f48047b 100644 --- a/lib/ofp-group.c +++ b/lib/ofp-group.c @@ -64,7 +64,7 @@ ofputil_group_from_string(const char *s, uint32_t *group_idp) void ofputil_format_group(uint32_t group_id, struct ds *s) { - char name[MAX_GROUP_NAME_LEN]; + char name[MAX_GROUP_NAME_LEN + 1]; ofputil_group_to_string(group_id, name, sizeof name); ds_put_cstr(s, name); diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml index a2778de4bc..3894cb3c33 100644 --- a/lib/ovs-actions.xml +++ b/lib/ovs-actions.xml @@ -1666,7 +1666,7 @@ for i in [1,n_members]:

The ct action

- ct(argument]...) + ct([argument]...) ct(commit[, argument]...)

@@ -1833,6 +1833,16 @@ for i in [1,n_members]: connection, will behave the same as a bare nat.

+

+ For SNAT, there is a special case when the src IP + address is configured as all 0's, i.e., + nat(src=0.0.0.0). In this case, when a source port + collision is detected during the commit, the source port will be + translated to an ephemeral port. If there is no collision, no SNAT + is performed. Note that this is currently only implemented in the + Linux kernel datapath. +

+

Open vSwitch 2.6 introduced nat. Linux 4.6 was the earliest upstream kernel that implemented ct support for diff --git a/lib/ovsdb-cs.c b/lib/ovsdb-cs.c index ff8adaefb5..7c78056956 100644 --- a/lib/ovsdb-cs.c +++ b/lib/ovsdb-cs.c @@ -712,6 +712,16 @@ void ovsdb_cs_force_reconnect(struct ovsdb_cs *cs) { if (cs->session) { + if (cs->state == CS_S_MONITORING) { + /* The ovsdb-cs was in MONITORING state, so we either had data + * inconsistency on this server, or it stopped being the cluster + * leader, or the user requested to re-connect. Avoiding backoff + * in these cases, as we need to re-connect as soon as possible. + * Connections that are not in MONITORING state should have their + * backoff to avoid constant flood of re-connection attempts in + * case there is no suitable database server. */ + jsonrpc_session_reset_backoff(cs->session); + } jsonrpc_session_force_reconnect(cs->session); } } @@ -903,8 +913,27 @@ ovsdb_cs_db_set_condition(struct ovsdb_cs_db *db, const char *table, } /* Conditions will be up to date when we receive replies for already - * requested and new conditions, if any. */ - return db->cond_seqno + (t->new_cond ? 1 : 0) + (t->req_cond ? 1 : 0); + * requested and new conditions, if any. This includes condition change + * requests for other tables too. + */ + if (t->new_cond) { + /* New condition will be sent out after all already requested ones + * are acked. + */ + bool any_req_cond = false; + HMAP_FOR_EACH (t, hmap_node, &db->tables) { + if (t->req_cond) { + any_req_cond = true; + break; + } + } + return db->cond_seqno + any_req_cond + 1; + } else { + /* Already requested conditions should be up to date at + * db->cond_seqno + 1 while acked conditions are already up to date. + */ + return db->cond_seqno + !!t->req_cond; + } } /* Sets the replication condition for 'tc' in 'cs' to 'condition' and arranges @@ -1367,7 +1396,7 @@ ovsdb_cs_send_transaction(struct ovsdb_cs *cs, struct json *operations) sizeof *cs->txns); } cs->txns[cs->n_txns++] = request_id; - return request_id; + return json_clone(request_id); } /* Makes 'cs' drop its record of transaction 'request_id'. If a reply arrives @@ -1380,6 +1409,7 @@ ovsdb_cs_forget_transaction(struct ovsdb_cs *cs, const struct json *request_id) { for (size_t i = 0; i < cs->n_txns; i++) { if (json_equal(request_id, cs->txns[i])) { + json_destroy(cs->txns[i]); cs->txns[i] = cs->txns[--cs->n_txns]; return true; } diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index 2c8a0c9cfe..2198c69c60 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -92,6 +92,9 @@ struct ovsdb_idl { struct ovsdb_idl_txn *txn; struct hmap outstanding_txns; bool verify_write_only; + struct ovs_list deleted_untracked_rows; /* Stores rows deleted in the + * current run, that are not yet + * added to the track_list. */ }; static struct ovsdb_cs_ops ovsdb_idl_cs_ops; @@ -144,6 +147,7 @@ static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *, const struct shash *values, bool xor); static void ovsdb_idl_parse_update(struct ovsdb_idl *, const struct ovsdb_cs_update_event *); +static void ovsdb_idl_reparse_deleted(struct ovsdb_idl *); static void ovsdb_idl_txn_process_reply(struct ovsdb_idl *, const struct jsonrpc_msg *); @@ -163,6 +167,10 @@ static void ovsdb_idl_row_unparse(struct ovsdb_idl_row *); static void ovsdb_idl_row_clear_old(struct ovsdb_idl_row *); static void ovsdb_idl_row_clear_new(struct ovsdb_idl_row *); static void ovsdb_idl_row_clear_arcs(struct ovsdb_idl_row *, bool destroy_dsts); +static void ovsdb_idl_row_reparse_backrefs(struct ovsdb_idl_row *); +static void ovsdb_idl_row_track_change(struct ovsdb_idl_row *, + enum ovsdb_idl_change); +static void ovsdb_idl_row_untrack_change(struct ovsdb_idl_row *); static void ovsdb_idl_txn_abort_all(struct ovsdb_idl *); static bool ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row *, @@ -182,7 +190,6 @@ ovsdb_idl_table_from_class(const struct ovsdb_idl *, static struct ovsdb_idl_table * ovsdb_idl_table_from_class(const struct ovsdb_idl *, const struct ovsdb_idl_table_class *); -static bool ovsdb_idl_track_is_set(struct ovsdb_idl_table *table); static void ovsdb_idl_track_clear__(struct ovsdb_idl *, bool flush_all); static void ovsdb_idl_destroy_indexes(struct ovsdb_idl_table *); @@ -191,6 +198,8 @@ static void ovsdb_idl_remove_from_indexes(const struct ovsdb_idl_row *); static int ovsdb_idl_try_commit_loop_txn(struct ovsdb_idl_loop *loop, bool *may_need_wakeup); +static void add_tracked_change_for_references(struct ovsdb_idl_row *); + /* Creates and returns a connection to database 'remote', which should be in a * form acceptable to jsonrpc_session_open(). The connection will maintain an * in-memory replica of the remote database whose schema is described by @@ -249,6 +258,8 @@ ovsdb_idl_create_unconnected(const struct ovsdb_idl_class *class, .txn = NULL, .outstanding_txns = HMAP_INITIALIZER(&idl->outstanding_txns), .verify_write_only = false, + .deleted_untracked_rows + = OVS_LIST_INITIALIZER(&idl->deleted_untracked_rows), }; uint8_t default_mode = (monitor_everything_by_default @@ -352,6 +363,14 @@ ovsdb_idl_set_leader_only(struct ovsdb_idl *idl, bool leader_only) static void ovsdb_idl_clear(struct ovsdb_idl *db) { + /* Process deleted rows, removing them from the 'deleted_untracked_rows' + * list and reparsing their backrefs. + */ + ovsdb_idl_reparse_deleted(db); + + /* Cleanup all rows; each row gets added to its own table's + * 'track_list'. + */ for (size_t i = 0; i < db->class_->n_tables; i++) { struct ovsdb_idl_table *table = &db->tables[i]; struct ovsdb_idl_row *row, *next_row; @@ -368,17 +387,26 @@ ovsdb_idl_clear(struct ovsdb_idl *db) ovsdb_idl_row_unparse(row); } LIST_FOR_EACH_SAFE (arc, next_arc, src_node, &row->src_arcs) { + ovs_list_remove(&arc->src_node); + ovs_list_remove(&arc->dst_node); + free(arc); + } + LIST_FOR_EACH_SAFE (arc, next_arc, dst_node, &row->dst_arcs) { + ovs_list_remove(&arc->src_node); + ovs_list_remove(&arc->dst_node); free(arc); } - /* No need to do anything with dst_arcs: some node has those arcs - * as forward arcs and will destroy them itself. */ ovsdb_idl_row_destroy(row); } } + + /* Free rows deleted from tables with change tracking disabled. */ ovsdb_idl_row_destroy_postprocess(db); + /* Free rows deleted from tables with change tracking enabled. */ ovsdb_idl_track_clear__(db, true); + ovs_assert(ovs_list_is_empty(&db->deleted_untracked_rows)); db->change_seqno++; } @@ -401,9 +429,15 @@ ovsdb_idl_run(struct ovsdb_idl *idl) break; case OVSDB_CS_EVENT_TYPE_LOCKED: - /* If the client couldn't run a transaction because it didn't have - * the lock, this will encourage it to try again. */ - idl->change_seqno++; + if (ovsdb_cs_may_send_transaction(idl->cs)) { + /* If the client couldn't run a transaction because it didn't + * have the lock, this will encourage it to try again. */ + idl->change_seqno++; + } else { + /* We're setting up a session, so don't signal that the + * database changed. Finalizing the session will increment + * change_seqno anyhow. */ + } break; case OVSDB_CS_EVENT_TYPE_UPDATE: @@ -416,7 +450,7 @@ ovsdb_idl_run(struct ovsdb_idl *idl) } ovsdb_cs_event_destroy(event); } - + ovsdb_idl_reparse_deleted(idl); ovsdb_idl_row_destroy_postprocess(idl); } @@ -1140,7 +1174,7 @@ ovsdb_idl_track_add_all(struct ovsdb_idl *idl) } /* Returns true if 'table' has any tracked column. */ -static bool +bool ovsdb_idl_track_is_set(struct ovsdb_idl_table *table) { size_t i; @@ -1227,13 +1261,8 @@ ovsdb_idl_track_clear__(struct ovsdb_idl *idl, bool flush_all) free(row->updated); row->updated = NULL; } + ovsdb_idl_row_untrack_change(row); - row->change_seqno[OVSDB_IDL_CHANGE_INSERT] = - row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] = - row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0; - - ovs_list_remove(&row->track_node); - ovs_list_init(&row->track_node); if (ovsdb_idl_row_is_orphan(row)) { ovsdb_idl_row_unparse(row); if (row->tracked_old_datum) { @@ -1351,6 +1380,33 @@ ovsdb_idl_parse_update(struct ovsdb_idl *idl, } } +/* Reparses references to rows that have been deleted in the current IDL run. + * + * To ensure that reference sources that are deleted are not reparsed, + * this function must be called after all updates have been processed in + * the current IDL run, i.e., after all calls to ovsdb_idl_parse_update(). + */ +static void +ovsdb_idl_reparse_deleted(struct ovsdb_idl *db) +{ + struct ovsdb_idl_row *row, *next; + + LIST_FOR_EACH_SAFE (row, next, track_node, &db->deleted_untracked_rows) { + ovsdb_idl_row_untrack_change(row); + add_tracked_change_for_references(row); + ovsdb_idl_row_reparse_backrefs(row); + + /* Orphan rows that are still unreferenced or are part of tables that + * have change tracking enabled should be added to their table's + * 'track_list'. + */ + if (ovs_list_is_empty(&row->dst_arcs) + || ovsdb_idl_track_is_set(row->table)) { + ovsdb_idl_row_track_change(row, OVSDB_IDL_CHANGE_DELETE); + } + } +} + static struct ovsdb_idl_row * ovsdb_idl_get_row(struct ovsdb_idl_table *table, const struct uuid *uuid) { @@ -1404,6 +1460,7 @@ ovsdb_idl_process_update(struct ovsdb_idl_table *table, ovsdb_idl_insert_row(ovsdb_idl_row_create(table, uuid), ru->columns); } else if (ovsdb_idl_row_is_orphan(row)) { + ovsdb_idl_row_untrack_change(row); ovsdb_idl_insert_row(row, ru->columns); } else { VLOG_ERR_RL(&semantic_rl, "cannot add existing row "UUID_FMT" to " @@ -1451,13 +1508,8 @@ add_tracked_change_for_references(struct ovsdb_idl_row *row) if (ovs_list_is_empty(&ref->track_node) && ovsdb_idl_track_is_set(ref->table)) { - ovs_list_push_back(&ref->table->track_list, - &ref->track_node); - - ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY] - = ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY] - = ref->table->idl->change_seqno + 1; + ovsdb_idl_row_track_change(ref, OVSDB_IDL_CHANGE_MODIFY); add_tracked_change_for_references(ref); } } @@ -2023,6 +2075,32 @@ ovsdb_idl_row_reparse_backrefs(struct ovsdb_idl_row *row) } } +static void +ovsdb_idl_row_track_change(struct ovsdb_idl_row *row, + enum ovsdb_idl_change change) +{ + row->change_seqno[change] + = row->table->change_seqno[change] + = row->table->idl->change_seqno + 1; + if (ovs_list_is_empty(&row->track_node)) { + ovs_list_push_back(&row->table->track_list, &row->track_node); + } +} + +static void +ovsdb_idl_row_untrack_change(struct ovsdb_idl_row *row) +{ + if (ovs_list_is_empty(&row->track_node)) { + return; + } + + row->change_seqno[OVSDB_IDL_CHANGE_INSERT] = + row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] = + row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0; + ovs_list_remove(&row->track_node); + ovs_list_init(&row->track_node); +} + static struct ovsdb_idl_row * ovsdb_idl_row_create__(const struct ovsdb_idl_table_class *class) { @@ -2049,22 +2127,26 @@ ovsdb_idl_row_create(struct ovsdb_idl_table *table, const struct uuid *uuid) return row; } +/* If 'row' is not referenced anymore, removes 'row' from the table hmap, + * clears the old datum and adds 'row' to the table's track_list. + * + * If 'row' is still referenced, i.e., became "orphan", queues 'row' for + * reparsing after all updates have been processed by adding it to the + * 'deleted_untracked_rows' list. + */ static void ovsdb_idl_row_destroy(struct ovsdb_idl_row *row) { - if (row) { - ovsdb_idl_row_clear_old(row); + ovsdb_idl_row_clear_old(row); + if (ovs_list_is_empty(&row->dst_arcs)) { hmap_remove(&row->table->rows, &row->hmap_node); ovsdb_idl_destroy_all_map_op_lists(row); ovsdb_idl_destroy_all_set_op_lists(row); - if (ovsdb_idl_track_is_set(row->table)) { - row->change_seqno[OVSDB_IDL_CHANGE_DELETE] - = row->table->change_seqno[OVSDB_IDL_CHANGE_DELETE] - = row->table->idl->change_seqno + 1; - } - if (ovs_list_is_empty(&row->track_node)) { - ovs_list_push_back(&row->table->track_list, &row->track_node); - } + ovsdb_idl_row_track_change(row, OVSDB_IDL_CHANGE_DELETE); + } else { + ovsdb_idl_row_untrack_change(row); + ovs_list_push_back(&row->table->idl->deleted_untracked_rows, + &row->track_node); } } @@ -2154,12 +2236,7 @@ ovsdb_idl_delete_row(struct ovsdb_idl_row *row) { ovsdb_idl_remove_from_indexes(row); ovsdb_idl_row_clear_arcs(row, true); - ovsdb_idl_row_clear_old(row); - if (ovs_list_is_empty(&row->dst_arcs)) { - ovsdb_idl_row_destroy(row); - } else { - ovsdb_idl_row_reparse_backrefs(row); - } + ovsdb_idl_row_destroy(row); } /* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h index 05bb48d66c..d93483245e 100644 --- a/lib/ovsdb-idl.h +++ b/lib/ovsdb-idl.h @@ -53,6 +53,7 @@ struct ovsdb_datum; struct ovsdb_idl_class; struct ovsdb_idl_row; struct ovsdb_idl_column; +struct ovsdb_idl_table; struct ovsdb_idl_table_class; struct uuid; @@ -217,6 +218,7 @@ unsigned int ovsdb_idl_row_get_seqno( void ovsdb_idl_track_add_column(struct ovsdb_idl *idl, const struct ovsdb_idl_column *column); void ovsdb_idl_track_add_all(struct ovsdb_idl *idl); +bool ovsdb_idl_track_is_set(struct ovsdb_idl_table *table); const struct ovsdb_idl_row *ovsdb_idl_track_get_first( const struct ovsdb_idl *, const struct ovsdb_idl_table_class *); const struct ovsdb_idl_row *ovsdb_idl_track_get_next(const struct ovsdb_idl_row *); diff --git a/lib/pcap-file.c b/lib/pcap-file.c index f0cac8e0fa..7f5561f827 100644 --- a/lib/pcap-file.c +++ b/lib/pcap-file.c @@ -89,6 +89,7 @@ ovs_pcap_open(const char *file_name, const char *mode) : mode[0] == 'w' ? "writing" : "appending"), ovs_strerror(errno)); + free(p_file); return NULL; } diff --git a/lib/tc.c b/lib/tc.c index 3192207984..2eb271d621 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -2557,6 +2557,7 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) if (!released && flower->tunnel) { act_offset = nl_msg_start_nested(request, act_index++); nl_msg_put_act_tunnel_key_release(request); + nl_msg_put_act_flags(request); nl_msg_end_nested(request, act_offset); released = true; } diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c index c0b0ae0448..af0bcbde8d 100644 --- a/lib/tun-metadata.c +++ b/lib/tun-metadata.c @@ -828,7 +828,7 @@ tun_metadata_to_geneve_nlattr(const struct flow_tnl *tun, } else { tun_metadata_to_geneve_nlattr_mask(key, tun, flow, b); } - } else if (flow->metadata.present.len || is_mask) { + } else { nl_msg_put_unspec(b, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, tun->metadata.opts.gnv, flow->metadata.present.len); diff --git a/ofproto/bond.c b/ofproto/bond.c index 35b9caac01..a4116588f4 100644 --- a/ofproto/bond.c +++ b/ofproto/bond.c @@ -1173,49 +1173,72 @@ bond_shift_load(struct bond_entry *hash, struct bond_member *to) bond->bond_revalidate = true; } -/* Picks and returns a bond_entry to migrate from 'from' (the most heavily +/* Picks and returns 'bond_entry's to migrate from 'from' (the most heavily * loaded bond member) to a bond member that has 'to_tx_bytes' bytes of load, * given that doing so must decrease the ratio of the load on the two members - * by at least 0.1. Returns NULL if there is no appropriate entry. + * by at least 0.1. Returns number of entries filled in 'to_migrate'. * - * The list of entries isn't sorted. I don't know of a reason to prefer to - * shift away small hashes or large hashes. */ -static struct bond_entry * -choose_entry_to_migrate(const struct bond_member *from, uint64_t to_tx_bytes) + * The list of entries is sorted in descending order of load. This allows us + * to collect subset of entries with accumulated load close to ideal. */ +static size_t +choose_entries_to_migrate(const struct bond_member *from, uint64_t to_tx_bytes, + struct bond_entry **to_migrate) OVS_REQ_WRLOCK(rwlock) { struct bond_entry *e; + /* Note, the ideal traffic is the mid point between 'from' and 'to'. + * This value does not change by rebalancing. */ + uint64_t ideal_tx_bytes = (from->tx_bytes + to_tx_bytes) / 2; + uint64_t ideal_delta = ideal_tx_bytes - to_tx_bytes; + uint64_t delta = 0; /* The amount to rebalance. */ + uint64_t new_low; /* The lower bandwidth between 'to' and 'from' + * after rebalancing. */ + uint64_t migration_threshold = ideal_delta / 10; /* 10% */ + size_t cnt = 0; if (ovs_list_is_short(&from->entries)) { /* 'from' carries no more than one MAC hash, so shifting load away from * it would be pointless. */ - return NULL; + return 0; } LIST_FOR_EACH (e, list_node, &from->entries) { - uint64_t delta = e->tx_bytes; /* The amount to rebalance. */ - uint64_t ideal_tx_bytes = (from->tx_bytes + to_tx_bytes)/2; - /* Note, the ideal traffic is the mid point - * between 'from' and 'to'. This value does - * not change by rebalancing. */ - uint64_t new_low; /* The lower bandwidth between 'to' and 'from' - after rebalancing. */ - - new_low = MIN(from->tx_bytes - delta, to_tx_bytes + delta); - - if ((new_low > to_tx_bytes) && - (new_low - to_tx_bytes >= (ideal_tx_bytes - to_tx_bytes) / 10)) { - /* Only rebalance if the new 'low' is closer to to the mid point, - * and the improvement exceeds 10% of current traffic - * deviation from the ideal split. - * - * The improvement on the 'high' side is always the same as the - * 'low' side. Thus consider 'low' side is sufficient. */ - return e; + if (delta + e->tx_bytes <= ideal_delta) { + /* Take next entry if amount to rebalance will not exceed ideal. */ + to_migrate[cnt++] = e; + delta += e->tx_bytes; + } + if (ideal_delta - delta < migration_threshold) { + /* Stop collecting hashes if we're close enough to the ideal value + * to avoid frequent moving of light ones. */ + break; } } - return NULL; + if (!cnt) { + /* There is no entry with load less than or equal to 'ideal_delta'. + * Lets try closest one. The closest is the last in sorted list. */ + struct bond_entry *closest; + + ASSIGN_CONTAINER(closest, ovs_list_back(&from->entries), list_node); + + delta = closest->tx_bytes; + to_migrate[cnt++] = closest; + } + + new_low = MIN(from->tx_bytes - delta, to_tx_bytes + delta); + if ((new_low > to_tx_bytes) && + (new_low - to_tx_bytes >= migration_threshold)) { + /* Only rebalance if the new 'low' is closer to to the mid point and the + * improvement of traffic deviation from the ideal split exceeds 10% + * (migration threshold). + * + * The improvement on the 'high' side is always the same as the 'low' + * side. Thus consider 'low' side is sufficient. */ + return cnt; + } + + return 0; } /* Inserts 'member' into 'bals' so that descending order of 'tx_bytes' is @@ -1242,6 +1265,22 @@ reinsert_bal(struct ovs_list *bals, struct bond_member *member) insert_bal(bals, member); } +static int +compare_bond_entries(const void *a_, const void *b_) + OVS_REQ_RDLOCK(rwlock) +{ + const struct bond_entry *const *ap = a_; + const struct bond_entry *const *bp = b_; + const struct bond_entry *a = *ap; + const struct bond_entry *b = *bp; + + if (a->tx_bytes != b->tx_bytes) { + return a->tx_bytes > b->tx_bytes ? -1 : 1; + } else { + return 0; + } +} + /* If 'bond' needs rebalancing, does so. * * The caller should have called bond_account() for each active flow, or in case @@ -1251,8 +1290,8 @@ reinsert_bal(struct ovs_list *bals, struct bond_member *member) void bond_rebalance(struct bond *bond) { + struct bond_entry *e, *hashes[BOND_BUCKETS]; struct bond_member *member; - struct bond_entry *e; struct ovs_list bals; bool rebalanced = false; bool use_recirc; @@ -1276,7 +1315,15 @@ bond_rebalance(struct bond *bond) member->tx_bytes = 0; ovs_list_init(&member->entries); } - for (e = &bond->hash[0]; e <= &bond->hash[BOND_MASK]; e++) { + + for (int i = 0; i < BOND_BUCKETS; i++) { + hashes[i] = &bond->hash[i]; + } + qsort(hashes, BOND_BUCKETS, sizeof *hashes, compare_bond_entries); + + /* Iteration over sorted bond hashes will give us sorted 'entries'. */ + for (int i = 0; i < BOND_BUCKETS; i++) { + e = hashes[i]; if (e->member && e->tx_bytes) { e->member->tx_bytes += e->tx_bytes; ovs_list_push_back(&e->member->entries, &e->list_node); @@ -1311,15 +1358,23 @@ bond_rebalance(struct bond *bond) break; } - /* 'from' is carrying significantly more load than 'to'. Pick a hash + /* 'from' is carrying significantly more load than 'to'. Pick hashes * to move from 'from' to 'to'. */ - e = choose_entry_to_migrate(from, to->tx_bytes); - if (e) { + size_t cnt = choose_entries_to_migrate(from, to->tx_bytes, hashes); + if (!cnt) { + /* Can't usefully migrate anything away from 'from'. + * Don't reconsider it. */ + ovs_list_remove(&from->bal_node); + continue; + } + + for (size_t i = 0; i < cnt; i++) { + e = hashes[i]; bond_shift_load(e, to); /* Delete element from from->entries. * - * We don't add the element to to->hashes. That would only allow + * We don't add the element to to->entries. That would only allow * 'e' to be migrated to another member in this rebalancing run, and * there is no point in doing that. */ ovs_list_remove(&e->list_node); @@ -1327,12 +1382,8 @@ bond_rebalance(struct bond *bond) /* Re-sort 'bals'. */ reinsert_bal(&bals, from); reinsert_bal(&bals, to); - rebalanced = true; - } else { - /* Can't usefully migrate anything away from 'from'. - * Don't reconsider it. */ - ovs_list_remove(&from->bal_node); } + rebalanced = true; } /* Implement exponentially weighted moving average. A weight of 1/2 causes diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index 9c5c633b41..fa8f6cd0e8 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -2140,7 +2140,7 @@ ofmonitor_report(struct connmgr *mgr, struct rule *rule, const struct rule_actions *old_actions) OVS_REQUIRES(ofproto_mutex) { - if (rule_is_hidden(rule)) { + if (!mgr || rule_is_hidden(rule)) { return; } @@ -2244,6 +2244,10 @@ ofmonitor_flush(struct connmgr *mgr) { struct ofconn *ofconn; + if (!mgr) { + return; + } + LIST_FOR_EACH (ofconn, connmgr_node, &mgr->conns) { struct rconn_packet_counter *counter = ofconn->monitor_counter; diff --git a/ofproto/ipfix-gen-entities b/ofproto/ipfix-gen-entities index d5abe9c2ed..dcecdab212 100755 --- a/ofproto/ipfix-gen-entities +++ b/ofproto/ipfix-gen-entities @@ -7,8 +7,6 @@ # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. -from __future__ import print_function - import getopt import re import sys diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c index fdcb9eabbf..864c136b5d 100644 --- a/ofproto/ofproto-dpif-sflow.c +++ b/ofproto/ofproto-dpif-sflow.c @@ -1292,10 +1292,10 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet, ovs_be16 vlan_tci; ovs_mutex_lock(&mutex); - sampler = ds->sflow_agent->samplers; - if (!sampler) { + if (!ds->sflow_agent || !ds->sflow_agent->samplers) { goto out; } + sampler = ds->sflow_agent->samplers; /* Build a flow sample. */ memset(&fs, 0, sizeof fs); diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 5fae46adfc..ccf97266c0 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -491,6 +491,11 @@ udpif_destroy(struct udpif *udpif) dpif_register_upcall_cb(udpif->dpif, NULL, udpif); for (int i = 0; i < N_UMAPS; i++) { + struct udpif_key *ukey; + + CMAP_FOR_EACH (ukey, cmap_node, &udpif->ukeys[i].cmap) { + ukey_delete__(ukey); + } cmap_destroy(&udpif->ukeys[i].cmap); ovs_mutex_destroy(&udpif->ukeys[i].mutex); } diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 7108c8a301..479e459fcb 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -7127,7 +7127,9 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, break; case OFPACT_CT_CLEAR: - compose_ct_clear_action(ctx); + if (ctx->conntracked) { + compose_ct_clear_action(ctx); + } break; case OFPACT_NAT: diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index fd0b2fdea0..5ce56adfae 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -1389,6 +1389,24 @@ check_ct_timeout_policy(struct dpif_backer *backer) return !error; } +/* Tests whether 'backer''s datapath supports the all-zero SNAT case. */ +static bool +dpif_supports_ct_zero_snat(struct dpif_backer *backer) +{ + enum ct_features features; + bool supported = false; + + if (!ct_dpif_get_features(backer->dpif, &features)) { + if (features & CONNTRACK_F_ZERO_SNAT) { + supported = true; + } + } + VLOG_INFO("%s: Datapath %s ct_zero_snat", + dpif_name(backer->dpif), (supported) ? "supports" + : "does not support"); + return supported; +} + /* Tests whether 'backer''s datapath supports the * OVS_ACTION_ATTR_CHECK_PKT_LEN action. */ static bool @@ -1588,8 +1606,9 @@ check_support(struct dpif_backer *backer) backer->rt_support.ct_timeout = check_ct_timeout_policy(backer); backer->rt_support.explicit_drop_action = dpif_supports_explicit_drop_action(backer->dpif); - backer->rt_support.lb_output_action= + backer->rt_support.lb_output_action = dpif_supports_lb_output_action(backer->dpif); + backer->rt_support.ct_zero_snat = dpif_supports_ct_zero_snat(backer); /* Flow fields. */ backer->rt_support.odp.ct_state = check_ct_state(backer); @@ -5413,6 +5432,8 @@ ct_add_timeout_policy_to_dpif(struct dpif *dpif, struct ct_dpif_timeout_policy cdtp; struct simap_node *node; + memset(&cdtp, 0, sizeof cdtp); + cdtp.id = ct_tp->tp_id; SIMAP_FOR_EACH (node, &ct_tp->tp) { ct_dpif_set_timeout_policy_attr_by_name(&cdtp, node->name, node->data); @@ -5603,6 +5624,7 @@ get_datapath_cap(const char *datapath_type, struct smap *cap) smap_add(cap, "explicit_drop_action", s.explicit_drop_action ? "true" :"false"); smap_add(cap, "lb_output_action", s.lb_output_action ? "true" : "false"); + smap_add(cap, "ct_zero_snat", s.ct_zero_snat ? "true" : "false"); } /* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index b41c3d82ad..191cfcb0df 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -204,7 +204,10 @@ struct group_dpif *group_dpif_lookup(struct ofproto_dpif *, DPIF_SUPPORT_FIELD(bool, explicit_drop_action, "Explicit Drop action") \ \ /* True if the datapath supports balance_tcp optimization */ \ - DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode") + DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode")\ + \ + /* True if the datapath supports all-zero IP SNAT. */ \ + DPIF_SUPPORT_FIELD(bool, ct_zero_snat, "Conntrack all-zero IP SNAT") /* Stores the various features which the corresponding backer supports. */ diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index b91517cd25..80ec2d9ac9 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -968,7 +968,7 @@ ofproto_get_datapath_cap(const char *datapath_type, struct smap *dp_cap) datapath_type = ofproto_normalize_type(datapath_type); const struct ofproto_class *class = ofproto_class_find__(datapath_type); - if (class->get_datapath_cap) { + if (class && class->get_datapath_cap) { class->get_datapath_cap(datapath_type, dp_cap); } } @@ -981,7 +981,7 @@ ofproto_ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone_id, datapath_type = ofproto_normalize_type(datapath_type); const struct ofproto_class *class = ofproto_class_find__(datapath_type); - if (class->ct_set_zone_timeout_policy) { + if (class && class->ct_set_zone_timeout_policy) { class->ct_set_zone_timeout_policy(datapath_type, zone_id, timeout_policy); } @@ -993,7 +993,7 @@ ofproto_ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone_id) datapath_type = ofproto_normalize_type(datapath_type); const struct ofproto_class *class = ofproto_class_find__(datapath_type); - if (class->ct_del_zone_timeout_policy) { + if (class && class->ct_del_zone_timeout_policy) { class->ct_del_zone_timeout_policy(datapath_type, zone_id); } diff --git a/ovsdb/ovsdb-client.c b/ovsdb/ovsdb-client.c index 72756eb1f2..ba28e36d78 100644 --- a/ovsdb/ovsdb-client.c +++ b/ovsdb/ovsdb-client.c @@ -1664,14 +1664,15 @@ static void do_needs_conversion(struct jsonrpc *rpc, const char *database_ OVS_UNUSED, int argc OVS_UNUSED, char *argv[]) { + const char *schema_file_name = argv[argc - 1]; struct ovsdb_schema *schema1; - check_ovsdb_error(ovsdb_schema_from_file(argv[0], &schema1)); + check_ovsdb_error(ovsdb_schema_from_file(schema_file_name, &schema1)); char *database = schema1->name; open_rpc(1, NEED_DATABASE, argc, argv, &rpc, &database); if (is_database_clustered(rpc, database)) { - ovsdb_schema_persist_ephemeral_columns(schema1, argv[0]); + ovsdb_schema_persist_ephemeral_columns(schema1, schema_file_name); } struct ovsdb_schema *schema2 = fetch_schema(rpc, schema1->name); diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in index 5914e08789..61cded16d3 100755 --- a/ovsdb/ovsdb-idlc.in +++ b/ovsdb/ovsdb-idlc.in @@ -1,6 +1,5 @@ #! @PYTHON3@ -from __future__ import print_function import getopt import os import re diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index 29a2bace84..ce6aee3008 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -655,8 +655,6 @@ add_db(struct server_config *config, struct db *db) static struct ovsdb_error * OVS_WARN_UNUSED_RESULT open_db(struct server_config *config, const char *filename) { - struct db *db; - /* If we know that the file is already open, return a good error message. * Otherwise, if the file is open, we'll fail later on with a harder to * interpret file locking error. */ @@ -671,9 +669,6 @@ open_db(struct server_config *config, const char *filename) return error; } - db = xzalloc(sizeof *db); - db->filename = xstrdup(filename); - struct ovsdb_schema *schema; if (ovsdb_storage_is_clustered(storage)) { schema = NULL; @@ -686,6 +681,9 @@ open_db(struct server_config *config, const char *filename) } ovs_assert(schema && !txn_json); } + + struct db *db = xzalloc(sizeof *db); + db->filename = xstrdup(filename); db->db = ovsdb_create(schema, storage); ovsdb_jsonrpc_server_add_db(config->jsonrpc, db->db); diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c index 9042658fa8..e019631e9a 100644 --- a/ovsdb/ovsdb.c +++ b/ovsdb/ovsdb.c @@ -31,6 +31,7 @@ #include "simap.h" #include "storage.h" #include "table.h" +#include "timeval.h" #include "transaction.h" #include "trigger.h" @@ -525,6 +526,7 @@ ovsdb_snapshot(struct ovsdb *db, bool trim_memory OVS_UNUSED) return NULL; } + uint64_t elapsed, start_time = time_msec(); struct json *schema = ovsdb_schema_to_json(db->schema); struct json *data = ovsdb_to_txn_json(db, "compacting database online"); struct ovsdb_error *error = ovsdb_storage_store_snapshot(db->storage, @@ -537,6 +539,12 @@ ovsdb_snapshot(struct ovsdb *db, bool trim_memory OVS_UNUSED) malloc_trim(0); } #endif + + elapsed = time_msec() - start_time; + if (elapsed > 1000) { + VLOG_INFO("%s: Database compaction took %"PRIu64"ms", + db->name, elapsed); + } return error; } diff --git a/ovsdb/raft.c b/ovsdb/raft.c index ea91d1fdba..8fa872494e 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -940,6 +940,34 @@ raft_reset_ping_timer(struct raft *raft) raft->ping_timeout = time_msec() + raft->election_timer / 3; } +static void +raft_conn_update_probe_interval(struct raft *raft, struct raft_conn *r_conn) +{ + /* Inactivity probe will be sent if connection will remain idle for the + * time of an election timeout. Connection will be dropped if inactivity + * will last twice that time. + * + * It's not enough to just have heartbeats if connection is still + * established, but no packets received from the other side. Without + * inactivity probe follower will just try to initiate election + * indefinitely staying in 'candidate' role. And the leader will continue + * to send heartbeats to the dead connection thinking that remote server + * is still part of the cluster. */ + int probe_interval = raft->election_timer + ELECTION_RANGE_MSEC; + + jsonrpc_session_set_probe_interval(r_conn->js, probe_interval); +} + +static void +raft_update_probe_intervals(struct raft *raft) +{ + struct raft_conn *r_conn; + + LIST_FOR_EACH (r_conn, list_node, &raft->conns) { + raft_conn_update_probe_interval(raft, r_conn); + } +} + static void raft_add_conn(struct raft *raft, struct jsonrpc_session *js, const struct uuid *sid, bool incoming) @@ -954,7 +982,7 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js, &conn->sid); conn->incoming = incoming; conn->js_seqno = jsonrpc_session_get_seqno(conn->js); - jsonrpc_session_set_probe_interval(js, 0); + raft_conn_update_probe_interval(raft, conn); jsonrpc_session_set_backlog_threshold(js, raft->conn_backlog_max_n_msgs, raft->conn_backlog_max_n_bytes); } @@ -2804,6 +2832,7 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) raft->election_timer, e->election_timer); raft->election_timer = e->election_timer; raft->election_timer_new = 0; + raft_update_probe_intervals(raft); } if (e->servers) { /* raft_run_reconfigure() can write a new Raft entry, which can @@ -2820,6 +2849,7 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64, raft->election_timer, e->election_timer); raft->election_timer = e->election_timer; + raft_update_probe_intervals(raft); } } /* Check if any pending command can be completed, and complete it. @@ -4122,9 +4152,24 @@ raft_may_snapshot(const struct raft *raft) && !raft->leaving && !raft->left && !raft->failed + && raft->role != RAFT_LEADER && raft->last_applied >= raft->log_start); } +/* Prepares for soon snapshotting. */ +void +raft_notify_snapshot_recommended(struct raft *raft) +{ + if (raft->role == RAFT_LEADER) { + /* Leader is about to write database snapshot to the disk and this + * might take significant amount of time. Stepping back from the + * leadership to keep the cluster functional during this process. */ + VLOG_INFO("Transferring leadership to write a snapshot."); + raft_transfer_leadership(raft, "preparing to write snapshot"); + raft_become_follower(raft); + } +} + /* Replaces the log for 'raft', up to the last log entry read, by * 'new_snapshot_data'. Returns NULL if successful, otherwise an error that * the caller must eventually free. @@ -4468,6 +4513,8 @@ raft_unixctl_status(struct unixctl_conn *conn, : raft->leaving ? "leaving cluster" : raft->left ? "left cluster" : raft->failed ? "failed" + : raft->candidate_retrying + ? "disconnected from the cluster (election timeout)" : "cluster member"); if (raft->joining) { ds_put_format(&s, "Remotes for joining:"); diff --git a/ovsdb/raft.h b/ovsdb/raft.h index 99d5307e54..59902fe825 100644 --- a/ovsdb/raft.h +++ b/ovsdb/raft.h @@ -174,6 +174,7 @@ void raft_command_wait(const struct raft_command *); bool raft_grew_lots(const struct raft *); uint64_t raft_get_log_length(const struct raft *); bool raft_may_snapshot(const struct raft *); +void raft_notify_snapshot_recommended(struct raft *); struct ovsdb_error *raft_store_snapshot(struct raft *, const struct json *new_snapshot) OVS_WARN_UNUSED_RESULT; diff --git a/ovsdb/storage.c b/ovsdb/storage.c index f662e90566..40415fcf62 100644 --- a/ovsdb/storage.c +++ b/ovsdb/storage.c @@ -519,14 +519,11 @@ ovsdb_storage_should_snapshot(const struct ovsdb_storage *storage) return false; } - /* If we can't snapshot right now, don't. */ - if (storage->raft && !raft_may_snapshot(storage->raft)) { - return false; - } - uint64_t log_len = (storage->raft ? raft_get_log_length(storage->raft) : storage->n_read + storage->n_written); + bool snapshot_recommended = false; + if (now < storage->next_snapshot_max) { /* Maximum snapshot time not yet reached. Take a snapshot if there * have been at least 100 log entries and the log file size has @@ -534,12 +531,25 @@ ovsdb_storage_should_snapshot(const struct ovsdb_storage *storage) bool grew_lots = (storage->raft ? raft_grew_lots(storage->raft) : ovsdb_log_grew_lots(storage->log)); - return log_len >= 100 && grew_lots; + snapshot_recommended = (log_len >= 100 && grew_lots); } else { /* We have reached the maximum snapshot time. Take a snapshot if * there have been any log entries at all. */ - return log_len > 0; + snapshot_recommended = (log_len > 0); } + + if (!snapshot_recommended) { + return false; + } + + /* If we can't snapshot right now, don't. */ + if (storage->raft && !raft_may_snapshot(storage->raft)) { + /* Notifying the storage that it needs to make a snapshot soon. */ + raft_notify_snapshot_recommended(storage->raft); + return false; + } + + return true; } return false; diff --git a/python/ovs/compat/sortedcontainers/sortedlist.py b/python/ovs/compat/sortedcontainers/sortedlist.py index 8aec6bbac1..ba55566926 100644 --- a/python/ovs/compat/sortedcontainers/sortedlist.py +++ b/python/ovs/compat/sortedcontainers/sortedlist.py @@ -3,8 +3,6 @@ """ # pylint: disable=redefined-builtin, ungrouped-imports -from __future__ import print_function - from bisect import bisect_left, bisect_right, insort from collections import Sequence, MutableSequence from functools import wraps diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py index 5850ac7abf..3ca47f96bb 100644 --- a/python/ovs/db/idl.py +++ b/python/ovs/db/idl.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import collections import functools import uuid @@ -39,6 +40,10 @@ OVSDB_UPDATE2 = 1 CLUSTERED = "clustered" +Notice = collections.namedtuple('Notice', ('event', 'row', 'updates')) +Notice.__new__.__defaults__ = (None,) # default updates=None + + class Idl(object): """Open vSwitch Database Interface Definition Language (OVSDB IDL). @@ -96,6 +101,7 @@ class Idl(object): IDL_S_SERVER_MONITOR_REQUESTED = 2 IDL_S_DATA_MONITOR_REQUESTED = 3 IDL_S_DATA_MONITOR_COND_REQUESTED = 4 + IDL_S_MONITORING = 5 def __init__(self, remote, schema_helper, probe_interval=None, leader_only=True): @@ -241,6 +247,7 @@ class Idl(object): i = 0 while i < 50: i += 1 + previous_change_seqno = self.change_seqno if not self._session.is_connected(): break @@ -269,7 +276,7 @@ class Idl(object): if msg.params[0] == str(self.server_monitor_uuid): self.__parse_update(msg.params[1], OVSDB_UPDATE, tables=self.server_tables) - self.change_seqno = initial_change_seqno + self.change_seqno = previous_change_seqno if not self.__check_server_db(): self.force_reconnect() break @@ -288,6 +295,7 @@ class Idl(object): else: assert self.state == self.IDL_S_DATA_MONITOR_REQUESTED self.__parse_update(msg.result, OVSDB_UPDATE) + self.state = self.IDL_S_MONITORING except error.Error as e: vlog.err("%s: parse error in received schema: %s" @@ -312,7 +320,7 @@ class Idl(object): self.__error() break else: - self.change_seqno = initial_change_seqno + self.change_seqno = previous_change_seqno self.__send_monitor_request() elif (msg.type == ovs.jsonrpc.Message.T_REPLY and self._server_monitor_request_id is not None @@ -322,7 +330,7 @@ class Idl(object): self._server_monitor_request_id = None self.__parse_update(msg.result, OVSDB_UPDATE, tables=self.server_tables) - self.change_seqno = initial_change_seqno + self.change_seqno = previous_change_seqno if self.__check_server_db(): self.__send_monitor_request() self.__send_db_change_aware() @@ -336,7 +344,7 @@ class Idl(object): self.__error() break else: - self.change_seqno = initial_change_seqno + self.change_seqno = previous_change_seqno self.__send_monitor_request() elif (msg.type == ovs.jsonrpc.Message.T_REPLY and self._db_change_aware_request_id is not None @@ -372,7 +380,7 @@ class Idl(object): self.force_reconnect() break else: - self.change_seqno = initial_change_seqno + self.change_seqno = previous_change_seqno self.__send_monitor_request() elif (msg.type in (ovs.jsonrpc.Message.T_ERROR, ovs.jsonrpc.Message.T_REPLY) @@ -435,6 +443,15 @@ class Idl(object): def force_reconnect(self): """Forces the IDL to drop its connection to the database and reconnect. In the meantime, the contents of the IDL will not change.""" + if self.state == self.IDL_S_MONITORING: + # The IDL was in MONITORING state, so we either had data + # inconsistency on this server, or it stopped being the cluster + # leader, or the user requested to re-connect. Avoiding backoff + # in these cases, as we need to re-connect as soon as possible. + # Connections that are not in MONITORING state should have their + # backoff to avoid constant flood of re-connection attempts in + # case there is no suitable database server. + self._session.reset_backoff() self._session.force_reconnect() def session_name(self): @@ -614,6 +631,7 @@ class Idl(object): raise error.Error(" is not an object", table_updates) + notices = [] for table_name, table_update in table_updates.items(): table = tables.get(table_name) if not table: @@ -639,7 +657,9 @@ class Idl(object): % (table_name, uuid_string)) if version == OVSDB_UPDATE2: - if self.__process_update2(table, uuid, row_update): + changes = self.__process_update2(table, uuid, row_update) + if changes: + notices.append(changes) self.change_seqno += 1 continue @@ -652,17 +672,20 @@ class Idl(object): raise error.Error(' missing "old" and ' '"new" members', row_update) - if self.__process_update(table, uuid, old, new): + changes = self.__process_update(table, uuid, old, new) + if changes: + notices.append(changes) self.change_seqno += 1 + for notice in notices: + self.notify(*notice) def __process_update2(self, table, uuid, row_update): + """Returns Notice if a column changed, False otherwise.""" row = table.rows.get(uuid) - changed = False if "delete" in row_update: if row: del table.rows[uuid] - self.notify(ROW_DELETE, row) - changed = True + return Notice(ROW_DELETE, row) else: # XXX rate-limit vlog.warn("cannot delete missing row %s from table" @@ -681,29 +704,27 @@ class Idl(object): changed = self.__row_update(table, row, row_update) table.rows[uuid] = row if changed: - self.notify(ROW_CREATE, row) + return Notice(ROW_CREATE, row) elif "modify" in row_update: if not row: raise error.Error('Modify non-existing row') old_row = self.__apply_diff(table, row, row_update['modify']) - self.notify(ROW_UPDATE, row, Row(self, table, uuid, old_row)) - changed = True + return Notice(ROW_UPDATE, row, Row(self, table, uuid, old_row)) else: raise error.Error(' unknown operation', row_update) - return changed + return False def __process_update(self, table, uuid, old, new): - """Returns True if a column changed, False otherwise.""" + """Returns Notice if a column changed, False otherwise.""" row = table.rows.get(uuid) changed = False if not new: # Delete row. if row: del table.rows[uuid] - changed = True - self.notify(ROW_DELETE, row) + return Notice(ROW_DELETE, row) else: # XXX rate-limit vlog.warn("cannot delete missing row %s from table %s" @@ -723,7 +744,7 @@ class Idl(object): if op == ROW_CREATE: table.rows[uuid] = row if changed: - self.notify(ROW_CREATE, row) + return Notice(ROW_CREATE, row) else: op = ROW_UPDATE if not row: @@ -737,8 +758,8 @@ class Idl(object): if op == ROW_CREATE: table.rows[uuid] = row if changed: - self.notify(op, row, Row.from_json(self, table, uuid, old)) - return changed + return Notice(op, row, Row.from_json(self, table, uuid, old)) + return False def __check_server_db(self): """Returns True if this is a valid server database, False otherwise.""" @@ -1458,6 +1479,11 @@ class Transaction(object): if self != self.idl.txn: return self._status + if self.idl.state != Idl.IDL_S_MONITORING: + self._status = Transaction.TRY_AGAIN + self.__disassemble() + return self._status + # If we need a lock but don't have it, give up quickly. if self.idl.lock_name and not self.idl.has_lock: self._status = Transaction.NOT_LOCKED diff --git a/python/ovs/jsonrpc.py b/python/ovs/jsonrpc.py index bf32f8c87c..d5127268aa 100644 --- a/python/ovs/jsonrpc.py +++ b/python/ovs/jsonrpc.py @@ -612,5 +612,18 @@ class Session(object): def force_reconnect(self): self.reconnect.force_reconnect(ovs.timeval.msec()) + def reset_backoff(self): + """ Resets the reconnect backoff by allowing as many free tries as the + number of configured remotes. This is to be used by upper layers + before calling force_reconnect() if backoff is undesirable.""" + free_tries = len(self.remotes) + + if self.is_connected(): + # The extra free try will be consumed when the current remote + # is disconnected. + free_tries += 1 + + self.reconnect.set_backoff_free_tries(free_tries) + def get_num_of_remotes(self): return len(self.remotes) diff --git a/python/ovstest/rpcserver.py b/python/ovstest/rpcserver.py index c4aab70207..05b6b1be20 100644 --- a/python/ovstest/rpcserver.py +++ b/python/ovstest/rpcserver.py @@ -18,22 +18,14 @@ rpcserver is an XML RPC server that allows RPC client to initiate tests import sys -import exceptions - import xmlrpc.client -import tcp - from twisted.internet import reactor from twisted.internet.error import CannotListenError from twisted.web import server from twisted.web import xmlrpc -import udp - -import util - -import vswitch +from . import tcp, udp, util, vswitch class TestArena(xmlrpc.XMLRPC): @@ -210,7 +202,7 @@ class TestArena(xmlrpc.XMLRPC): (_, port) = self.__get_handle_resources(handle) port.loseConnection() self.__delete_handle(handle) - except exceptions.KeyError: + except KeyError: return -1 return 0 @@ -222,7 +214,7 @@ class TestArena(xmlrpc.XMLRPC): (_, connector) = self.__get_handle_resources(handle) connector.disconnect() self.__delete_handle(handle) - except exceptions.KeyError: + except KeyError: return -1 return 0 diff --git a/python/ovstest/tcp.py b/python/ovstest/tcp.py index c495717f2f..098c6cba3e 100644 --- a/python/ovstest/tcp.py +++ b/python/ovstest/tcp.py @@ -21,7 +21,7 @@ import time from twisted.internet import interfaces from twisted.internet.protocol import ClientFactory, Factory, Protocol -from zope.interface import implements +from zope.interface.declarations import implementer class TcpListenerConnection(Protocol): @@ -55,8 +55,8 @@ class TcpListenerFactory(Factory): return str(self.stats) +@implementer(interfaces.IPushProducer) class Producer(object): - implements(interfaces.IPushProducer) """ This producer class generates infinite byte stream for a specified time duration diff --git a/python/ovstest/tests.py b/python/ovstest/tests.py index 6de3cc3af4..f959f945ef 100644 --- a/python/ovstest/tests.py +++ b/python/ovstest/tests.py @@ -10,8 +10,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function - import math import time diff --git a/python/ovstest/util.py b/python/ovstest/util.py index 72457158f2..270d6a0376 100644 --- a/python/ovstest/util.py +++ b/python/ovstest/util.py @@ -26,8 +26,6 @@ import socket import struct import subprocess -import exceptions - import xmlrpc.client @@ -88,7 +86,7 @@ def start_process(args): stderr=subprocess.PIPE) out, err = p.communicate() return (p.returncode, out, err) - except exceptions.OSError: + except OSError: return (-1, None, None) diff --git a/python/ovstest/vswitch.py b/python/ovstest/vswitch.py index 9d5b5cffd0..45c9587eeb 100644 --- a/python/ovstest/vswitch.py +++ b/python/ovstest/vswitch.py @@ -15,7 +15,7 @@ """ vswitch module allows its callers to interact with OVS DB. """ -import util +from . import util def ovs_vsctl_add_bridge(bridge): diff --git a/python/setup.py b/python/setup.py index d385d83722..cfe01763f3 100644 --- a/python/setup.py +++ b/python/setup.py @@ -10,8 +10,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function - import sys from distutils.command.build_ext import build_ext @@ -82,8 +80,6 @@ setup_args = dict( 'Topic :: Software Development :: Libraries :: Python Modules', 'Topic :: System :: Networking', 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', diff --git a/tests/atlocal.in b/tests/atlocal.in index 02e2dc57f2..cfca7e1926 100644 --- a/tests/atlocal.in +++ b/tests/atlocal.in @@ -175,6 +175,9 @@ find_command() # Set HAVE_NC find_command nc +# Set HAVE_TC +find_command tc + # Determine correct netcat option to quit on stdin EOF if nc --version 2>&1 | grep -q nmap.org; then # Nmap netcat diff --git a/tests/automake.mk b/tests/automake.mk index 677b99a6b4..fc80e027df 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -134,7 +134,8 @@ FUZZ_REGRESSION_TESTS = \ tests/fuzz-regression/ofp_print_fuzzer-5722747668791296 \ tests/fuzz-regression/ofp_print_fuzzer-6285128790704128 \ tests/fuzz-regression/ofp_print_fuzzer-6470117922701312 \ - tests/fuzz-regression/ofp_print_fuzzer-6502620041576448 + tests/fuzz-regression/ofp_print_fuzzer-6502620041576448 \ + tests/fuzz-regression/ofp_print_fuzzer-6540965472632832 $(srcdir)/tests/fuzz-regression-list.at: tests/automake.mk $(AM_V_GEN)for name in $(FUZZ_REGRESSION_TESTS); do \ basename=`echo $$name | sed 's,^.*/,,'`; \ diff --git a/tests/daemon.at b/tests/daemon.at index a7982de381..39d9aa391e 100644 --- a/tests/daemon.at +++ b/tests/daemon.at @@ -218,11 +218,11 @@ OVS_WAIT_UNTIL([test -s ovsdb-server.pid]) OVS_WAIT_UNTIL([sc query ovsdb-server | grep STATE | grep RUNNING > /dev/null 2>&1]) AT_CHECK([kill -0 `cat ovsdb-server.pid`], [0], [ignore]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-dbs], [0], -[Open_vSwitch +[_Server ]) AT_CHECK([sc stop ovsdb-server], [0], [ignore]) OVS_WAIT_UNTIL([test ! -s ovsdb-server.pid]) -AT_CHECK([sc query ovsdb-server | grep STATE | grep STOPPED], [0], [ignore]) +OVS_WAIT_UNTIL([sc query ovsdb-server | grep STATE | grep STOPPED > /dev/null 2>&1]) AT_CHECK([sc delete ovsdb-server], [0], [[[SC]] DeleteService SUCCESS ]) AT_CLEANUP diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at index 2862a3c9b9..16402ebae2 100644 --- a/tests/dpif-netdev.at +++ b/tests/dpif-netdev.at @@ -299,60 +299,87 @@ type=drop rate=1 burst_size=2 ]) ovs-appctl time/warp 5000 -AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) +for i in `seq 1 7`; do + AT_CHECK( + [ovs-appctl netdev-dummy/receive p7 \ + 'in_port(7),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) +done + +for i in `seq 1 5`; do + AT_CHECK( + [ovs-appctl netdev-dummy/receive p8 \ + 'in_port(8),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) +done + sleep 1 # wait for forwarders process packets # Meter 1 is measuring packets, allowing one packet per second with -# bursts of one packet, so 4 out of 5 packets should hit the drop -# band. +# bursts of one packet, so 4 out of 5 packets should hit the drop band. # Meter 2 is measuring kbps, with burst size 2 (== 2000 bits). 4 packets -# (240 bytes == 1920 bits) pass, but the last packet should hit the drop band. +# (240 bytes == 1920 bits) pass, but the last three packets should hit the +# drop band. There should be 80 bits remaining for the next packets. AT_CHECK([ovs-ofctl -O OpenFlow13 meter-stats br0 | strip_timers], [0], [dnl OFPST_METER reply (OF1.3) (xid=0x2): meter:1 flow_count:1 packet_in_count:5 byte_in_count:300 duration:0.0s bands: 0: packet_count:4 byte_count:240 -meter:2 flow_count:1 packet_in_count:5 byte_in_count:300 duration:0.0s bands: -0: packet_count:1 byte_count:60 +meter:2 flow_count:1 packet_in_count:7 byte_in_count:420 duration:0.0s bands: +0: packet_count:3 byte_count:180 ]) -# Advance time by 1/2 second -ovs-appctl time/warp 500 +# Advance time by 870 ms +ovs-appctl time/warp 870 + +for i in `seq 1 5`; do + AT_CHECK( + [ovs-appctl netdev-dummy/receive p7 \ + 'in_port(7),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) + + AT_CHECK( + [ovs-appctl netdev-dummy/receive p8 \ + 'in_port(8),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) +done -AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) -AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) sleep 1 # wait for forwarders process packets # Meter 1 is measuring packets, allowing one packet per second with # bursts of one packet, so all 5 of the new packets should hit the drop # band. -# Meter 2 is measuring kbps, with burst size 2 (== 2000 bits). After 500ms -# there should be space for 80 + 500 bits, so one new 60 byte (480 bit) packet -# should pass, remaining 4 should hit the drop band. +# Meter 2 is measuring kbps, with burst size 2 (== 2000 bits). After 870ms +# there should be space for 80 + 870 = 950 bits, so one new 60 byte (480 bit) +# packet should pass, remaining 4 should hit the drop band. There should be +# 470 bits left. AT_CHECK([ovs-ofctl -O OpenFlow13 meter-stats br0 | strip_timers], [0], [dnl OFPST_METER reply (OF1.3) (xid=0x2): meter:1 flow_count:1 packet_in_count:10 byte_in_count:600 duration:0.0s bands: 0: packet_count:9 byte_count:540 -meter:2 flow_count:1 packet_in_count:10 byte_in_count:600 duration:0.0s bands: -0: packet_count:5 byte_count:300 +meter:2 flow_count:1 packet_in_count:12 byte_in_count:720 duration:0.0s bands: +0: packet_count:7 byte_count:420 +]) + +# Advance time by 10 ms +ovs-appctl time/warp 10 + +for i in `seq 1 5`; do + AT_CHECK( + [ovs-appctl netdev-dummy/receive p7 \ + 'in_port(7),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' --len 60]) +done + +sleep 1 # wait for forwarders process packets + +# Meter 1 should remain the same as we didn't send anything that should hit it. +# Meter 2 is measuring kbps, with burst size 2 (== 2000 bits). After 10ms +# there should be space for 470 + 10 = 480 bits, so one new 60 byte (480 bit) +# packet should pass, remaining 4 should hit the drop band. +AT_CHECK([ovs-ofctl -O OpenFlow13 meter-stats br0 | strip_timers], [0], [dnl +OFPST_METER reply (OF1.3) (xid=0x2): +meter:1 flow_count:1 packet_in_count:10 byte_in_count:600 duration:0.0s bands: +0: packet_count:9 byte_count:540 + +meter:2 flow_count:1 packet_in_count:17 byte_in_count:1020 duration:0.0s bands: +0: packet_count:11 byte_count:660 ]) ovs-appctl time/warp 5000 @@ -360,7 +387,7 @@ ovs-appctl time/warp 5000 AT_CHECK([ ovs-appctl coverage/read-counter datapath_drop_meter ], [0], [dnl -14 +20 ]) AT_CHECK([cat ovs-vswitchd.log | filter_flow_install | strip_xout_keep_actions], [0], [dnl @@ -370,6 +397,8 @@ recirc_id(0),in_port(7),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), a recirc_id(0),in_port(8),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), actions:2 ]) +AT_CHECK([ovs-ofctl -O OpenFlow13 del-meters br0]) + OVS_VSWITCHD_STOP AT_CLEANUP @@ -589,3 +618,20 @@ arp,in_port=ANY,dl_vlan=11,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09: DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS_VID_ARP([dummy]) DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS_VID_ARP([dummy-pmd]) + +AT_SETUP([dpif-netdev - check dpctl/add-flow in_port exact match]) +OVS_VSWITCHD_START( + [add-port br0 p1 \ + -- set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p0.sock \ + -- set bridge br0 datapath-type=dummy \ + other-config:datapath-id=1234 fail-mode=secure]) + +AT_CHECK([ovs-appctl dpctl/add-flow "eth(),eth_type(0x0800),ipv4()" "3"], [2], +[], [dnl +ovs-vswitchd: updating flow table (Invalid argument) +ovs-appctl: ovs-vswitchd: server returned an error +]) +OVS_WAIT_UNTIL([grep "flow: in_port is not an exact match" ovs-vswitchd.log]) +OVS_VSWITCHD_STOP(["/flow: in_port is not an exact match/d +/failed to put/d"]) +AT_CLEANUP diff --git a/tests/fuzz-regression-list.at b/tests/fuzz-regression-list.at index e3173fb88f..2347c690ef 100644 --- a/tests/fuzz-regression-list.at +++ b/tests/fuzz-regression-list.at @@ -21,3 +21,4 @@ TEST_FUZZ_REGRESSION([ofp_print_fuzzer-5722747668791296]) TEST_FUZZ_REGRESSION([ofp_print_fuzzer-6285128790704128]) TEST_FUZZ_REGRESSION([ofp_print_fuzzer-6470117922701312]) TEST_FUZZ_REGRESSION([ofp_print_fuzzer-6502620041576448]) +TEST_FUZZ_REGRESSION([ofp_print_fuzzer-6540965472632832]) diff --git a/tests/fuzz-regression/ofp_print_fuzzer-6540965472632832 b/tests/fuzz-regression/ofp_print_fuzzer-6540965472632832 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/ofp-actions.at b/tests/ofp-actions.at index 199db8ed0f..59093c03c9 100644 --- a/tests/ofp-actions.at +++ b/tests/ofp-actions.at @@ -1007,12 +1007,21 @@ bad_action 'dec_ttl(,)' 'dec_ttl_cnt_ids: expected at least one controller id.' # set_mpls_label bad_action 'set_mpls_label' 'set_mpls_label: expected label.' +# set_mpls_label oversized +bad_action 'set_mpls_label(0x100000)' '0x100000: not a valid MPLS label' + # set_mpls_tc bad_action 'set_mpls_tc' 'set_mpls_tc: expected tc.' +# set_mpls_tc oversized +bad_action 'set_mpls_tc(8)' '8: not a valid MPLS TC' + # set_mpls_ttl bad_action 'set_mpls_ttl' 'set_mpls_ttl: expected ttl.' +# set_mpls_ttl oversized +bad_action 'set_mpls_ttl(256)' 'invalid MPLS TTL "256"' + # fin_timeout bad_action 'fin_timeout(foo=bar)' "invalid key 'foo' in 'fin_timeout' argument" diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 31064ed95e..f99a60444f 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -342,6 +342,22 @@ AT_CHECK([test `egrep 'in_port\(6\)' br1_flows.txt |wc -l` -gt 3]) OVS_VSWITCHD_STOP AT_CLEANUP +# SEND_TCP_BOND_PKTS([p_name], [p_ofport], [packet_len]) +# +# Sends 256 packets to port 'p_name' with different TCP destination ports. +m4_define([SEND_TCP_BOND_PKTS], + [ + len_cmd="" + if test -n "$3"; then + len_cmd=" --len $3" + fi + for i in `seq 0 255`; do + pkt="in_port($2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=$i),tcp_flags(ack)" + ovs-appctl netdev-dummy/receive $1 $pkt$len_cmd + done + ] +) + AT_SETUP([ofproto-dpif - balance-tcp bonding]) # Create br0 with members bond0(p1, p2, p3) and p7, # and br1 with members bond1(p4, p5, p6) and p8. @@ -377,13 +393,7 @@ ovs-appctl lacp/show > lacp.txt ovs-appctl bond/show > bond.txt # Check that lb_output is not enabled by default. AT_CHECK([grep -q '^lb_output action: disabled' bond.txt]) -( -for i in `seq 0 255` ; - do - pkt="in_port(7),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=$i),tcp_flags(ack)" - AT_CHECK([ovs-appctl netdev-dummy/receive p7 $pkt]) - done -) +AT_CHECK([SEND_TCP_BOND_PKTS([p7], [7])]) ovs-appctl time/warp 300 100 AT_CHECK([ovs-appctl dpif/dump-flows br0 |grep tcp > br0_flows.txt]) AT_CHECK([ovs-appctl dpif/dump-flows br1 |grep tcp > br1_flows.txt]) @@ -400,13 +410,7 @@ OVS_WAIT_UNTIL([ovs-appctl bond/show | grep -q '^lb_output action: enabled']) ovs-appctl time/warp 10000 500 ovs-appctl revalidator/wait OVS_WAIT_WHILE([ovs-appctl dpif/dump-flows br1 | grep -q tcp]) -( -for i in $(seq 256) ; - do - pkt="in_port(7),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=$i),tcp_flags(ack)" - AT_CHECK([ovs-appctl netdev-dummy/receive p7 $pkt]) - done -) +AT_CHECK([SEND_TCP_BOND_PKTS([p7], [7])]) ovs-appctl time/warp 300 100 AT_CHECK([ovs-appctl dpif/dump-flows br0 | grep tcp > br0_flows.txt]) AT_CHECK([ovs-appctl dpif/dump-flows br1 | grep tcp > br1_flows.txt]) @@ -423,6 +427,78 @@ OVS_WAIT_UNTIL([test -z "$(ovs-appctl dpif-netdev/bond-show)"]) OVS_VSWITCHD_STOP() AT_CLEANUP +# Make sure that rebalancing works after link state changes. +AT_SETUP([ofproto-dpif - balance-tcp bonding rebalance after link state changes]) +# Create br0 with interfaces bond0(p1, p2) and p5, +# and br1 with interfaces bond1(p3, p4) and p6. +# bond0 <-> bond1 +# Send some traffic, set link state down and up for p2, +# send big amount of traffic to trigger rebalancing and +# make sure that some hashes rebalanced. +OVS_VSWITCHD_START( + [add-bond br0 bond0 p1 p2 bond_mode=balance-tcp lacp=active \ + other-config:lacp-time=fast other-config:bond-rebalance-interval=1000 --\ + set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 mtu_request=65535 -- \ + set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 mtu_request=65535 -- \ + add-port br0 p5 -- set interface p5 ofport_request=5 type=dummy mtu_request=65535 -- \ + add-br br1 -- \ + set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \ + set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \ + fail-mode=secure -- \ + add-bond br1 bond1 p3 p4 bond_mode=balance-tcp lacp=active \ + other-config:lacp-time=fast other-config:bond-rebalance-interval=1000 --\ + set interface p3 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=3 mtu_request=65535 -- \ + set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=4 mtu_request=65535 -- \ + add-port br1 p6 -- set interface p6 ofport_request=6 type=dummy mtu_request=65535 --]) +AT_CHECK([ovs-appctl vlog/set bond:dbg]) +AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK +]) +AT_CHECK([ovs-ofctl add-flow br0 action=normal]) +AT_CHECK([ovs-ofctl add-flow br1 action=normal]) +AT_CHECK([ovs-appctl upcall/disable-megaflows], [0], [megaflows disabled +], []) +OVS_WAIT_WHILE([ovs-appctl bond/show | grep "may_enable: false"]) + +ovs-appctl time/stop +ovs-appctl time/warp 2000 200 + +# Send some traffic to distribute all the hashes between ports. +AT_CHECK([SEND_TCP_BOND_PKTS([p5], [5], [65500])]) + +# Wait for rebalancing for per-hash stats accounting. +ovs-appctl time/warp 1000 100 + +# Check that p2 handles some hashes. +ovs-appctl bond/show > bond1.txt +AT_CHECK([sed -n '/member p2/,/^$/p' bond1.txt | grep 'hash'], [0], [ignore]) + +# Move p2 down to force all hashes move to p1 +AT_CHECK([ovs-appctl netdev-dummy/set-admin-state p2 down], 0, [OK +]) + +ovs-appctl time/warp 200 100 +# Check that all hashes moved form p2 +ovs-appctl bond/show > bond2.txt +AT_CHECK([sed -n '/member p2/,/^$/p' bond2.txt | grep 'hash'], [1], [ignore]) + +# Move p2 up +AT_CHECK([ovs-appctl netdev-dummy/set-admin-state p2 up], 0, [OK +]) + +# Send some packets to trigger rebalancing. +AT_CHECK([SEND_TCP_BOND_PKTS([p5], [5], [65500])]) + +# Wait for rebalancing +ovs-appctl time/warp 1000 100 + +# Check that some hashes was shifted to p2 +ovs-appctl bond/show > bond3.txt +AT_CHECK([sed -n '/member p2/,/^$/p' bond3.txt | grep 'hash'], [0], [ignore]) + +OVS_VSWITCHD_STOP() +AT_CLEANUP + + # Makes sure recirculation does not change the way packet is handled. AT_SETUP([ofproto-dpif - balance-tcp bonding, different recirc flow ]) OVS_VSWITCHD_START( @@ -9520,6 +9596,26 @@ OFPST_TABLE reply (OF1.3) (xid=0x2): OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ofproto-dpif packet-out table meter drop]) +OVS_VSWITCHD_START +add_of_ports br0 1 2 + +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1 pktps bands=type=drop rate=1']) +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 'in_port=1 action=meter:1,output:2']) + +ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)" +ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)" + +# Check that vswitchd hasn't crashed by dumping the meter added above +AT_CHECK([ovs-ofctl -O OpenFlow13 dump-meters br0 | ofctl_strip], [0], [dnl +OFPST_METER_CONFIG reply (OF1.3): +meter=1 pktps bands= +type=drop rate=1 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([ofproto-dpif - ICMPv6]) OVS_VSWITCHD_START add_of_ports br0 1 @@ -10842,6 +10938,31 @@ dnl NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=106 in_port=2 (via action) data_len=106 (unbuffered) udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=2,tp_dst=1 udp_csum:553 ]) + +dnl The next test verifies that ct_clear at the datapath only gets executed +dnl if conntrack information is present. +AT_DATA([flows.txt], [dnl +table=0 in_port=1 actions=ct_clear,ct_clear,ct_clear,p2 +]) +AT_CHECK([ovs-ofctl del-flows br0]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=p1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2'], [0], [stdout]) +AT_CHECK([tail -1 stdout], [0], + [Datapath actions: 2 +]) +AT_DATA([flows.txt], [dnl +table=0 in_port=1 ip actions=ct_clear,ct(table=1) +table=1 in_port=1 actions=ct_clear,ct_clear,goto_table:2 +table=2 in_port=1 actions=ct_clear,p2 +]) +AT_CHECK([ovs-ofctl del-flows br0]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=p1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2'], [0], [stdout]) +AT_CHECK([grep Datapath stdout | sed 's/recirc(.*)/recirc(X)/'], [0], + [Datapath actions: ct,recirc(X) +Datapath actions: ct_clear,2 +]) + OVS_VSWITCHD_STOP AT_CLEANUP diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at index 5ddca67e71..604f15c2d1 100644 --- a/tests/ovs-ofctl.at +++ b/tests/ovs-ofctl.at @@ -449,6 +449,16 @@ actions=output(max_len=100,port=123) actions=output(port=100,max_len=123) actions=output(port=LOCAL,max_len=123) actions=output(port=IN_PORT,max_len=123) +mpls,mpls_label=1,actions=set_mpls_label(0) +mpls,mpls_label=1,actions=set_mpls_label(10) +mpls,mpls_label=1,actions=set_mpls_label(0x10) +mpls,mpls_label=1,actions=set_mpls_label(0xfffff) +mpls,mpls_tc=1,actions=set_mpls_tc(0) +mpls,mpls_tc=1,actions=set_mpls_tc(3) +mpls,mpls_tc=1,actions=set_mpls_tc(7) +mpls,mpls_ttl=1,actions=set_mpls_ttl(0) +mpls,mpls_ttl=1,actions=set_mpls_ttl(200) +mpls,mpls_ttl=1,actions=set_mpls_ttl(255) ]]) AT_CHECK([ovs-ofctl parse-flows flows.txt @@ -506,6 +516,16 @@ NXT_FLOW_MOD: ADD table:255 actions=output(port=123,max_len=100) NXT_FLOW_MOD: ADD table:255 actions=output(port=100,max_len=123) NXT_FLOW_MOD: ADD table:255 actions=output(port=LOCAL,max_len=123) NXT_FLOW_MOD: ADD table:255 actions=output(port=IN_PORT,max_len=123) +NXT_FLOW_MOD: ADD table:255 mpls,mpls_label=1 actions=set_mpls_label(0) +NXT_FLOW_MOD: ADD table:255 mpls,mpls_label=1 actions=set_mpls_label(10) +NXT_FLOW_MOD: ADD table:255 mpls,mpls_label=1 actions=set_mpls_label(16) +NXT_FLOW_MOD: ADD table:255 mpls,mpls_label=1 actions=set_mpls_label(1048575) +NXT_FLOW_MOD: ADD table:255 mpls,mpls_tc=1 actions=set_mpls_tc(0) +NXT_FLOW_MOD: ADD table:255 mpls,mpls_tc=1 actions=set_mpls_tc(3) +NXT_FLOW_MOD: ADD table:255 mpls,mpls_tc=1 actions=set_mpls_tc(7) +NXT_FLOW_MOD: ADD table:255 mpls,mpls_ttl=1 actions=set_mpls_ttl(0) +NXT_FLOW_MOD: ADD table:255 mpls,mpls_ttl=1 actions=set_mpls_ttl(200) +NXT_FLOW_MOD: ADD table:255 mpls,mpls_ttl=1 actions=set_mpls_ttl(255) ]]) AT_CLEANUP diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at index c8babe3612..1f1fc3c79a 100644 --- a/tests/ovs-vsctl.at +++ b/tests/ovs-vsctl.at @@ -1639,3 +1639,26 @@ AT_CHECK([grep "server name" ovsdb-server.log], [0], OVS_VSCTL_CLEANUP AT_CLEANUP + +dnl ---------------------------------------------------------------------- +AT_BANNER([set ingress policing test]) + +AT_SETUP([set ingress_policing_rate and ingress_policing_burst]) +AT_KEYWORDS([ingress_policing]) +OVS_VSCTL_SETUP +AT_CHECK([RUN_OVS_VSCTL_TOGETHER( + [add-br a], + [add-port a a1], + [set interface a1 ingress_policing_rate=100], + [set interface a1 ingress_policing_burst=10], + [--columns=ingress_policing_burst,ingress_policing_rate list interface a1])], + [0], + [ + + + +ingress_policing_burst: 10 +ingress_policing_rate: 100 +]) +OVS_VSCTL_CLEANUP +AT_CLEANUP diff --git a/tests/ovsdb-client.at b/tests/ovsdb-client.at index 8d777a0275..5e3b26aea8 100644 --- a/tests/ovsdb-client.at +++ b/tests/ovsdb-client.at @@ -12,6 +12,30 @@ AT_CHECK([ovsdb-client get-schema-cksum unix:socket ordinals], [0], [12345678 9 OVSDB_SERVER_SHUTDOWN AT_CLEANUP +AT_SETUP([ovsdb-client needs-conversion (no conversion needed)]) +AT_KEYWORDS([ovsdb client file positive]) +ordinal_schema > schema +touch .db.~lock~ +AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) +AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore]) +AT_CHECK([ovsdb-client needs-conversion unix:socket schema], [0], [no +]) +OVSDB_SERVER_SHUTDOWN +AT_CLEANUP + +AT_SETUP([ovsdb-client needs-conversion (conversion needed)]) +AT_KEYWORDS([ovsdb client file positive]) +ordinal_schema > schema +touch .db.~lock~ +AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) +AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore]) +sed 's/5\.1\.3/5.1.4/' < schema > schema2 +AT_CHECK([diff schema schema2], [1], [ignore]) +AT_CHECK([ovsdb-client needs-conversion unix:socket schema2], [0], [yes +]) +OVSDB_SERVER_SHUTDOWN +AT_CLEANUP + AT_SETUP([ovsdb-client backup and restore]) AT_KEYWORDS([ovsdb client positive]) diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at index 92aa427093..cf43e9cf86 100644 --- a/tests/ovsdb-cluster.at +++ b/tests/ovsdb-cluster.at @@ -128,7 +128,7 @@ ovsdb_test_cluster_disconnect () { "rows": [{"i": 1}]}]]' > test-ovsdb.log 2>&1 & echo $! > test-ovsdb.pid - OVS_WAIT_UNTIL([grep "000: i=1" test-ovsdb.log]) + OVS_WAIT_UNTIL([grep "000: table simple: i=1" test-ovsdb.log]) # Start collecting raft_is_connected logs for $target before shutting down # any servers. diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index 4b4791a7da..d5cdf7e8b0 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -141,7 +141,7 @@ m4_define([OVSDB_CHECK_IDL_REGISTER_COLUMNS_PY], AT_CHECK([ovsdb_start_idltest]) m4_if([$2], [], [], [AT_CHECK([ovsdb-client transact unix:socket $2], [0], [ignore], [ignore])]) - AT_CHECK([$PYTHON3 $srcdir/test-ovsdb.py -t10 idl $srcdir/idltest.ovsschema unix:socket ?simple:b,ba,i,ia,r,ra,s,sa,u,ua?link1:i,k,ka,l2?link2:i,l1?singleton:name $3], + AT_CHECK([$PYTHON3 $srcdir/test-ovsdb.py -t10 idl $srcdir/idltest.ovsschema unix:socket ?simple:b,ba,i,ia,r,ra,s,sa,u,ua?simple3:name,uset,uref?simple4:name?simple6:name,weak_ref?link1:i,k,ka,l2?link2:i,l1?singleton:name $3], [0], [stdout], [ignore]) AT_CHECK([sort stdout | uuidfilt]m4_if([$6],,, [[| $6]]), [0], [$4]) @@ -355,28 +355,28 @@ OVSDB_CHECK_IDL([simple idl, initially empty, various ops], 'reconnect']], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]}]} -002: i=0 r=0 b=false s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -002: i=1 r=2 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +002: table simple: i=0 r=0 b=false s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +002: table simple: i=1 r=2 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 003: {"error":null,"result":[{"count":2}]} -004: i=0 r=0 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -004: i=1 r=2 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +004: table simple: i=0 r=0 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +004: table simple: i=1 r=2 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 005: {"error":null,"result":[{"count":2}]} -006: i=0 r=123.5 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -006: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +006: table simple: i=0 r=123.5 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +006: table simple: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 007: {"error":null,"result":[{"uuid":["uuid","<6>"]}]} -008: i=-1 r=125 b=false s= u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> -008: i=0 r=123.5 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -008: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +008: table simple: i=-1 r=125 b=false s= u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> +008: table simple: i=0 r=123.5 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +008: table simple: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 009: {"error":null,"result":[{"count":2}]} -010: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> -010: i=0 r=123.5 b=true s=newstring u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -010: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +010: table simple: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> +010: table simple: i=0 r=123.5 b=true s=newstring u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +010: table simple: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 011: {"error":null,"result":[{"count":1}]} -012: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> -012: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +012: table simple: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> +012: table simple: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 013: reconnect -014: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> -014: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +014: table simple: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> +014: table simple: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 015: done ]]) @@ -403,11 +403,11 @@ OVSDB_CHECK_IDL([simple idl, initially populated], "table": "simple", "where": [], "row": {"b": true}}]']], - [[000: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -000: i=1 r=2 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<5> + [[000: table simple: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +000: table simple: i=1 r=2 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<5> 001: {"error":null,"result":[{"count":2}]} -002: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -002: i=1 r=2 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<5> +002: table simple: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +002: table simple: i=1 r=2 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<5> 003: done ]]) @@ -431,14 +431,14 @@ OVSDB_CHECK_IDL([simple idl, writing via IDL], "row": {}}]']], [['verify 0 b, verify 1 r, set 0 b 1, set 1 r 3.5' \ 'insert 2, verify 2 i, verify 1 b, delete 1']], - [[000: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -000: i=1 r=2 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<5> + [[000: table simple: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +000: table simple: i=1 r=2 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<5> 001: commit, status=success -002: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -002: i=1 r=3.5 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<5> +002: table simple: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +002: table simple: i=1 r=3.5 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<5> 003: commit, status=success -004: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -004: i=2 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<6> +004: table simple: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +004: table simple: i=2 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<6> 005: done ]]) @@ -448,10 +448,10 @@ OVSDB_CHECK_IDL([simple idl, writing via IDL with unicode], "table": "simple", "row": {"s": "(╯°□°)╯︵ ┻━┻"}}]']], [['set 0 b 1, insert 1, set 1 s "¯\_(ツ)_/¯"']], - [[000: i=0 r=0 b=false s=(╯°□°)╯︵ ┻━┻ u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> + [[000: table simple: i=0 r=0 b=false s=(╯°□°)╯︵ ┻━┻ u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 001: commit, status=success -002: i=0 r=0 b=true s=(╯°□°)╯︵ ┻━┻ u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -002: i=1 r=0 b=false s="¯\_(ツ)_/¯" u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +002: table simple: i=0 r=0 b=true s=(╯°□°)╯︵ ┻━┻ u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +002: table simple: i=1 r=0 b=false s="¯\_(ツ)_/¯" u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 003: done ]]) @@ -475,10 +475,10 @@ OVSDB_CHECK_IDL_PY_WITH_EXPOUT([simple idl, writing large data via IDL with unic "table": "simple", "row": {"s": "'$(printf "测试超过四千零九十六个字节的中文字符串以使解码出现问题。%.0s" {1..50})'"}}]']], [['set 0 b 1, insert 1, set 1 s '$(printf "测试超过四千零九十六个字节的中文字符串以使解码出现问题。%.0s" {1..100})'']], - [[000: i=0 r=0 b=false s=$(printf "测试超过四千零九十六个字节的中文字符串以使解码出现问题。%.0s" {1..50}) u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> + [[000: table simple: i=0 r=0 b=false s=$(printf "测试超过四千零九十六个字节的中文字符串以使解码出现问题。%.0s" {1..50}) u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 001: commit, status=success -002: i=0 r=0 b=true s=$(printf "测试超过四千零九十六个字节的中文字符串以使解码出现问题。%.0s" {1..50}) u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -002: i=1 r=0 b=false s=$(printf "测试超过四千零九十六个字节的中文字符串以使解码出现问题。%.0s" {1..100}) u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +002: table simple: i=0 r=0 b=true s=$(printf "测试超过四千零九十六个字节的中文字符串以使解码出现问题。%.0s" {1..50}) u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +002: table simple: i=1 r=0 b=false s=$(printf "测试超过四千零九十六个字节的中文字符串以使解码出现问题。%.0s" {1..100}) u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 003: done]]) OVSDB_CHECK_IDL([simple idl, handling verification failure], @@ -499,16 +499,16 @@ OVSDB_CHECK_IDL([simple idl, handling verification failure], '+verify 1 r, set 1 r 3' \ 'verify 1 r, set 1 r 3' \ ]], - [[000: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -000: i=1 r=2 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> + [[000: table simple: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +000: table simple: i=1 r=2 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 001: commit, status=success 002: {"error":null,"result":[{"count":1}]} 003: commit, status=try again -004: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -004: i=1 r=5 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +004: table simple: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +004: table simple: i=1 r=5 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 005: commit, status=success -006: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -006: i=1 r=3 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +006: table simple: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +006: table simple: i=1 r=3 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 007: done ]]) @@ -518,9 +518,9 @@ OVSDB_CHECK_IDL([simple idl, increment operation], "table": "simple", "row": {}}]']], [['set 0 r 2.0, increment 0']], - [[000: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> + [[000: table simple: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 001: commit, status=success, increment=1 -002: i=1 r=2 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +002: table simple: i=1 r=2 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 003: done ]]) @@ -531,10 +531,10 @@ OVSDB_CHECK_IDL([simple idl, aborting], "row": {}}]']], [['set 0 r 2.0, abort' \ '+set 0 b 1']], - [[000: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> + [[000: table simple: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 001: commit, status=aborted 002: commit, status=success -003: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +003: table simple: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: done ]]) @@ -545,10 +545,10 @@ OVSDB_CHECK_IDL([simple idl, destroy without commit or abort], "row": {}}]']], [['set 0 r 2.0, destroy' \ '+set 0 b 1']], - [[000: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> + [[000: table simple: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 001: destroy 002: commit, status=success -003: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +003: table simple: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: done ]]) @@ -564,7 +564,7 @@ OVSDB_CHECK_IDL([simple idl, conditional, false condition], [[000: change conditions 001: empty 002: change conditions -003: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +003: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: done ]]) @@ -580,7 +580,7 @@ OVSDB_CHECK_IDL([simple idl, conditional, true condition], [[000: change conditions 001: empty 002: change conditions -003: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +003: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: done ]]) @@ -601,8 +601,8 @@ OVSDB_CHECK_IDL([simple idl, conditional, multiple clauses in condition], [[000: change conditions 001: empty 002: change conditions -003: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -003: i=2 r=3 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +003: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +003: table simple: i=2 r=3 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 004: done ]]) @@ -618,7 +618,7 @@ OVSDB_CHECK_IDL([simple idl, conditional, modify as insert due to condition], [[000: change conditions 001: empty 002: change conditions -003: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +003: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: done ]]) @@ -641,11 +641,11 @@ OVSDB_CHECK_IDL([simple idl, conditional, modify as delete due to condition], [[000: change conditions 001: empty 002: change conditions -003: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +003: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: change conditions 005: empty 006: {"error":null,"result":[{"uuid":["uuid","<2>"]}]} -007: i=2 r=3 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +007: table simple: i=2 r=3 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 008: done ]]) @@ -676,15 +676,15 @@ OVSDB_CHECK_IDL([simple idl, conditional, multiple tables], [[000: change conditions 001: empty 002: change conditions -003: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +003: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: change conditions -005: i=0 k=0 ka=[] l2= uuid=<2> -005: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +005: table link1: i=0 k=0 ka=[] l2= uuid=<2> +005: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 006: change conditions 007: {"error":null,"result":[{"uuid":["uuid","<3>"]}]} -008: i=0 k=0 ka=[] l2= uuid=<2> -008: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -008: i=3 l1= uuid=<3> +008: table link1: i=0 k=0 ka=[] l2= uuid=<2> +008: table link2: i=3 l1= uuid=<3> +008: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 009: done ]]) @@ -716,19 +716,19 @@ OVSDB_CHECK_IDL([self-linking idl, consistent ops], "row": {"k": ["uuid", "#0#"]}}]']], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]}]} -002: i=0 k=0 ka=[] l2= uuid=<0> +002: table link1: i=0 k=0 ka=[] l2= uuid=<0> 003: {"error":null,"result":[{"uuid":["uuid","<1>"]},{"uuid":["uuid","<2>"]}]} -004: i=0 k=0 ka=[] l2= uuid=<0> -004: i=1 k=2 ka=[] l2= uuid=<1> -004: i=2 k=1 ka=[] l2= uuid=<2> +004: table link1: i=0 k=0 ka=[] l2= uuid=<0> +004: table link1: i=1 k=2 ka=[] l2= uuid=<1> +004: table link1: i=2 k=1 ka=[] l2= uuid=<2> 005: {"error":null,"result":[{"count":1}]} -006: i=0 k=0 ka=[] l2= uuid=<0> -006: i=1 k=1 ka=[] l2= uuid=<1> -006: i=2 k=1 ka=[] l2= uuid=<2> +006: table link1: i=0 k=0 ka=[] l2= uuid=<0> +006: table link1: i=1 k=1 ka=[] l2= uuid=<1> +006: table link1: i=2 k=1 ka=[] l2= uuid=<2> 007: {"error":null,"result":[{"count":3}]} -008: i=0 k=0 ka=[] l2= uuid=<0> -008: i=1 k=0 ka=[] l2= uuid=<1> -008: i=2 k=0 ka=[] l2= uuid=<2> +008: table link1: i=0 k=0 ka=[] l2= uuid=<0> +008: table link1: i=1 k=0 ka=[] l2= uuid=<1> +008: table link1: i=2 k=0 ka=[] l2= uuid=<2> 009: done ]]) @@ -767,12 +767,12 @@ OVSDB_CHECK_IDL([self-linking idl, inconsistent ops], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]},{"details":"Table link1 column k row <0> references nonexistent row <1> in table link1.","error":"referential integrity violation"}]} 002: {"error":null,"result":[{"uuid":["uuid","<2>"]},{"uuid":["uuid","<3>"]}]} -003: i=1 k=1 ka=[] l2= uuid=<2> -003: i=2 k=1 ka=[] l2= uuid=<3> +003: table link1: i=1 k=1 ka=[] l2= uuid=<2> +003: table link1: i=2 k=1 ka=[] l2= uuid=<3> 004: {"error":null,"result":[{"count":2},{"details":"Table link1 column k row references nonexistent row <4> in table link1.","error":"referential integrity violation"}]} 005: {"error":null,"result":[{"count":1},{"details":"cannot delete link1 row <2> because of 1 remaining reference(s)","error":"referential integrity violation"}]} 006: {"error":null,"result":[{"count":1}]} -007: i=1 k=1 ka=[] l2= uuid=<2> +007: table link1: i=1 k=1 ka=[] l2= uuid=<2> 008: {"error":null,"result":[{"count":1}]} 009: empty 010: done @@ -815,15 +815,15 @@ OVSDB_CHECK_IDL([self-linking idl, sets], "where": []}]']], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{"uuid":["uuid","<2>"]},{"uuid":["uuid","<3>"]}]} -002: i=0 k=0 ka=[0] l2= uuid=<0> -002: i=1 k=0 ka=[1] l2= uuid=<1> -002: i=2 k=0 ka=[2] l2= uuid=<2> -002: i=3 k=0 ka=[3] l2= uuid=<3> +002: table link1: i=0 k=0 ka=[0] l2= uuid=<0> +002: table link1: i=1 k=0 ka=[1] l2= uuid=<1> +002: table link1: i=2 k=0 ka=[2] l2= uuid=<2> +002: table link1: i=3 k=0 ka=[3] l2= uuid=<3> 003: {"error":null,"result":[{"count":4}]} -004: i=0 k=0 ka=[0 1 2 3] l2= uuid=<0> -004: i=1 k=0 ka=[0 1 2 3] l2= uuid=<1> -004: i=2 k=0 ka=[0 1 2 3] l2= uuid=<2> -004: i=3 k=0 ka=[0 1 2 3] l2= uuid=<3> +004: table link1: i=0 k=0 ka=[0 1 2 3] l2= uuid=<0> +004: table link1: i=1 k=0 ka=[0 1 2 3] l2= uuid=<1> +004: table link1: i=2 k=0 ka=[0 1 2 3] l2= uuid=<2> +004: table link1: i=3 k=0 ka=[0 1 2 3] l2= uuid=<3> 005: {"error":null,"result":[{"count":1},{"details":"Table link1 column ka row <2> references nonexistent row <4> in table link1.","error":"referential integrity violation"}]} 006: {"error":null,"result":[{"count":4}]} 007: empty @@ -843,8 +843,8 @@ OVSDB_CHECK_IDL([external-linking idl, consistent ops], "uuid-name": "row1"}]']], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]}]} -002: i=0 l1= uuid=<0> -002: i=1 k=1 ka=[] l2=0 uuid=<1> +002: table link1: i=1 k=1 ka=[] l2=0 uuid=<1> +002: table link2: i=0 l1= uuid=<0> 003: done ]]) @@ -867,20 +867,49 @@ OVSDB_CHECK_IDL([singleton idl, constraints], "row": {"name": "bar"}}]']], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]}]} -002: name=foo uuid=<0> +002: table singleton: name=foo uuid=<0> 003: {"error":null,"result":[{"uuid":["uuid","<1>"]},{"details":"transaction causes \"singleton\" table to contain 2 rows, greater than the schema-defined limit of 1 row(s)","error":"constraint violation"}]} 004: {"error":null,"result":[{"count":1},{"uuid":["uuid","<2>"]}]} -005: name=bar uuid=<2> +005: table singleton: name=bar uuid=<2> 006: done ]]) +dnl This test creates a database with references and checks that deleting both +dnl source and destination rows of a reference in a single update doesn't leak +dnl rows that got orphaned when processing the update. +OVSDB_CHECK_IDL([simple idl, references, multiple deletes], + [['["idltest", + {"op": "insert", + "table": "simple", + "row": {"s": "row0_s"}, + "uuid-name": "weak_row0"}, + {"op": "insert", + "table": "simple6", + "row": {"name": "first_row", + "weak_ref": ["set", + [["named-uuid", "weak_row0"]] + ]}}]']], + [['["idltest", + {"op": "delete", + "table": "simple", + "where": [["s", "==", "row0_s"]]}, + {"op": "delete", + "table": "simple6", + "where": [["name", "==", "first_row"]]}]']], + [[000: table simple6: name=first_row weak_ref=[<0>] uuid=<1> +000: table simple: i=0 r=0 b=false s=row0_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<0> +001: {"error":null,"result":[{"count":1},{"count":1}]} +002: empty +003: done +]]) + OVSDB_CHECK_IDL_PY([external-linking idl, insert ops], [], [['linktest']], [[000: empty 001: commit, status=success -002: i=1 k=1 ka=[1] l2= uuid=<0> -002: i=2 k=1 ka=[1 2] l2= uuid=<1> +002: table link1: i=1 k=1 ka=[1] l2= uuid=<0> +002: table link1: i=2 k=1 ka=[1 2] l2= uuid=<1> 003: done ]]) @@ -889,7 +918,7 @@ OVSDB_CHECK_IDL_PY([getattr idl, insert ops], [['getattrtest']], [[000: empty 001: commit, status=success -002: i=2 k=2 ka=[] l2= uuid=<0> +002: table link1: i=2 k=2 ka=[] l2= uuid=<0> 003: done ]]) @@ -902,11 +931,11 @@ OVSDB_CHECK_IDL_PY([row-from-json idl, whats this], "table": "simple", "row": {}}]']], [['notifytest insert 2, notifytest set 1 b 1, notifytest delete 0']], - [[000: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -000: i=1 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> + [[000: table simple: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +000: table simple: i=1 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 001: commit, status=success, events=create|2|None, delete|0|None, update|1|b -002: i=1 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> -002: i=2 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> +002: table simple: i=1 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +002: table simple: i=2 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> 003: done ]]) @@ -946,19 +975,19 @@ AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 idl uni AT_CHECK([sort stdout | uuidfilt], [0], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]}]} -002: i=0 k=0 ka=[] l2= uuid=<0> +002: table link1: i=0 k=0 ka=[] l2= uuid=<0> 003: {"error":null,"result":[{"uuid":["uuid","<1>"]},{"uuid":["uuid","<2>"]}]} -004: i=0 k=0 ka=[] l2= uuid=<0> -004: i=1 k=2 ka=[] l2= uuid=<1> -004: i=2 k=1 ka=[] l2= uuid=<2> +004: table link1: i=0 k=0 ka=[] l2= uuid=<0> +004: table link1: i=1 k=2 ka=[] l2= uuid=<1> +004: table link1: i=2 k=1 ka=[] l2= uuid=<2> 005: {"error":null,"result":[{"count":1}]} -006: i=0 k=0 ka=[] l2= uuid=<0> -006: i=1 k=1 ka=[] l2= uuid=<1> -006: i=2 k=1 ka=[] l2= uuid=<2> +006: table link1: i=0 k=0 ka=[] l2= uuid=<0> +006: table link1: i=1 k=1 ka=[] l2= uuid=<1> +006: table link1: i=2 k=1 ka=[] l2= uuid=<2> 007: {"error":null,"result":[{"count":3}]} -008: i=0 k=0 ka=[] l2= uuid=<0> -008: i=1 k=0 ka=[] l2= uuid=<1> -008: i=2 k=0 ka=[] l2= uuid=<2> +008: table link1: i=0 k=0 ka=[] l2= uuid=<0> +008: table link1: i=1 k=0 ka=[] l2= uuid=<1> +008: table link1: i=2 k=0 ka=[] l2= uuid=<2> 009: done ]]) @@ -1022,11 +1051,11 @@ OVSDB_CHECK_IDL_FETCH_COLUMNS([simple idl, initially populated], "row": {}}]']], [?simple:i,r!], ['fetch 0 r'], - [[000: i=0 uuid=<0> -000: i=1 uuid=<1> + [[000: table simple: i=0 uuid=<0> +000: table simple: i=1 uuid=<1> 001: commit, status=success -002: i=0 r=0 uuid=<0> -002: i=1 uuid=<1> +002: table simple: i=0 r=0 uuid=<0> +002: table simple: i=1 uuid=<1> 003: done ]]) @@ -1098,28 +1127,28 @@ OVSDB_CHECK_IDL_WO_MONITOR_COND([simple idl disable monitor-cond], 'reconnect']], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]}]} -002: i=0 r=0 b=false s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -002: i=1 r=2 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +002: table simple: i=0 r=0 b=false s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +002: table simple: i=1 r=2 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 003: {"error":null,"result":[{"count":2}]} -004: i=0 r=0 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -004: i=1 r=2 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +004: table simple: i=0 r=0 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +004: table simple: i=1 r=2 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 005: {"error":null,"result":[{"count":2}]} -006: i=0 r=123.5 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -006: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +006: table simple: i=0 r=123.5 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +006: table simple: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 007: {"error":null,"result":[{"uuid":["uuid","<6>"]}]} -008: i=-1 r=125 b=false s= u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> -008: i=0 r=123.5 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -008: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +008: table simple: i=-1 r=125 b=false s= u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> +008: table simple: i=0 r=123.5 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +008: table simple: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 009: {"error":null,"result":[{"count":2}]} -010: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> -010: i=0 r=123.5 b=true s=newstring u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -010: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +010: table simple: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> +010: table simple: i=0 r=123.5 b=true s=newstring u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +010: table simple: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 011: {"error":null,"result":[{"count":1}]} -012: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> -012: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +012: table simple: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> +012: table simple: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 013: reconnect -014: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> -014: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> +014: table simple: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> +014: table simple: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 015: done ]]) @@ -1162,13 +1191,12 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated], "table": "simple", "where": [], "row": {"b": true}}]']], - [[000: i=1 r=2 b=true s=mystring u=<0> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<1> <2>] uuid=<3> -000: inserted row: uuid=<3> -000: updated columns: b ba i ia r ra s sa u ua + [[000: table simple: inserted row: i=1 r=2 b=true s=mystring u=<0> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<1> <2>] uuid=<3> +000: table simple: updated columns: b ba i ia r ra s sa u ua 001: {"error":null,"result":[{"count":2}]} -002: i=0 r=0 b=true s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<5> -002: i=1 r=2 b=true s=mystring u=<0> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<1> <2>] uuid=<3> -002: updated columns: b +002: table simple: i=0 r=0 b=true s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<5> +002: table simple: i=1 r=2 b=true s=mystring u=<0> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<1> <2>] uuid=<3> +002: table simple: updated columns: b 003: done ]]) @@ -1209,19 +1237,17 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated, orphan weak refer "table": "simple6", "where": []}]']], [[000: change conditions -001: inserted row: uuid=<0> -001: name=first_row weak_ref=[] uuid=<0> -001: updated columns: name weak_ref +001: table simple6: inserted row: name=first_row weak_ref=[] uuid=<0> +001: table simple6: updated columns: name weak_ref 002: change conditions -003: i=0 r=0 b=false s=row1_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> -003: inserted row: uuid=<2> -003: name=first_row weak_ref=[<2>] uuid=<0> -003: updated columns: s +003: table simple6: name=first_row weak_ref=[<1>] uuid=<0> +003: table simple: inserted row: i=0 r=0 b=false s=row1_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +003: table simple: updated columns: s 004: {"error":null,"result":[{"count":1}]} -005: name=new_name weak_ref=[<2>] uuid=<0> -005: updated columns: name +005: table simple6: name=new_name weak_ref=[<1>] uuid=<0> +005: table simple6: updated columns: name 006: {"error":null,"result":[{"count":1}]} -007: i=0 r=0 b=false s=row1_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +007: table simple: i=0 r=0 b=false s=row1_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 008: done ]]) @@ -1253,30 +1279,266 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated, orphan rows, cond "table": "simple6", "where": []}]']], [[000: change conditions -001: inserted row: uuid=<0> -001: name=first_row weak_ref=[] uuid=<0> -001: updated columns: name weak_ref +001: table simple6: inserted row: name=first_row weak_ref=[] uuid=<0> +001: table simple6: updated columns: name weak_ref 002: change conditions -003: i=0 r=0 b=false s=row0_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> -003: inserted row: uuid=<2> -003: name=first_row weak_ref=[<2>] uuid=<0> -003: updated columns: s +003: table simple6: name=first_row weak_ref=[<1>] uuid=<0> +003: table simple: inserted row: i=0 r=0 b=false s=row0_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +003: table simple: updated columns: s 004: change conditions -005: i=0 r=0 b=false s=row1_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> -005: inserted row: uuid=<3> -005: updated columns: s +005: table simple6: name=first_row weak_ref=[] uuid=<0> +005: table simple: deleted row: i=0 r=0 b=false s=row0_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +005: table simple: inserted row: i=0 r=0 b=false s=row1_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> +005: table simple: updated columns: s 006: change conditions -007: deleted row: uuid=<3> -007: i=0 r=0 b=false s=row0_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> -007: i=0 r=0 b=false s=row1_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> -007: inserted row: uuid=<2> -007: name=first_row weak_ref=[<2>] uuid=<0> -007: updated columns: s +007: table simple6: name=first_row weak_ref=[<1>] uuid=<0> +007: table simple: deleted row: i=0 r=0 b=false s=row1_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> +007: table simple: inserted row: i=0 r=0 b=false s=row0_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +007: table simple: updated columns: s +008: {"error":null,"result":[{"count":1}]} +009: table simple: i=0 r=0 b=false s=row0_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +010: done +]]) + +dnl This test checks that deleting the destination of a weak reference +dnl without deleting the source, through monitor condition change, updates +dnl the source tracked record. +OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated, references, conditional delete], + [['["idltest", + {"op": "insert", + "table": "simple", + "row": {"s": "row0_s", "i": 0}, + "uuid-name": "weak_row0"}, + {"op": "insert", + "table": "simple", + "row": {"s": "row1_s", "i": 1}, + "uuid-name": "weak_row1"}, + {"op": "insert", + "table": "simple6", + "row": {"name": "first_row", + "weak_ref": ["set", + [["named-uuid", "weak_row0"], + ["named-uuid", "weak_row1"]] + ]}}]']], + [['condition simple []' \ + 'condition simple [["s","==","row0_s"]]' \ + 'condition simple [["s","==","row1_s"]]' \ + '["idltest", + {"op": "update", + "table": "simple6", + "where": [], + "row": {"name": "new_name"}}]' \ + '["idltest", + {"op": "delete", + "table": "simple6", + "where": []}]']], + [[000: change conditions +001: table simple6: inserted row: name=first_row weak_ref=[] uuid=<0> +001: table simple6: updated columns: name weak_ref +002: change conditions +003: table simple6: name=first_row weak_ref=[<1>] uuid=<0> +003: table simple: inserted row: i=0 r=0 b=false s=row0_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +003: table simple: updated columns: s +004: change conditions +005: table simple6: name=first_row weak_ref=[<3>] uuid=<0> +005: table simple: deleted row: i=0 r=0 b=false s=row0_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +005: table simple: inserted row: i=1 r=0 b=false s=row1_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> +005: table simple: updated columns: i s +006: {"error":null,"result":[{"count":1}]} +007: table simple6: name=new_name weak_ref=[<3>] uuid=<0> +007: table simple6: updated columns: name 008: {"error":null,"result":[{"count":1}]} -009: i=0 r=0 b=false s=row0_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +009: table simple: i=1 r=0 b=false s=row1_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> 010: done ]]) +dnl This test checks that deleting the destination of a reference updates the +dnl source tracked record. +OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated, references, single delete], + [['["idltest", + {"op": "insert", + "table": "simple", + "row": {"s": "row0_s"}, + "uuid-name": "uuid_row0_s"}, + {"op": "insert", + "table": "simple6", + "row": {"name": "row0_s6", + "weak_ref": ["set", + [["named-uuid", "uuid_row0_s"]] + ]}}]']], + [['condition simple [true];simple6 [true]' \ + '["idltest", + {"op": "delete", + "table": "simple", + "where": []}]' \ + '["idltest", + {"op": "insert", + "table": "simple", + "row": {"s": "row0_s"}}]']], + [[000: change conditions +001: table simple6: inserted row: name=row0_s6 weak_ref=[<0>] uuid=<1> +001: table simple6: updated columns: name weak_ref +001: table simple: inserted row: i=0 r=0 b=false s=row0_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<0> +001: table simple: updated columns: s +002: {"error":null,"result":[{"count":1}]} +003: table simple6: name=row0_s6 weak_ref=[] uuid=<1> +003: table simple6: updated columns: weak_ref +003: table simple: deleted row: i=0 r=0 b=false s=row0_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<0> +004: {"error":null,"result":[{"uuid":["uuid","<3>"]}]} +005: table simple6: name=row0_s6 weak_ref=[] uuid=<1> +005: table simple: inserted row: i=0 r=0 b=false s=row0_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> +005: table simple: updated columns: s +006: done +]]) + +dnl This test checks that deleting both the destination and source of the +dnl reference doesn't remove the reference in the source tracked record. +OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated, weak references, multiple deletes], + [['["idltest", + {"op": "insert", + "table": "simple", + "row": {"s": "row0_s"}, + "uuid-name": "uuid_row0_s"}, + {"op": "insert", + "table": "simple6", + "row": {"name": "row0_s6", + "weak_ref": ["set", + [["named-uuid", "uuid_row0_s"]] + ]}}]']], + [['condition simple [true];simple6 [true]' \ + '["idltest", + {"op": "delete", + "table": "simple", + "where": []}, + {"op": "delete", + "table": "simple6", + "where": []}]' \ + '["idltest", + {"op": "insert", + "table": "simple", + "row": {"s": "row0_s"}}]']], + [[000: change conditions +001: table simple6: inserted row: name=row0_s6 weak_ref=[<0>] uuid=<1> +001: table simple6: updated columns: name weak_ref +001: table simple: inserted row: i=0 r=0 b=false s=row0_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<0> +001: table simple: updated columns: s +002: {"error":null,"result":[{"count":1},{"count":1}]} +003: table simple6: deleted row: name=row0_s6 weak_ref=[<0>] uuid=<1> +003: table simple: deleted row: i=0 r=0 b=false s=row0_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<0> +004: {"error":null,"result":[{"uuid":["uuid","<3>"]}]} +005: table simple: inserted row: i=0 r=0 b=false s=row0_s u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> +005: table simple: updated columns: s +006: done +]]) + +dnl This test checks that deleting both the destination and source of the +dnl reference doesn't remove the reference in the source tracked record. +OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated, strong references, multiple deletes], + [['["idltest", + {"op": "insert", + "table": "simple4", + "row": {"name": "row0_s4"}, + "uuid-name": "uuid_row0_s4"}, + {"op": "insert", + "table": "simple3", + "row": {"name": "row0_s3", + "uref": ["set", + [["named-uuid", "uuid_row0_s4"]] + ]}}]']], + [['condition simple [true];simple3 [true];simple4 [true]' \ + '["idltest", + {"op": "delete", + "table": "simple3", + "where": []}, + {"op": "delete", + "table": "simple4", + "where": []}]' \ + '["idltest", + {"op": "insert", + "table": "simple", + "row": {"s": "row0_s"}}]']], + [[000: change conditions +001: table simple3: inserted row: name=row0_s3 uset=[] uref=[<0>] uuid=<1> +001: table simple3: updated columns: name uref +001: table simple4: inserted row: name=row0_s4 uuid=<0> +001: table simple4: updated columns: name +002: {"error":null,"result":[{"count":1},{"count":1}]} +003: table simple3: deleted row: name=row0_s3 uset=[] uref=[<0>] uuid=<1> +003: table simple4: deleted row: name=row0_s4 uuid=<0> +004: {"error":null,"result":[{"uuid":["uuid","<2>"]}]} +005: table simple: inserted row: i=0 r=0 b=false s=row0_s u=<3> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +005: table simple: updated columns: s +006: done +]]) + +dnl This test checks that changing conditions to not include the target of +dnl a strong reference also updates the source row when change tracking is +dnl enabled. +OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated, strong references, conditional], + [['["idltest", + {"op": "insert", + "table": "simple4", + "row": {"name": "row0_s4"}, + "uuid-name": "uuid_row0_s4"}, + {"op": "insert", + "table": "simple3", + "row": {"name": "row0_s3", + "uref": ["set", + [["named-uuid", "uuid_row0_s4"]] + ]}}]']], + [['condition simple [true];simple3 [true];simple4 [true]' \ + 'condition simple4 []' \ + '["idltest", + {"op": "insert", + "table": "simple", + "row": {"s": "row0_s"}}]']], + [[000: change conditions +001: table simple3: inserted row: name=row0_s3 uset=[] uref=[<0>] uuid=<1> +001: table simple3: updated columns: name uref +001: table simple4: inserted row: name=row0_s4 uuid=<0> +001: table simple4: updated columns: name +002: change conditions +003: table simple3: name=row0_s3 uset=[] uref=[] uuid=<1> +003: table simple4: deleted row: name=row0_s4 uuid=<0> +004: {"error":null,"result":[{"uuid":["uuid","<2>"]}]} +005: table simple3: name=row0_s3 uset=[] uref=[] uuid=<1> +005: table simple: inserted row: i=0 r=0 b=false s=row0_s u=<3> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +005: table simple: updated columns: s +006: done +]]) + +dnl This test checks that changing conditions to not include the target of +dnl a strong reference also updates the source row when change tracking is +dnl disabled. +OVSDB_CHECK_IDL([simple idl, initially populated, strong references, conditional], + [['["idltest", + {"op": "insert", + "table": "simple4", + "row": {"name": "row0_s4"}, + "uuid-name": "uuid_row0_s4"}, + {"op": "insert", + "table": "simple3", + "row": {"name": "row0_s3", + "uref": ["set", + [["named-uuid", "uuid_row0_s4"]] + ]}}]']], + [['condition simple [true];simple3 [true];simple4 [true]' \ + 'condition simple4 []' \ + '["idltest", + {"op": "insert", + "table": "simple", + "row": {"s": "row0_s"}}]']], + [[000: change conditions +001: table simple3: name=row0_s3 uset=[] uref=[<0>] uuid=<1> +001: table simple4: name=row0_s4 uuid=<0> +002: change conditions +003: table simple3: name=row0_s3 uset=[] uref=[] uuid=<1> +004: {"error":null,"result":[{"uuid":["uuid","<2>"]}]} +005: table simple3: name=row0_s3 uset=[] uref=[] uuid=<1> +005: table simple: i=0 r=0 b=false s=row0_s u=<3> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +006: done +]]) + OVSDB_CHECK_IDL_TRACK([track, simple idl, initially empty, various ops], [], [['["idltest", @@ -1330,34 +1592,31 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially empty, various ops], 'reconnect']], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]}]} -002: i=1 r=2 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<0> -002: inserted row: uuid=<0> -002: updated columns: b ba i ia r ra s sa u ua +002: table simple: inserted row: i=1 r=2 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<0> +002: table simple: updated columns: b ba i ia r ra s sa u ua 003: {"error":null,"result":[{"count":2}]} -004: i=0 r=0 b=true s= u=<5> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -004: updated columns: b +004: table simple: i=0 r=0 b=true s= u=<5> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +004: table simple: updated columns: b 005: {"error":null,"result":[{"count":2}]} -006: i=0 r=123.5 b=true s= u=<5> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -006: i=1 r=123.5 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<0> -006: updated columns: r -006: updated columns: r +006: table simple: i=0 r=123.5 b=true s= u=<5> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +006: table simple: i=1 r=123.5 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<0> +006: table simple: updated columns: r +006: table simple: updated columns: r 007: {"error":null,"result":[{"uuid":["uuid","<6>"]}]} -008: i=-1 r=125 b=false s= u=<5> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> -008: inserted row: uuid=<6> -008: updated columns: ba i ia r ra +008: table simple: inserted row: i=-1 r=125 b=false s= u=<5> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> +008: table simple: updated columns: ba i ia r ra 009: {"error":null,"result":[{"count":2}]} -010: i=-1 r=125 b=false s=newstring u=<5> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> -010: i=0 r=123.5 b=true s=newstring u=<5> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -010: updated columns: s -010: updated columns: s +010: table simple: i=-1 r=125 b=false s=newstring u=<5> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> +010: table simple: i=0 r=123.5 b=true s=newstring u=<5> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +010: table simple: updated columns: s +010: table simple: updated columns: s 011: {"error":null,"result":[{"count":1}]} -012: deleted row: uuid=<1> -012: i=0 r=123.5 b=true s=newstring u=<5> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +012: table simple: deleted row: i=0 r=123.5 b=true s=newstring u=<5> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 013: reconnect -014: i=-1 r=125 b=false s=newstring u=<5> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> -014: i=1 r=123.5 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<0> -014: updated columns: b ba i ia r ra s sa u ua -014: updated columns: ba i ia r ra s +014: table simple: inserted row: i=-1 r=125 b=false s=newstring u=<5> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> +014: table simple: inserted row: i=1 r=123.5 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<0> +014: table simple: updated columns: b ba i ia r ra s sa u ua +014: table simple: updated columns: ba i ia r ra s 015: done ]]) @@ -1397,16 +1656,16 @@ OVSDB_CHECK_IDL_PY([partial-map idl], "row":{"name":"myString1","smap":["map",[["key1","value1"],["key2","value2"]]]} }]'] ], [?simple2:name,smap,imap 'partialmapinsertelement' 'partialmapinsertmultipleelements' 'partialmapdelelements' 'partialmapmutatenew'], -[[000: name=myString1 smap=[(key1 value1) (key2 value2)] imap=[] +[[000: table simple2: name=myString1 smap=[(key1 value1) (key2 value2)] imap=[] uuid=<0> 001: commit, status=success -002: name=String2 smap=[(key1 myList1) (key2 value2)] imap=[(3 myids2)] +002: table simple2: name=String2 smap=[(key1 myList1) (key2 value2)] imap=[(3 myids2)] uuid=<0> 003: commit, status=success -004: name=String2 smap=[(key1 myList1) (key2 myList2) (key3 myList3) (key4 myList4)] imap=[(3 myids2)] +004: table simple2: name=String2 smap=[(key1 myList1) (key2 myList2) (key3 myList3) (key4 myList4)] imap=[(3 myids2)] uuid=<0> 005: commit, status=success -006: name=String2 smap=[(key2 myList2)] imap=[(3 myids2)] +006: table simple2: name=String2 smap=[(key2 myList2)] imap=[(3 myids2)] uuid=<0> 007: commit, status=success -008: name=String2 smap=[(key2 myList2)] imap=[(3 myids2)] -008: name=String2New smap=[(key1 newList1) (key2 newList2)] imap=[] +008: table simple2: name=String2 smap=[(key2 myList2)] imap=[(3 myids2)] uuid=<0> +008: table simple2: name=String2New smap=[(key1 newList1) (key2 newList2)] imap=[] uuid=<1> 009: done ]]) @@ -1414,11 +1673,11 @@ OVSDB_CHECK_IDL_PY([partial-map update set refmap idl], [['["idltest", {"op":"insert", "table":"simple3", "row":{"name":"myString1"}}, {"op":"insert", "table":"simple5", "row":{"name":"myString2"}}]']], ['partialmapmutateirefmap'], -[[000: name=myString1 uset=[] -000: name=myString2 irefmap=[] +[[000: table simple3: name=myString1 uset=[] uref=[] uuid=<0> +000: table simple5: name=myString2 irefmap=[] uuid=<1> 001: commit, status=success -002: name=myString1 uset=[] -002: name=myString2 irefmap=[(1 <0>)] +002: table simple3: name=myString1 uset=[] uref=[] uuid=<0> +002: table simple5: name=myString2 irefmap=[(1 <0>)] uuid=<1> 003: done ]]) @@ -1441,17 +1700,17 @@ OVSDB_CHECK_IDL_PARTIAL_UPDATE_SET_COLUMN([set, simple3 idl-partial-update-set-c ], [], [[000: Getting records -001: name=mySet1 uset=[[<0>],[<1>]] uref=[] +001: table simple3: name=mySet1 uset=[<0>,<1>] uref=[] uuid=<2> 002: After rename+add new value -003: name=String2 uset=[[<0>],[<1>],[<2>]] uref=[] +003: table simple3: name=String2 uset=[<0>,<1>,<3>] uref=[] uuid=<2> 004: After add new value -005: name=String2 uset=[[<0>],[<1>],[<2>],[<3>]] uref=[] +005: table simple3: name=String2 uset=[<0>,<1>,<3>,<4>] uref=[] uuid=<2> 006: After delete value -007: name=String2 uset=[[<0>],[<1>],[<3>]] uref=[] +007: table simple3: name=String2 uset=[<0>,<1>,<4>] uref=[] uuid=<2> 008: After trying to delete a deleted value -009: name=String2 uset=[[<0>],[<1>],[<3>]] uref=[] +009: table simple3: name=String2 uset=[<0>,<1>,<4>] uref=[] uuid=<2> 010: After add to other table + set of strong ref -011: name=String2 uset=[[<0>],[<1>],[<3>]] uref=[[<4>]] +011: table simple3: name=String2 uset=[<0>,<1>,<4>] uref=[<5>] uuid=<2> 012: End test ]]) @@ -1463,22 +1722,26 @@ OVSDB_CHECK_IDL_PY([partial-set idl], "mutations": [["uset", "insert", ["set", [["uuid", "000d2f6a-76af-412f-b59d-e7bcd3e84eff"]]]]]}]'] ], ['partialrenamesetadd' 'partialduplicateadd' 'partialsetdel' 'partialsetref' 'partialsetoverrideops' 'partialsetadddelete' 'partialsetmutatenew'], -[[000: name=mySet1 uset=[<0> <1>] +[[000: table simple3: name=mySet1 uset=[<0> <1>] uref=[] uuid=<2> 001: commit, status=success -002: name=String2 uset=[<0> <1> <2>] +002: table simple3: name=String2 uset=[<0> <1> <3>] uref=[] uuid=<2> 003: commit, status=success -004: name=String2 uset=[<0> <1> <2> <3>] +004: table simple3: name=String2 uset=[<0> <1> <3> <4>] uref=[] uuid=<2> 005: commit, status=success -006: name=String2 uset=[<0> <1> <3>] +006: table simple3: name=String2 uset=[<0> <1> <4>] uref=[] uuid=<2> 007: commit, status=success -008: name=String2 uset=[<0> <1> <3>] +008: table simple3: name=String2 uset=[<0> <1> <4>] uref=[<5>] uuid=<2> +008: table simple4: name=test uuid=<5> 009: commit, status=success -010: name=String2 uset=[<3>] +010: table simple3: name=String2 uset=[<4>] uref=[<5>] uuid=<2> +010: table simple4: name=test uuid=<5> 011: commit, status=success -012: name=String2 uset=[<4> <5>] +012: table simple3: name=String2 uset=[<6> <7>] uref=[<5>] uuid=<2> +012: table simple4: name=test uuid=<5> 013: commit, status=success -014: name=String2 uset=[<4> <5>] -014: name=String3 uset=[<6>] +014: table simple3: name=String2 uset=[<6> <7>] uref=[<5>] uuid=<2> +014: table simple3: name=String3 uset=[<8>] uref=[] uuid=<9> +014: table simple4: name=test uuid=<5> 015: done ]]) @@ -1486,6 +1749,28 @@ m4_define([OVSDB_CHECK_IDL_NOTIFY], [OVSDB_CHECK_IDL_PY([$1], [], [$2], [$3], [notify $4], [$5]) OVSDB_CHECK_IDL_SSL_PY([$1], [], [$2], [$3], [notify $4], [$5])]) +OVSDB_CHECK_IDL_NOTIFY([simple link idl verify notify], + [['track-notify' \ + '["idltest", + {"op": "insert", + "table": "link1", + "row": {"i": 1, "k": ["named-uuid", "l1row"], "l2": ["set", [["named-uuid", "l2row"]]]}, + "uuid-name": "l1row"}, + {"op": "insert", + "table": "link2", + "uuid-name": "l2row", + "row": {"i": 2, "l1": ["set", [["named-uuid", "l1row"]]]}}]']], +[[000: empty +000: event:create, row={}, uuid=<0>, updates=None +000: event:create, row={}, uuid=<1>, updates=None +001: {"error":null,"result":[{"uuid":["uuid","<2>"]},{"uuid":["uuid","<3>"]}]} +002: event:create, row={i=1 l2=[<3>]}, uuid=<2>, updates=None +002: event:create, row={i=2 l1=[<2>]}, uuid=<3>, updates=None +002: table link1: i=1 k=1 ka=[] l2=2 uuid=<2> +002: table link2: i=2 l1=1 uuid=<3> +003: done +]]) + OVSDB_CHECK_IDL_NOTIFY([simple idl verify notify], [['track-notify' \ '["idltest", @@ -1538,44 +1823,44 @@ OVSDB_CHECK_IDL_NOTIFY([simple idl verify notify], "where": [["i", "==", 0]]}]' \ 'reconnect']], [[000: empty -000: event:create, row={uuid=<0>}, updates=None -000: event:create, row={uuid=<1>}, updates=None +000: event:create, row={}, uuid=<0>, updates=None +000: event:create, row={}, uuid=<1>, updates=None 001: {"error":null,"result":[{"uuid":["uuid","<2>"]},{"uuid":["uuid","<3>"]}]} -002: event:create, row={i=0 r=0 b=false s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3>}, updates=None -002: event:create, row={i=1 r=2 b=true s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2>}, updates=None -002: i=0 r=0 b=false s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> -002: i=1 r=2 b=true s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> +002: event:create, row={i=0 r=0 b=false s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[]}, uuid=<3>, updates=None +002: event:create, row={i=1 r=2 b=true s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>]}, uuid=<2>, updates=None +002: table simple: i=0 r=0 b=false s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> +002: table simple: i=1 r=2 b=true s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> 003: {"error":null,"result":[{"count":2}]} -004: event:update, row={i=1 r=2 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2>}, updates={b=true uuid=<2>} -004: i=0 r=0 b=false s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> -004: i=1 r=2 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> +004: event:update, row={i=1 r=2 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>]}, uuid=<2>, updates={b=true} +004: table simple: i=0 r=0 b=false s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> +004: table simple: i=1 r=2 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> 005: {"error":null,"result":[{"count":2}]} -006: event:update, row={i=0 r=123.5 b=false s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3>}, updates={r=0 uuid=<3>} -006: event:update, row={i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2>}, updates={r=2 uuid=<2>} -006: i=0 r=123.5 b=false s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> -006: i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> +006: event:update, row={i=0 r=123.5 b=false s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[]}, uuid=<3>, updates={r=0} +006: event:update, row={i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>]}, uuid=<2>, updates={r=2} +006: table simple: i=0 r=123.5 b=false s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> +006: table simple: i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> 007: {"error":null,"result":[{"uuid":["uuid","<8>"]}]} -008: event:create, row={i=-1 r=125 b=false s= u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<8>}, updates=None -008: i=-1 r=125 b=false s= u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<8> -008: i=0 r=123.5 b=false s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> -008: i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> +008: event:create, row={i=-1 r=125 b=false s= u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[]}, uuid=<8>, updates=None +008: table simple: i=-1 r=125 b=false s= u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<8> +008: table simple: i=0 r=123.5 b=false s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> +008: table simple: i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> 009: {"error":null,"result":[{"count":2}]} -010: event:update, row={i=-1 r=125 b=false s=newstring u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<8>}, updates={s= uuid=<8>} -010: event:update, row={i=0 r=123.5 b=false s=newstring u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3>}, updates={s= uuid=<3>} -010: i=-1 r=125 b=false s=newstring u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<8> -010: i=0 r=123.5 b=false s=newstring u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> -010: i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> +010: event:update, row={i=-1 r=125 b=false s=newstring u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[]}, uuid=<8>, updates={s=} +010: event:update, row={i=0 r=123.5 b=false s=newstring u=<4> ia=[] ra=[] ba=[] sa=[] ua=[]}, uuid=<3>, updates={s=} +010: table simple: i=-1 r=125 b=false s=newstring u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<8> +010: table simple: i=0 r=123.5 b=false s=newstring u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> +010: table simple: i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> 011: {"error":null,"result":[{"count":1}]} -012: event:delete, row={i=0 r=123.5 b=false s=newstring u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3>}, updates=None -012: i=-1 r=125 b=false s=newstring u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<8> -012: i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> +012: event:delete, row={i=0 r=123.5 b=false s=newstring u=<4> ia=[] ra=[] ba=[] sa=[] ua=[]}, uuid=<3>, updates=None +012: table simple: i=-1 r=125 b=false s=newstring u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<8> +012: table simple: i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> 013: reconnect -014: event:create, row={i=-1 r=125 b=false s=newstring u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<8>}, updates=None -014: event:create, row={i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2>}, updates=None -014: event:create, row={uuid=<0>}, updates=None -014: event:create, row={uuid=<1>}, updates=None -014: i=-1 r=125 b=false s=newstring u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<8> -014: i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> +014: event:create, row={i=-1 r=125 b=false s=newstring u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[]}, uuid=<8>, updates=None +014: event:create, row={i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>]}, uuid=<2>, updates=None +014: event:create, row={}, uuid=<0>, updates=None +014: event:create, row={}, uuid=<1>, updates=None +014: table simple: i=-1 r=125 b=false s=newstring u=<4> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<8> +014: table simple: i=1 r=123.5 b=false s=mystring u=<5> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<6> <7>] uuid=<2> 015: done ]]) @@ -1888,10 +2173,10 @@ OVSDB_CHECK_IDL_COMPOUND_INDEX_WITH_REF([set, simple3 idl-compound-index-with-re [], [], [[000: After add to other table + set of strong ref -001: name= uset=[] uref=[[<0>]] +001: table simple3: name= uset=[] uref=[<0>] uuid=<1> 002: check simple4: not empty 003: Query using index with reference -004: name= uset=[] uref=[[<0>]] +004: table simple3: name= uset=[] uref=[<0>] uuid=<1> 005: After delete 007: check simple4: empty 008: End test @@ -1942,11 +2227,29 @@ m4_define([OVSDB_CHECK_IDL_LEADER_ONLY_PY], OVSDB_CHECK_IDL_LEADER_ONLY_PY([Check Python IDL connects to leader], 3, ['remote']) OVSDB_CHECK_IDL_LEADER_ONLY_PY([Check Python IDL reconnects to leader], 3, ['remote' '+remotestop' 'remote']) -# same as OVSDB_CHECK_IDL but uses C IDL implementation with tcp -# with multiple remotes. +# OVSDB_CHECK_CLUSTER_IDL_C(TITLE, N_SERVERS, [PRE-IDL-TXN], TRANSACTIONS, +# OUTPUT, [KEYWORDS], [FILTER], [LOG_FILTER]) +# +# Creates a clustered database with a schema derived from idltest.ovsidl, runs +# each PRE-IDL-TXN (if any), starts N_SERVERS ovsdb-server instances in RAFT, +# on that database, and runs "test-ovsdb idl" passing each of the TRANSACTIONS +# along. +# +# Checks that the overall output is OUTPUT. Before comparison, the +# output is sorted (using "sort") and UUIDs in the output are replaced +# by markers of the form where N is a number. The first unique +# UUID is replaced by <0>, the next by <1>, and so on. If a given +# UUID appears more than once it is always replaced by the same +# marker. If FILTER is supplied then the output is also filtered +# through the specified program. +# +# TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. +# +# If LOG_FILTER is provided, checks that the contents of LOG_FILTER +# are not matched by grep in the test-ovsdb logs. m4_define([OVSDB_CHECK_CLUSTER_IDL_C], [AT_SETUP([$1 - C - tcp]) - AT_KEYWORDS([ovsdb server idl positive tcp socket $5]) + AT_KEYWORDS([ovsdb server idl tcp $6]) m4_define([LPBK],[127.0.0.1]) OVSDB_CLUSTER_START_IDLTEST([$2], ["ptcp:0:"LPBK]) PARSE_LISTENING_PORT([s1.log], [TCP_PORT_1]) @@ -1957,11 +2260,36 @@ m4_define([OVSDB_CHECK_CLUSTER_IDL_C], m4_if([$3], [], [], [AT_CHECK([ovsdb-client transact $remotes $3], [0], [ignore], [ignore])]) AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 idl tcp:LPBK:$TCP_PORT_1 $4], - [0], [stdout], [ignore]) + [0], [stdout], [stderr]) + AT_CHECK([sort stdout | uuidfilt]m4_if([$7],,, [[| $7]]), + [0], [$5]) + m4_ifval([$8], [AT_CHECK([grep '$8' stderr], [1])], [], []) + AT_CLEANUP]) + +# Same as OVSDB_CHECK_CLUSTER_IDL_C but uses the Python IDL implementation. +m4_define([OVSDB_CHECK_CLUSTER_IDL_PY], + [AT_SETUP([$1 - Python3 - tcp]) + AT_KEYWORDS([ovsdb server idl tcp $6]) + m4_define([LPBK],[127.0.0.1]) + OVSDB_CLUSTER_START_IDLTEST([$2], ["ptcp:0:"LPBK]) + PARSE_LISTENING_PORT([s1.log], [TCP_PORT_1]) + PARSE_LISTENING_PORT([s2.log], [TCP_PORT_2]) + PARSE_LISTENING_PORT([s3.log], [TCP_PORT_3]) + remotes=tcp:LPBK:$TCP_PORT_1,tcp:LPBK:$TCP_PORT_2,tcp:LPBK:$TCP_PORT_3 + + m4_if([$3], [], [], + [AT_CHECK([ovsdb-client transact $remotes $3], [0], [ignore], [ignore])]) + AT_CHECK([$PYTHON3 $srcdir/test-ovsdb.py -t10 idl $srcdir/idltest.ovsschema tcp:LPBK:$TCP_PORT_1 $4], + [0], [stdout], [stderr]) AT_CHECK([sort stdout | uuidfilt]m4_if([$7],,, [[| $7]]), [0], [$5]) + m4_if([$8], [AT_CHECK([grep '$8' stderr], [1])], [], []) AT_CLEANUP]) +m4_define([OVSDB_CHECK_CLUSTER_IDL], + [OVSDB_CHECK_CLUSTER_IDL_C($@) + OVSDB_CHECK_CLUSTER_IDL_PY($@)]) + # Checks that monitor_cond_since works fine when disconnects happen # with cond_change requests in flight (i.e., IDL is properly updated). OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect], @@ -1989,11 +2317,34 @@ OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect], [[000: change conditions 001: empty 002: change conditions -003: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +003: table simple: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: change conditions 005: reconnect -006: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +006: table simple: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 007: {"error":null,"result":[{"count":1}]} -008: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +008: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 009: done ]]) + +dnl This test checks that forceful reconnects triggered by the IDL +dnl happen immediately (they should not use backoff). +OVSDB_CHECK_CLUSTER_IDL([simple idl, initially empty, force reconnect], + 3, + [], + [['+reconnect' \ + 'reconnect' \ + 'reconnect' \ + 'reconnect']], + [[000: reconnect +001: empty +002: reconnect +003: empty +004: reconnect +005: empty +006: reconnect +007: empty +008: done +]], +[], +[], +reconnect.*waiting .* seconds before reconnect) diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at index 15628a7c6f..86d633ac4f 100644 --- a/tests/system-kmod-macros.at +++ b/tests/system-kmod-macros.at @@ -99,6 +99,17 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP], # m4_define([CHECK_CONNTRACK_NAT]) +# CHECK_CONNTRACK_ZEROIP_SNAT() +# +# Perform requirements checks for running conntrack all-zero IP SNAT tests. +# The kernel always supports all-zero IP SNAT, so no check is needed. +# However, the Windows datapath using the same netlink interface does not. +# +m4_define([CHECK_CONNTRACK_ZEROIP_SNAT], +[ + AT_SKIP_IF([test "$IS_WIN32" = "yes"]) +]) + # CHECK_CONNTRACK_TIMEOUT() # # Perform requirements checks for running conntrack customized timeout tests. diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at index 4f601ef939..c8e4c68fae 100644 --- a/tests/system-offloads-traffic.at +++ b/tests/system-offloads-traffic.at @@ -70,3 +70,53 @@ AT_CHECK([ovs-appctl upcall/show | grep -E "offloaded flows : [[1-9]]"], [0], [i OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([offloads - set ingress_policing_rate and ingress_policing_burst - offloads disabled]) +AT_KEYWORDS([ingress_policing]) +AT_SKIP_IF([test $HAVE_TC = "no"]) +OVS_TRAFFIC_VSWITCHD_START() +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-offload=false]) +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) +ADD_NAMESPACES(at_ns0) +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +AT_CHECK([ovs-vsctl set interface ovs-p0 ingress_policing_rate=100]) +AT_CHECK([ovs-vsctl set interface ovs-p0 ingress_policing_burst=10]) +AT_CHECK([ovs-vsctl --columns=other_config list open], [0], [dnl +other_config : {hw-offload="false"} +]) +AT_CHECK([tc -o -s -d filter show dev ovs-p0 ingress | + sed -n 's/.*\(rate [[0-9]]*[[a-zA-Z]]* burst [[0-9]]*[[a-zA-Z]]*\).*/\1/; T; p; q'], + [0],[dnl +rate 100Kbit burst 1280b +]) +AT_CHECK([tc -s -d filter show dev ovs-p0 ingress | grep basic | + sed -n 's/.*\(basic\).*/\1/; T; p; q'], [0], [dnl +basic +]) +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([offloads - set ingress_policing_rate and ingress_policing_burst - offloads enabled]) +AT_KEYWORDS([ingress_policing]) +AT_SKIP_IF([test $HAVE_TC = "no"]) +OVS_TRAFFIC_VSWITCHD_START() +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-offload=true]) +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) +ADD_NAMESPACES(at_ns0) +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +AT_CHECK([ovs-vsctl set interface ovs-p0 ingress_policing_rate=100]) +AT_CHECK([ovs-vsctl set interface ovs-p0 ingress_policing_burst=10]) +AT_CHECK([ovs-vsctl --columns=other_config list open], [0], [dnl +other_config : {hw-offload="true"} +]) +AT_CHECK([tc -o -s -d filter show dev ovs-p0 ingress | + sed -n 's/.*\(rate [[0-9]]*[[a-zA-Z]]* burst [[0-9]]*[[a-zA-Z]]*\).*/\1/; T; p; q'], + [0],[dnl +rate 100Kbit burst 1280b +]) +AT_CHECK([tc -o -s -d filter show dev ovs-p0 ingress | grep matchall | + sed -n 's/.*\(matchall\).*/\1/; T; p; q'], [0], [dnl +matchall +]) +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP diff --git a/tests/system-traffic.at b/tests/system-traffic.at index fb5b9a36d2..bc203c1cce 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -574,6 +574,60 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PI OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([datapath - ping over geneve tunnel, delete flow regression]) +OVS_CHECK_GENEVE() + +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-underlay]) + +AT_DATA([flows.txt], [dnl +priority=100,icmp actions=resubmit(,10) +priority=0 actions=NORMAL +table=10, priority=100, ip, actions=ct(table=20,zone=65520) +table=20, priority=200, ip, ct_state=-new+trk, actions=resubmit(,30) +table=20, priority=100, ip, ct_state=+new, actions=resubmit(,30) +table=20, priority=50, ip, actions=DROP +table=30, priority=100, ip, actions=ct(commit,table=40,zone=65520) +table=40, actions=normal +]) + +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) +AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"]) + +ADD_NAMESPACES(at_ns0) + +dnl Set up underlay link from host into the namespace using veth pair. +ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24") +AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"]) +AT_CHECK([ip link set dev br-underlay up]) + +dnl Set up tunnel endpoints on OVS outside the namespace and with a native +dnl linux device inside the namespace. +ADD_OVS_TUNNEL([geneve], [br0], [at_gnv0], [172.31.1.1], [10.1.1.100/24]) +ADD_NATIVE_TUNNEL([geneve], [ns_gnv0], [at_ns0], [172.31.1.100], [10.1.1.1/24], + [vni 0]) + +dnl First, check the underlay +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +dnl ping over tunnel should work +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-ofctl del-flows br0 "ct_state=+new"]) + +dnl ping should not go through after removal of the flow +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl +7 packets transmitted, 0 received, 100% packet loss, time 0ms +]) + +OVS_TRAFFIC_VSWITCHD_STOP(["/|ERR|/d +/|WARN|/d"]) +AT_CLEANUP + AT_SETUP([datapath - flow resume with geneve tun_metadata]) OVS_CHECK_GENEVE() @@ -3251,6 +3305,46 @@ NS_CHECK_EXEC([at_ns0], [ping6 -s 3200 -q -c 3 -i 0.3 -w 2 fc00::2 | FORMAT_PING OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - IPv4 Fragmentation + NAT]) +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) +CHECK_CONNTRACK() + +OVS_TRAFFIC_VSWITCHD_START( + [set-fail-mode br0 secure -- ]) + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.2.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.2.1.2/24") + +dnl Create a dummy route for NAT +NS_CHECK_EXEC([at_ns1], [ip addr add 10.1.1.2/32 dev lo]) +NS_CHECK_EXEC([at_ns0], [ip route add 10.1.1.0/24 via 10.2.1.2]) +NS_CHECK_EXEC([at_ns1], [ip route add 10.1.1.0/24 via 10.2.1.1]) + +dnl Solely for debugging when things go wrong +NS_EXEC([at_ns0], [tcpdump -l -n -xx -U -i p0 -w p0.pcap >tcpdump.out 2>/dev/null &]) +NS_EXEC([at_ns1], [tcpdump -l -n -xx -U -i p1 -w p1.pcap >tcpdump.out 2>/dev/null &]) + +AT_DATA([flows.txt], [dnl +table=0,arp,actions=normal +table=0,ct_state=-trk,ip,in_port=ovs-p0, actions=ct(table=1, nat) +table=0,ct_state=-trk,ip,in_port=ovs-p1, actions=ct(table=1, nat) +table=1,ct_state=+trk+new,ip,in_port=ovs-p0, actions=ct(commit, nat(src=10.1.1.1)),ovs-p1 +table=1,ct_state=+trk+est,ip,in_port=ovs-p0, actions=ovs-p1 +table=1,ct_state=+trk+est,ip,in_port=ovs-p1, actions=ovs-p0 +]) + +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +dnl Check connectivity +NS_CHECK_EXEC([at_ns0], [ping -c 1 10.1.1.2 -M dont -s 4500 | FORMAT_PING], [0], [dnl +1 packets transmitted, 1 received, 0% packet loss, time 0ms +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([conntrack - resubmit to ct multiple times]) CHECK_CONNTRACK() @@ -4433,6 +4527,52 @@ tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src= OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([conntrack - all-zero IP SNAT]) +AT_SKIP_IF([test $HAVE_NC = no]) +CHECK_CONNTRACK() +CHECK_CONNTRACK_ZEROIP_SNAT() +OVS_TRAFFIC_VSWITCHD_START() + +ADD_NAMESPACES(at_ns0, at_ns1) +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") +NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2]) + +OVS_START_L7([at_ns1], [http]) + +AT_DATA([flows.txt], [dnl +table=0,priority=30,ct_state=-trk,ip,action=ct(table=0) +table=0,priority=20,ct_state=-rpl,ip,nw_dst=10.1.1.0/24,actions=ct(commit,nat(src=0.0.0.0),table=10) +table=0,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24,actions=resubmit(,10) +table=0,priority=20,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10.1.1.2),table=10) +table=0,priority=10,arp,action=normal +table=0,priority=1,action=drop +table=10,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24 actions=ct(table=20,nat) +table=10,priority=10,ip,nw_dst=10.1.1.0/24 actions=resubmit(,20) +table=20,priority=10,ip,nw_dst=10.1.1.1,action=1 +table=20,priority=10,ip,nw_dst=10.1.1.2,action=2 +]) +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +dnl - Test to make sure src nat is NOT done when not needed +NS_CHECK_EXEC([at_ns0], [echo "TEST" | nc -p 30000 10.1.1.2 80 > nc-1.log]) +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [0], [dnl +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=30000,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=30000),protoinfo=(state=TIME_WAIT) +]) + +dnl - Test to make sure src nat is done when needed +NS_CHECK_EXEC([at_ns0], [echo "TEST2" | nc -p 30001 172.1.1.2 80 > nc-2.log]) +NS_CHECK_EXEC([at_ns0], [echo "TEST3" | nc -p 30001 10.1.1.2 80 > nc-3.log]) +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 30001 | grep "orig=.src=10\.1\.1\.1," | sed -e 's/port=30001/port=/g' -e 's/sport=80,dport=[[0-9]]\+/sport=80,dport=/g' | sort], [0], [dnl +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=),protoinfo=(state=TIME_WAIT) +tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=),protoinfo=(state=TIME_WAIT) +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + + AT_SETUP([conntrack - simple DNAT]) CHECK_CONNTRACK() CHECK_CONNTRACK_NAT() @@ -4488,6 +4628,41 @@ tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src= OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - DNAT with additional SNAT]) +CHECK_CONNTRACK() +OVS_TRAFFIC_VSWITCHD_START() + +ADD_NAMESPACES(at_ns0, at_ns1) +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") +NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2]) + +OVS_START_L7([at_ns1], [http]) + +AT_DATA([flows.txt], [dnl +table=0,priority=30,in_port=1,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10.1.1.2:80),table=1) +table=0,priority=20,in_port=2,ip,actions=ct(nat),1 +table=0,priority=10,arp,actions=NORMAL +table=0,priority=1,actions=drop +dnl Be sure all ct() actions but src nat are executed +table=1,ip,actions=ct(commit,nat(src=10.1.1.240),exec(set_field:0xac->ct_mark,set_field:0xac->ct_label),table=2) +table=2,in_port=1,ip,ct_mark=0xac,ct_label=0xac,actions=2 +]) +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +NS_CHECK_EXEC([at_ns0], [wget http://172.1.1.2:8080 -t 5 -T 1 --retry-connrefused -v -o wget0.log]) + +dnl - make sure only dst nat has been performed +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.240)], [0], [dnl +]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.1)], [0], [dnl +tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=,dport=),reply=(src=10.1.1.2,dst=10.1.1.1,sport=,dport=),mark=172,labels=0xac,protoinfo=(state=) +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([conntrack - more complex DNAT]) CHECK_CONNTRACK() CHECK_CONNTRACK_NAT() diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at index 34f82cee3d..9f0d38dfb3 100644 --- a/tests/system-userspace-macros.at +++ b/tests/system-userspace-macros.at @@ -96,6 +96,16 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP]) # m4_define([CHECK_CONNTRACK_NAT]) +# CHECK_CONNTRACK_ZEROIP_SNAT() +# +# Perform requirements checks for running conntrack all-zero IP SNAT tests. +# The userspace datapath does not support all-zero IP SNAT. +# +m4_define([CHECK_CONNTRACK_ZEROIP_SNAT], +[ + AT_SKIP_IF([:]) +]) + # CHECK_CONNTRACK_TIMEOUT() # # Perform requirements checks for running conntrack customized timeout tests. diff --git a/tests/test-jsonrpc.py b/tests/test-jsonrpc.py index 3eabcd78d5..1df5afa221 100644 --- a/tests/test-jsonrpc.py +++ b/tests/test-jsonrpc.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function - import argparse import errno import os diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c index 15433e3472..a886f971e7 100644 --- a/tests/test-ovsdb.c +++ b/tests/test-ovsdb.c @@ -1861,6 +1861,23 @@ print_and_log(const char *format, ...) free(message); } +static char * +format_idl_row(const struct ovsdb_idl_row *row, int step, const char *contents) +{ + const char *change_str = + !ovsdb_idl_track_is_set(row->table) + ? "" + : ovsdb_idl_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0 + ? "inserted row: " + : ovsdb_idl_row_get_seqno(row, OVSDB_IDL_CHANGE_DELETE) > 0 + ? "deleted row: " + : ""; + + return xasprintf("%03d: table %s: %s%s uuid=" UUID_FMT, + step, row->table->class_->name, change_str, contents, + UUID_ARGS(&row->uuid)); +} + static void print_idl_row_updated_simple(const struct idltest_simple *s, int step) { @@ -1871,7 +1888,9 @@ print_idl_row_updated_simple(const struct idltest_simple *s, int step) } } if (updates.length) { - print_and_log("%03d: updated columns:%s", step, ds_cstr(&updates)); + print_and_log("%03d: table %s: updated columns:%s", + step, s->header_.table->class_->name, + ds_cstr(&updates)); ds_destroy(&updates); } } @@ -1886,7 +1905,9 @@ print_idl_row_updated_link1(const struct idltest_link1 *l1, int step) } } if (updates.length) { - print_and_log("%03d: updated columns:%s", step, ds_cstr(&updates)); + print_and_log("%03d: table %s: updated columns:%s", + step, l1->header_.table->class_->name, + ds_cstr(&updates)); ds_destroy(&updates); } } @@ -1901,7 +1922,43 @@ print_idl_row_updated_link2(const struct idltest_link2 *l2, int step) } } if (updates.length) { - print_and_log("%03d: updated columns:%s", step, ds_cstr(&updates)); + print_and_log("%03d: table %s: updated columns:%s", + step, l2->header_.table->class_->name, + ds_cstr(&updates)); + ds_destroy(&updates); + } +} + +static void +print_idl_row_updated_simple3(const struct idltest_simple3 *s3, int step) +{ + struct ds updates = DS_EMPTY_INITIALIZER; + for (size_t i = 0; i < IDLTEST_SIMPLE3_N_COLUMNS; i++) { + if (idltest_simple3_is_updated(s3, i)) { + ds_put_format(&updates, " %s", idltest_simple3_columns[i].name); + } + } + if (updates.length) { + print_and_log("%03d: table %s: updated columns:%s", + step, s3->header_.table->class_->name, + ds_cstr(&updates)); + ds_destroy(&updates); + } +} + +static void +print_idl_row_updated_simple4(const struct idltest_simple4 *s4, int step) +{ + struct ds updates = DS_EMPTY_INITIALIZER; + for (size_t i = 0; i < IDLTEST_SIMPLE4_N_COLUMNS; i++) { + if (idltest_simple4_is_updated(s4, i)) { + ds_put_format(&updates, " %s", idltest_simple4_columns[i].name); + } + } + if (updates.length) { + print_and_log("%03d: table %s: updated columns:%s", + step, s4->header_.table->class_->name, + ds_cstr(&updates)); ds_destroy(&updates); } } @@ -1916,7 +1973,9 @@ print_idl_row_updated_simple6(const struct idltest_simple6 *s6, int step) } } if (updates.length) { - print_and_log("%03d: updated columns:%s", step, ds_cstr(&updates)); + print_and_log("%03d: table %s: updated columns:%s", + step, s6->header_.table->class_->name, + ds_cstr(&updates)); ds_destroy(&updates); } } @@ -1931,7 +1990,9 @@ print_idl_row_updated_singleton(const struct idltest_singleton *sng, int step) } } if (updates.length) { - print_and_log("%03d: updated columns:%s", step, ds_cstr(&updates)); + print_and_log("%03d: table %s: updated columns:%s", + step, sng->header_.table->class_->name, + ds_cstr(&updates)); ds_destroy(&updates); } } @@ -1940,8 +2001,8 @@ static void print_idl_row_simple(const struct idltest_simple *s, int step) { struct ds msg = DS_EMPTY_INITIALIZER; - ds_put_format(&msg, "%03d: i=%"PRId64" r=%g b=%s s=%s u="UUID_FMT" ia=[", - step, s->i, s->r, s->b ? "true" : "false", + ds_put_format(&msg, "i=%"PRId64" r=%g b=%s s=%s u="UUID_FMT" ia=[", + s->i, s->r, s->b ? "true" : "false", s->s, UUID_ARGS(&s->u)); for (size_t i = 0; i < s->n_ia; i++) { ds_put_format(&msg, "%s%"PRId64, i ? " " : "", s->ia[i]); @@ -1962,9 +2023,12 @@ print_idl_row_simple(const struct idltest_simple *s, int step) for (size_t i = 0; i < s->n_ua; i++) { ds_put_format(&msg, "%s"UUID_FMT, i ? " " : "", UUID_ARGS(&s->ua[i])); } - ds_put_format(&msg, "] uuid="UUID_FMT, UUID_ARGS(&s->header_.uuid)); - print_and_log("%s", ds_cstr(&msg)); + ds_put_cstr(&msg, "]"); + + char *row_msg = format_idl_row(&s->header_, step, ds_cstr(&msg)); + print_and_log("%s", row_msg); ds_destroy(&msg); + free(row_msg); print_idl_row_updated_simple(s, step); } @@ -1973,7 +2037,7 @@ static void print_idl_row_link1(const struct idltest_link1 *l1, int step) { struct ds msg = DS_EMPTY_INITIALIZER; - ds_put_format(&msg, "%03d: i=%"PRId64" k=", step, l1->i); + ds_put_format(&msg, "i=%"PRId64" k=", l1->i); if (l1->k) { ds_put_format(&msg, "%"PRId64, l1->k->i); } @@ -1988,9 +2052,11 @@ print_idl_row_link1(const struct idltest_link1 *l1, int step) if (l1->l2) { ds_put_format(&msg, "%"PRId64, l1->l2->i); } - ds_put_format(&msg, " uuid="UUID_FMT, UUID_ARGS(&l1->header_.uuid)); - print_and_log("%s", ds_cstr(&msg)); + + char *row_msg = format_idl_row(&l1->header_, step, ds_cstr(&msg)); + print_and_log("%s", row_msg); ds_destroy(&msg); + free(row_msg); print_idl_row_updated_link1(l1, step); } @@ -1999,30 +2065,77 @@ static void print_idl_row_link2(const struct idltest_link2 *l2, int step) { struct ds msg = DS_EMPTY_INITIALIZER; - ds_put_format(&msg, "%03d: i=%"PRId64" l1=", step, l2->i); + ds_put_format(&msg, "i=%"PRId64" l1=", l2->i); if (l2->l1) { ds_put_format(&msg, "%"PRId64, l2->l1->i); } - ds_put_format(&msg, " uuid="UUID_FMT, UUID_ARGS(&l2->header_.uuid)); - print_and_log("%s", ds_cstr(&msg)); + + char *row_msg = format_idl_row(&l2->header_, step, ds_cstr(&msg)); + print_and_log("%s", row_msg); ds_destroy(&msg); + free(row_msg); print_idl_row_updated_link2(l2, step); } +static void +print_idl_row_simple3(const struct idltest_simple3 *s3, int step) +{ + struct ds msg = DS_EMPTY_INITIALIZER; + size_t i; + + ds_put_format(&msg, "name=%s uset=[", s3->name); + for (i = 0; i < s3->n_uset; i++) { + ds_put_format(&msg, UUID_FMT"%s", + UUID_ARGS(&s3->uset[i]), + i < s3->n_uset - 1 ? "," : ""); + } + ds_put_cstr(&msg, "] uref=["); + for (i = 0; i < s3->n_uref; i++) { + ds_put_format(&msg, UUID_FMT"%s", + UUID_ARGS(&s3->uref[i]->header_.uuid), + i < s3->n_uref -1 ? "," : ""); + } + ds_put_cstr(&msg, "]"); + + char *row_msg = format_idl_row(&s3->header_, step, ds_cstr(&msg)); + print_and_log("%s", row_msg); + ds_destroy(&msg); + free(row_msg); + + print_idl_row_updated_simple3(s3, step); +} + +static void +print_idl_row_simple4(const struct idltest_simple4 *s4, int step) +{ + struct ds msg = DS_EMPTY_INITIALIZER; + ds_put_format(&msg, "name=%s", s4->name); + + char *row_msg = format_idl_row(&s4->header_, step, ds_cstr(&msg)); + print_and_log("%s", row_msg); + ds_destroy(&msg); + free(row_msg); + + print_idl_row_updated_simple4(s4, step); +} + static void print_idl_row_simple6(const struct idltest_simple6 *s6, int step) { struct ds msg = DS_EMPTY_INITIALIZER; - ds_put_format(&msg, "%03d: name=%s ", step, s6->name); + ds_put_format(&msg, "name=%s ", s6->name); ds_put_cstr(&msg, "weak_ref=["); for (size_t i = 0; i < s6->n_weak_ref; i++) { ds_put_format(&msg, "%s"UUID_FMT, i ? " " : "", UUID_ARGS(&s6->weak_ref[i]->header_.uuid)); } - ds_put_format(&msg, "] uuid="UUID_FMT, UUID_ARGS(&s6->header_.uuid)); - print_and_log("%s", ds_cstr(&msg)); + ds_put_cstr(&msg, "]"); + + char *row_msg = format_idl_row(&s6->header_, step, ds_cstr(&msg)); + print_and_log("%s", row_msg); ds_destroy(&msg); + free(row_msg); print_idl_row_updated_simple6(s6, step); } @@ -2030,14 +2143,23 @@ print_idl_row_simple6(const struct idltest_simple6 *s6, int step) static void print_idl_row_singleton(const struct idltest_singleton *sng, int step) { - print_and_log("%03d: name=%s uuid="UUID_FMT, step, sng->name, - UUID_ARGS(&sng->header_.uuid)); + struct ds msg = DS_EMPTY_INITIALIZER; + ds_put_format(&msg, "name=%s", sng->name); + + char *row_msg = format_idl_row(&sng->header_, step, ds_cstr(&msg)); + print_and_log("%s", row_msg); + ds_destroy(&msg); + free(row_msg); + print_idl_row_updated_singleton(sng, step); } static void print_idl(struct ovsdb_idl *idl, int step) { + const struct idltest_simple3 *s3; + const struct idltest_simple4 *s4; + const struct idltest_simple6 *s6; const struct idltest_simple *s; const struct idltest_link1 *l1; const struct idltest_link2 *l2; @@ -2056,6 +2178,18 @@ print_idl(struct ovsdb_idl *idl, int step) print_idl_row_link2(l2, step); n++; } + IDLTEST_SIMPLE3_FOR_EACH (s3, idl) { + print_idl_row_simple3(s3, step); + n++; + } + IDLTEST_SIMPLE4_FOR_EACH (s4, idl) { + print_idl_row_simple4(s4, step); + n++; + } + IDLTEST_SIMPLE6_FOR_EACH (s6, idl) { + print_idl_row_simple6(s6, step); + n++; + } IDLTEST_SINGLETON_FOR_EACH (sng, idl) { print_idl_row_singleton(sng, step); n++; @@ -2068,6 +2202,8 @@ print_idl(struct ovsdb_idl *idl, int step) static void print_idl_track(struct ovsdb_idl *idl, int step) { + const struct idltest_simple3 *s3; + const struct idltest_simple4 *s4; const struct idltest_simple6 *s6; const struct idltest_simple *s; const struct idltest_link1 *l1; @@ -2076,51 +2212,26 @@ print_idl_track(struct ovsdb_idl *idl, int step) IDLTEST_SIMPLE_FOR_EACH_TRACKED (s, idl) { print_idl_row_simple(s, step); - if (idltest_simple_is_deleted(s)) { - print_and_log("%03d: deleted row: uuid="UUID_FMT, step, - UUID_ARGS(&s->header_.uuid)); - } else if (idltest_simple_is_new(s)) { - print_and_log("%03d: inserted row: uuid="UUID_FMT, step, - UUID_ARGS(&s->header_.uuid)); - } n++; } IDLTEST_LINK1_FOR_EACH_TRACKED (l1, idl) { - if (idltest_link1_is_deleted(l1)) { - print_and_log("%03d: deleted row: uuid="UUID_FMT, step, - UUID_ARGS(&l1->header_.uuid)); - } else { - print_idl_row_link1(l1, step); - if (idltest_link1_is_new(l1)) { - print_and_log("%03d: inserted row: uuid="UUID_FMT, step, - UUID_ARGS(&l1->header_.uuid)); - } - } + print_idl_row_link1(l1, step); n++; } IDLTEST_LINK2_FOR_EACH_TRACKED (l2, idl) { - if (idltest_link2_is_deleted(l2)) { - print_and_log("%03d: deleted row: uuid="UUID_FMT, step, - UUID_ARGS(&l2->header_.uuid)); - } else { - print_idl_row_link2(l2, step); - if (idltest_link2_is_new(l2)) { - print_and_log("%03d: inserted row: uuid="UUID_FMT, step, - UUID_ARGS(&l2->header_.uuid)); - } - - } + print_idl_row_link2(l2, step); + n++; + } + IDLTEST_SIMPLE3_FOR_EACH_TRACKED (s3, idl) { + print_idl_row_simple3(s3, step); + n++; + } + IDLTEST_SIMPLE4_FOR_EACH_TRACKED (s4, idl) { + print_idl_row_simple4(s4, step); n++; } IDLTEST_SIMPLE6_FOR_EACH_TRACKED (s6, idl) { print_idl_row_simple6(s6, step); - if (idltest_simple6_is_deleted(s6)) { - print_and_log("%03d: deleted row: uuid="UUID_FMT, step, - UUID_ARGS(&s6->header_.uuid)); - } else if (idltest_simple6_is_new(s6)) { - print_and_log("%03d: inserted row: uuid="UUID_FMT, step, - UUID_ARGS(&s6->header_.uuid)); - } n++; } @@ -2349,6 +2460,10 @@ find_table_class(const char *name) return &idltest_table_link1; } else if (!strcmp(name, "link2")) { return &idltest_table_link2; + } else if (!strcmp(name, "simple3")) { + return &idltest_table_simple3; + } else if (!strcmp(name, "simple4")) { + return &idltest_table_simple4; } else if (!strcmp(name, "simple6")) { return &idltest_table_simple6; } @@ -2702,27 +2817,6 @@ do_idl_partial_update_map_column(struct ovs_cmdl_context *ctx) printf("%03d: End test\n", step); } -static void -print_idl_row_simple3(const struct idltest_simple3 *s, int step) -{ - size_t i; - const struct ovsdb_datum *uset; - const struct ovsdb_datum *uref; - - uset = idltest_simple3_get_uset(s, OVSDB_TYPE_UUID); - printf("%03d: name=%s uset=[", - step, s->name); - for (i = 0; i < uset->n; i++) { - printf("["UUID_FMT"]%s", UUID_ARGS(&(uset->keys[i].uuid)), i < uset->n-1? ",": ""); - } - uref = idltest_simple3_get_uref(s, OVSDB_TYPE_UUID); - printf("] uref=["); - for (i = 0; i < uref->n; i++) { - printf("["UUID_FMT"]%s", UUID_ARGS(&(uref->keys[i].uuid)), i < uref->n-1? ",": ""); - } - printf("]\n"); -} - static void dump_simple3(struct ovsdb_idl *idl, const struct idltest_simple3 *myRow, diff --git a/tests/test-ovsdb.py b/tests/test-ovsdb.py index a196802743..72a319123e 100644 --- a/tests/test-ovsdb.py +++ b/tests/test-ovsdb.py @@ -162,6 +162,10 @@ def get_simple_printable_row_string(row, columns): if isinstance(value, dict): value = sorted((row_to_uuid(k), row_to_uuid(v)) for k, v in value.items()) + if isinstance(value, (list, tuple)): + value = sorted((row_to_uuid(v) for v in value)) + elif isinstance(value, list): + value = sorted(row_to_uuid(v) for v in value) s += "%s=%s " % (column, value) s = s.strip() s = re.sub('""|,|u?\'', "", s) @@ -172,9 +176,10 @@ def get_simple_printable_row_string(row, columns): return s -def get_simple_table_printable_row(row): +def get_simple_table_printable_row(row, *additional_columns): simple_columns = ["i", "r", "b", "s", "u", "ia", - "ra", "ba", "sa", "ua", "uuid"] + "ra", "ba", "sa", "ua"] + simple_columns.extend(additional_columns) return get_simple_printable_row_string(row, simple_columns) @@ -184,81 +189,118 @@ def get_simple2_table_printable_row(row): def get_simple3_table_printable_row(row): - simple3_columns = ["name", "uset"] + simple3_columns = ["name", "uset", "uref"] return get_simple_printable_row_string(row, simple3_columns) +def get_simple4_table_printable_row(row): + simple4_columns = ["name"] + return get_simple_printable_row_string(row, simple4_columns) + + +def get_simple5_table_printable_row(row): + simple5_columns = ["name", "irefmap"] + return get_simple_printable_row_string(row, simple5_columns) + + +def get_simple6_table_printable_row(row): + simple6_columns = ["name", "weak_ref"] + return get_simple_printable_row_string(row, simple6_columns) + + +def get_link1_table_printable_row(row): + s = ["i=%s k=" % row.i] + if hasattr(row, "k") and row.k: + s.append(str(row.k.i)) + if hasattr(row, "ka"): + s.append(" ka=[") + s.append(' '.join(sorted(str(ka.i) for ka in row.ka))) + s.append("] l2=") + if hasattr(row, "l2") and row.l2: + s.append(str(row.l2[0].i)) + return ''.join(s) + + +def get_link2_table_printable_row(row): + s = "i=%s l1=" % row.i + if hasattr(row, "l1") and row.l1: + s += str(row.l1[0].i) + return s + + +def get_singleton_table_printable_row(row): + return "name=%s" % row.name + + +def print_row(table, row, step, contents): + s = "%03d: table %s: %s " % (step, table, contents) + s += get_simple_printable_row_string(row, ["uuid"]) + print(s) + + def print_idl(idl, step): n = 0 if "simple" in idl.tables: simple = idl.tables["simple"].rows for row in simple.values(): - s = "%03d: " % step - s += get_simple_table_printable_row(row) - print(s) + print_row("simple", row, step, + get_simple_table_printable_row(row)) n += 1 if "simple2" in idl.tables: simple2 = idl.tables["simple2"].rows for row in simple2.values(): - s = "%03d: " % step - s += get_simple2_table_printable_row(row) - print(s) + print_row("simple2", row, step, + get_simple2_table_printable_row(row)) n += 1 if "simple3" in idl.tables: simple3 = idl.tables["simple3"].rows for row in simple3.values(): - s = "%03d: " % step - s += get_simple3_table_printable_row(row) - print(s) + print_row("simple3", row, step, + get_simple3_table_printable_row(row)) + n += 1 + + if "simple4" in idl.tables: + simple4 = idl.tables["simple4"].rows + for row in simple4.values(): + print_row("simple4", row, step, + get_simple4_table_printable_row(row)) n += 1 if "simple5" in idl.tables: simple5 = idl.tables["simple5"].rows for row in simple5.values(): - s = "%03d: " % step - s += get_simple_printable_row_string(row, ["name", "irefmap"]) - print(s) + print_row("simple5", row, step, + get_simple5_table_printable_row(row)) + n += 1 + + if "simple6" in idl.tables: + simple6 = idl.tables["simple6"].rows + for row in simple6.values(): + print_row("simple6", row, step, + get_simple6_table_printable_row(row)) n += 1 if "link1" in idl.tables: l1 = idl.tables["link1"].rows for row in l1.values(): - s = ["%03d: i=%s k=" % (step, row.i)] - if hasattr(row, "k") and row.k: - s.append(str(row.k.i)) - if hasattr(row, "ka"): - s.append(" ka=[") - s.append(' '.join(sorted(str(ka.i) for ka in row.ka))) - s.append("] l2=") - if hasattr(row, "l2") and row.l2: - s.append(str(row.l2[0].i)) - if hasattr(row, "uuid"): - s.append(" uuid=%s" % row.uuid) - print(''.join(s)) + print_row("link1", row, step, + get_link1_table_printable_row(row)) n += 1 if "link2" in idl.tables: l2 = idl.tables["link2"].rows for row in l2.values(): - s = ["%03d:" % step] - s.append(" i=%s l1=" % row.i) - if hasattr(row, "l1") and row.l1: - s.append(str(row.l1[0].i)) - if hasattr(row, "uuid"): - s.append(" uuid=%s" % row.uuid) - print(''.join(s)) + print_row("link2", row, step, + get_link2_table_printable_row(row)) n += 1 if "singleton" in idl.tables: sng = idl.tables["singleton"].rows for row in sng.values(): - s = ["%03d:" % step] - s.append(" name=%s" % row.name) - if hasattr(row, "uuid"): - s.append(" uuid=%s" % row.uuid) - print(''.join(s)) + print_row("singleton", row, step, + get_singleton_table_printable_row(row)) n += 1 if not n: @@ -637,7 +679,8 @@ def do_idl(schema_file, remote, *commands): def mock_notify(event, row, updates=None): output = "%03d: " % step output += "event:" + str(event) + ", row={" - output += get_simple_table_printable_row(row) + "}, updates=" + output += get_simple_table_printable_row(row, 'l2', 'l1') + "}, " + output += get_simple_printable_row_string(row, ["uuid"]) + ", updates=" if updates is None: output += "None" else: diff --git a/tests/test-reconnect.py b/tests/test-reconnect.py index f0ad9f9793..cea48eb527 100644 --- a/tests/test-reconnect.py +++ b/tests/test-reconnect.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function - import errno import sys diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at index 48c5de9d19..6364653975 100644 --- a/tests/tunnel-push-pop.at +++ b/tests/tunnel-push-pop.at @@ -595,6 +595,64 @@ OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | grep 50540000000a5054000000091235 | wc OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([tunnel_push_pop - packet_out debug_slow]) + +OVS_VSWITCHD_START( + [add-port br0 p0 dnl + -- set Interface p0 type=dummy ofport_request=1 dnl + other-config:hwaddr=aa:55:aa:55:00:00]) +AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg]) +AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy]) +AT_CHECK([ovs-vsctl add-port int-br t2 dnl + -- set Interface t2 type=geneve options:remote_ip=1.1.2.92 dnl + options:key=123 ofport_request=2]) + +dnl First setup dummy interface IP address, then add the route +dnl so that tnl-port table can get valid IP address for the device. +AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 1.1.2.88/24], [0], [OK +]) +AT_CHECK([ovs-appctl ovs/route/add 1.1.2.92/24 br0], [0], [OK +]) +AT_CHECK([ovs-ofctl add-flow br0 action=normal]) + +dnl This ARP reply from p0 has two effects: +dnl 1. The ARP cache will learn that 1.1.2.92 is at f8:bc:12:44:34:b6. +dnl 2. The br0 mac learning will learn that f8:bc:12:44:34:b6 is on p0. +AT_CHECK([ + ovs-appctl netdev-dummy/receive p0 dnl + 'recirc_id(0),in_port(2),dnl + eth(src=f8:bc:12:44:34:b6,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),dnl + arp(sip=1.1.2.92,tip=1.1.2.88,op=2,sha=f8:bc:12:44:34:b6,tha=00:00:00:00:00:00)' +]) + +AT_CHECK([ovs-vsctl -- set Interface p0 options:tx_pcap=p0.pcap]) + +packet=50540000000a505400000009123 +dnl Source port is based on a packet hash, so it may differ depending on the +dnl compiler flags and CPU type. Masked with '....'. +encap=f8bc124434b6aa55aa5500000800450000320000400040113406010102580101025c....17c1001e00000000655800007b00 + +dnl Output to tunnel from a int-br internal port. +dnl Checking that the packet arrived and it was correctly encapsulated. +AT_CHECK([ovs-ofctl add-flow int-br "in_port=LOCAL,actions=debug_slow,output:2"]) +AT_CHECK([ovs-appctl netdev-dummy/receive int-br "${packet}4"]) +OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}4" | wc -l` -ge 1]) +dnl Sending again to exercise the non-miss upcall path. +AT_CHECK([ovs-appctl netdev-dummy/receive int-br "${packet}4"]) +OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}4" | wc -l` -ge 2]) + +dnl Output to tunnel from the controller. +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out int-br CONTROLLER "debug_slow,output:2" "${packet}5"]) +OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}5" | wc -l` -ge 1]) + +dnl Datapath actions should not have tunnel push action. +AT_CHECK([ovs-appctl dpctl/dump-flows | grep -q tnl_push], [1]) +dnl There should be slow_path action instead. +AT_CHECK([ovs-appctl dpctl/dump-flows | grep -q 'slow_path(action)'], [0]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([tunnel_push_pop - underlay bridge match]) OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00]) diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py index bc6bfae15a..ac14da29b1 100755 --- a/utilities/checkpatch.py +++ b/utilities/checkpatch.py @@ -13,7 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function import email import getopt diff --git a/utilities/gdb/ovs_gdb.py b/utilities/gdb/ovs_gdb.py index 1111f3100d..0b2ecb81be 100644 --- a/utilities/gdb/ovs_gdb.py +++ b/utilities/gdb/ovs_gdb.py @@ -55,7 +55,6 @@ # ... # ... # -from __future__ import print_function import gdb import sys import uuid diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in index d71c34e691..4156da20ef 100644 --- a/utilities/ovs-ctl.in +++ b/utilities/ovs-ctl.in @@ -226,7 +226,9 @@ start_forwarding () { if test X"$OVS_VSWITCHD" = Xyes; then do_start_forwarding || return 1 fi - set_hostname & + if test X"$RECORD_HOSTNAME" = Xyes; then + set_hostname & + fi return 0 } @@ -317,6 +319,7 @@ set_defaults () { SYSTEM_ID= FULL_HOSTNAME=yes + RECORD_HOSTNAME=yes DELETE_BRIDGES=no DELETE_TRANSIENT_PORTS=no @@ -378,19 +381,24 @@ This program is intended to be invoked internally by Open vSwitch startup scripts. System administrators should not normally invoke it directly. Commands: - start start Open vSwitch daemons - stop stop Open vSwitch daemons - restart stop and start Open vSwitch daemons - status check whether Open vSwitch daemons are running - version print versions of Open vSwitch daemons - load-kmod insert modules if not already present - force-reload-kmod save OVS network device state, stop OVS, unload kernel - module, reload kernel module, start OVS, restore state - enable-protocol enable protocol specified in options with iptables - delete-transient-ports delete transient (other_config:transient=true) ports - start-ovs-ipsec start Open vSwitch ipsec daemon - stop-ovs-ipsec stop Open vSwitch ipsec daemon - help display this help message + start start Open vSwitch daemons + stop stop Open vSwitch daemons + restart stop and start Open vSwitch daemons + status check whether Open vSwitch daemons are running + version print versions of Open vSwitch daemons + load-kmod insert modules if not already present + force-reload-kmod save OVS network device state, stop OVS, unload + kernel module, reload kernel module, start OVS, + restore state + enable-protocol enable protocol specified in options with + iptables + delete-transient-ports delete transient (other_config:transient=true) + ports + start-ovs-ipsec start Open vSwitch ipsec daemon + stop-ovs-ipsec stop Open vSwitch ipsec daemon + record-hostname-if-not-set determine the system hostname and record it in + the Open vSwitch database if not already set + help display this help message One of the following options is required for "start", "restart" and "force-reload-kmod": --system-id=UUID set specific ID to uniquely identify this system @@ -411,6 +419,8 @@ Less important options for "start", "restart" and "force-reload-kmod": --ovsdb-server-priority=NICE set ovsdb-server's niceness (default: $OVSDB_SERVER_PRIORITY) --ovs-vswitchd-priority=NICE set ovs-vswitchd's niceness (default: $OVS_VSWITCHD_PRIORITY) --no-full-hostname set short hostname instead of full hostname + --no-record-hostname do not attempt to determine/record system + hostname as part of start command Debugging options for "start", "restart" and "force-reload-kmod": --ovsdb-server-wrapper=WRAPPER @@ -569,6 +579,9 @@ case $command in stop-ovs-ipsec) stop_ovs_ipsec ;; + record-hostname-if-not-set) + set_hostname + ;; help) usage ;; diff --git a/utilities/ovs-l3ping.in b/utilities/ovs-l3ping.in index 92d32acb3f..1ece06457c 100644 --- a/utilities/ovs-l3ping.in +++ b/utilities/ovs-l3ping.in @@ -19,7 +19,7 @@ achieved by tunneling the control connection inside the tunnel itself. """ import socket -import xmlrpclib +import xmlrpc.client import ovstest.args as args import ovstest.tests as tests @@ -64,13 +64,13 @@ if __name__ == '__main__': ps = get_packet_sizes(me, he, args.client[0]) tests.do_direct_tests(me, he, bandwidth, interval, ps) except KeyboardInterrupt: - print "Terminating" - except xmlrpclib.Fault: - print "Couldn't contact peer" + print("Terminating") + except xmlrpc.client.Fault: + print("Couldn't contact peer") except socket.error: - print "Couldn't contact peer" - except xmlrpclib.ProtocolError: - print "XMLRPC control channel was abruptly terminated" + print("Couldn't contact peer") + except xmlrpc.client.ProtocolError: + print("XMLRPC control channel was abruptly terminated") finally: if local_server is not None: local_server.terminate() diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index 3601890f40..ede7f1e61a 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -4020,6 +4020,7 @@ ofctl_meter_mod__(const char *bridge, const char *str, int command) enum ofputil_protocol usable_protocols; enum ofp_version version; + memset(&mm, 0, sizeof mm); if (str) { char *error; error = parse_ofp_meter_mod_str(&mm, str, command, &usable_protocols); @@ -4030,7 +4031,6 @@ ofctl_meter_mod__(const char *bridge, const char *str, int command) usable_protocols = OFPUTIL_P_OF13_UP; mm.command = command; mm.meter.meter_id = OFPM13_ALL; - mm.meter.bands = NULL; } protocol = open_vconn_for_flow_mod(bridge, &vconn, usable_protocols); @@ -4050,6 +4050,7 @@ ofctl_meter_request__(const char *bridge, const char *str, enum ofputil_protocol protocol; enum ofp_version version; + memset(&mm, 0, sizeof mm); if (str) { char *error; error = parse_ofp_meter_mod_str(&mm, str, -1, &usable_protocols); @@ -4059,7 +4060,6 @@ ofctl_meter_request__(const char *bridge, const char *str, } else { usable_protocols = OFPUTIL_P_OF13_UP; mm.meter.meter_id = OFPM13_ALL; - mm.meter.bands = NULL; } protocol = open_vconn_for_flow_mod(bridge, &vconn, usable_protocols); @@ -5051,7 +5051,7 @@ static const struct ovs_cmdl_command all_commands[] = { { "add-group", "switch group", 1, 2, ofctl_add_group, OVS_RW }, { "add-groups", "switch file", - 1, 2, ofctl_add_groups, OVS_RW }, + 2, 2, ofctl_add_groups, OVS_RW }, { "mod-group", "switch group", 1, 2, ofctl_mod_group, OVS_RW }, { "del-groups", "switch [group]", diff --git a/utilities/ovs-parse-backtrace.in b/utilities/ovs-parse-backtrace.in index d5506769a8..f44f05cd1e 100755 --- a/utilities/ovs-parse-backtrace.in +++ b/utilities/ovs-parse-backtrace.in @@ -70,7 +70,7 @@ result. Expected usage is for ovs-appctl backtrace to be piped in.""") if os.path.exists(debug): binary = debug - print "Binary: %s\n" % binary + print("Binary: %s\n" % binary) stdin = sys.stdin.read() @@ -88,15 +88,15 @@ result. Expected usage is for ovs-appctl backtrace to be piped in.""") for lines, count in traces: longest = max(len(l) for l in lines) - print "Backtrace Count: %d" % count + print("Backtrace Count: %d" % count) for line in lines: match = re.search(r'\[(0x.*)]', line) if match: - print "%s %s" % (line.ljust(longest), - addr2line(binary, match.group(1))) + print("%s %s" % (line.ljust(longest), + addr2line(binary, match.group(1)))) else: - print line - print + print(line) + print() if __name__ == "__main__": diff --git a/utilities/ovs-pcap.in b/utilities/ovs-pcap.in index dddbee4dfb..6b5f63399e 100755 --- a/utilities/ovs-pcap.in +++ b/utilities/ovs-pcap.in @@ -14,8 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function - import binascii import getopt import struct @@ -79,7 +77,7 @@ if __name__ == "__main__": try: options, args = getopt.gnu_getopt(sys.argv[1:], 'hV', ['help', 'version']) - except getopt.GetoptException as geo: + except getopt.GetoptError as geo: sys.stderr.write("%s: %s\n" % (argv0, geo.msg)) sys.exit(1) diff --git a/utilities/ovs-vlan-test.in b/utilities/ovs-vlan-test.in index 154573a9b5..de3ae16862 100755 --- a/utilities/ovs-vlan-test.in +++ b/utilities/ovs-vlan-test.in @@ -14,9 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import BaseHTTPServer import getopt -import httplib +import http.client +import http.server import os import threading import time @@ -84,7 +84,7 @@ class UDPReceiver: try: sock.bind((self.vlan_ip, self.vlan_port)) - except socket.error, e: + except socket.error as e: print_safe('Failed to bind to %s:%d with error: %s' % (self.vlan_ip, self.vlan_port, e)) os._exit(1) #sys.exit only exits the current thread. @@ -95,7 +95,7 @@ class UDPReceiver: data, _ = sock.recvfrom(4096) except socket.timeout: continue - except socket.error, e: + except socket.error as e: print_safe('Failed to receive from %s:%d with error: %s' % (self.vlan_ip, self.vlan_port, e)) os._exit(1) @@ -180,7 +180,7 @@ class VlanServer: for _ in range(send_time * 2): try: send_packet(test_id, size, ip, port) - except socket.error, e: + except socket.error as e: self.set_result(test_id, 'Failure: ' + str(e)) return time.sleep(.5) @@ -194,15 +194,15 @@ class VlanServer: def run(self): self.udp_recv.start() try: - BaseHTTPServer.HTTPServer((self.server_ip, self.server_port), + http.server.HTTPServer((self.server_ip, self.server_port), VlanServerHandler).serve_forever() - except socket.error, e: + except socket.error as e: print_safe('Failed to start control server: %s' % e) self.udp_recv.stop() return 1 -class VlanServerHandler(BaseHTTPServer.BaseHTTPRequestHandler): +class VlanServerHandler(http.server.BaseHTTPRequestHandler): def do_GET(self): #Guarantee three arguments. @@ -244,7 +244,7 @@ class VlanClient: self.udp_recv = UDPReceiver(vlan_ip, vlan_port) def request(self, resource): - conn = httplib.HTTPConnection(self.server_ip_port) + conn = http.client.HTTPConnection(self.server_ip_port) conn.request('GET', resource) return conn @@ -256,7 +256,7 @@ class VlanClient: try: conn = self.request('/start/recv') data = conn.getresponse().read() - except (socket.error, httplib.HTTPException), e: + except (socket.error, http.client.HTTPException) as e: error_msg(e) return False @@ -277,7 +277,7 @@ class VlanClient: send_packet(test_id, size, ip, port) resp = self.request('/result/%d' % test_id).getresponse() data = resp.read() - except (socket.error, httplib.HTTPException), e: + except (socket.error, http.client.HTTPException) as e: error_msg(e) return False @@ -302,7 +302,7 @@ class VlanClient: try: conn = self.request(resource) test_id = conn.getresponse().read() - except (socket.error, httplib.HTTPException), e: + except (socket.error, http.client.HTTPException) as e: error_msg(e) return False @@ -335,7 +335,7 @@ class VlanClient: try: resp = self.request('/ping').getresponse() data = resp.read() - except (socket.error, httplib.HTTPException), e: + except (socket.error, http.client.HTTPException) as e: error_msg(e) return False @@ -383,7 +383,7 @@ def main(): try: options, args = getopt.gnu_getopt(sys.argv[1:], 'hVs', ['help', 'version', 'server']) - except getopt.GetoptError, geo: + except getopt.GetoptError as geo: print_safe('%s: %s\n' % (sys.argv[0], geo.msg)) return 1 diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 5ed7e82343..ea0630e112 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -3019,9 +3019,9 @@ ofp12_controller_role_to_str(enum ofp12_controller_role role) case OFPCR12_ROLE_EQUAL: return "other"; case OFPCR12_ROLE_PRIMARY: - return "primary"; + return "master"; case OFPCR12_ROLE_SECONDARY: - return "secondary"; + return "slave"; case OFPCR12_ROLE_NOCHANGE: default: return NULL; diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index a2ad84edef..d8ea287d5d 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -4660,7 +4660,8 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ packets per second the CIR would be set to to to 46000000. This value can be broken into '1,000,000 x 46'. Where 1,000,000 is the policing rate for the number of packets per second and 46 represents the size - of the packet data for a 64 byte ip packet. + of the packet data for a 64 bytes IP packet without 14 bytes Ethernet + and 4 bytes FCS header. The Committed Burst Size (CBS) is measured in bytes and represents a @@ -4681,7 +4682,8 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ packets per second the EIR would be set to to to 46000000. This value can be broken into '1,000,000 x 46'. Where 1,000,000 is the policing rate for the number of packets per second and 46 represents the size - of the packet data for a 64 byte ip packet. + of the packet data for a 64 bytes IP packet without 14 bytes Ethernet + and 4 bytes FCS header. The Excess Burst Size (EBS) is measured in bytes and represents a @@ -6124,6 +6126,15 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ True if the datapath supports OVS_ACTION_ATTR_DROP. If false, explicit drop action will not be sent to the datapath. + + True if the datapath supports all-zero SNAT. This is a special case + if the src IP address is configured as all 0's, i.e., + nat(src=0.0.0.0). In this case, when a source port + collision is detected during the commit, the source port will be + translated to an ephemeral port. If there is no collision, no SNAT + is performed. +