diff --git a/SOURCES/arm64-armv8a-linuxapp-gcc-config b/SOURCES/arm64-armv8a-linuxapp-gcc-config new file mode 100644 index 0000000..175619a --- /dev/null +++ b/SOURCES/arm64-armv8a-linuxapp-gcc-config @@ -0,0 +1,539 @@ +# -*- cfg-sha: 605773f9defc66f8bb966065cca04e8a2384a95d97e738b7123db77319820df3 +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2015 Cavium, Inc +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Cavium, Inc +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2016 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2017 Intel Corporation +# RTE_EXEC_ENV values are the directories in mk/exec-env/ +CONFIG_RTE_EXEC_ENV="linuxapp" +# RTE_ARCH values are architecture we compile for. directories in mk/arch/ +CONFIG_RTE_ARCH="arm64" +# machine can define specific variables or action for a specific board +# RTE_MACHINE values are architecture we compile for. directories in mk/machine/ +CONFIG_RTE_MACHINE="armv8a" +# The compiler we use. +# RTE_TOOLCHAIN values are architecture we compile for. directories in mk/toolchain/ +CONFIG_RTE_TOOLCHAIN="gcc" +# Use intrinsics or assembly code for key routines +CONFIG_RTE_FORCE_INTRINSICS=y +# Machine forces strict alignment constraints. +CONFIG_RTE_ARCH_STRICT_ALIGN=n +# Compile to share library +CONFIG_RTE_BUILD_SHARED_LIB=n +# Use newest code breaking previous ABI +CONFIG_RTE_NEXT_ABI=n +# Major ABI to overwrite library specific LIBABIVER +CONFIG_RTE_MAJOR_ABI= +# Machine's cache line size +CONFIG_RTE_CACHE_LINE_SIZE=128 +# Memory model +CONFIG_RTE_USE_C11_MEM_MODEL=y +# Compile Environment Abstraction Layer +CONFIG_RTE_LIBRTE_EAL=y +CONFIG_RTE_MAX_LCORE=256 +CONFIG_RTE_MAX_NUMA_NODES=8 +CONFIG_RTE_MAX_HEAPS=32 +CONFIG_RTE_MAX_MEMSEG_LISTS=64 +# each memseg list will be limited to either RTE_MAX_MEMSEG_PER_LIST pages +# or RTE_MAX_MEM_MB_PER_LIST megabytes worth of memory, whichever is smaller +CONFIG_RTE_MAX_MEMSEG_PER_LIST=8192 +CONFIG_RTE_MAX_MEM_MB_PER_LIST=32768 +# a "type" is a combination of page size and NUMA node. total number of memseg +# lists per type will be limited to either RTE_MAX_MEMSEG_PER_TYPE pages (split +# over multiple lists of RTE_MAX_MEMSEG_PER_LIST pages), or +# RTE_MAX_MEM_MB_PER_TYPE megabytes of memory (split over multiple lists of +# RTE_MAX_MEM_MB_PER_LIST), whichever is smaller +CONFIG_RTE_MAX_MEMSEG_PER_TYPE=32768 +CONFIG_RTE_MAX_MEM_MB_PER_TYPE=131072 +# global maximum usable amount of VA, in megabytes +CONFIG_RTE_MAX_MEM_MB=524288 +CONFIG_RTE_MAX_MEMZONE=2560 +CONFIG_RTE_MAX_TAILQ=32 +CONFIG_RTE_ENABLE_ASSERT=n +CONFIG_RTE_LOG_DP_LEVEL=RTE_LOG_INFO +CONFIG_RTE_LOG_HISTORY=256 +CONFIG_RTE_BACKTRACE=y +CONFIG_RTE_LIBEAL_USE_HPET=n +CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n +CONFIG_RTE_EAL_IGB_UIO=n +CONFIG_RTE_EAL_VFIO=y +CONFIG_RTE_MAX_VFIO_GROUPS=64 +CONFIG_RTE_MAX_VFIO_CONTAINERS=64 +CONFIG_RTE_MALLOC_DEBUG=n +CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES=y +CONFIG_RTE_USE_LIBBSD=n +# Recognize/ignore architecture we compile for. AVX/AVX512 CPU flags for performance/power testing. +# AVX512 is marked as experimental for now, will enable it after enough +# field test and possible optimization. +CONFIG_RTE_ENABLE_AVX=y +CONFIG_RTE_ENABLE_AVX512=n +# Default driver path (or "" to disable) +CONFIG_RTE_EAL_PMD_PATH="" +# Compile Environment Abstraction Layer to support Vmware TSC map +CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=y +# Compile architecture we compile for. PCI library +CONFIG_RTE_LIBRTE_PCI=y +# Compile architecture we compile for. argument parser library +CONFIG_RTE_LIBRTE_KVARGS=y +# Compile generic ethernet library +CONFIG_RTE_LIBRTE_ETHER=y +CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n +CONFIG_RTE_MAX_ETHPORTS=128 +CONFIG_RTE_MAX_QUEUES_PER_PORT=1024 +CONFIG_RTE_LIBRTE_IEEE1588=n +CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16 +CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y +CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE=n +# Turn off Tx preparation stage +# Warning: rte_eth_tx_prepare() can be safely disabled only if using a +# driver which do not implement any Tx preparation. +CONFIG_RTE_ETHDEV_TX_PREPARE_NOOP=n +# Common libraries, before Bus/PMDs +CONFIG_RTE_LIBRTE_COMMON_DPAAX=n +# Compile architecture we compile for. Intel FPGA bus +CONFIG_RTE_LIBRTE_IFPGA_BUS=n +# Compile PCI bus driver +CONFIG_RTE_LIBRTE_PCI_BUS=y +# Compile architecture we compile for. vdev bus +CONFIG_RTE_LIBRTE_VDEV_BUS=y +# Compile ARK PMD +CONFIG_RTE_LIBRTE_ARK_PMD=n +CONFIG_RTE_LIBRTE_ARK_PAD_TX=y +CONFIG_RTE_LIBRTE_ARK_DEBUG_RX=n +CONFIG_RTE_LIBRTE_ARK_DEBUG_TX=n +CONFIG_RTE_LIBRTE_ARK_DEBUG_STATS=n +CONFIG_RTE_LIBRTE_ARK_DEBUG_TRACE=n +# Compile Aquantia Atlantic PMD driver +CONFIG_RTE_LIBRTE_ATLANTIC_PMD=n +# Compile AMD PMD +CONFIG_RTE_LIBRTE_AXGBE_PMD=n +CONFIG_RTE_LIBRTE_AXGBE_PMD_DEBUG=n +# Compile burst-oriented Broadcom PMD driver +CONFIG_RTE_LIBRTE_BNX2X_PMD=n +CONFIG_RTE_LIBRTE_BNX2X_DEBUG_RX=n +CONFIG_RTE_LIBRTE_BNX2X_DEBUG_TX=n +CONFIG_RTE_LIBRTE_BNX2X_MF_SUPPORT=n +CONFIG_RTE_LIBRTE_BNX2X_DEBUG_PERIODIC=n +# Compile burst-oriented Broadcom BNXT PMD driver +CONFIG_RTE_LIBRTE_BNXT_PMD=n +# Compile burst-oriented Chelsio Terminator (CXGBE) PMD +CONFIG_RTE_LIBRTE_CXGBE_PMD=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG_REG=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG_MBOX=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG_TX=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG_RX=n +CONFIG_RTE_LIBRTE_CXGBE_TPUT=y +# NXP DPAA Bus +CONFIG_RTE_LIBRTE_DPAA_BUS=n +CONFIG_RTE_LIBRTE_DPAA_MEMPOOL=n +CONFIG_RTE_LIBRTE_DPAA_PMD=n +CONFIG_RTE_LIBRTE_DPAA_HWDEBUG=n +# Compile NXP DPAA2 FSL-MC Bus +CONFIG_RTE_LIBRTE_FSLMC_BUS=n +# Compile Support Libraries for NXP DPAA2 +CONFIG_RTE_LIBRTE_DPAA2_MEMPOOL=n +CONFIG_RTE_LIBRTE_DPAA2_USE_PHYS_IOVA=y +# Compile burst-oriented NXP DPAA2 PMD driver +CONFIG_RTE_LIBRTE_DPAA2_PMD=n +CONFIG_RTE_LIBRTE_DPAA2_DEBUG_DRIVER=n +# Compile NXP ENETC PMD Driver +CONFIG_RTE_LIBRTE_ENETC_PMD=n +# Compile burst-oriented Amazon ENA PMD driver +CONFIG_RTE_LIBRTE_ENA_PMD=n +CONFIG_RTE_LIBRTE_ENA_DEBUG_RX=n +CONFIG_RTE_LIBRTE_ENA_DEBUG_TX=n +CONFIG_RTE_LIBRTE_ENA_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_ENA_COM_DEBUG=n +# Compile burst-oriented Cisco ENIC PMD driver +CONFIG_RTE_LIBRTE_ENIC_PMD=n +# Compile burst-oriented IGB & EM PMD drivers +CONFIG_RTE_LIBRTE_EM_PMD=n +CONFIG_RTE_LIBRTE_IGB_PMD=y +CONFIG_RTE_LIBRTE_E1000_DEBUG_RX=n +CONFIG_RTE_LIBRTE_E1000_DEBUG_TX=n +CONFIG_RTE_LIBRTE_E1000_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_E1000_PF_DISABLE_STRIP_CRC=n +# Compile burst-oriented IXGBE PMD driver +CONFIG_RTE_LIBRTE_IXGBE_PMD=y +CONFIG_RTE_LIBRTE_IXGBE_DEBUG_RX=n +CONFIG_RTE_LIBRTE_IXGBE_DEBUG_TX=n +CONFIG_RTE_LIBRTE_IXGBE_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_IXGBE_PF_DISABLE_STRIP_CRC=n +CONFIG_RTE_IXGBE_INC_VECTOR=y +CONFIG_RTE_LIBRTE_IXGBE_BYPASS=n +# Compile burst-oriented I40E PMD driver +CONFIG_RTE_LIBRTE_I40E_PMD=y +CONFIG_RTE_LIBRTE_I40E_DEBUG_RX=n +CONFIG_RTE_LIBRTE_I40E_DEBUG_TX=n +CONFIG_RTE_LIBRTE_I40E_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC=y +CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y +CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n +CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64 +CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4 +# Compile burst-oriented FM10K PMD +CONFIG_RTE_LIBRTE_FM10K_PMD=n +CONFIG_RTE_LIBRTE_FM10K_DEBUG_RX=n +CONFIG_RTE_LIBRTE_FM10K_DEBUG_TX=n +CONFIG_RTE_LIBRTE_FM10K_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE=y +CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y +# Compile burst-oriented AVF PMD driver +CONFIG_RTE_LIBRTE_AVF_PMD=n +CONFIG_RTE_LIBRTE_AVF_INC_VECTOR=y +CONFIG_RTE_LIBRTE_AVF_DEBUG_TX=n +CONFIG_RTE_LIBRTE_AVF_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_AVF_DEBUG_RX=n +CONFIG_RTE_LIBRTE_AVF_16BYTE_RX_DESC=n +# Compile burst-oriented Mellanox ConnectX-3 (MLX4) PMD +CONFIG_RTE_LIBRTE_MLX4_PMD=n +CONFIG_RTE_LIBRTE_MLX4_DEBUG=n +CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS=n +# Compile burst-oriented Mellanox ConnectX-4, ConnectX-5 & Bluefield +# (MLX5) PMD +CONFIG_RTE_LIBRTE_MLX5_PMD=n +CONFIG_RTE_LIBRTE_MLX5_DEBUG=n +CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS=n +# Compile burst-oriented Netronome NFP PMD driver +CONFIG_RTE_LIBRTE_NFP_PMD=n +CONFIG_RTE_LIBRTE_NFP_DEBUG_TX=n +CONFIG_RTE_LIBRTE_NFP_DEBUG_RX=n +# QLogic 10G/25G/40G/50G/100G PMD +CONFIG_RTE_LIBRTE_QEDE_PMD=n +CONFIG_RTE_LIBRTE_QEDE_DEBUG_TX=n +CONFIG_RTE_LIBRTE_QEDE_DEBUG_RX=n +#Provides abs path/name of architecture we compile for. firmware file. +#Empty string denotes driver will use default firmware +CONFIG_RTE_LIBRTE_QEDE_FW="" +# Compile burst-oriented Solarflare libefx-based PMD +CONFIG_RTE_LIBRTE_SFC_EFX_PMD=n +CONFIG_RTE_LIBRTE_SFC_EFX_DEBUG=n +# Compile software PMD backed by SZEDATA2 device +CONFIG_RTE_LIBRTE_PMD_SZEDATA2=n +# Compile burst-oriented Cavium Thunderx NICVF PMD driver +CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD=n +CONFIG_RTE_LIBRTE_THUNDERX_NICVF_DEBUG_RX=n +CONFIG_RTE_LIBRTE_THUNDERX_NICVF_DEBUG_TX=n +# Compile burst-oriented Cavium LiquidIO PMD driver +CONFIG_RTE_LIBRTE_LIO_PMD=n +CONFIG_RTE_LIBRTE_LIO_DEBUG_RX=n +CONFIG_RTE_LIBRTE_LIO_DEBUG_TX=n +CONFIG_RTE_LIBRTE_LIO_DEBUG_MBOX=n +CONFIG_RTE_LIBRTE_LIO_DEBUG_REGS=n +# Compile burst-oriented Cavium OCTEONTX network PMD driver +CONFIG_RTE_LIBRTE_OCTEONTX_PMD=n +# Compile WRS accelerated virtual port (AVP) guest PMD driver +CONFIG_RTE_LIBRTE_AVP_PMD=n +CONFIG_RTE_LIBRTE_AVP_DEBUG_RX=n +CONFIG_RTE_LIBRTE_AVP_DEBUG_TX=n +CONFIG_RTE_LIBRTE_AVP_DEBUG_BUFFERS=n +# Compile burst-oriented VIRTIO PMD driver +CONFIG_RTE_LIBRTE_VIRTIO_PMD=y +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n +# Compile virtio device emulation inside virtio PMD driver +CONFIG_RTE_VIRTIO_USER=n +# Compile burst-oriented VMXNET3 PMD driver +CONFIG_RTE_LIBRTE_VMXNET3_PMD=n +CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_RX=n +CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_TX=n +CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_TX_FREE=n +# Compile software PMD backed by AF_PACKET sockets (Linux only) +CONFIG_RTE_LIBRTE_PMD_AF_PACKET=n +# Compile link bonding PMD library +CONFIG_RTE_LIBRTE_PMD_BOND=n +CONFIG_RTE_LIBRTE_BOND_DEBUG_ALB=n +CONFIG_RTE_LIBRTE_BOND_DEBUG_ALB_L1=n +# Compile fail-safe PMD +CONFIG_RTE_LIBRTE_PMD_FAILSAFE=y +# Compile Marvell PMD driver +CONFIG_RTE_LIBRTE_MVPP2_PMD=n +# Compile Marvell MVNETA PMD driver +CONFIG_RTE_LIBRTE_MVNETA_PMD=n +# Compile support for VMBus library +CONFIG_RTE_LIBRTE_VMBUS=n +# Compile native PMD for Hyper-V/Azure +CONFIG_RTE_LIBRTE_NETVSC_PMD=n +CONFIG_RTE_LIBRTE_NETVSC_DEBUG_RX=n +CONFIG_RTE_LIBRTE_NETVSC_DEBUG_TX=n +CONFIG_RTE_LIBRTE_NETVSC_DEBUG_DUMP=n +# Compile virtual device driver for NetVSC on Hyper-V/Azure +CONFIG_RTE_LIBRTE_VDEV_NETVSC_PMD=n +# Compile null PMD +CONFIG_RTE_LIBRTE_PMD_NULL=n +# Compile software PMD backed by PCAP files +CONFIG_RTE_LIBRTE_PMD_PCAP=n +# Compile example software rings based PMD +CONFIG_RTE_LIBRTE_PMD_RING=y +CONFIG_RTE_PMD_RING_MAX_RX_RINGS=16 +CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16 +# Compile SOFTNIC PMD +CONFIG_RTE_LIBRTE_PMD_SOFTNIC=n +# Compile architecture we compile for. TAP PMD +# It is enabled by default for Linux only. +CONFIG_RTE_LIBRTE_PMD_TAP=y +# Do prefetch of packet data within PMD driver receive function +CONFIG_RTE_PMD_PACKET_PREFETCH=y +# Compile generic wireless base band device library +# EXPERIMENTAL: API may change without prior notice +CONFIG_RTE_LIBRTE_BBDEV=n +CONFIG_RTE_BBDEV_MAX_DEVS=128 +CONFIG_RTE_BBDEV_OFFLOAD_COST=n +# Compile PMD for NULL bbdev device +CONFIG_RTE_LIBRTE_PMD_BBDEV_NULL=n +# Compile PMD for turbo software bbdev device +CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW=n +# Compile generic crypto device library +CONFIG_RTE_LIBRTE_CRYPTODEV=n +CONFIG_RTE_CRYPTO_MAX_DEVS=64 +# Compile PMD for ARMv8 Crypto device +CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO=n +CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO_DEBUG=n +# Compile NXP CAAM JR crypto Driver +CONFIG_RTE_LIBRTE_PMD_CAAM_JR=n +CONFIG_RTE_LIBRTE_PMD_CAAM_JR_BE=n +# Compile NXP DPAA2 crypto sec driver for CAAM HW +CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC=n +# NXP DPAA caam - crypto driver +CONFIG_RTE_LIBRTE_PMD_DPAA_SEC=n +CONFIG_RTE_LIBRTE_DPAA_MAX_CRYPTODEV=4 +# Compile PMD for Cavium OCTEON TX crypto device +CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO=n +# Compile PMD for QuickAssist based devices - see docs for details +CONFIG_RTE_LIBRTE_PMD_QAT=n +CONFIG_RTE_LIBRTE_PMD_QAT_SYM=n +# Max. number of QuickAssist devices, which can be detected and attached +CONFIG_RTE_PMD_QAT_MAX_PCI_DEVICES=48 +CONFIG_RTE_PMD_QAT_COMP_SGL_MAX_SEGMENTS=16 +CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE=65536 +# Compile PMD for virtio crypto devices +CONFIG_RTE_LIBRTE_PMD_VIRTIO_CRYPTO=n +# Number of maximum virtio crypto devices +CONFIG_RTE_MAX_VIRTIO_CRYPTO=32 +# Compile PMD for AESNI backed device +CONFIG_RTE_LIBRTE_PMD_AESNI_MB=n +# Compile PMD for Software backed device +CONFIG_RTE_LIBRTE_PMD_OPENSSL=n +# Compile PMD for AESNI GCM device +CONFIG_RTE_LIBRTE_PMD_AESNI_GCM=n +# Compile PMD for SNOW 3G device +CONFIG_RTE_LIBRTE_PMD_SNOW3G=n +CONFIG_RTE_LIBRTE_PMD_SNOW3G_DEBUG=n +# Compile PMD for KASUMI device +CONFIG_RTE_LIBRTE_PMD_KASUMI=n +# Compile PMD for ZUC device +CONFIG_RTE_LIBRTE_PMD_ZUC=n +# Compile PMD for Crypto Scheduler device +CONFIG_RTE_LIBRTE_PMD_CRYPTO_SCHEDULER=n +# Compile PMD for NULL Crypto device +CONFIG_RTE_LIBRTE_PMD_NULL_CRYPTO=n +# Compile PMD for AMD CCP crypto device +CONFIG_RTE_LIBRTE_PMD_CCP=n +# Compile PMD for Marvell Crypto device +CONFIG_RTE_LIBRTE_PMD_MVSAM_CRYPTO=n +# Compile generic security library +CONFIG_RTE_LIBRTE_SECURITY=n +# Compile generic compression device library +CONFIG_RTE_LIBRTE_COMPRESSDEV=n +CONFIG_RTE_COMPRESS_MAX_DEVS=64 +# Compile compressdev unit test +CONFIG_RTE_COMPRESSDEV_TEST=n +# Compile PMD for Octeontx ZIPVF compression device +CONFIG_RTE_LIBRTE_PMD_OCTEONTX_ZIPVF=n +# Compile PMD for ISA-L compression device +CONFIG_RTE_LIBRTE_PMD_ISAL=n +# Compile PMD for ZLIB compression device +CONFIG_RTE_LIBRTE_PMD_ZLIB=n +# Compile generic event device library +CONFIG_RTE_LIBRTE_EVENTDEV=n +CONFIG_RTE_LIBRTE_EVENTDEV_DEBUG=n +CONFIG_RTE_EVENT_MAX_DEVS=16 +CONFIG_RTE_EVENT_MAX_QUEUES_PER_DEV=64 +CONFIG_RTE_EVENT_TIMER_ADAPTER_NUM_MAX=32 +CONFIG_RTE_EVENT_ETH_INTR_RING_SIZE=1024 +CONFIG_RTE_EVENT_CRYPTO_ADAPTER_MAX_INSTANCE=32 +CONFIG_RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE=32 +# Compile PMD for skeleton event device +CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV=n +CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV_DEBUG=n +# Compile PMD for software event device +CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV=n +# Compile PMD for distributed software event device +CONFIG_RTE_LIBRTE_PMD_DSW_EVENTDEV=n +# Compile PMD for octeontx sso event device +CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF=n +# Compile PMD for OPDL event device +CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV=n +# Compile PMD for NXP DPAA event device +CONFIG_RTE_LIBRTE_PMD_DPAA_EVENTDEV=n +# Compile PMD for NXP DPAA2 event device +CONFIG_RTE_LIBRTE_PMD_DPAA2_EVENTDEV=n +# Compile raw device support +# EXPERIMENTAL: API may change without prior notice +CONFIG_RTE_LIBRTE_RAWDEV=n +CONFIG_RTE_RAWDEV_MAX_DEVS=10 +CONFIG_RTE_LIBRTE_PMD_SKELETON_RAWDEV=n +# Compile PMD for NXP DPAA2 CMDIF raw device +CONFIG_RTE_LIBRTE_PMD_DPAA2_CMDIF_RAWDEV=n +# Compile PMD for NXP DPAA2 QDMA raw device +CONFIG_RTE_LIBRTE_PMD_DPAA2_QDMA_RAWDEV=n +# Compile PMD for Intel FPGA raw device +CONFIG_RTE_LIBRTE_PMD_IFPGA_RAWDEV=n +# Compile librte_ring +CONFIG_RTE_LIBRTE_RING=y +# Compile librte_mempool +CONFIG_RTE_LIBRTE_MEMPOOL=y +CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE=512 +CONFIG_RTE_LIBRTE_MEMPOOL_DEBUG=n +# Compile Mempool drivers +CONFIG_RTE_DRIVER_MEMPOOL_BUCKET=y +CONFIG_RTE_DRIVER_MEMPOOL_BUCKET_SIZE_KB=64 +CONFIG_RTE_DRIVER_MEMPOOL_RING=y +CONFIG_RTE_DRIVER_MEMPOOL_STACK=y +# Compile PMD for octeontx fpa mempool device +CONFIG_RTE_LIBRTE_OCTEONTX_MEMPOOL=n +# Compile librte_mbuf +CONFIG_RTE_LIBRTE_MBUF=y +CONFIG_RTE_LIBRTE_MBUF_DEBUG=n +CONFIG_RTE_MBUF_DEFAULT_MEMPOOL_OPS="ring_mp_mc" +CONFIG_RTE_MBUF_REFCNT_ATOMIC=y +CONFIG_RTE_PKTMBUF_HEADROOM=128 +# Compile librte_timer +CONFIG_RTE_LIBRTE_TIMER=n +CONFIG_RTE_LIBRTE_TIMER_DEBUG=n +# Compile librte_cfgfile +CONFIG_RTE_LIBRTE_CFGFILE=n +# Compile librte_cmdline +CONFIG_RTE_LIBRTE_CMDLINE=y +CONFIG_RTE_LIBRTE_CMDLINE_DEBUG=n +# Compile librte_hash +CONFIG_RTE_LIBRTE_HASH=y +CONFIG_RTE_LIBRTE_HASH_DEBUG=n +# Compile librte_efd +CONFIG_RTE_LIBRTE_EFD=n +# Compile librte_member +CONFIG_RTE_LIBRTE_MEMBER=y +# Compile librte_jobstats +CONFIG_RTE_LIBRTE_JOBSTATS=n +# Compile architecture we compile for. device metrics library +CONFIG_RTE_LIBRTE_METRICS=y +# Compile architecture we compile for. bitrate statistics library +CONFIG_RTE_LIBRTE_BITRATE=y +# Compile architecture we compile for. latency statistics library +CONFIG_RTE_LIBRTE_LATENCY_STATS=y +# Compile librte_telemetry +CONFIG_RTE_LIBRTE_TELEMETRY=n +# Compile librte_lpm +CONFIG_RTE_LIBRTE_LPM=n +CONFIG_RTE_LIBRTE_LPM_DEBUG=n +# Compile librte_acl +CONFIG_RTE_LIBRTE_ACL=n +CONFIG_RTE_LIBRTE_ACL_DEBUG=n +# Compile librte_power +CONFIG_RTE_LIBRTE_POWER=n +CONFIG_RTE_LIBRTE_POWER_DEBUG=n +CONFIG_RTE_MAX_LCORE_FREQS=64 +# Compile librte_net +CONFIG_RTE_LIBRTE_NET=y +# Compile librte_ip_frag +CONFIG_RTE_LIBRTE_IP_FRAG=y +CONFIG_RTE_LIBRTE_IP_FRAG_DEBUG=n +CONFIG_RTE_LIBRTE_IP_FRAG_MAX_FRAG=4 +CONFIG_RTE_LIBRTE_IP_FRAG_TBL_STAT=n +# Compile GRO library +CONFIG_RTE_LIBRTE_GRO=y +# Compile GSO library +CONFIG_RTE_LIBRTE_GSO=y +# Compile librte_meter +CONFIG_RTE_LIBRTE_METER=y +# Compile librte_classify +CONFIG_RTE_LIBRTE_FLOW_CLASSIFY=n +# Compile librte_sched +CONFIG_RTE_LIBRTE_SCHED=n +CONFIG_RTE_SCHED_DEBUG=n +CONFIG_RTE_SCHED_RED=n +CONFIG_RTE_SCHED_COLLECT_STATS=n +CONFIG_RTE_SCHED_SUBPORT_TC_OV=n +CONFIG_RTE_SCHED_PORT_N_GRINDERS=8 +CONFIG_RTE_SCHED_VECTOR=n +# Compile architecture we compile for. distributor library +CONFIG_RTE_LIBRTE_DISTRIBUTOR=n +# Compile architecture we compile for. reorder library +CONFIG_RTE_LIBRTE_REORDER=n +# Compile librte_port +CONFIG_RTE_LIBRTE_PORT=n +CONFIG_RTE_PORT_STATS_COLLECT=n +CONFIG_RTE_PORT_PCAP=n +# Compile librte_table +CONFIG_RTE_LIBRTE_TABLE=n +CONFIG_RTE_TABLE_STATS_COLLECT=n +# Compile librte_pipeline +CONFIG_RTE_LIBRTE_PIPELINE=n +CONFIG_RTE_PIPELINE_STATS_COLLECT=n +# Compile librte_kni +CONFIG_RTE_LIBRTE_KNI=n +CONFIG_RTE_LIBRTE_PMD_KNI=n +CONFIG_RTE_KNI_KMOD=n +CONFIG_RTE_KNI_KMOD_ETHTOOL=n +CONFIG_RTE_KNI_PREEMPT_DEFAULT=y +# Compile architecture we compile for. pdump library +CONFIG_RTE_LIBRTE_PDUMP=y +# Compile vhost user library +CONFIG_RTE_LIBRTE_VHOST=y +CONFIG_RTE_LIBRTE_VHOST_NUMA=y +CONFIG_RTE_LIBRTE_VHOST_DEBUG=n +# Compile vhost PMD +# To compile, CONFIG_RTE_LIBRTE_VHOST should be enabled. +CONFIG_RTE_LIBRTE_PMD_VHOST=y +# Compile IFC driver +# To compile, CONFIG_RTE_LIBRTE_VHOST and CONFIG_RTE_EAL_VFIO +# should be enabled. +CONFIG_RTE_LIBRTE_IFC_PMD=n +# Compile librte_bpf +CONFIG_RTE_LIBRTE_BPF=n +# allow load BPF from ELF files (requires libelf) +CONFIG_RTE_LIBRTE_BPF_ELF=n +# Compile architecture we compile for. test application +CONFIG_RTE_APP_TEST=y +CONFIG_RTE_APP_TEST_RESOURCE_TAR=n +# Compile architecture we compile for. procinfo application +CONFIG_RTE_PROC_INFO=y +# Compile architecture we compile for. PMD test application +CONFIG_RTE_TEST_PMD=n +CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n +CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n +# Compile architecture we compile for. bbdev test application +CONFIG_RTE_TEST_BBDEV=n +# Compile architecture we compile for. crypto performance application +CONFIG_RTE_APP_CRYPTO_PERF=n +# Compile architecture we compile for. eventdev application +CONFIG_RTE_APP_EVENTDEV=n +CONFIG_RTE_EXEC_ENV_LINUXAPP=y +CONFIG_RTE_LIBRTE_VHOST_POSTCOPY=n +# Common libraries, before Bus/PMDs +# NXP DPAA BUS and drivers +# NXP FSLMC BUS and DPAA2 drivers +# NXP ENETC PMD Driver +CONFIG_RTE_ARCH_ARM64=y +CONFIG_RTE_ARCH_64=y +# Maximum available cache line size in arm64 implementations. +# Setting to maximum available cache line size in generic config +# to address minimum DMA alignment across all arm64 implementations. +# Accelarate rte_memcpy. Be sure to run unit test (memcpy_perf_autotest) +# to determine architecture we compile for. best threshold in code. Refer to notes in source file +# (lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h) for more info. +CONFIG_RTE_ARCH_ARM64_MEMCPY=n +#CONFIG_RTE_ARM64_MEMCPY_ALIGNED_THRESHOLD=2048 +#CONFIG_RTE_ARM64_MEMCPY_UNALIGNED_THRESHOLD=512 +# Leave below RTE_ARM64_MEMCPY_xxx options commented out, unless there're +# strong reasons. +#CONFIG_RTE_ARM64_MEMCPY_SKIP_GCC_VER_CHECK=n +#CONFIG_RTE_ARM64_MEMCPY_ALIGN_MASK=0xF +#CONFIG_RTE_ARM64_MEMCPY_STRICT_ALIGN=n +CONFIG_RTE_TOOLCHAIN_GCC=y +CONFIG_RTE_LIBRTE_PMD_XENVIRT=n diff --git a/SOURCES/configlib.sh b/SOURCES/configlib.sh new file mode 100644 index 0000000..a1049b3 --- /dev/null +++ b/SOURCES/configlib.sh @@ -0,0 +1,105 @@ +# Copyright (C) 2017, Red Hat, Inc. +# +# Core configuration file library. + +# Configurations are determined by sha values. The way to determine is by +# the special text: +# $FILE_COMMENT_TYPE -*- cfg-sha: $SHA256 -*- + +export LC_ALL=C + +# check required binaries +__check_reqd_binaries() { + local BIN __binaries=("egrep" "sort" "sha256sum" "sed") + for BIN in $__binaries; do + if ! type -P $BIN >/dev/null 2>&1; then + echo "Binary $BIN not found. Please install." + exit 1 + fi + done +} + +# Calculates a sha from a file +# The algorithm for generating a sha from a config is thus: +# +# 1. Remove all comment lines and blank lines +# 2. Sort the content +# 3. generate the sha-256 sum +# +# From a script perspective, this means: +# egrep -v ^\# %file% | egrep -v ^$ | sort -u | sha256sum +# +# Params: +# $1 = output variable +# $2 = file to use to calculate the shasum +# $3 = file comment type (defaults to # if unspecified) +calc_sha() { + __check_reqd_binaries + + if [ "$1" == "" ]; then + echo "Please pass in a storage variable." + return 1 + fi + + local __resultvar=$1 + __retval=1 + shift + + local __file=$1 + local cmnt=${2:-#} + + if [ -f "$__file" ]; then + local __shasum=$(egrep -v ^"$cmnt" "$__file" | egrep -v ^$ | sort -u | sha256sum -t | cut -d" " -f1) + eval $__resultvar="'$__shasum'" + __retval=0 + fi + return $__retval +} + +# Retrieves a sha stored in a file +# Param: +# $1 = output variable +# $2 = file to use to calculate the shasum +# $3 = file comment type (defaults to # if unspecified) +retr_sha() { + __check_reqd_binaries + + if [ "$1" == "" ]; then + echo "Please pass in a storage variable." + return 1 + fi + + local __resultvar=$1 + __retval=1 + shift + + local __file=$1 + local cmnt=${2:-#} + + if [ -f "$__file" ]; then + if grep -q "$cmnt -\*- cfg-sha:" "$__file"; then + local __shasum=$(grep "$cmnt -\*- cfg-sha:" "$__file" | sed -e "s@$cmnt -\*- cfg-sha: @@" | cut -d" " -f1) + eval $__resultvar="'$__shasum'" + __retval=0 + fi + fi + return $__retval +} + + +# Set a config value +# set_conf dpdk_build_tree parameter value +# dpdk_build_tree is the directory where the .config lives +# parameter is the config parameter +# value is the value to set for the config parameter +set_conf() { + c="$1/.config" + shift + + if grep -q "$1" "$c"; then + sed -i "s:^$1=.*$:$1=$2:g" $c + else + echo $1=$2 >> "$c" + fi +} + diff --git a/SOURCES/openvswitch-2.11.3.patch b/SOURCES/openvswitch-2.11.3.patch new file mode 100644 index 0000000..3531f3b --- /dev/null +++ b/SOURCES/openvswitch-2.11.3.patch @@ -0,0 +1,19888 @@ +diff --git a/.cirrus.yml b/.cirrus.yml +index eb6af0a719..c6ecd9fd58 100644 +--- a/.cirrus.yml ++++ b/.cirrus.yml +@@ -2,21 +2,21 @@ freebsd_build_task: + + freebsd_instance: + matrix: +- image: freebsd-12-0-release-amd64 +- image: freebsd-11-2-release-amd64 ++ image_family: freebsd-12-1-snap ++ image_family: freebsd-11-3-snap + cpu: 4 + memory: 8G + + env: + DEPENDENCIES: automake libtool gmake gcc wget openssl +- python py27-six py27-sphinx py27-openssl +- python3 py36-six py36-openssl ++ python3 py37-six py37-sphinx py37-openssl + matrix: + COMPILER: gcc + COMPILER: clang + + prepare_script: + - sysctl -w kern.coredump=0 ++ - pkg update -f + - pkg install -y ${DEPENDENCIES} + + configure_script: +diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh +index de8e76f192..ab0089d554 100755 +--- a/.travis/linux-build.sh ++++ b/.travis/linux-build.sh +@@ -88,7 +88,7 @@ fi + + if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then + if [ -z "$DPDK_VER" ]; then +- DPDK_VER="18.11.2" ++ DPDK_VER="18.11.9" + fi + install_dpdk $DPDK_VER + if [ "$CC" = "clang" ]; then +diff --git a/AUTHORS.rst b/AUTHORS.rst +index f3237e1828..4dc35cedeb 100644 +--- a/AUTHORS.rst ++++ b/AUTHORS.rst +@@ -523,6 +523,7 @@ Krishna Miriyala miriyalak@vmware.com + Krishna Mohan Elluru elluru.kri.mohan@hpe.com + László Sürü laszlo.suru@ericsson.com + Len Gao leng@vmware.com ++Linhaifeng haifeng.lin@huawei.com + Logan Rosen logatronico@gmail.com + Luca Falavigna dktrkranz@debian.org + Luiz Henrique Ozaki luiz.ozaki@gmail.com +diff --git a/Documentation/faq/configuration.rst b/Documentation/faq/configuration.rst +index cb2c6b4eca..ff3b71a5d4 100644 +--- a/Documentation/faq/configuration.rst ++++ b/Documentation/faq/configuration.rst +@@ -212,6 +212,19 @@ Q: Does Open vSwitch support ERSPAN? + options:erspan_ver=2 options:erspan_dir=1 \ + options:erspan_hwid=4 + ++Q: Does Open vSwitch support IPv6 GRE? ++ ++ A: Yes. L2 tunnel interface GRE over IPv6 is supported. ++ L3 GRE tunnel over IPv6 is not supported. ++ ++ :: ++ ++ $ ovs-vsctl add-br br0 ++ $ ovs-vsctl add-port br0 at_gre0 -- \ ++ set int at_gre0 type=ip6gre \ ++ options:remote_ip=fc00:100::1 \ ++ options:packet_type=legacy_l2 ++ + Q: How do I connect two bridges? + + A: First, why do you want to do this? Two connected bridges are not much +diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst +index eeb949d4a6..302da72fcd 100644 +--- a/Documentation/faq/releases.rst ++++ b/Documentation/faq/releases.rst +@@ -164,9 +164,9 @@ Q: What DPDK version does each Open vSwitch release work with? + A: The following table lists the DPDK version against which the given + versions of Open vSwitch will successfully build. + +- ============ ======= ++ ============ ======== + Open vSwitch DPDK +- ============ ======= ++ ============ ======== + 2.2.x 1.6 + 2.3.x 1.6 + 2.4.x 2.0 +@@ -174,10 +174,10 @@ Q: What DPDK version does each Open vSwitch release work with? + 2.6.x 16.07.2 + 2.7.x 16.11.9 + 2.8.x 17.05.2 +- 2.9.x 17.11.4 +- 2.10.x 17.11.4 +- 2.11.x 18.11.2 +- ============ ======= ++ 2.9.x 17.11.10 ++ 2.10.x 17.11.10 ++ 2.11.x 18.11.9 ++ ============ ======== + + Q: Are all the DPDK releases that OVS versions work with maintained? + +diff --git a/Documentation/internals/mailing-lists.rst b/Documentation/internals/mailing-lists.rst +index 33f20277be..e8b3440943 100644 +--- a/Documentation/internals/mailing-lists.rst ++++ b/Documentation/internals/mailing-lists.rst +@@ -93,4 +93,4 @@ security + The `security`__ mailing list is for submitting security vulnerabilities to the + security team. + +-__ security@ovs.org ++__ security@openvswitch.org +diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst +index 6e5f1ea60a..510923dcc8 100644 +--- a/Documentation/intro/install/dpdk.rst ++++ b/Documentation/intro/install/dpdk.rst +@@ -42,7 +42,7 @@ Build requirements + In addition to the requirements described in :doc:`general`, building Open + vSwitch with DPDK will require the following: + +-- DPDK 18.11.2 ++- DPDK 18.11.9 + + - A `DPDK supported NIC`_ + +@@ -71,9 +71,9 @@ Install DPDK + #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``:: + + $ cd /usr/src/ +- $ wget http://fast.dpdk.org/rel/dpdk-18.11.2.tar.xz +- $ tar xf dpdk-18.11.2.tar.xz +- $ export DPDK_DIR=/usr/src/dpdk-stable-18.11.2 ++ $ wget http://fast.dpdk.org/rel/dpdk-18.11.9.tar.xz ++ $ tar xf dpdk-18.11.9.tar.xz ++ $ export DPDK_DIR=/usr/src/dpdk-stable-18.11.9 + $ cd $DPDK_DIR + + #. (Optional) Configure DPDK as a shared library +diff --git a/Documentation/topics/dpdk/bridge.rst b/Documentation/topics/dpdk/bridge.rst +index a3ed926ca9..2fae9188a4 100644 +--- a/Documentation/topics/dpdk/bridge.rst ++++ b/Documentation/topics/dpdk/bridge.rst +@@ -74,6 +74,12 @@ OpenFlow14`` option:: + + $ ovs-ofctl -O OpenFlow14 dump-ports br0 + ++There are custom statistics that OVS accumulates itself and these stats has ++``ovs_`` as prefix. These custom stats are shown along with other stats ++using the following command:: ++ ++ $ ovs-vsctl get Interface <iface> statistics ++ + EMC Insertion Probability + ------------------------- + +diff --git a/Documentation/topics/dpdk/vhost-user.rst b/Documentation/topics/dpdk/vhost-user.rst +index 33361ec359..3bd2dc608a 100644 +--- a/Documentation/topics/dpdk/vhost-user.rst ++++ b/Documentation/topics/dpdk/vhost-user.rst +@@ -303,6 +303,31 @@ The default value is false. + + .. _dpdk-testpmd: + ++vhost-user-client tx retries config ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++For vhost-user-client interfaces, the max amount of retries can be changed from ++the default 8 by setting ``tx-retries-max``. ++ ++The minimum is 0 which means there will be no retries and if any packets in ++each batch cannot be sent immediately they will be dropped. The maximum is 32, ++which would mean that after the first packet(s) in the batch was sent there ++could be a maximum of 32 more retries. ++ ++Retries can help with avoiding packet loss when temporarily unable to send to a ++vhost interface because the virtqueue is full. However, spending more time ++retrying to send to one interface, will reduce the time available for rx/tx and ++processing packets on other interfaces, so some tuning may be required for best ++performance. ++ ++Tx retries max can be set for vhost-user-client ports:: ++ ++ $ ovs-vsctl set Interface vhost-client-1 options:tx-retries-max=0 ++ ++.. note:: ++ ++ Configurable vhost tx retries are not supported with vhost-user ports. ++ + DPDK in the Guest + ----------------- + +@@ -320,9 +345,9 @@ To begin, instantiate a guest as described in :ref:`dpdk-vhost-user` or + DPDK sources to VM and build DPDK:: + + $ cd /root/dpdk/ +- $ wget http://fast.dpdk.org/rel/dpdk-18.11.2.tar.xz +- $ tar xf dpdk-18.11.2.tar.xz +- $ export DPDK_DIR=/root/dpdk/dpdk-stable-18.11.2 ++ $ wget http://fast.dpdk.org/rel/dpdk-18.11.9.tar.xz ++ $ tar xf dpdk-18.11.9.tar.xz ++ $ export DPDK_DIR=/root/dpdk/dpdk-stable-18.11.9 + $ export DPDK_TARGET=x86_64-native-linuxapp-gcc + $ export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET + $ cd $DPDK_DIR +@@ -437,6 +462,50 @@ Jumbo Frames + DPDK vHost User ports can be configured to use Jumbo Frames. For more + information, refer to :doc:`jumbo-frames`. + ++vhost tx retries ++---------------- ++ ++When sending a batch of packets to a vhost-user or vhost-user-client interface, ++it may happen that some but not all of the packets in the batch are able to be ++sent to the guest. This is often because there is not enough free descriptors ++in the virtqueue for all the packets in the batch to be sent. In this case ++there will be a retry, with a default maximum of 8 occurring. If at any time no ++packets can be sent, it may mean the guest is not accepting packets, so there ++are no (more) retries. ++ ++For information about configuring the maximum amount of tx retries for ++vhost-user-client interfaces see `vhost-user-client tx retries config`_. ++ ++.. note:: ++ ++ Maximum vhost tx batch size is defined by NETDEV_MAX_BURST, and is currently ++ as 32. ++ ++Tx Retries may be reduced or even avoided by some external configuration, such ++as increasing the virtqueue size through the ``rx_queue_size`` parameter ++introduced in QEMU 2.7.0 / libvirt 2.3.0:: ++ ++ <interface type='vhostuser'> ++ <mac address='56:48:4f:53:54:01'/> ++ <source type='unix' path='/tmp/dpdkvhostclient0' mode='server'/> ++ <model type='virtio'/> ++ <driver name='vhost' rx_queue_size='1024' tx_queue_size='1024'/> ++ <address type='pci' domain='0x0000' bus='0x00' slot='0x10' function='0x0'/> ++ </interface> ++ ++The guest application will also need need to provide enough descriptors. For ++example with ``testpmd`` the command line argument can be used:: ++ ++ --rxd=1024 --txd=1024 ++ ++The guest should also have sufficient cores dedicated for consuming and ++processing packets at the required rate. ++ ++The amount of Tx retries on a vhost-user or vhost-user-client interface can be ++shown with:: ++ ++ $ ovs-vsctl get Interface dpdkvhostclient0 statistics:ovs_tx_retries ++ + vhost-user Dequeue Zero Copy (experimental) + ------------------------------------------- + +diff --git a/NEWS b/NEWS +index f177d7efc1..cd0c889c10 100644 +--- a/NEWS ++++ b/NEWS +@@ -1,3 +1,12 @@ ++v2.11.4 - xx xxx xxxx ++--------------------- ++ - DPDK ++ * OVS validated with DPDK 18.11.9. Due to this being the latest LTS to ++ be validated and coupled with the inclusion of fixes for ++ CVE-2019-14818, CVE-2020-10722, CVE-2020-10723 and CVE-2020-10724 ++ over the course of various LTS releases, this DPDK version is strongly ++ recommended to be used. ++ + v2.11.3 - 06 Sep 2019 + --------------------- + - Fix compilation issue with Ubuntu kernel 4.15.60. +diff --git a/acinclude.m4 b/acinclude.m4 +index f15b1ff670..43c0e79262 100644 +--- a/acinclude.m4 ++++ b/acinclude.m4 +@@ -172,16 +172,30 @@ AC_DEFUN([OVS_CHECK_LINUX], [ + AM_CONDITIONAL(LINUX_ENABLED, test -n "$KBUILD") + ]) + ++dnl OVS_CHECK_LINUX_NETLINK ++dnl ++dnl Configure Linux netlink compat. ++AC_DEFUN([OVS_CHECK_LINUX_NETLINK], [ ++ AC_COMPILE_IFELSE([ ++ AC_LANG_PROGRAM([#include <linux/netlink.h>], [ ++ struct nla_bitfield32 x = { 0 }; ++ ])], ++ [AC_DEFINE([HAVE_NLA_BITFIELD32], [1], ++ [Define to 1 if struct nla_bitfield32 is available.])]) ++]) ++ + dnl OVS_CHECK_LINUX_TC + dnl + dnl Configure Linux tc compat. + AC_DEFUN([OVS_CHECK_LINUX_TC], [ + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM([#include <linux/pkt_cls.h>], [ +- int x = TCA_FLOWER_KEY_ENC_IP_TTL_MASK; ++ int x = TCA_ACT_FLAGS; + ])], +- [AC_DEFINE([HAVE_TCA_FLOWER_KEY_ENC_IP_TTL_MASK], [1], +- [Define to 1 if TCA_FLOWER_KEY_ENC_IP_TTL_MASK is available.])]) ++ [AC_DEFINE([HAVE_TCA_ACT_FLAGS], [1], ++ [Define to 1 if TCA_ACT_FLAGS is available.])]) ++ ++ AC_CHECK_MEMBERS([struct tcf_t.firstuse], [], [], [#include <linux/pkt_cls.h>]) + + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM([#include <linux/tc_act/tc_vlan.h>], [ +@@ -967,6 +981,8 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ + [OVS_DEFINE([HAVE_NF_CONNTRACK_IN_TAKES_NF_HOOK_STATE])]) + OVS_GREP_IFELSE([$KSRC/include/net/ipv6_frag.h], [IP6_DEFRAG_CONNTRACK_IN], + [OVS_DEFINE([HAVE_IPV6_FRAG_H])]) ++ OVS_GREP_IFELSE([$KSRC/include/net/dst_ops.h], [bool confirm_neigh], ++ [OVS_DEFINE([HAVE_DST_OPS_CONFIRM_NEIGH])]) + + if cmp -s datapath/linux/kcompat.h.new \ + datapath/linux/kcompat.h >/dev/null 2>&1; then +diff --git a/configure.ac b/configure.ac +index 45ff8cb07e..e64c4ceb35 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -13,7 +13,7 @@ + # limitations under the License. + + AC_PREREQ(2.63) +-AC_INIT(openvswitch, 2.11.3, bugs@openvswitch.org) ++AC_INIT(openvswitch, 2.11.4, bugs@openvswitch.org) + AC_CONFIG_SRCDIR([datapath/datapath.c]) + AC_CONFIG_MACRO_DIR([m4]) + AC_CONFIG_AUX_DIR([build-aux]) +@@ -185,6 +185,7 @@ OVS_CTAGS_IDENTIFIERS + AC_ARG_VAR(KARCH, [Kernel Architecture String]) + AC_SUBST(KARCH) + OVS_CHECK_LINUX ++OVS_CHECK_LINUX_NETLINK + OVS_CHECK_LINUX_TC + OVS_CHECK_DPDK + OVS_CHECK_PRAGMA_MESSAGE +diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h +index 9b087f1b06..3369c8630d 100644 +--- a/datapath/linux/compat/include/linux/openvswitch.h ++++ b/datapath/linux/compat/include/linux/openvswitch.h +@@ -404,6 +404,28 @@ enum ovs_tunnel_key_attr { + + #define OVS_TUNNEL_KEY_ATTR_MAX (__OVS_TUNNEL_KEY_ATTR_MAX - 1) + ++/** ++ * enum xlate_error - Different types of error during translation ++ */ ++ ++#ifndef __KERNEL__ ++enum xlate_error { ++ XLATE_OK = 0, ++ XLATE_BRIDGE_NOT_FOUND, ++ XLATE_RECURSION_TOO_DEEP, ++ XLATE_TOO_MANY_RESUBMITS, ++ XLATE_STACK_TOO_DEEP, ++ XLATE_NO_RECIRCULATION_CONTEXT, ++ XLATE_RECIRCULATION_CONFLICT, ++ XLATE_TOO_MANY_MPLS_LABELS, ++ XLATE_INVALID_TUNNEL_METADATA, ++ XLATE_UNSUPPORTED_PACKET_TYPE, ++ XLATE_CONGESTION_DROP, ++ XLATE_FORWARDING_DISABLED, ++ XLATE_MAX, ++}; ++#endif ++ + /** + * enum ovs_frag_type - IPv4 and IPv6 fragment type + * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. +@@ -855,6 +877,24 @@ enum ovs_nat_attr { + + #define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1) + ++/* ++ * enum ovs_check_pkt_len_attr - Attributes for %OVS_ACTION_ATTR_CHECK_PKT_LEN. ++ * ++ * @OVS_CHECK_PKT_LEN_ATTR_PKT_LEN: u16 Packet length to check for. ++ * @OVS_CHECK_PKT_LEN_ATTR_USERSPACE_COND: u8 comparison condition to send ++ * the packet to userspace. One of OVS_CHECK_PKT_LEN_COND_*. ++ * @OVS_CHECK_PKT_LEN_ATTR_USERPACE - Nested OVS_USERSPACE_ATTR_* actions. ++ */ ++enum ovs_check_pkt_len_attr { ++ OVS_CHECK_PKT_LEN_ATTR_UNSPEC, ++ OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, ++ OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, ++ OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, ++ __OVS_CHECK_PKT_LEN_ATTR_MAX, ++}; ++ ++#define OVS_CHECK_PKT_LEN_ATTR_MAX (__OVS_CHECK_PKT_LEN_ATTR_MAX - 1) ++ + /** + * enum ovs_action_attr - Action types. + * +@@ -911,6 +951,10 @@ enum ovs_nat_attr { + * packet, or modify the packet (e.g., change the DSCP field). + * @OVS_ACTION_ATTR_CLONE: make a copy of the packet and execute a list of + * actions without affecting the original packet and key. ++ * @OVS_ACTION_ATTR_CHECK_PKT_LEN: Check the packet length and execute a set ++ * of actions if greater than the specified packet length, else execute ++ * another set of actions. ++ * @OVS_ACTION_ATTR_DROP: Explicit drop action. + */ + + enum ovs_action_attr { +@@ -938,10 +982,12 @@ enum ovs_action_attr { + OVS_ACTION_ATTR_POP_NSH, /* No argument. */ + OVS_ACTION_ATTR_METER, /* u32 meter number. */ + OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */ ++ OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ + + #ifndef __KERNEL__ + OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/ + OVS_ACTION_ATTR_TUNNEL_POP, /* u32 port number. */ ++ OVS_ACTION_ATTR_DROP, /* u32 xlate_error. */ + #endif + __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted + * from userspace. */ +diff --git a/datapath/linux/compat/ip6_gre.c b/datapath/linux/compat/ip6_gre.c +index 2ffdda5e1d..aa6cf3e7fc 100644 +--- a/datapath/linux/compat/ip6_gre.c ++++ b/datapath/linux/compat/ip6_gre.c +@@ -1196,7 +1196,11 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, + + /* TooBig packet may have updated dst->dev's mtu */ + if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) ++#ifndef HAVE_DST_OPS_CONFIRM_NEIGH + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); ++#else ++ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false); ++#endif + + err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, + NEXTHDR_GRE); +@@ -2550,7 +2554,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { + }; + + static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { +- .kind = "ip6gre", ++ .kind = "ip6gretap", + .maxtype = RPL_IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), +diff --git a/datapath/linux/compat/ip_tunnel.c b/datapath/linux/compat/ip_tunnel.c +index d16e60fbfe..f1c8ba7cdc 100644 +--- a/datapath/linux/compat/ip_tunnel.c ++++ b/datapath/linux/compat/ip_tunnel.c +@@ -266,7 +266,12 @@ static int rpl_tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + if (skb_dst(skb)) ++#ifndef HAVE_DST_OPS_CONFIRM_NEIGH + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); ++#else ++ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), ++ NULL, skb, mtu, false); ++#endif + + if (skb->protocol == htons(ETH_P_IP)) { + if (!skb_is_gso(skb) && +diff --git a/debian/changelog b/debian/changelog +index d08cfbfd91..56471d417a 100644 +--- a/debian/changelog ++++ b/debian/changelog +@@ -1,3 +1,9 @@ ++openvswitch (2.11.4-1) unstable; urgency=low ++ [ Open vSwitch team ] ++ * New upstream version ++ ++ -- Open vSwitch team <dev@openvswitch.org> Fri, 06 Sep 2019 14:31:36 -0700 ++ + openvswitch (2.11.3-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version +diff --git a/dpdk/app/test-crypto-perf/main.c b/dpdk/app/test-crypto-perf/main.c +index 921394799a..8affc5dc95 100644 +--- a/dpdk/app/test-crypto-perf/main.c ++++ b/dpdk/app/test-crypto-perf/main.c +@@ -544,7 +544,8 @@ main(int argc, char **argv) + goto err; + } + +- if (!opts.silent) ++ if (!opts.silent && opts.test != CPERF_TEST_TYPE_THROUGHPUT && ++ opts.test != CPERF_TEST_TYPE_LATENCY) + show_test_vector(t_vec); + + total_nb_qps = nb_cryptodevs * opts.nb_qps; +diff --git a/dpdk/app/test-eventdev/test_pipeline_common.c b/dpdk/app/test-eventdev/test_pipeline_common.c +index c988da28c9..b586804090 100644 +--- a/dpdk/app/test-eventdev/test_pipeline_common.c ++++ b/dpdk/app/test-eventdev/test_pipeline_common.c +@@ -366,12 +366,16 @@ pipeline_event_tx_adapter_setup(struct evt_options *opt, + if (!(cap & RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT)) { + uint32_t service_id; + +- rte_event_eth_tx_adapter_service_id_get(consm, +- &service_id); ++ ret = rte_event_eth_tx_adapter_service_id_get(consm, ++ &service_id); ++ if (ret != -ESRCH && ret != 0) { ++ evt_err("Failed to get Tx adptr service ID"); ++ return ret; ++ } + ret = evt_service_setup(service_id); + if (ret) { + evt_err("Failed to setup service core" +- " for Tx adapter\n"); ++ " for Tx adapter"); + return ret; + } + } +diff --git a/dpdk/app/test-pmd/Makefile b/dpdk/app/test-pmd/Makefile +index d5258eae4a..f36137fd44 100644 +--- a/dpdk/app/test-pmd/Makefile ++++ b/dpdk/app/test-pmd/Makefile +@@ -15,6 +15,12 @@ CFLAGS += -O3 + CFLAGS += $(WERROR_FLAGS) + CFLAGS += -Wno-deprecated-declarations + ++ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) ++ifeq ($(shell test $(GCC_VERSION) -ge 100 && echo 1), 1) ++CFLAGS += -fcommon ++endif ++endif ++ + # + # all source are stored in SRCS-y + # +diff --git a/dpdk/app/test-pmd/cmdline.c b/dpdk/app/test-pmd/cmdline.c +index 26295130c5..abf803418c 100644 +--- a/dpdk/app/test-pmd/cmdline.c ++++ b/dpdk/app/test-pmd/cmdline.c +@@ -101,7 +101,7 @@ static void cmd_help_brief_parsed(__attribute__((unused)) void *parsed_result, + " help ports : Configuring ports.\n" + " help registers : Reading and setting port registers.\n" + " help filters : Filters configuration help.\n" +- " help traffic_management : Traffic Management commmands.\n" ++ " help traffic_management : Traffic Management commands.\n" + " help all : All of the above sections.\n\n" + ); + +@@ -4932,7 +4932,7 @@ cmd_gso_size_parsed(void *parsed_result, + + if (test_done == 0) { + printf("Before setting GSO segsz, please first" +- " stop fowarding\n"); ++ " stop forwarding\n"); + return; + } + +@@ -16459,8 +16459,10 @@ cmd_ddp_get_list_parsed( + #ifdef RTE_LIBRTE_I40E_PMD + size = PROFILE_INFO_SIZE * MAX_PROFILE_NUM + 4; + p_list = (struct rte_pmd_i40e_profile_list *)malloc(size); +- if (!p_list) ++ if (!p_list) { + printf("%s: Failed to malloc buffer\n", __func__); ++ return; ++ } + + if (ret == -ENOTSUP) + ret = rte_pmd_i40e_get_ddp_list(res->port_id, +diff --git a/dpdk/app/test-pmd/config.c b/dpdk/app/test-pmd/config.c +index 6e9a2042c2..0e5d77159d 100644 +--- a/dpdk/app/test-pmd/config.c ++++ b/dpdk/app/test-pmd/config.c +@@ -204,11 +204,26 @@ nic_stats_display(portid_t port_id) + void + nic_stats_clear(portid_t port_id) + { ++ int ret; ++ + if (port_id_is_invalid(port_id, ENABLED_WARN)) { + print_valid_ports(); + return; + } +- rte_eth_stats_reset(port_id); ++ ++ ret = rte_eth_stats_reset(port_id); ++ if (ret != 0) { ++ printf("%s: Error: failed to reset stats (port %u): %s", ++ __func__, port_id, strerror(ret)); ++ return; ++ } ++ ++ ret = rte_eth_stats_get(port_id, &ports[port_id].stats); ++ if (ret != 0) { ++ printf("%s: Error: failed to get stats (port %u): %s", ++ __func__, port_id, strerror(ret)); ++ return; ++ } + printf("\n NIC statistics for port %d cleared\n", port_id); + } + +@@ -278,11 +293,20 @@ nic_xstats_display(portid_t port_id) + void + nic_xstats_clear(portid_t port_id) + { ++ int ret; ++ + if (port_id_is_invalid(port_id, ENABLED_WARN)) { + print_valid_ports(); + return; + } + rte_eth_xstats_reset(port_id); ++ ++ ret = rte_eth_stats_get(port_id, &ports[port_id].stats); ++ if (ret != 0) { ++ printf("%s: Error: failed to get stats (port %u): %s", ++ __func__, port_id, strerror(ret)); ++ return; ++ } + } + + void +diff --git a/dpdk/app/test-pmd/csumonly.c b/dpdk/app/test-pmd/csumonly.c +index 46eb52d5b8..b0d528d98c 100644 +--- a/dpdk/app/test-pmd/csumonly.c ++++ b/dpdk/app/test-pmd/csumonly.c +@@ -138,21 +138,23 @@ parse_ipv6(struct ipv6_hdr *ipv6_hdr, struct testpmd_offload_info *info) + + /* + * Parse an ethernet header to fill the ethertype, l2_len, l3_len and +- * ipproto. This function is able to recognize IPv4/IPv6 with one optional vlan +- * header. The l4_len argument is only set in case of TCP (useful for TSO). ++ * ipproto. This function is able to recognize IPv4/IPv6 with optional VLAN ++ * headers. The l4_len argument is only set in case of TCP (useful for TSO). + */ + static void + parse_ethernet(struct ether_hdr *eth_hdr, struct testpmd_offload_info *info) + { + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; ++ struct vlan_hdr *vlan_hdr; + + info->l2_len = sizeof(struct ether_hdr); + info->ethertype = eth_hdr->ether_type; + +- if (info->ethertype == _htons(ETHER_TYPE_VLAN)) { +- struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); +- ++ while (info->ethertype == _htons(ETHER_TYPE_VLAN) || ++ info->ethertype == _htons(ETHER_TYPE_QINQ)) { ++ vlan_hdr = (struct vlan_hdr *) ++ ((char *)eth_hdr + info->l2_len); + info->l2_len += sizeof(struct vlan_hdr); + info->ethertype = vlan_hdr->eth_proto; + } +diff --git a/dpdk/app/test-pmd/meson.build b/dpdk/app/test-pmd/meson.build +index 6006c60f99..adeeeeedba 100644 +--- a/dpdk/app/test-pmd/meson.build ++++ b/dpdk/app/test-pmd/meson.build +@@ -5,6 +5,11 @@ + name = 'testpmd' + allow_experimental_apis = true + cflags += '-Wno-deprecated-declarations' ++ ++if (toolchain == 'gcc' and cc.version().version_compare('>=10.0.0')) ++ cflags += '-fcommon' ++endif ++ + sources = files('cmdline.c', + 'cmdline_flow.c', + 'cmdline_mtr.c', +diff --git a/dpdk/app/test-pmd/parameters.c b/dpdk/app/test-pmd/parameters.c +index 4d5e28970c..57028c313f 100644 +--- a/dpdk/app/test-pmd/parameters.c ++++ b/dpdk/app/test-pmd/parameters.c +@@ -52,7 +52,7 @@ + static void + usage(char* progname) + { +- printf("usage: %s " ++ printf("usage: %s [EAL options] -- " + #ifdef RTE_LIBRTE_CMDLINE + "[--interactive|-i] " + "[--cmdline-file=FILENAME] " +diff --git a/dpdk/app/test-pmd/testpmd.c b/dpdk/app/test-pmd/testpmd.c +index a910c06dc2..a32eae750c 100644 +--- a/dpdk/app/test-pmd/testpmd.c ++++ b/dpdk/app/test-pmd/testpmd.c +@@ -2500,7 +2500,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. speed %u Mbps- %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +@@ -2934,6 +2934,8 @@ get_eth_dcb_conf(portid_t pid, struct rte_eth_conf *eth_conf, + struct rte_eth_dcb_tx_conf *tx_conf = + ð_conf->tx_adv_conf.dcb_tx_conf; + ++ memset(&rss_conf, 0, sizeof(struct rte_eth_rss_conf)); ++ + rc = rte_eth_dev_rss_hash_conf_get(pid, &rss_conf); + if (rc != 0) + return rc; +diff --git a/dpdk/config/meson.build b/dpdk/config/meson.build +index 616af97466..88742ce9db 100644 +--- a/dpdk/config/meson.build ++++ b/dpdk/config/meson.build +@@ -111,6 +111,10 @@ warning_flags = [ + '-Wcast-qual', + '-Wno-address-of-packed-member' + ] ++if cc.get_id() == 'gcc' and cc.version().version_compare('>=10.0') ++# FIXME: Bugzilla 396 ++ warning_flags += '-Wno-zero-length-bounds' ++endif + if not dpdk_conf.get('RTE_ARCH_64') + # for 32-bit, don't warn about casting a 32-bit pointer to 64-bit int - it's fine!! + warning_flags += '-Wno-pointer-to-int-cast' +diff --git a/dpdk/devtools/check-symbol-change.sh b/dpdk/devtools/check-symbol-change.sh +index f6f79a883b..2d4f05fdd2 100755 +--- a/dpdk/devtools/check-symbol-change.sh ++++ b/dpdk/devtools/check-symbol-change.sh +@@ -17,13 +17,11 @@ build_map_changes() + # map files are altered, and all section/symbol names + # appearing between a triggering of this rule and the + # next trigger of this rule are associated with this file +- /[-+] a\/.*\.map/ {map=$2; in_map=1} ++ /[-+] [ab]\/.*\.map/ {map=$2; in_map=1; next} + +- # Same pattern as above, only it matches on anything that +- # does not end in 'map', indicating we have left the map chunk. +- # When we hit this, turn off the in_map variable, which +- # supresses the subordonate rules below +- /[-+] a\/.*\.[^map]/ {in_map=0} ++ # The previous rule catches all .map files, anything else ++ # indicates we left the map chunk. ++ /[-+] [ab]\// {in_map=0} + + # Triggering this rule, which starts a line and ends it + # with a { identifies a versioned section. The section name is +diff --git a/dpdk/devtools/checkpatches.sh b/dpdk/devtools/checkpatches.sh +index c471731d45..acff1843af 100755 +--- a/dpdk/devtools/checkpatches.sh ++++ b/dpdk/devtools/checkpatches.sh +@@ -56,6 +56,14 @@ check_forbidden_additions() { # <patch> + -f $(dirname $(readlink -e $0))/check-forbidden-tokens.awk \ + "$1" || res=1 + ++ # links must prefer https over http ++ awk -v FOLDERS='doc' \ ++ -v EXPRESSIONS='http://.*dpdk.org' \ ++ -v RET_ON_FAIL=1 \ ++ -v MESSAGE='Using non https link to dpdk.org' \ ++ -f $(dirname $(readlink -f $0))/check-forbidden-tokens.awk \ ++ "$1" || res=1 ++ + return $res + } + +diff --git a/dpdk/doc/api/doxy-api-index.md b/dpdk/doc/api/doxy-api-index.md +index e27874c5ae..a380f44fbd 100644 +--- a/dpdk/doc/api/doxy-api-index.md ++++ b/dpdk/doc/api/doxy-api-index.md +@@ -1,4 +1,4 @@ +-API {#index} ++API + === + + <!-- +diff --git a/dpdk/doc/api/doxy-api.conf.in b/dpdk/doc/api/doxy-api.conf.in +index 77ba327a88..f0da8bd9e5 100644 +--- a/dpdk/doc/api/doxy-api.conf.in ++++ b/dpdk/doc/api/doxy-api.conf.in +@@ -3,6 +3,7 @@ + + PROJECT_NAME = DPDK + PROJECT_NUMBER = @VERSION@ ++USE_MDFILE_AS_MAINPAGE = @TOPDIR@/doc/api/doxy-api-index.md + INPUT = @TOPDIR@/doc/api/doxy-api-index.md \ + @TOPDIR@/drivers/crypto/scheduler \ + @TOPDIR@/drivers/mempool/dpaa2 \ +diff --git a/dpdk/doc/guides/conf.py b/dpdk/doc/guides/conf.py +index 2ebbf50c60..5fc597cbe7 100644 +--- a/dpdk/doc/guides/conf.py ++++ b/dpdk/doc/guides/conf.py +@@ -318,16 +318,22 @@ def print_table_css(outfile, table_id): + cursor: default; + overflow: hidden; + } ++ table#idx p { ++ margin: 0; ++ line-height: inherit; ++ } + table#idx th, table#idx td { + text-align: center; ++ border: solid 1px #ddd; + } + table#idx th { +- font-size: 72%; ++ padding: 0.5em 0; ++ } ++ table#idx th, table#idx th p { ++ font-size: 11px; + white-space: pre-wrap; + vertical-align: top; +- padding: 0.5em 0; + min-width: 0.9em; +- width: 2em; + } + table#idx col:first-child { + width: 0; +@@ -336,9 +342,11 @@ def print_table_css(outfile, table_id): + vertical-align: bottom; + } + table#idx td { +- font-size: 70%; + padding: 1px; + } ++ table#idx td, table#idx td p { ++ font-size: 11px; ++ } + table#idx td:first-child { + padding-left: 1em; + text-align: left; +@@ -414,4 +422,8 @@ def setup(app): + # Process the numref references once the doctree has been created. + app.connect('doctree-resolved', process_numref) + +- app.add_stylesheet('css/custom.css') ++ try: ++ # New function in sphinx 1.8 ++ app.add_css_file('css/custom.css') ++ except: ++ app.add_stylesheet('css/custom.css') +diff --git a/dpdk/doc/guides/contributing/documentation.rst b/dpdk/doc/guides/contributing/documentation.rst +index a45b62bad3..8c782531d9 100644 +--- a/dpdk/doc/guides/contributing/documentation.rst ++++ b/dpdk/doc/guides/contributing/documentation.rst +@@ -83,7 +83,7 @@ added to by the developer. + * **API documentation** + + The API documentation explains how to use the public DPDK functions. +- The `API index page <http://doc.dpdk.org/api/>`_ shows the generated API documentation with related groups of functions. ++ The `API index page <https://doc.dpdk.org/api/>`_ shows the generated API documentation with related groups of functions. + + The API documentation should be updated via Doxygen comments when new functions are added. + +@@ -562,14 +562,14 @@ Hyperlinks + ~~~~~~~~~~ + + * Links to external websites can be plain URLs. +- The following is rendered as http://dpdk.org:: ++ The following is rendered as https://dpdk.org:: + +- http://dpdk.org ++ https://dpdk.org + + * They can contain alternative text. +- The following is rendered as `Check out DPDK <http://dpdk.org>`_:: ++ The following is rendered as `Check out DPDK <https://dpdk.org>`_:: + +- `Check out DPDK <http://dpdk.org>`_ ++ `Check out DPDK <https://dpdk.org>`_ + + * An internal link can be generated by placing labels in the document with the format ``.. _label_name``. + +@@ -667,7 +667,7 @@ The following are some guidelines for use of Doxygen in the DPDK API documentati + */ + + In the API documentation the functions will be rendered as links, see the +- `online section of the rte_ethdev.h docs <http://doc.dpdk.org/api/rte__ethdev_8h.html>`_ that contains the above text. ++ `online section of the rte_ethdev.h docs <https://doc.dpdk.org/api/rte__ethdev_8h.html>`_ that contains the above text. + + * The ``@see`` keyword can be used to create a *see also* link to another file or library. + This directive should be placed on one line at the bottom of the documentation section. +diff --git a/dpdk/doc/guides/contributing/patches.rst b/dpdk/doc/guides/contributing/patches.rst +index 02cd0555a4..28d4b67d66 100644 +--- a/dpdk/doc/guides/contributing/patches.rst ++++ b/dpdk/doc/guides/contributing/patches.rst +@@ -28,9 +28,9 @@ The DPDK development process has the following features: + * All sub-repositories are merged into main repository for ``-rc1`` and ``-rc2`` versions of the release. + * After the ``-rc2`` release all patches should target the main repository. + +-The mailing list for DPDK development is `dev@dpdk.org <http://mails.dpdk.org/archives/dev/>`_. +-Contributors will need to `register for the mailing list <http://mails.dpdk.org/listinfo/dev>`_ in order to submit patches. +-It is also worth registering for the DPDK `Patchwork <http://patches.dpdk.org/project/dpdk/list/>`_ ++The mailing list for DPDK development is `dev@dpdk.org <https://mails.dpdk.org/archives/dev/>`_. ++Contributors will need to `register for the mailing list <https://mails.dpdk.org/listinfo/dev>`_ in order to submit patches. ++It is also worth registering for the DPDK `Patchwork <https://patches.dpdk.org/project/dpdk/list/>`_ + + If you are using the GitHub service, you can link your repository to + the ``travis-ci.org`` build service. When you push patches to your GitHub +@@ -132,12 +132,12 @@ The source code can be cloned using either of the following: + main repository:: + + git clone git://dpdk.org/dpdk +- git clone http://dpdk.org/git/dpdk ++ git clone https://dpdk.org/git/dpdk + +-sub-repositories (`list <http://git.dpdk.org/next>`_):: ++sub-repositories (`list <https://git.dpdk.org/next>`_):: + + git clone git://dpdk.org/next/dpdk-next-* +- git clone http://dpdk.org/git/next/dpdk-next-* ++ git clone https://dpdk.org/git/next/dpdk-next-* + + Make your Changes + ----------------- +@@ -174,7 +174,7 @@ A good way of thinking about whether a patch should be split is to consider whet + applied without dependencies as a backport. + + It is better to keep the related documentation changes in the same patch +-file as the code, rather than one big documentation patch at then end of a ++file as the code, rather than one big documentation patch at the end of a + patchset. This makes it easier for future maintenance and development of the + code. + +@@ -312,7 +312,7 @@ Patch for Stable Releases + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + All fix patches to the master branch that are candidates for backporting +-should also be CCed to the `stable@dpdk.org <http://mails.dpdk.org/listinfo/stable>`_ ++should also be CCed to the `stable@dpdk.org <https://mails.dpdk.org/listinfo/stable>`_ + mailing list. + In the commit message body the Cc: stable@dpdk.org should be inserted as follows:: + +@@ -406,7 +406,7 @@ files, in order of preference:: + ~/.config/dpdk/devel.config + /etc/dpdk/devel.config. + +-Once the environment variable the script can be run as follows:: ++Once the environment variable is set, the script can be run as follows:: + + devtools/checkpatches.sh ~/patch/ + +@@ -513,7 +513,7 @@ If the patch is in relation to a previous email thread you can add it to the sam + git send-email --to dev@dpdk.org --in-reply-to <1234-foo@bar.com> 000*.patch + + The Message ID can be found in the raw text of emails or at the top of each Patchwork patch, +-`for example <http://patches.dpdk.org/patch/7646/>`_. ++`for example <https://patches.dpdk.org/patch/7646/>`_. + Shallow threading (``--thread --no-chain-reply-to``) is preferred for a patch series. + + Once submitted your patches will appear on the mailing list and in Patchwork. +diff --git a/dpdk/doc/guides/contributing/stable.rst b/dpdk/doc/guides/contributing/stable.rst +index 2ac4f0a88b..88b46aee24 100644 +--- a/dpdk/doc/guides/contributing/stable.rst ++++ b/dpdk/doc/guides/contributing/stable.rst +@@ -96,7 +96,7 @@ The Stable and LTS release are coordinated on the stable@dpdk.org mailing + list. + + All fix patches to the master branch that are candidates for backporting +-should also be CCed to the `stable@dpdk.org <http://mails.dpdk.org/listinfo/stable>`_ ++should also be CCed to the `stable@dpdk.org <https://mails.dpdk.org/listinfo/stable>`_ + mailing list. + + +@@ -107,10 +107,10 @@ A Stable Release will be released by: + + * Tagging the release with YY.MM.n (year, month, number). + * Uploading a tarball of the release to dpdk.org. +-* Sending an announcement to the `announce@dpdk.org <http://mails.dpdk.org/listinfo/announce>`_ ++* Sending an announcement to the `announce@dpdk.org <https://mails.dpdk.org/listinfo/announce>`_ + list. + +-Stable releases are available on the `dpdk.org download page <http://core.dpdk.org/download/>`_. ++Stable releases are available on the `dpdk.org download page <https://core.dpdk.org/download/>`_. + + + ABI +diff --git a/dpdk/doc/guides/cryptodevs/aesni_gcm.rst b/dpdk/doc/guides/cryptodevs/aesni_gcm.rst +index fb74179b84..77f45b8a26 100644 +--- a/dpdk/doc/guides/cryptodevs/aesni_gcm.rst ++++ b/dpdk/doc/guides/cryptodevs/aesni_gcm.rst +@@ -45,6 +45,20 @@ can be downloaded in `<https://github.com/01org/intel-ipsec-mb/archive/v0.50.zip + make + make install + ++The library requires NASM to be built. Depending on the library version, it might ++require a minimum NASM version (e.g. v0.54 requires at least NASM 2.14). ++ ++NASM is packaged for different OS. However, on some OS the version is too old, ++so a manual installation is required. In that case, NASM can be downloaded from ++`NASM website <https://www.nasm.us/pub/nasm/releasebuilds/?C=M;O=D>`_. ++Once it is downloaded, extract it and follow these steps: ++ ++.. code-block:: console ++ ++ ./configure ++ make ++ make install ++ + As a reference, the following table shows a mapping between the past DPDK versions + and the external crypto libraries supported by them: + +diff --git a/dpdk/doc/guides/cryptodevs/aesni_mb.rst b/dpdk/doc/guides/cryptodevs/aesni_mb.rst +index 2c8ccc3d45..012726b3c7 100644 +--- a/dpdk/doc/guides/cryptodevs/aesni_mb.rst ++++ b/dpdk/doc/guides/cryptodevs/aesni_mb.rst +@@ -69,6 +69,20 @@ can be downloaded from `<https://github.com/01org/intel-ipsec-mb/archive/v0.51.z + make + make install + ++The library requires NASM to be built. Depending on the library version, it might ++require a minimum NASM version (e.g. v0.54 requires at least NASM 2.14). ++ ++NASM is packaged for different OS. However, on some OS the version is too old, ++so a manual installation is required. In that case, NASM can be downloaded from ++`NASM website <https://www.nasm.us/pub/nasm/releasebuilds/?C=M;O=D>`_. ++Once it is downloaded, extract it and follow these steps: ++ ++.. code-block:: console ++ ++ ./configure ++ make ++ make install ++ + As a reference, the following table shows a mapping between the past DPDK versions + and the Multi-Buffer library version supported by them: + +diff --git a/dpdk/doc/guides/eventdevs/index.rst b/dpdk/doc/guides/eventdevs/index.rst +index f7382dc8a3..ce8b73cb86 100644 +--- a/dpdk/doc/guides/eventdevs/index.rst ++++ b/dpdk/doc/guides/eventdevs/index.rst +@@ -5,7 +5,7 @@ Event Device Drivers + ==================== + + The following are a list of event device PMDs, which can be used from an +-application trough the eventdev API. ++application through the eventdev API. + + .. toctree:: + :maxdepth: 2 +diff --git a/dpdk/doc/guides/freebsd_gsg/install_from_ports.rst b/dpdk/doc/guides/freebsd_gsg/install_from_ports.rst +index 253328eb10..2ac27f115c 100644 +--- a/dpdk/doc/guides/freebsd_gsg/install_from_ports.rst ++++ b/dpdk/doc/guides/freebsd_gsg/install_from_ports.rst +@@ -62,7 +62,7 @@ environmental variables should be set as below: + .. note:: + + To install a copy of the DPDK compiled using gcc, please download the +- official DPDK package from http://core.dpdk.org/download/ and install manually using ++ official DPDK package from https://core.dpdk.org/download/ and install manually using + the instructions given in the next chapter, :ref:`building_from_source` + + An example application can therefore be copied to a user's home directory and +diff --git a/dpdk/doc/guides/linux_gsg/eal_args.include.rst b/dpdk/doc/guides/linux_gsg/eal_args.include.rst +index cf421a56eb..7251fe9c2f 100644 +--- a/dpdk/doc/guides/linux_gsg/eal_args.include.rst ++++ b/dpdk/doc/guides/linux_gsg/eal_args.include.rst +@@ -126,7 +126,7 @@ Debugging options + + Specify log level for a specific component. For example:: + +- --log-level eal:8 ++ --log-level lib.eal:debug + + Can be specified multiple times. + +diff --git a/dpdk/doc/guides/linux_gsg/nic_perf_intel_platform.rst b/dpdk/doc/guides/linux_gsg/nic_perf_intel_platform.rst +index 4e9afbdbc1..5fa6e3899d 100644 +--- a/dpdk/doc/guides/linux_gsg/nic_perf_intel_platform.rst ++++ b/dpdk/doc/guides/linux_gsg/nic_perf_intel_platform.rst +@@ -64,7 +64,7 @@ This aligns with the previous output which showed that each channel has one memo + Network Interface Card Requirements + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-Use a `DPDK supported <http://core.dpdk.org/supported/>`_ high end NIC such as the Intel XL710 40GbE. ++Use a `DPDK supported <https://core.dpdk.org/supported/>`_ high end NIC such as the Intel XL710 40GbE. + + Make sure each NIC has been flashed the latest version of NVM/firmware. + +diff --git a/dpdk/doc/guides/nics/enic.rst b/dpdk/doc/guides/nics/enic.rst +index 773f13b0fc..c06363fc27 100644 +--- a/dpdk/doc/guides/nics/enic.rst ++++ b/dpdk/doc/guides/nics/enic.rst +@@ -14,7 +14,7 @@ How to obtain ENIC PMD integrated DPDK + -------------------------------------- + + ENIC PMD support is integrated into the DPDK suite. dpdk-<version>.tar.gz +-should be downloaded from http://core.dpdk.org/download/ ++should be downloaded from https://core.dpdk.org/download/ + + + Configuration information +diff --git a/dpdk/doc/guides/nics/fail_safe.rst b/dpdk/doc/guides/nics/fail_safe.rst +index 6c02d7ef6d..60bbf40f7f 100644 +--- a/dpdk/doc/guides/nics/fail_safe.rst ++++ b/dpdk/doc/guides/nics/fail_safe.rst +@@ -49,7 +49,7 @@ The Fail-safe PMD can be used like most other DPDK virtual devices, by passing a + ``--vdev`` parameter to the EAL when starting the application. The device name + must start with the *net_failsafe* prefix, followed by numbers or letters. This + name must be unique for each device. Each fail-safe instance must have at least one +-sub-device, up to ``RTE_MAX_ETHPORTS-1``. ++sub-device, and at most two. + + A sub-device can be any legal DPDK device, including possibly another fail-safe + instance. +diff --git a/dpdk/doc/guides/nics/features/avf.ini b/dpdk/doc/guides/nics/features/avf.ini +index 35ceada24f..28d072f37e 100644 +--- a/dpdk/doc/guides/nics/features/avf.ini ++++ b/dpdk/doc/guides/nics/features/avf.ini +@@ -15,7 +15,6 @@ TSO = Y + Promiscuous mode = Y + Allmulticast mode = Y + Unicast MAC filter = Y +-Multicast MAC filter = Y + RSS hash = Y + RSS key update = Y + RSS reta update = Y +diff --git a/dpdk/doc/guides/nics/features/avf_vec.ini b/dpdk/doc/guides/nics/features/avf_vec.ini +index 3050bc4a60..3b051f0d6f 100644 +--- a/dpdk/doc/guides/nics/features/avf_vec.ini ++++ b/dpdk/doc/guides/nics/features/avf_vec.ini +@@ -15,7 +15,6 @@ TSO = Y + Promiscuous mode = Y + Allmulticast mode = Y + Unicast MAC filter = Y +-Multicast MAC filter = Y + RSS hash = Y + RSS key update = Y + RSS reta update = Y +diff --git a/dpdk/doc/guides/nics/features/i40e.ini b/dpdk/doc/guides/nics/features/i40e.ini +index 16eab7f43b..b0a9c20e6a 100644 +--- a/dpdk/doc/guides/nics/features/i40e.ini ++++ b/dpdk/doc/guides/nics/features/i40e.ini +@@ -17,7 +17,6 @@ TSO = Y + Promiscuous mode = Y + Allmulticast mode = Y + Unicast MAC filter = Y +-Multicast MAC filter = Y + RSS hash = Y + RSS key update = Y + RSS reta update = Y +diff --git a/dpdk/doc/guides/nics/features/igb.ini b/dpdk/doc/guides/nics/features/igb.ini +index c53fd0757e..b66aa1de19 100644 +--- a/dpdk/doc/guides/nics/features/igb.ini ++++ b/dpdk/doc/guides/nics/features/igb.ini +@@ -15,6 +15,7 @@ TSO = Y + Promiscuous mode = Y + Allmulticast mode = Y + Unicast MAC filter = Y ++Multicast MAC filter = Y + RSS hash = Y + RSS key update = Y + RSS reta update = Y +diff --git a/dpdk/doc/guides/nics/features/ixgbe.ini b/dpdk/doc/guides/nics/features/ixgbe.ini +index 4143111769..2ec3552599 100644 +--- a/dpdk/doc/guides/nics/features/ixgbe.ini ++++ b/dpdk/doc/guides/nics/features/ixgbe.ini +@@ -17,6 +17,7 @@ TSO = Y + Promiscuous mode = Y + Allmulticast mode = Y + Unicast MAC filter = Y ++Multicast MAC filter = Y + RSS hash = Y + RSS key update = Y + RSS reta update = Y +diff --git a/dpdk/doc/guides/nics/i40e.rst b/dpdk/doc/guides/nics/i40e.rst +index 62e90d9fd2..01fa817a6d 100644 +--- a/dpdk/doc/guides/nics/i40e.rst ++++ b/dpdk/doc/guides/nics/i40e.rst +@@ -642,6 +642,15 @@ Use 16 Bytes RX Descriptor Size + As i40e PMD supports both 16 and 32 bytes RX descriptor sizes, and 16 bytes size can provide helps to high performance of small packets. + Configuration of ``CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC`` in config files can be changed to use 16 bytes size RX descriptors. + ++Input set requirement of each pctype for FDIR ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++Each PCTYPE can only have one specific FDIR input set at one time. ++For example, if creating 2 rte_flow rules with different input set for one PCTYPE, ++it will fail and return the info "Conflict with the first rule's input set", ++which means the current rule's input set conflicts with the first rule's. ++Remove the first rule if want to change the input set of the PCTYPE. ++ + Example of getting best performance with l3fwd example + ------------------------------------------------------ + +diff --git a/dpdk/doc/guides/prog_guide/cryptodev_lib.rst b/dpdk/doc/guides/prog_guide/cryptodev_lib.rst +index 7a95053ad5..53d797bcc1 100644 +--- a/dpdk/doc/guides/prog_guide/cryptodev_lib.rst ++++ b/dpdk/doc/guides/prog_guide/cryptodev_lib.rst +@@ -1043,4 +1043,4 @@ Asymmetric Crypto Device API + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + The cryptodev Library API is described in the +-`DPDK API Reference <http://doc.dpdk.org/api/>`_ ++`DPDK API Reference <https://doc.dpdk.org/api/>`_ +diff --git a/dpdk/doc/guides/prog_guide/env_abstraction_layer.rst b/dpdk/doc/guides/prog_guide/env_abstraction_layer.rst +index a7536a80e1..67e65491a3 100644 +--- a/dpdk/doc/guides/prog_guide/env_abstraction_layer.rst ++++ b/dpdk/doc/guides/prog_guide/env_abstraction_layer.rst +@@ -344,6 +344,37 @@ Misc Functions + + Locks and atomic operations are per-architecture (i686 and x86_64). + ++IOVA Mode Detection ++~~~~~~~~~~~~~~~~~~~ ++ ++IOVA Mode is selected by considering what the current usable Devices on the ++system require and/or support. ++ ++Below is the 2-step heuristic for this choice. ++ ++For the first step, EAL asks each bus its requirement in terms of IOVA mode ++and decides on a preferred IOVA mode. ++ ++- if all buses report RTE_IOVA_PA, then the preferred IOVA mode is RTE_IOVA_PA, ++- if all buses report RTE_IOVA_VA, then the preferred IOVA mode is RTE_IOVA_VA, ++- if all buses report RTE_IOVA_DC, no bus expressed a preferrence, then the ++ preferred mode is RTE_IOVA_DC, ++- if the buses disagree (at least one wants RTE_IOVA_PA and at least one wants ++ RTE_IOVA_VA), then the preferred IOVA mode is RTE_IOVA_DC (see below with the ++ check on Physical Addresses availability), ++ ++The second step checks if the preferred mode complies with the Physical ++Addresses availability since those are only available to root user in recent ++kernels. ++ ++- if the preferred mode is RTE_IOVA_PA but there is no access to Physical ++ Addresses, then EAL init fails early, since later probing of the devices ++ would fail anyway, ++- if the preferred mode is RTE_IOVA_DC then based on the Physical Addresses ++ availability, the preferred mode is adjusted to RTE_IOVA_PA or RTE_IOVA_VA. ++ In the case when the buses had disagreed on the IOVA Mode at the first step, ++ part of the buses won't work because of this decision. ++ + IOVA Mode Configuration + ~~~~~~~~~~~~~~~~~~~~~~~ + +diff --git a/dpdk/doc/guides/rel_notes/release_18_11.rst b/dpdk/doc/guides/rel_notes/release_18_11.rst +index 9e3686bed1..b2d3851c1c 100644 +--- a/dpdk/doc/guides/rel_notes/release_18_11.rst ++++ b/dpdk/doc/guides/rel_notes/release_18_11.rst +@@ -2964,3 +2964,457 @@ Fixes skipped and status unresolved + * 207b1c813 test: fix build without ring PMD + * 819d0d1d5 net/ixgbe: fix blocking system events + * 050bfe033 net/mlx5: fix tunnel flow priority ++ ++18.11.8 Release Notes ++--------------------- ++ ++18.11.8 Fixes ++~~~~~~~~~~~~~ ++ ++* vhost: check log mmap offset and size overflow ++* vhost/crypto: validate keys lengths ++* vhost: fix vring index check ++ ++18.11.8 Validation ++~~~~~~~~~~~~~~~~~~ ++ ++* Red Hat(R) Testing ++ ++ * RHEL 8 ++ * Functionality ++ ++ * PF assignment ++ * VF assignment ++ * vhost single/multi queues and cross-NUMA ++ * vhostclient reconnect ++ * vhost live migration with single/multi queues and cross-NUMA ++ * OVS PVP ++ ++ * Tested NICs ++ ++ * X540-AT2 NIC(ixgbe, 10G) ++ ++* Intel(R) Testing ++ ++ * Virtio features ++ ++ * vhost/virtio loopback test with virtio user as server mode ++ * loopback multi queues ++ * loopback multi paths port restart ++ * vhost/virtio pvp multi-paths performance ++ * pvp multi-queues and port restart ++ * vhost dequeue zero copy ++ * pvp share lib ++ * pvp vhost user reconnect ++ * pvp test with 4k pages ++ * pvp test with 2M hugepages ++ * pvp virtio bonding ++ * pvp test with diff qemu version ++ * vhost enqueue interrupt ++ * vhost event idx interrupt ++ * vhost virtio pmd interrupt ++ * vhost virtio user interrupt ++ * virtio event idx interrupt ++ * virtio user for container networking ++ * virtio user as exceptional path ++ * vhost xstats ++ * virtio-pmd multi-process ++ * vm2vm virtio pmd ++ * vm2vm virtio-net iperf ++ * vm2vm virtio-user ++ * vhost user live migration ++ ++18.11.8 Known Issues ++~~~~~~~~~~~~~~~~~~~~ ++ ++* DPDK 18.11.8 contains fixes up to DPDK v20.02 and fixes for CVE-2020-10722, CVE-2020-10723 and CVE-2020-10724 ++* Issues identified/fixed in DPDK master branch after DPDK v20.02 may be present in DPDK 18.11.8 ++ ++18.11.8 Fixes skipped and status unresolved ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++* dcfbc594f net/iavf: fix queue interrupt for ice ++* b149a7064 eal/freebsd: add config reattach in secondary process ++* a135e050a examples/ipsec-secgw: fix packet length ++* 9d10f53e4 test/metrics: fix second run ++* ea81c1b81 net/mlx5: fix NVGRE matching ++* 721c95301 net/mlx5: fix Rx scatter mode validation ++* be048a1aa net/virtio: fix descriptor addressed in Tx ++* 6080796f6 mem: make base address hint OS specific ++* 6d3f9917f eal: fix memory config allocation for multi-process ++* 1526dd053 net/virtio: fix Tx checksum offloads ++* f0617163b mempool/dpaa2: report error on endless loop in mbuf release ++* 05817057f net/ena: fix indication of bad L4 Rx checksums ++* 9e0d81c1a net/mlx5: fix selection between encap and decap ++* 7392ad06f app/testpmd: use better randomness for Tx split ++* dcd05da0a app/testpmd: fix GENEVE flow item ++* 2e02a2aff ethdev: fix VLAN offloads set if no driver callback ++* ec8615607 crypto/dpaa_sec: fix IOVA conversions ++* 06387be8e net/mlx5: fix encap/decap validation ++* 7593cf1d3 net/mlx5: fix legacy multi-packet write session ++* e21492a51 net/mlx: fix overlinking with meson and glue dlopen ++* 150c9ac2d app/testpmd: update Rx offload after setting MTU ++* 207b1c813 test: fix build without ring PMD ++* 819d0d1d5 net/ixgbe: fix blocking system events ++* 050bfe033 net/mlx5: fix tunnel flow priority ++ ++18.11.9 Release Notes ++--------------------- ++ ++18.11.9 Fixes ++~~~~~~~~~~~~~ ++ ++* app/crypto-perf: fix display of sample test vector ++* app/eventdev: check Tx adapter service ID ++* app: fix usage help of options separated by dashes ++* app/pipeline: fix build with gcc 10 ++* app: remove extra new line after link duplex ++* app/testpmd: add parsing for QinQ VLAN headers ++* app/testpmd: disable gcc 10 -fno-common build errors ++* app/testpmd: fix DCB set ++* app/testpmd: fix memory failure handling for i40e DDP ++* app/testpmd: fix statistics after reset ++* bbdev: fix doxygen comments ++* build: disable gcc 10 zero-length-bounds warning ++* bus/fslmc: fix dereferencing null pointer ++* bus/fslmc: fix size of qman fq descriptor ++* bus/pci: fix devargs on probing again ++* bus/pci: fix UIO resource access from secondary process ++* bus/pci: fix VF memory access ++* bus/vmbus: fix comment spelling ++* contigmem: cleanup properly when load fails ++* crypto/caam_jr: fix check of file descriptors ++* crypto/caam_jr: fix IRQ functions return type ++* crypto/ccp: fix fd leak on probe failure ++* cryptodev: fix SHA-1 digest enum comment ++* crypto/kasumi: fix extern declaration ++* crypto/octeontx: fix build with gcc 10 ++* crypto/octeontx: fix gcc 10 -fno-common build errors ++* crypto/openssl: fix out-of-place encryption ++* crypto/qat: fix cipher descriptor for ZUC and SNOW ++* devtools: fix symbol map change check ++* doc: add i40e limitation for flow director ++* doc: add NASM installation steps ++* doc: fix API index ++* doc: fix build with doxygen 1.8.18 ++* doc: fix log level example in Linux guide ++* doc: fix matrix CSS for recent sphinx ++* doc: fix multicast filter feature announcement ++* doc: fix number of failsafe sub-devices ++* doc: fix sphinx compatibility ++* doc: fix typo in contributors guide ++* doc: fix typo in contributors guide ++* doc: prefer https when pointing to dpdk.org ++* drivers: add crypto as dependency for event drivers ++* drivers/crypto: disable gcc 10 fno-common errors ++* drivers/crypto: fix log type variables for -fno-common ++* drivers: fix log type variables for -fno-common ++* eal/arm64: fix precise TSC ++* eal: fix C++17 compilation ++* eal: fix comments spelling ++* eal: fix log message print for regex ++* eal: fix typo in endian conversion macros ++* eal/ppc: fix build with gcc 9.3 ++* eal/x86: ignore gcc 10 stringop-overflow warnings ++* ethdev: fix build when vtune profiling is on ++* ethdev: fix spelling ++* eventdev: fix probe and remove for secondary process ++* event/dsw: avoid reusing previously recorded events ++* event/dsw: fix enqueue burst return value ++* event/dsw: remove redundant control ring poll ++* event/dsw: remove unnecessary read barrier ++* examples/eventdev: fix build with gcc 10 ++* examples/eventdev: fix crash on exit ++* examples/ip_pipeline: remove check of null response ++* examples/kni: fix crash during MTU set ++* examples/kni: fix MTU change to setup Tx queue ++* examples/l2fwd-keepalive: fix mbuf pool size ++* examples: remove extra new line after link duplex ++* examples/vmdq: fix output of pools/queues ++* examples/vmdq: fix RSS configuration ++* examples/vm_power: drop Unix path limit redefinition ++* examples/vm_power: fix build because of missing include ++* examples/vm_power: fix build with -fno-common ++* fix same typo in multiple places ++* fix various typos found by Lintian ++* kni: fix ethtool build with kernel 5.6 ++* kni: fix ethtool dev_open build error ++* kni: fix ethtool maybe-uninitialized warnings ++* kni: fix ethtool pointer type build error ++* kni: fix gcc 10 ethtool build error ++* kvargs: fix buffer overflow when parsing list ++* kvargs: fix invalid token parsing on FreeBSD ++* kvargs: fix strcmp helper documentation ++* log: fix level picked with globbing on type register ++* lpm6: fix comments spelling ++* lpm6: fix size of tbl8 group ++* mem: fix overflow on allocation ++* mem: mark pages as not accessed when freeing memory ++* mem: mark pages as not accessed when reserving VA ++* mempool/dpaa2: install missing header with meson ++* mlx5: fix build with -fno-common ++* net/avp: fix gcc 10 maybe-uninitialized warning ++* net/bnxt: do not use PMD log type ++* net/bnxt: fix error log for command timeout ++* net/bnxt: fix FW version query ++* net/bnxt: fix using RSS config struct ++* net/bnxt: fix VLAN add when port is stopped ++* net/dpaa2: fix 10G port negotiation ++* net/dpaa: use dynamic log type ++* net/e1000: fix port hotplug for multi-process ++* net/ena/base: fix documentation of functions ++* net/ena/base: fix indentation in CQ polling ++* net/ena/base: fix indentation of multiple defines ++* net/ena/base: fix testing for supported hash function ++* net/ena/base: make allocation macros thread-safe ++* net/ena/base: prevent allocation of zero sized memory ++* net/ena: set IO ring size to valid value ++* net/failsafe: fix fd leak ++* net/i40e/base: update copyright ++* net/i40e: fix flow director for ARP packets ++* net/i40e: fix flush of flow director filter ++* net/i40e: fix queue related exception handling ++* net/i40e: fix setting L2TAG ++* net/i40e: fix wild pointer ++* net/i40e: fix X722 performance ++* net/i40e: relax barrier in Tx for NEON ++* net/iavf: fix setting L2TAG ++* net/iavf: fix stats query error code ++* net/ixgbe/base: update copyright ++* net/ixgbe: check driver type in MACsec API ++* net/ixgbe: fix link status synchronization on BSD ++* net/ixgbe: fix statistics in flow control mode ++* net/mlx4: fix build with -fno-common ++* net/mlx4: fix drop queue error handling ++* net/mlx4: remove device register remap ++* net/mlx5: fix CVLAN tag set in IP item translation ++* net/mlx5: fix mask used for IPv6 item validation ++* net/mlx5: fix matching for UDP tunnels with Verbs ++* net/mlx5: fix mlx5 devices port naming ++* net/mlx5: fix recursive inclusion of header file ++* net/mlx5: fix RSS enablement ++* net/mlx5: fix Tx queue release debug log timing ++* net/mlx5: fix validation of VXLAN/VXLAN-GPE specs ++* net/mlx5: remove device register remap ++* net/mvneta: do not use PMD log type ++* net/mvpp2: fix build with gcc 10 ++* net/netvsc: avoid possible live lock ++* net/netvsc: fix comment spelling ++* net/netvsc: fix memory free on device close ++* net/netvsc: handle Rx packets during multi-channel setup ++* net/netvsc: handle Tx completions based on burst size ++* net/netvsc: propagate descriptor limits from VF ++* net/netvsc: remove process event optimization ++* net/netvsc: split send buffers from Tx descriptors ++* net/null: fix secondary burst function selection ++* net/null: remove redundant check ++* net/octeontx: fix dangling pointer on init failure ++* net/octeontx: fix meson build for disabled drivers ++* net/qede: fix link state configuration ++* net/qede: fix port reconfiguration ++* net/ring: fix device pointer on allocation ++* net/sfc/base: fix manual filter delete in EF10 ++* net/sfc/base: handle manual and auto filter clashes in EF10 ++* net/sfc/base: reduce filter priorities to implemented only ++* net/sfc/base: refactor filter lookup loop in EF10 ++* net/sfc/base: reject automatic filter creation by users ++* net/sfc: fix initialization error path ++* net/sfc: fix reported promiscuous/multicast mode ++* net/sfc: fix Rx queue start failure path ++* net/softnic: fix memory leak for thread ++* net/softnic: fix resource leak for pipeline ++* net/tap: do not use PMD log type ++* net/tap: fix check for mbuf number of segment ++* net/tap: fix crash in flow destroy ++* net/tap: fix fd leak on creation failure ++* net/tap: fix file close on remove ++* net/tap: fix mbuf and mem leak during queue release ++* net/tap: fix mbuf double free when writev fails ++* net/tap: fix queues fd check before close ++* net/tap: fix unexpected link handler ++* net/tap: remove unused assert ++* net/thunderx: use dynamic log type ++* net/vhost: fix potential memory leak on close ++* net/virtio: do not use PMD log type ++* net/virtio: fix outdated comment ++* net/virtio-user: fix devargs parsing ++* net/vmxnet3: handle bad host framing ++* pci: accept 32-bit domain numbers ++* pci: fix build on ppc ++* pci: reject negative values in PCI id ++* remove references to private PCI probe function ++* security: fix crash at accessing non-implemented ops ++* security: fix return types in documentation ++* security: fix session counter ++* security: fix verification of parameters ++* service: fix crash on exit ++* service: fix identification of service running on other lcore ++* service: fix race condition for MT unsafe service ++* service: remove rte prefix from static functions ++* test/crypto: fix flag check ++* test/flow_classify: enable multi-sockets system ++* test/kvargs: fix invalid cases check ++* test/kvargs: fix to consider empty elements as valid ++* test: remove redundant macro ++* test: skip some subtests in no-huge mode ++* usertools: check for pci.ids in /usr/share/misc ++* version: 18.11.9-rc1 ++* version: 18.11.9-rc2 ++* vfio: fix race condition with sysfs ++* vfio: fix use after free with multiprocess ++* vhost/crypto: add missing user protocol flag ++* vhost: fix peer close check ++* vhost: fix zero-copy server mode ++* vhost: make IOTLB cache name unique among processes ++* vhost: prevent zero-copy with incompatible client mode ++* vhost: remove unused variable ++* vhost: remove zero-copy and client mode restriction ++ ++18.11.9 Validation ++~~~~~~~~~~~~~~~~~~ ++ ++* Intel(R) Testing ++ ++ * Basic Intel(R) NIC(ixgbe and i40e) testing ++ ++ * PF (i40e) ++ * PF (ixgbe) ++ * VF (i40e) ++ * Compile Testing ++ * Intel NIC single core/NIC performance ++ ++ * Basic cryptodev and virtio testing ++ ++ * vhost/virtio basic loopback, PVP and performance test ++ * cryptodev function ++ * cryptodev performance ++ ++* Red Hat(R) Testing ++ ++ * RHEL 8 ++ * QEMU 5.0 ++ * Functionality ++ ++ * PF assignment ++ * VF assignment ++ * vhost single/multi queues and cross-NUMA ++ * vhostclient reconnect ++ * vhost live migration with single/multi queues and cross-NUMA ++ * OVS PVP ++ ++ * Tested NICs ++ ++ * X540-AT2 NIC(ixgbe, 10G) ++ ++* Mellanox(R) Testing ++ ++ * Basic functionality with testpmd ++ ++ * Tx/Rx ++ * xstats ++ * Timestamps ++ * Link status ++ * RTE flow and flow_director ++ * RSS ++ * VLAN stripping and insertion ++ * Checksum/TSO ++ * ptype ++ ++ * l3fwd-power example app tests ++ * Multi-process ++ ++ * ConnectX-4 Lx ++ ++ * RHEL 7.4 ++ * Driver MLNX_OFED_LINUX-5.0-2.1.8.0 ++ * fw 14.27.1016 ++ ++ * ConnectX-5 ++ ++ * RHEL 7.4 ++ * Driver MLNX_OFED_LINUX-5.0-1.0.0.0 ++ * fw 16.27.2008 ++ ++* Intel(R) Testing with Open vSwitch ++ ++ * OVS testing with OVS branches 2.12 and 2.11 with VSPERF ++ ++ * Tested NICs ++ ++ * i40e (X710) ++ * ixgbe (82599ES) ++ ++ * Functionality ++ ++ * P2P throughput ++ * P2P multiqueue ++ * PVP RSS ++ * Vhostuserclient reconnect ++ * Flow Control ++ ++18.11.9 Known Issues ++~~~~~~~~~~~~~~~~~~~~ ++ ++* DPDK 18.11.9 contains fixes up to DPDK 20.05 ++* Issues identified/fixed in DPDK master branch after DPDK 20.05 may be present in DPDK 18.11.9 ++ ++18.11.9 Fixes skipped and status unresolved ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++* dcfbc594f net/iavf: fix queue interrupt for ice ++* b149a7064 eal/freebsd: add config reattach in secondary process ++* a135e050a examples/ipsec-secgw: fix packet length ++* 9d10f53e4 test/metrics: fix second run ++* ea81c1b81 net/mlx5: fix NVGRE matching ++* 721c95301 net/mlx5: fix Rx scatter mode validation ++* be048a1aa net/virtio: fix descriptor addressed in Tx ++* 6080796f6 mem: make base address hint OS specific ++* 6d3f9917f eal: fix memory config allocation for multi-process ++* 1526dd053 net/virtio: fix Tx checksum offloads ++* f0617163b mempool/dpaa2: report error on endless loop in mbuf release ++* 05817057f net/ena: fix indication of bad L4 Rx checksums ++* 9e0d81c1a net/mlx5: fix selection between encap and decap ++* 7392ad06f app/testpmd: use better randomness for Tx split ++* dcd05da0a app/testpmd: fix GENEVE flow item ++* 2e02a2aff (Xavier) ethdev: fix VLAN offloads set if no driver callback ++* ec8615607 crypto/dpaa_sec: fix IOVA conversions ++* 06387be8e net/mlx5: fix encap/decap validation ++* 7593cf1d3 net/mlx5: fix legacy multi-packet write session ++* e21492a51 net/mlx: fix overlinking with meson and glue dlopen ++* 150c9ac2d (Xavier) app/testpmd: update Rx offload after setting MTU ++* 207b1c813 test: fix build without ring PMD ++* 819d0d1d5 net/ixgbe: fix blocking system events ++* 050bfe033 net/mlx5: fix tunnel flow priority ++* bade47a75 net/i40e: relax barrier in Tx ++* 48f9faddc net/bnxt: fix MAC address setting when port is stopped ++* f6752f660 net/sfc: set priority of created filters to manual ++* 27fb5dd28 test: skip some subtests in no-huge mode ++* 4236694f0 mem: preallocate VA space in no-huge mode ++* 4448a202b eal: remove useless makefiles ++* efa8c72f1 net/ixgbe: fix link status inconsistencies ++* 9c4971e52 net/mlx5: update VLAN and encap actions validation ++* 00437823c net/mlx5: use open/read/close for ib stats query ++* 24cb500c1 net/tap: fix mbuf double free when writev fails ++* ae08c73e6 net/i40e: fix flow director initialisation ++* d68ab7a9f net/ixgbe: fix resource leak after thread exits normally ++* 036d82365 mempool: remove inline functions from export list ++* d256c73c1 net/bnxt: fix memory leak during queue restart ++* bc75bdb60 net/bnxt: fix VNIC Rx queue count on VNIC free ++* 90ecace4f examples/fips_validation: fix parsing of algorithms ++* d70a869db net/ixgbe: fix link state timing on fiber ports ++* 92818d839 net/mlx5: fix match on empty VLAN item in DV mode ++* 29fdc5bf4 test/crypto: fix statistics case ++* 3ae4beb07 vhost: check log mmap offset and size overflow ++* ff55182ce net/mlx5: fix VLAN flow action with wildcard VLAN item ++* a60704d1a net/qede: fix assignment of Rx/Tx handlers ++* 205b74295 net/bnxt: fix allocation of LED config info ++* 96477b5dd net/bnxt: fix allocation of COS queue info ++* 986fa3ba8 net/bnxt: fix allocation of link info struct ++* e8fe0e067 net/bnxt: fix allocation of PF info struct ++* 86421846c net/bnxt: fix storing MAC address twice ++* da7018ec2 net/i40e: fix queue region in RSS flow ++* 3acf10719 common/mlx5: fix netlink buffer allocation from stack ++* 20cb28a0e net/bnxt: fix Rx ring producer index ++* 24cb500c1 net/tap: fix mbuf double free when writev fails ++* be4ef2e0d net/i40e: fix flow director enabling ++* f6e63e59e app/testpmd: fix global variable multiple definitions +diff --git a/dpdk/doc/guides/sample_app_ug/l2_forward_real_virtual.rst b/dpdk/doc/guides/sample_app_ug/l2_forward_real_virtual.rst +index 87653ec7bd..9729095872 100644 +--- a/dpdk/doc/guides/sample_app_ug/l2_forward_real_virtual.rst ++++ b/dpdk/doc/guides/sample_app_ug/l2_forward_real_virtual.rst +@@ -194,9 +194,6 @@ in the *DPDK Programmer's Guide* - Rel 1.4 EAR and the *DPDK API Reference*. + + .. code-block:: c + +- if (rte_pci_probe() < 0) +- rte_exit(EXIT_FAILURE, "Cannot probe PCI\n"); +- + /* reset l2fwd_dst_ports */ + + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) +@@ -226,12 +223,6 @@ in the *DPDK Programmer's Guide* - Rel 1.4 EAR and the *DPDK API Reference*. + rte_eth_dev_info_get((uint8_t) portid, &dev_info); + } + +-Observe that: +- +-* rte_igb_pmd_init_all() simultaneously registers the driver as a PCI driver and as an Ethernet* Poll Mode Driver. +- +-* rte_pci_probe() parses the devices on the PCI bus and initializes recognized devices. +- + The next step is to configure the RX and TX queues. + For each port, there is only one RX queue (only one lcore is able to poll a given port). + The number of TX queues depends on the number of available lcores. +diff --git a/dpdk/doc/guides/sample_app_ug/link_status_intr.rst b/dpdk/doc/guides/sample_app_ug/link_status_intr.rst +index 695c088e88..45f2439a32 100644 +--- a/dpdk/doc/guides/sample_app_ug/link_status_intr.rst ++++ b/dpdk/doc/guides/sample_app_ug/link_status_intr.rst +@@ -88,9 +88,6 @@ To fully understand this code, it is recommended to study the chapters that rela + + .. code-block:: c + +- if (rte_pci_probe() < 0) +- rte_exit(EXIT_FAILURE, "Cannot probe PCI\n"); +- + /* + * Each logical core is assigned a dedicated TX queue on each port. + */ +@@ -115,10 +112,6 @@ To fully understand this code, it is recommended to study the chapters that rela + rte_eth_dev_info_get((uint8_t) portid, &dev_info); + } + +-Observe that: +- +-* rte_pci_probe() parses the devices on the PCI bus and initializes recognized devices. +- + The next step is to configure the RX and TX queues. + For each port, there is only one RX queue (only one lcore is able to poll a given port). + The number of TX queues depends on the number of available lcores. +diff --git a/dpdk/doc/guides/sample_app_ug/multi_process.rst b/dpdk/doc/guides/sample_app_ug/multi_process.rst +index 9c374da6f7..f2a79a6397 100644 +--- a/dpdk/doc/guides/sample_app_ug/multi_process.rst ++++ b/dpdk/doc/guides/sample_app_ug/multi_process.rst +@@ -209,7 +209,7 @@ How the Application Works + ^^^^^^^^^^^^^^^^^^^^^^^^^ + + The initialization calls in both the primary and secondary instances are the same for the most part, +-calling the rte_eal_init(), 1 G and 10 G driver initialization and then rte_pci_probe() functions. ++calling the rte_eal_init(), 1 G and 10 G driver initialization and then probing devices. + Thereafter, the initialization done depends on whether the process is configured as a primary or secondary instance. + + In the primary instance, a memory pool is created for the packet mbufs and the network ports to be used are initialized - +diff --git a/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst +index c327d1f4df..0d71e20c64 100644 +--- a/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst ++++ b/dpdk/doc/guides/testpmd_app_ug/testpmd_funcs.rst +@@ -238,7 +238,7 @@ Display the RSS hash functions and RSS hash key of a port:: + clear port + ~~~~~~~~~~ + +-Clear the port statistics for a given port or for all ports:: ++Clear the port statistics and forward engine statistics for a given port or for all ports:: + + testpmd> clear port (info|stats|xstats|fdir|stat_qmap) (port_id|all) + +diff --git a/dpdk/drivers/Makefile b/dpdk/drivers/Makefile +index 7d5da5d9f5..cfc24b2d0b 100644 +--- a/dpdk/drivers/Makefile ++++ b/dpdk/drivers/Makefile +@@ -19,7 +19,7 @@ DEPDIRS-common/qat := bus mempool + DIRS-$(CONFIG_RTE_LIBRTE_COMPRESSDEV) += compress + DEPDIRS-compress := bus mempool + DIRS-$(CONFIG_RTE_LIBRTE_EVENTDEV) += event +-DEPDIRS-event := common bus mempool net ++DEPDIRS-event := common bus mempool net crypto + DIRS-$(CONFIG_RTE_LIBRTE_RAWDEV) += raw + DEPDIRS-raw := common bus mempool net event + +diff --git a/dpdk/drivers/bus/fslmc/qbman/qbman_debug.c b/dpdk/drivers/bus/fslmc/qbman/qbman_debug.c +index 0bb2ce880f..34374ae4b6 100644 +--- a/dpdk/drivers/bus/fslmc/qbman/qbman_debug.c ++++ b/dpdk/drivers/bus/fslmc/qbman/qbman_debug.c +@@ -20,26 +20,27 @@ struct qbman_fq_query_desc { + uint8_t verb; + uint8_t reserved[3]; + uint32_t fqid; +- uint8_t reserved2[57]; ++ uint8_t reserved2[56]; + }; + + int qbman_fq_query_state(struct qbman_swp *s, uint32_t fqid, + struct qbman_fq_query_np_rslt *r) + { + struct qbman_fq_query_desc *p; ++ struct qbman_fq_query_np_rslt *var; + + p = (struct qbman_fq_query_desc *)qbman_swp_mc_start(s); + if (!p) + return -EBUSY; + + p->fqid = fqid; +- *r = *(struct qbman_fq_query_np_rslt *)qbman_swp_mc_complete(s, p, +- QBMAN_FQ_QUERY_NP); +- if (!r) { ++ var = qbman_swp_mc_complete(s, p, QBMAN_FQ_QUERY_NP); ++ if (!var) { + pr_err("qbman: Query FQID %d NP fields failed, no response\n", + fqid); + return -EIO; + } ++ *r = *var; + + /* Decode the outcome */ + QBMAN_BUG_ON((r->verb & QBMAN_RESPONSE_VERB_MASK) != QBMAN_FQ_QUERY_NP); +diff --git a/dpdk/drivers/bus/ifpga/ifpga_bus.c b/dpdk/drivers/bus/ifpga/ifpga_bus.c +index 55d3abf9fe..1fddde60a5 100644 +--- a/dpdk/drivers/bus/ifpga/ifpga_bus.c ++++ b/dpdk/drivers/bus/ifpga/ifpga_bus.c +@@ -24,6 +24,7 @@ + #include <rte_kvargs.h> + #include <rte_alarm.h> + #include <rte_string_fns.h> ++#include <rte_debug.h> + + #include "rte_rawdev.h" + #include "rte_rawdev_pmd.h" +diff --git a/dpdk/drivers/bus/pci/bsd/pci.c b/dpdk/drivers/bus/pci/bsd/pci.c +index d09f8ee5a6..42f435334a 100644 +--- a/dpdk/drivers/bus/pci/bsd/pci.c ++++ b/dpdk/drivers/bus/pci/bsd/pci.c +@@ -376,13 +376,20 @@ rte_pci_scan(void) + return -1; + } + +-/* +- * Get iommu class of PCI devices on the bus. +- */ ++bool ++pci_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) ++{ ++ return false; ++} ++ + enum rte_iova_mode +-rte_pci_get_iommu_class(void) ++pci_device_iova_mode(const struct rte_pci_driver *pdrv __rte_unused, ++ const struct rte_pci_device *pdev) + { + /* Supports only RTE_KDRV_NIC_UIO */ ++ if (pdev->kdrv != RTE_KDRV_NIC_UIO) ++ RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting to IOVA as 'PA'\n"); ++ + return RTE_IOVA_PA; + } + +diff --git a/dpdk/drivers/bus/pci/linux/pci.c b/dpdk/drivers/bus/pci/linux/pci.c +index 9b1c7c839a..f50c7090e4 100644 +--- a/dpdk/drivers/bus/pci/linux/pci.c ++++ b/dpdk/drivers/bus/pci/linux/pci.c +@@ -378,6 +378,11 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) + */ + RTE_LOG(ERR, EAL, "Unexpected device scan at %s!\n", + filename); ++ else if (dev2->device.devargs != ++ dev->device.devargs) { ++ rte_devargs_remove(dev2->device.devargs); ++ pci_name_set(dev2); ++ } + } + free(dev); + } +@@ -497,93 +502,13 @@ rte_pci_scan(void) + return -1; + } + +-/* +- * Is pci device bound to any kdrv +- */ +-static inline int +-pci_one_device_is_bound(void) +-{ +- struct rte_pci_device *dev = NULL; +- int ret = 0; +- +- FOREACH_DEVICE_ON_PCIBUS(dev) { +- if (dev->kdrv == RTE_KDRV_UNKNOWN || +- dev->kdrv == RTE_KDRV_NONE) { +- continue; +- } else { +- ret = 1; +- break; +- } +- } +- return ret; +-} +- +-/* +- * Any one of the device bound to uio +- */ +-static inline int +-pci_one_device_bound_uio(void) +-{ +- struct rte_pci_device *dev = NULL; +- struct rte_devargs *devargs; +- int need_check; +- +- FOREACH_DEVICE_ON_PCIBUS(dev) { +- devargs = dev->device.devargs; +- +- need_check = 0; +- switch (rte_pci_bus.bus.conf.scan_mode) { +- case RTE_BUS_SCAN_WHITELIST: +- if (devargs && devargs->policy == RTE_DEV_WHITELISTED) +- need_check = 1; +- break; +- case RTE_BUS_SCAN_UNDEFINED: +- case RTE_BUS_SCAN_BLACKLIST: +- if (devargs == NULL || +- devargs->policy != RTE_DEV_BLACKLISTED) +- need_check = 1; +- break; +- } +- +- if (!need_check) +- continue; +- +- if (dev->kdrv == RTE_KDRV_IGB_UIO || +- dev->kdrv == RTE_KDRV_UIO_GENERIC) { +- return 1; +- } +- } +- return 0; +-} +- +-/* +- * Any one of the device has iova as va +- */ +-static inline int +-pci_one_device_has_iova_va(void) +-{ +- struct rte_pci_device *dev = NULL; +- struct rte_pci_driver *drv = NULL; +- +- FOREACH_DRIVER_ON_PCIBUS(drv) { +- if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) { +- FOREACH_DEVICE_ON_PCIBUS(dev) { +- if (dev->kdrv == RTE_KDRV_VFIO && +- rte_pci_match(drv, dev)) +- return 1; +- } +- } +- } +- return 0; +-} +- + #if defined(RTE_ARCH_X86) +-static bool +-pci_one_device_iommu_support_va(struct rte_pci_device *dev) ++bool ++pci_device_iommu_support_va(const struct rte_pci_device *dev) + { + #define VTD_CAP_MGAW_SHIFT 16 + #define VTD_CAP_MGAW_MASK (0x3fULL << VTD_CAP_MGAW_SHIFT) +- struct rte_pci_addr *addr = &dev->addr; ++ const struct rte_pci_addr *addr = &dev->addr; + char filename[PATH_MAX]; + FILE *fp; + uint64_t mgaw, vtd_cap_reg = 0; +@@ -627,81 +552,55 @@ pci_one_device_iommu_support_va(struct rte_pci_device *dev) + return true; + } + #elif defined(RTE_ARCH_PPC_64) +-static bool +-pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev) ++bool ++pci_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) + { + return false; + } + #else +-static bool +-pci_one_device_iommu_support_va(__rte_unused struct rte_pci_device *dev) ++bool ++pci_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) + { + return true; + } + #endif + +-/* +- * All devices IOMMUs support VA as IOVA +- */ +-static bool +-pci_devices_iommu_support_va(void) ++enum rte_iova_mode ++pci_device_iova_mode(const struct rte_pci_driver *pdrv, ++ const struct rte_pci_device *pdev) + { +- struct rte_pci_device *dev = NULL; +- struct rte_pci_driver *drv = NULL; ++ enum rte_iova_mode iova_mode = RTE_IOVA_DC; + +- FOREACH_DRIVER_ON_PCIBUS(drv) { +- FOREACH_DEVICE_ON_PCIBUS(dev) { +- if (!rte_pci_match(drv, dev)) +- continue; +- /* +- * just one PCI device needs to be checked out because +- * the IOMMU hardware is the same for all of them. +- */ +- return pci_one_device_iommu_support_va(dev); ++ switch (pdev->kdrv) { ++ case RTE_KDRV_VFIO: { ++#ifdef VFIO_PRESENT ++ static int is_vfio_noiommu_enabled = -1; ++ ++ if (is_vfio_noiommu_enabled == -1) { ++ if (rte_vfio_noiommu_is_enabled() == 1) ++ is_vfio_noiommu_enabled = 1; ++ else ++ is_vfio_noiommu_enabled = 0; + } ++ if (is_vfio_noiommu_enabled != 0) ++ iova_mode = RTE_IOVA_PA; ++ else if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) != 0) ++ iova_mode = RTE_IOVA_VA; ++#endif ++ break; + } +- return true; +-} + +-/* +- * Get iommu class of PCI devices on the bus. +- */ +-enum rte_iova_mode +-rte_pci_get_iommu_class(void) +-{ +- bool is_bound; +- bool is_vfio_noiommu_enabled = true; +- bool has_iova_va; +- bool is_bound_uio; +- bool iommu_no_va; +- +- is_bound = pci_one_device_is_bound(); +- if (!is_bound) +- return RTE_IOVA_DC; +- +- has_iova_va = pci_one_device_has_iova_va(); +- is_bound_uio = pci_one_device_bound_uio(); +- iommu_no_va = !pci_devices_iommu_support_va(); +-#ifdef VFIO_PRESENT +- is_vfio_noiommu_enabled = rte_vfio_noiommu_is_enabled() == true ? +- true : false; +-#endif ++ case RTE_KDRV_IGB_UIO: ++ case RTE_KDRV_UIO_GENERIC: ++ iova_mode = RTE_IOVA_PA; ++ break; + +- if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled && +- !iommu_no_va) +- return RTE_IOVA_VA; +- +- if (has_iova_va) { +- RTE_LOG(WARNING, EAL, "Some devices want iova as va but pa will be used because.. "); +- if (is_vfio_noiommu_enabled) +- RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n"); +- if (is_bound_uio) +- RTE_LOG(WARNING, EAL, "few device bound to UIO\n"); +- if (iommu_no_va) +- RTE_LOG(WARNING, EAL, "IOMMU does not support IOVA as VA\n"); ++ default: ++ if ((pdrv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) != 0) ++ iova_mode = RTE_IOVA_VA; ++ break; + } +- +- return RTE_IOVA_PA; ++ return iova_mode; + } + + /* Read PCI config space. */ +diff --git a/dpdk/drivers/bus/pci/linux/pci_vfio.c b/dpdk/drivers/bus/pci/linux/pci_vfio.c +index 366d2ab2ff..cf61032a5d 100644 +--- a/dpdk/drivers/bus/pci/linux/pci_vfio.c ++++ b/dpdk/drivers/bus/pci/linux/pci_vfio.c +@@ -149,6 +149,38 @@ pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) + return 0; + } + ++/* enable PCI bus memory space */ ++static int ++pci_vfio_enable_bus_memory(int dev_fd) ++{ ++ uint16_t cmd; ++ int ret; ++ ++ ret = pread64(dev_fd, &cmd, sizeof(cmd), ++ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + ++ PCI_COMMAND); ++ ++ if (ret != sizeof(cmd)) { ++ RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); ++ return -1; ++ } ++ ++ if (cmd & PCI_COMMAND_MEMORY) ++ return 0; ++ ++ cmd |= PCI_COMMAND_MEMORY; ++ ret = pwrite64(dev_fd, &cmd, sizeof(cmd), ++ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + ++ PCI_COMMAND); ++ ++ if (ret != sizeof(cmd)) { ++ RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); ++ return -1; ++ } ++ ++ return 0; ++} ++ + /* set PCI bus mastering */ + static int + pci_vfio_set_bus_master(int dev_fd, bool op) +@@ -427,6 +459,11 @@ pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd) + return -1; + } + ++ if (pci_vfio_enable_bus_memory(vfio_dev_fd)) { ++ RTE_LOG(ERR, EAL, "Cannot enable bus memory!\n"); ++ return -1; ++ } ++ + /* set bus mastering for the device */ + if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { + RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); +diff --git a/dpdk/drivers/bus/pci/pci_common.c b/dpdk/drivers/bus/pci/pci_common.c +index 6276e5d695..954d638a99 100644 +--- a/dpdk/drivers/bus/pci/pci_common.c ++++ b/dpdk/drivers/bus/pci/pci_common.c +@@ -169,8 +169,22 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr, + * This needs to be before rte_pci_map_device(), as it enables to use + * driver flags for adjusting configuration. + */ +- if (!already_probed) ++ if (!already_probed) { ++ enum rte_iova_mode dev_iova_mode; ++ enum rte_iova_mode iova_mode; ++ ++ dev_iova_mode = pci_device_iova_mode(dr, dev); ++ iova_mode = rte_eal_iova_mode(); ++ if (dev_iova_mode != RTE_IOVA_DC && ++ dev_iova_mode != iova_mode) { ++ RTE_LOG(ERR, EAL, " Expecting '%s' IOVA mode but current mode is '%s', not initializing\n", ++ dev_iova_mode == RTE_IOVA_PA ? "PA" : "VA", ++ iova_mode == RTE_IOVA_PA ? "PA" : "VA"); ++ return -EINVAL; ++ } ++ + dev->driver = dr; ++ } + + if (!already_probed && (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) { + /* map resources for devices that use igb_uio */ +@@ -273,8 +287,8 @@ pci_probe_all_drivers(struct rte_pci_device *dev) + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + */ +-int +-rte_pci_probe(void) ++static int ++pci_probe(void) + { + struct rte_pci_device *dev = NULL; + size_t probed = 0, failed = 0; +@@ -528,10 +542,93 @@ pci_unplug(struct rte_device *dev) + return ret; + } + ++static bool ++pci_ignore_device(const struct rte_pci_device *dev) ++{ ++ struct rte_devargs *devargs = dev->device.devargs; ++ ++ switch (rte_pci_bus.bus.conf.scan_mode) { ++ case RTE_BUS_SCAN_WHITELIST: ++ if (devargs && devargs->policy == RTE_DEV_WHITELISTED) ++ return false; ++ break; ++ case RTE_BUS_SCAN_UNDEFINED: ++ case RTE_BUS_SCAN_BLACKLIST: ++ if (devargs == NULL || ++ devargs->policy != RTE_DEV_BLACKLISTED) ++ return false; ++ break; ++ } ++ return true; ++} ++ ++enum rte_iova_mode ++rte_pci_get_iommu_class(void) ++{ ++ enum rte_iova_mode iova_mode = RTE_IOVA_DC; ++ const struct rte_pci_device *dev; ++ const struct rte_pci_driver *drv; ++ bool devices_want_va = false; ++ bool devices_want_pa = false; ++ int iommu_no_va = -1; ++ ++ FOREACH_DEVICE_ON_PCIBUS(dev) { ++ /* ++ * We can check this only once, because the IOMMU hardware is ++ * the same for all of them. ++ */ ++ if (iommu_no_va == -1) ++ iommu_no_va = pci_device_iommu_support_va(dev) ++ ? 0 : 1; ++ if (pci_ignore_device(dev)) ++ continue; ++ if (dev->kdrv == RTE_KDRV_UNKNOWN || ++ dev->kdrv == RTE_KDRV_NONE) ++ continue; ++ FOREACH_DRIVER_ON_PCIBUS(drv) { ++ enum rte_iova_mode dev_iova_mode; ++ ++ if (!rte_pci_match(drv, dev)) ++ continue; ++ ++ dev_iova_mode = pci_device_iova_mode(drv, dev); ++ RTE_LOG(DEBUG, EAL, "PCI driver %s for device " ++ PCI_PRI_FMT " wants IOVA as '%s'\n", ++ drv->driver.name, ++ dev->addr.domain, dev->addr.bus, ++ dev->addr.devid, dev->addr.function, ++ dev_iova_mode == RTE_IOVA_DC ? "DC" : ++ (dev_iova_mode == RTE_IOVA_PA ? "PA" : "VA")); ++ if (dev_iova_mode == RTE_IOVA_PA) ++ devices_want_pa = true; ++ else if (dev_iova_mode == RTE_IOVA_VA) ++ devices_want_va = true; ++ } ++ } ++ if (iommu_no_va == 1) { ++ iova_mode = RTE_IOVA_PA; ++ if (devices_want_va) { ++ RTE_LOG(WARNING, EAL, "Some devices want 'VA' but IOMMU does not support 'VA'.\n"); ++ RTE_LOG(WARNING, EAL, "The devices that want 'VA' won't initialize.\n"); ++ } ++ } else if (devices_want_va && !devices_want_pa) { ++ iova_mode = RTE_IOVA_VA; ++ } else if (devices_want_pa && !devices_want_va) { ++ iova_mode = RTE_IOVA_PA; ++ } else { ++ iova_mode = RTE_IOVA_DC; ++ if (devices_want_va) { ++ RTE_LOG(WARNING, EAL, "Some devices want 'VA' but forcing 'DC' because other devices want 'PA'.\n"); ++ RTE_LOG(WARNING, EAL, "Depending on the final decision by the EAL, not all devices may be able to initialize.\n"); ++ } ++ } ++ return iova_mode; ++} ++ + struct rte_pci_bus rte_pci_bus = { + .bus = { + .scan = rte_pci_scan, +- .probe = rte_pci_probe, ++ .probe = pci_probe, + .find_device = pci_find_device, + .plug = pci_plug, + .unplug = pci_unplug, +diff --git a/dpdk/drivers/bus/pci/pci_common_uio.c b/dpdk/drivers/bus/pci/pci_common_uio.c +index 7ea73dbc5b..f4dca9da91 100644 +--- a/dpdk/drivers/bus/pci/pci_common_uio.c ++++ b/dpdk/drivers/bus/pci/pci_common_uio.c +@@ -70,6 +70,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev) + } + return -1; + } ++ dev->mem_resource[i].addr = mapaddr; + } + return 0; + } +diff --git a/dpdk/drivers/bus/pci/private.h b/dpdk/drivers/bus/pci/private.h +index 13c3324bb0..af1c7ae5fe 100644 +--- a/dpdk/drivers/bus/pci/private.h ++++ b/dpdk/drivers/bus/pci/private.h +@@ -17,16 +17,6 @@ struct rte_pci_device; + + extern struct rte_pci_bus rte_pci_bus; + +-/** +- * Probe the PCI bus +- * +- * @return +- * - 0 on success. +- * - !0 on error. +- */ +-int +-rte_pci_probe(void); +- + /** + * Scan the content of the PCI bus, and the devices in the devices + * list +@@ -172,6 +162,17 @@ int + rte_pci_match(const struct rte_pci_driver *pci_drv, + const struct rte_pci_device *pci_dev); + ++/** ++ * OS specific callbacks for rte_pci_get_iommu_class ++ * ++ */ ++bool ++pci_device_iommu_support_va(const struct rte_pci_device *dev); ++ ++enum rte_iova_mode ++pci_device_iova_mode(const struct rte_pci_driver *pci_drv, ++ const struct rte_pci_device *pci_dev); ++ + /** + * Get iommu class of PCI devices on the bus. + * And return their preferred iova mapping mode. +diff --git a/dpdk/drivers/bus/pci/rte_bus_pci.h b/dpdk/drivers/bus/pci/rte_bus_pci.h +index f0d6d81c00..f0fa3a17f3 100644 +--- a/dpdk/drivers/bus/pci/rte_bus_pci.h ++++ b/dpdk/drivers/bus/pci/rte_bus_pci.h +@@ -147,8 +147,8 @@ struct rte_pci_bus { + #define RTE_PCI_DRV_INTR_RMV 0x0010 + /** Device driver needs to keep mapped resources if unsupported dev detected */ + #define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020 +-/** Device driver supports IOVA as VA */ +-#define RTE_PCI_DRV_IOVA_AS_VA 0X0040 ++/** Device driver only supports IOVA as VA and cannot work with IOVA as PA */ ++#define RTE_PCI_DRV_IOVA_AS_VA 0x0040 + + /** + * Map the PCI device resources in user space virtual memory address +diff --git a/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c b/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c +index 7cab5c19f4..308ba00893 100644 +--- a/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c ++++ b/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c +@@ -166,7 +166,7 @@ vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev, int idx, + dev->resource[idx].addr = mapaddr; + vmbus_map_addr = RTE_PTR_ADD(mapaddr, size); + +- /* Record result of sucessful mapping for use by secondary */ ++ /* Record result of successful mapping for use by secondary */ + maps[idx].addr = mapaddr; + maps[idx].size = size; + +diff --git a/dpdk/drivers/bus/vmbus/vmbus_common.c b/dpdk/drivers/bus/vmbus/vmbus_common.c +index 48a219f735..3adef01c95 100644 +--- a/dpdk/drivers/bus/vmbus/vmbus_common.c ++++ b/dpdk/drivers/bus/vmbus/vmbus_common.c +@@ -131,7 +131,7 @@ vmbus_probe_one_driver(struct rte_vmbus_driver *dr, + } + + /* +- * IF device class GUID mathces, call the probe function of ++ * If device class GUID matches, call the probe function of + * registere drivers for the vmbus device. + * Return -1 if initialization failed, + * and 1 if no driver found for this device. +diff --git a/dpdk/drivers/common/cpt/cpt_pmd_logs.h b/dpdk/drivers/common/cpt/cpt_pmd_logs.h +index 4cbec4e36c..2681d12869 100644 +--- a/dpdk/drivers/common/cpt/cpt_pmd_logs.h ++++ b/dpdk/drivers/common/cpt/cpt_pmd_logs.h +@@ -45,6 +45,6 @@ + * cpt_logtype will be used for common logging. This field would be initialized + * by otx_* driver routines during PCI probe. + */ +-int cpt_logtype; ++extern int cpt_logtype; + + #endif /* _CPT_PMD_LOGS_H_ */ +diff --git a/dpdk/drivers/compress/octeontx/otx_zip_pmd.c b/dpdk/drivers/compress/octeontx/otx_zip_pmd.c +index 9e00c86630..bff8ef035e 100644 +--- a/dpdk/drivers/compress/octeontx/otx_zip_pmd.c ++++ b/dpdk/drivers/compress/octeontx/otx_zip_pmd.c +@@ -406,7 +406,7 @@ zip_pmd_qp_setup(struct rte_compressdev *dev, uint16_t qp_id, + + qp->name = name; + +- /* Create completion queue upto max_inflight_ops */ ++ /* Create completion queue up to max_inflight_ops */ + qp->processed_pkts = zip_pmd_qp_create_processed_pkts_ring(qp, + max_inflight_ops, socket_id); + if (qp->processed_pkts == NULL) +diff --git a/dpdk/drivers/compress/zlib/zlib_pmd.c b/dpdk/drivers/compress/zlib/zlib_pmd.c +index 19f9200c22..e39be2ed86 100644 +--- a/dpdk/drivers/compress/zlib/zlib_pmd.c ++++ b/dpdk/drivers/compress/zlib/zlib_pmd.c +@@ -7,6 +7,8 @@ + + #include "zlib_pmd_private.h" + ++int zlib_logtype_driver; ++ + /** Compute next mbuf in the list, assign data buffer and length, + * returns 0 if mbuf is NULL + */ +diff --git a/dpdk/drivers/compress/zlib/zlib_pmd_private.h b/dpdk/drivers/compress/zlib/zlib_pmd_private.h +index 2c6e83d45c..05e80d94ef 100644 +--- a/dpdk/drivers/compress/zlib/zlib_pmd_private.h ++++ b/dpdk/drivers/compress/zlib/zlib_pmd_private.h +@@ -14,7 +14,7 @@ + + #define DEF_MEM_LEVEL 8 + +-int zlib_logtype_driver; ++extern int zlib_logtype_driver; + #define ZLIB_PMD_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, zlib_logtype_driver, "%s(): "fmt "\n", \ + __func__, ##args) +diff --git a/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c b/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c +index ebdf7c35a8..1975ad09d2 100644 +--- a/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c ++++ b/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c +@@ -13,6 +13,8 @@ + + #include "aesni_gcm_pmd_private.h" + ++int aesni_gcm_logtype_driver; ++ + static uint8_t cryptodev_driver_id; + + /** Parse crypto xform chain and set private session parameters */ +diff --git a/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd_private.h b/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd_private.h +index 92b0413547..20a619f1a9 100644 +--- a/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd_private.h ++++ b/dpdk/drivers/crypto/aesni_gcm/aesni_gcm_pmd_private.h +@@ -20,7 +20,7 @@ + /**< AES-NI GCM PMD device name */ + + /** AES-NI GCM PMD LOGTYPE DRIVER */ +-int aesni_gcm_logtype_driver; ++extern int aesni_gcm_logtype_driver; + #define AESNI_GCM_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, aesni_gcm_logtype_driver, \ + "%s() line %u: "fmt "\n", __func__, __LINE__, \ +diff --git a/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c b/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c +index 45dd86a756..19652469e7 100644 +--- a/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c ++++ b/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c +@@ -14,6 +14,8 @@ + + #include "rte_aesni_mb_pmd_private.h" + ++int aesni_mb_logtype_driver; ++ + #define AES_CCM_DIGEST_MIN_LEN 4 + #define AES_CCM_DIGEST_MAX_LEN 16 + #define HMAC_MAX_BLOCK_SIZE 128 +diff --git a/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h b/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h +index d8021cdaa2..a7a417b6b5 100644 +--- a/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h ++++ b/dpdk/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h +@@ -11,7 +11,7 @@ + /**< AES-NI Multi buffer PMD device name */ + + /** AESNI_MB PMD LOGTYPE DRIVER */ +-int aesni_mb_logtype_driver; ++extern int aesni_mb_logtype_driver; + + #define AESNI_MB_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, aesni_mb_logtype_driver, \ +diff --git a/dpdk/drivers/crypto/caam_jr/Makefile b/dpdk/drivers/crypto/caam_jr/Makefile +index 88cdf74108..c78901f5bb 100644 +--- a/dpdk/drivers/crypto/caam_jr/Makefile ++++ b/dpdk/drivers/crypto/caam_jr/Makefile +@@ -16,6 +16,13 @@ CFLAGS += -D _GNU_SOURCE + CFLAGS += -O3 + CFLAGS += $(WERROR_FLAGS) + ++# FIXME: temporary solution for Bugzilla 469 ++ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) ++ifeq ($(shell test $(GCC_VERSION) -ge 100 && echo 1), 1) ++CFLAGS += -fcommon ++endif ++endif ++ + CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa/include + CFLAGS += -I$(RTE_SDK)/drivers/crypto/caam_jr + #sharing the hw flib headers from dpaa2_sec pmd +diff --git a/dpdk/drivers/crypto/caam_jr/caam_jr.c b/dpdk/drivers/crypto/caam_jr/caam_jr.c +index a4e70babf4..7aca8d6fbd 100644 +--- a/dpdk/drivers/crypto/caam_jr/caam_jr.c ++++ b/dpdk/drivers/crypto/caam_jr/caam_jr.c +@@ -2110,7 +2110,7 @@ static void + close_job_ring(struct sec_job_ring_t *job_ring) + { + PMD_INIT_FUNC_TRACE(); +- if (job_ring->irq_fd) { ++ if (job_ring->irq_fd != -1) { + /* Producer index is frozen. If consumer index is not equal + * with producer index, then we have descs to flush. + */ +@@ -2119,7 +2119,7 @@ close_job_ring(struct sec_job_ring_t *job_ring) + + /* free the uio job ring */ + free_job_ring(job_ring->irq_fd); +- job_ring->irq_fd = 0; ++ job_ring->irq_fd = -1; + caam_jr_dma_free(job_ring->input_ring); + caam_jr_dma_free(job_ring->output_ring); + g_job_rings_no--; +@@ -2223,7 +2223,7 @@ caam_jr_dev_uninit(struct rte_cryptodev *dev) + * + */ + static void * +-init_job_ring(void *reg_base_addr, uint32_t irq_id) ++init_job_ring(void *reg_base_addr, int irq_id) + { + struct sec_job_ring_t *job_ring = NULL; + int i, ret = 0; +@@ -2233,7 +2233,7 @@ init_job_ring(void *reg_base_addr, uint32_t irq_id) + int irq_coalescing_count = 0; + + for (i = 0; i < MAX_SEC_JOB_RINGS; i++) { +- if (g_job_rings[i].irq_fd == 0) { ++ if (g_job_rings[i].irq_fd == -1) { + job_ring = &g_job_rings[i]; + g_job_rings_no++; + break; +@@ -2486,6 +2486,15 @@ cryptodev_caam_jr_remove(struct rte_vdev_device *vdev) + return rte_cryptodev_pmd_destroy(cryptodev); + } + ++static void ++sec_job_rings_init(void) ++{ ++ int i; ++ ++ for (i = 0; i < MAX_SEC_JOB_RINGS; i++) ++ g_job_rings[i].irq_fd = -1; ++} ++ + static struct rte_vdev_driver cryptodev_caam_jr_drv = { + .probe = cryptodev_caam_jr_probe, + .remove = cryptodev_caam_jr_remove +@@ -2500,6 +2509,12 @@ RTE_PMD_REGISTER_PARAM_STRING(CRYPTODEV_NAME_CAAM_JR_PMD, + RTE_PMD_REGISTER_CRYPTO_DRIVER(caam_jr_crypto_drv, cryptodev_caam_jr_drv.driver, + cryptodev_driver_id); + ++RTE_INIT(caam_jr_init) ++{ ++ sec_uio_job_rings_init(); ++ sec_job_rings_init(); ++} ++ + RTE_INIT(caam_jr_init_log) + { + caam_jr_logtype = rte_log_register("pmd.crypto.caam"); +diff --git a/dpdk/drivers/crypto/caam_jr/caam_jr_hw_specific.h b/dpdk/drivers/crypto/caam_jr/caam_jr_hw_specific.h +index 5f58a585d7..bbe8bc3f90 100644 +--- a/dpdk/drivers/crypto/caam_jr/caam_jr_hw_specific.h ++++ b/dpdk/drivers/crypto/caam_jr/caam_jr_hw_specific.h +@@ -360,7 +360,7 @@ struct sec_job_ring_t { + * bitwise operations. + */ + +- uint32_t irq_fd; /* The file descriptor used for polling from ++ int irq_fd; /* The file descriptor used for polling from + * user space for interrupts notifications + */ + uint32_t jr_mode; /* Model used by SEC Driver to receive +diff --git a/dpdk/drivers/crypto/caam_jr/caam_jr_pvt.h b/dpdk/drivers/crypto/caam_jr/caam_jr_pvt.h +index 9f1adabc7d..579aacbb38 100644 +--- a/dpdk/drivers/crypto/caam_jr/caam_jr_pvt.h ++++ b/dpdk/drivers/crypto/caam_jr/caam_jr_pvt.h +@@ -215,7 +215,7 @@ calc_chksum(void *buffer, int len) + } + struct uio_job_ring { + uint32_t jr_id; +- uint32_t uio_fd; ++ int uio_fd; + void *register_base_addr; + int map_size; + int uio_minor_number; +@@ -223,8 +223,9 @@ struct uio_job_ring { + + int sec_cleanup(void); + int sec_configure(void); ++void sec_uio_job_rings_init(void); + struct uio_job_ring *config_job_ring(void); +-void free_job_ring(uint32_t uio_fd); ++void free_job_ring(int uio_fd); + + /* For Dma memory allocation of specified length and alignment */ + static inline void * +@@ -273,7 +274,7 @@ static inline rte_iova_t caam_jr_dma_vtop(void *ptr) + * @retval 0 for success + * @retval -1 value for error + */ +-uint32_t caam_jr_enable_irqs(uint32_t uio_fd); ++int caam_jr_enable_irqs(int uio_fd); + + /** @brief Request to SEC kernel driver to disable interrupts for descriptor + * finished processing +@@ -286,6 +287,6 @@ uint32_t caam_jr_enable_irqs(uint32_t uio_fd); + * @retval -1 value for error + * + */ +-uint32_t caam_jr_disable_irqs(uint32_t uio_fd); ++int caam_jr_disable_irqs(int uio_fd); + + #endif +diff --git a/dpdk/drivers/crypto/caam_jr/caam_jr_uio.c b/dpdk/drivers/crypto/caam_jr/caam_jr_uio.c +index afd75c9a62..913f5f10a1 100644 +--- a/dpdk/drivers/crypto/caam_jr/caam_jr_uio.c ++++ b/dpdk/drivers/crypto/caam_jr/caam_jr_uio.c +@@ -150,7 +150,7 @@ file_read_first_line(const char root[], const char subdir[], + "%s/%s/%s", root, subdir, filename); + + fd = open(absolute_file_name, O_RDONLY); +- SEC_ASSERT(fd > 0, fd, "Error opening file %s", ++ SEC_ASSERT(fd >= 0, fd, "Error opening file %s", + absolute_file_name); + + /* read UIO device name from first line in file */ +@@ -184,7 +184,7 @@ file_read_first_line(const char root[], const char subdir[], + * kernel driver as well. No special return values are used. + */ + static int +-sec_uio_send_command(uint32_t uio_fd, int32_t uio_command) ++sec_uio_send_command(int uio_fd, int32_t uio_command) + { + int ret; + +@@ -206,8 +206,8 @@ sec_uio_send_command(uint32_t uio_fd, int32_t uio_command) + * @retval 0 for success + * @retval -1 value for error + */ +-uint32_t +-caam_jr_enable_irqs(uint32_t uio_fd) ++int ++caam_jr_enable_irqs(int uio_fd) + { + int ret; + +@@ -237,8 +237,8 @@ caam_jr_enable_irqs(uint32_t uio_fd) + * @retval -1 value for error + * + */ +-uint32_t +-caam_jr_disable_irqs(uint32_t uio_fd) ++int ++caam_jr_disable_irqs(int uio_fd) + { + int ret; + +@@ -327,12 +327,12 @@ uio_map_registers(int uio_device_fd, int uio_device_id, + } + + void +-free_job_ring(uint32_t uio_fd) ++free_job_ring(int uio_fd) + { + struct uio_job_ring *job_ring = NULL; + int i; + +- if (!uio_fd) ++ if (uio_fd == -1) + return; + + for (i = 0; i < MAX_SEC_JOB_RINGS; i++) { +@@ -352,7 +352,7 @@ free_job_ring(uint32_t uio_fd) + job_ring->jr_id, job_ring->uio_fd); + close(job_ring->uio_fd); + g_uio_jr_num--; +- job_ring->uio_fd = 0; ++ job_ring->uio_fd = -1; + if (job_ring->register_base_addr == NULL) + return; + +@@ -375,7 +375,7 @@ uio_job_ring *config_job_ring(void) + int i; + + for (i = 0; i < MAX_SEC_JOB_RINGS; i++) { +- if (g_uio_job_ring[i].uio_fd == 0) { ++ if (g_uio_job_ring[i].uio_fd == -1) { + job_ring = &g_uio_job_ring[i]; + g_uio_jr_num++; + break; +@@ -394,7 +394,7 @@ uio_job_ring *config_job_ring(void) + + /* Open device file */ + job_ring->uio_fd = open(uio_device_file_name, O_RDWR); +- SEC_ASSERT(job_ring->uio_fd > 0, NULL, ++ SEC_ASSERT(job_ring->uio_fd >= 0, NULL, + "Failed to open UIO device file for job ring %d", + job_ring->jr_id); + +@@ -493,12 +493,22 @@ sec_cleanup(void) + /* I need to close the fd after shutdown UIO commands need to be + * sent using the fd + */ +- if (job_ring->uio_fd != 0) { ++ if (job_ring->uio_fd != -1) { + CAAM_JR_INFO( + "Closed device file for job ring %d , fd = %d", + job_ring->jr_id, job_ring->uio_fd); + close(job_ring->uio_fd); ++ job_ring->uio_fd = -1; + } + } + return 0; + } ++ ++void ++sec_uio_job_rings_init(void) ++{ ++ int i; ++ ++ for (i = 0; i < MAX_SEC_JOB_RINGS; i++) ++ g_uio_job_ring[i].uio_fd = -1; ++} +diff --git a/dpdk/drivers/crypto/caam_jr/meson.build b/dpdk/drivers/crypto/caam_jr/meson.build +index 99b71aef15..ead7f42ff3 100644 +--- a/dpdk/drivers/crypto/caam_jr/meson.build ++++ b/dpdk/drivers/crypto/caam_jr/meson.build +@@ -11,6 +11,11 @@ sources = files('caam_jr_capabilities.c', + 'caam_jr_uio.c', + 'caam_jr.c') + ++# FIXME: temporary solution for Bugzilla 469 ++if (toolchain == 'gcc' and cc.version().version_compare('>=10.0.0')) ++ cflags += '-fcommon' ++endif ++ + allow_experimental_apis = true + + includes += include_directories('../dpaa2_sec/') +diff --git a/dpdk/drivers/crypto/ccp/ccp_dev.c b/dpdk/drivers/crypto/ccp/ccp_dev.c +index 80fe6a4533..7d98b2eb25 100644 +--- a/dpdk/drivers/crypto/ccp/ccp_dev.c ++++ b/dpdk/drivers/crypto/ccp/ccp_dev.c +@@ -760,7 +760,7 @@ ccp_probe_device(const char *dirname, uint16_t domain, + return 0; + fail: + CCP_LOG_ERR("CCP Device probe failed"); +- if (uio_fd > 0) ++ if (uio_fd >= 0) + close(uio_fd); + if (ccp_dev) + rte_free(ccp_dev); +diff --git a/dpdk/drivers/crypto/dpaa2_sec/Makefile b/dpdk/drivers/crypto/dpaa2_sec/Makefile +index f537f76a6f..0da83bfc16 100644 +--- a/dpdk/drivers/crypto/dpaa2_sec/Makefile ++++ b/dpdk/drivers/crypto/dpaa2_sec/Makefile +@@ -20,6 +20,13 @@ CFLAGS += -Wno-implicit-fallthrough + endif + endif + ++# FIXME: temporary solution for Bugzilla 469 ++ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) ++ifeq ($(shell test $(GCC_VERSION) -ge 100 && echo 1), 1) ++CFLAGS += -fcommon ++endif ++endif ++ + CFLAGS += -I$(RTE_SDK)/drivers/crypto/dpaa2_sec/ + CFLAGS += -I$(RTE_SDK)/drivers/crypto/dpaa2_sec/mc + CFLAGS += -I$(RTE_SDK)/drivers/bus/fslmc/ +diff --git a/dpdk/drivers/crypto/dpaa2_sec/meson.build b/dpdk/drivers/crypto/dpaa2_sec/meson.build +index 8fa4827edb..b999bbe433 100644 +--- a/dpdk/drivers/crypto/dpaa2_sec/meson.build ++++ b/dpdk/drivers/crypto/dpaa2_sec/meson.build +@@ -11,6 +11,11 @@ deps += ['security', 'mempool_dpaa2'] + sources = files('dpaa2_sec_dpseci.c', + 'mc/dpseci.c') + ++# FIXME: temporary solution for Bugzilla 469 ++if (toolchain == 'gcc' and cc.version().version_compare('>=10.0.0')) ++ cflags += '-fcommon' ++endif ++ + allow_experimental_apis = true + + includes += include_directories('mc', 'hw') +diff --git a/dpdk/drivers/crypto/dpaa_sec/Makefile b/dpdk/drivers/crypto/dpaa_sec/Makefile +index 5ce95c23fd..ed47f92b2b 100644 +--- a/dpdk/drivers/crypto/dpaa_sec/Makefile ++++ b/dpdk/drivers/crypto/dpaa_sec/Makefile +@@ -14,6 +14,13 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API + CFLAGS += -O3 + CFLAGS += $(WERROR_FLAGS) + ++# FIXME: temporary solution for Bugzilla 469 ++ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) ++ifeq ($(shell test $(GCC_VERSION) -ge 100 && echo 1), 1) ++CFLAGS += -fcommon ++endif ++endif ++ + CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa + CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa/include + CFLAGS += -I$(RTE_SDK)/drivers/crypto/dpaa_sec/ +diff --git a/dpdk/drivers/crypto/dpaa_sec/meson.build b/dpdk/drivers/crypto/dpaa_sec/meson.build +index 8a5709846e..e3daddbfd7 100644 +--- a/dpdk/drivers/crypto/dpaa_sec/meson.build ++++ b/dpdk/drivers/crypto/dpaa_sec/meson.build +@@ -8,6 +8,11 @@ endif + deps += ['bus_dpaa', 'security'] + sources = files('dpaa_sec.c') + ++# FIXME: temporary solution for Bugzilla 469 ++if (toolchain == 'gcc' and cc.version().version_compare('>=10.0.0')) ++ cflags += '-fcommon' ++endif ++ + allow_experimental_apis = true + + includes += include_directories('../dpaa2_sec/') +diff --git a/dpdk/drivers/crypto/kasumi/rte_kasumi_pmd.c b/dpdk/drivers/crypto/kasumi/rte_kasumi_pmd.c +index 239a1cf441..2d33f14e76 100644 +--- a/dpdk/drivers/crypto/kasumi/rte_kasumi_pmd.c ++++ b/dpdk/drivers/crypto/kasumi/rte_kasumi_pmd.c +@@ -17,6 +17,7 @@ + #define KASUMI_MAX_BURST 4 + #define BYTE_LEN 8 + ++int kasumi_logtype_driver; + static uint8_t cryptodev_driver_id; + + /** Get xform chain order. */ +diff --git a/dpdk/drivers/crypto/kasumi/rte_kasumi_pmd_private.h b/dpdk/drivers/crypto/kasumi/rte_kasumi_pmd_private.h +index 488777ca87..d26a86b62c 100644 +--- a/dpdk/drivers/crypto/kasumi/rte_kasumi_pmd_private.h ++++ b/dpdk/drivers/crypto/kasumi/rte_kasumi_pmd_private.h +@@ -11,7 +11,7 @@ + /**< KASUMI PMD device name */ + + /** KASUMI PMD LOGTYPE DRIVER */ +-int kasumi_logtype_driver; ++extern int kasumi_logtype_driver; + + #define KASUMI_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, kasumi_logtype_driver, \ +@@ -70,6 +70,6 @@ kasumi_set_session_parameters(struct kasumi_session *sess, + + + /** device specific operations function pointer structure */ +-struct rte_cryptodev_ops *rte_kasumi_pmd_ops; ++extern struct rte_cryptodev_ops *rte_kasumi_pmd_ops; + + #endif /* _RTE_KASUMI_PMD_PRIVATE_H_ */ +diff --git a/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd.c b/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd.c +index 41301d7ec0..661e187e37 100644 +--- a/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd.c ++++ b/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd.c +@@ -19,6 +19,7 @@ + #define MRVL_PMD_MAX_NB_SESS_ARG ("max_nb_sessions") + #define MRVL_PMD_DEFAULT_MAX_NB_SESSIONS 2048 + ++int mrvl_logtype_driver; + static uint8_t cryptodev_driver_id; + + struct mrvl_pmd_init_params { +diff --git a/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd_private.h b/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd_private.h +index 6f8cf56248..200254cca6 100644 +--- a/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd_private.h ++++ b/dpdk/drivers/crypto/mvsam/rte_mrvl_pmd_private.h +@@ -13,7 +13,7 @@ + /**< Marvell PMD device name */ + + /** MRVL PMD LOGTYPE DRIVER */ +-int mrvl_logtype_driver; ++extern int mrvl_logtype_driver; + + #define MRVL_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, mrvl_logtype_driver, \ +diff --git a/dpdk/drivers/crypto/octeontx/otx_cryptodev.c b/dpdk/drivers/crypto/octeontx/otx_cryptodev.c +index c3076487f7..d334fbfdd5 100644 +--- a/dpdk/drivers/crypto/octeontx/otx_cryptodev.c ++++ b/dpdk/drivers/crypto/octeontx/otx_cryptodev.c +@@ -17,6 +17,10 @@ + + static int otx_cryptodev_logtype; + ++uint8_t otx_cryptodev_driver_id; ++ ++int cpt_logtype; ++ + static struct rte_pci_id pci_id_cpt_table[] = { + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, CPT_81XX_PCI_VF_DEVICE_ID), +diff --git a/dpdk/drivers/crypto/octeontx/otx_cryptodev.h b/dpdk/drivers/crypto/octeontx/otx_cryptodev.h +index 6c2871d712..0b204320a2 100644 +--- a/dpdk/drivers/crypto/octeontx/otx_cryptodev.h ++++ b/dpdk/drivers/crypto/octeontx/otx_cryptodev.h +@@ -15,6 +15,6 @@ + /* + * Crypto device driver ID + */ +-uint8_t otx_cryptodev_driver_id; ++extern uint8_t otx_cryptodev_driver_id; + + #endif /* _OTX_CRYPTODEV_H_ */ +diff --git a/dpdk/drivers/crypto/openssl/rte_openssl_pmd.c b/dpdk/drivers/crypto/openssl/rte_openssl_pmd.c +index 36122a43ce..a73c92ffd3 100644 +--- a/dpdk/drivers/crypto/openssl/rte_openssl_pmd.c ++++ b/dpdk/drivers/crypto/openssl/rte_openssl_pmd.c +@@ -18,6 +18,7 @@ + + #define DES_BLOCK_SIZE 8 + ++int openssl_logtype_driver; + static uint8_t cryptodev_driver_id; + + #if (OPENSSL_VERSION_NUMBER < 0x10100000L) +@@ -1997,6 +1998,26 @@ process_asym_op(struct openssl_qp *qp, struct rte_crypto_op *op, + return retval; + } + ++static void ++copy_plaintext(struct rte_mbuf *m_src, struct rte_mbuf *m_dst, ++ struct rte_crypto_op *op) ++{ ++ uint8_t *p_src, *p_dst; ++ ++ p_src = rte_pktmbuf_mtod(m_src, uint8_t *); ++ p_dst = rte_pktmbuf_mtod(m_dst, uint8_t *); ++ ++ /** ++ * Copy the content between cipher offset and auth offset ++ * for generating correct digest. ++ */ ++ if (op->sym->cipher.data.offset > op->sym->auth.data.offset) ++ memcpy(p_dst + op->sym->auth.data.offset, ++ p_src + op->sym->auth.data.offset, ++ op->sym->cipher.data.offset - ++ op->sym->auth.data.offset); ++} ++ + /** Process crypto operation for mbuf */ + static int + process_op(struct openssl_qp *qp, struct rte_crypto_op *op, +@@ -2019,6 +2040,9 @@ process_op(struct openssl_qp *qp, struct rte_crypto_op *op, + break; + case OPENSSL_CHAIN_CIPHER_AUTH: + process_openssl_cipher_op(op, sess, msrc, mdst); ++ /* OOP */ ++ if (msrc != mdst) ++ copy_plaintext(msrc, mdst, op); + process_openssl_auth_op(qp, op, sess, mdst, mdst); + break; + case OPENSSL_CHAIN_AUTH_CIPHER: +diff --git a/dpdk/drivers/crypto/openssl/rte_openssl_pmd_private.h b/dpdk/drivers/crypto/openssl/rte_openssl_pmd_private.h +index a8f2c8482b..2a9302bc19 100644 +--- a/dpdk/drivers/crypto/openssl/rte_openssl_pmd_private.h ++++ b/dpdk/drivers/crypto/openssl/rte_openssl_pmd_private.h +@@ -16,7 +16,7 @@ + /**< Open SSL Crypto PMD device name */ + + /** OPENSSL PMD LOGTYPE DRIVER */ +-int openssl_logtype_driver; ++extern int openssl_logtype_driver; + #define OPENSSL_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, openssl_logtype_driver, \ + "%s() line %u: " fmt "\n", __func__, __LINE__, \ +diff --git a/dpdk/drivers/crypto/qat/qat_sym_session.c b/dpdk/drivers/crypto/qat/qat_sym_session.c +index e147572e12..c4f39280c6 100644 +--- a/dpdk/drivers/crypto/qat/qat_sym_session.c ++++ b/dpdk/drivers/crypto/qat/qat_sym_session.c +@@ -1432,7 +1432,7 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + (struct icp_qat_fw_la_auth_req_params *) + ((char *)&req_tmpl->serv_specif_rqpars + + sizeof(struct icp_qat_fw_la_cipher_req_params)); +- uint16_t state1_size = 0, state2_size = 0; ++ uint16_t state1_size = 0, state2_size = 0, cd_extra_size = 0; + uint16_t hash_offset, cd_size; + uint32_t *aad_len = NULL; + uint32_t wordIndex = 0; +@@ -1608,7 +1608,7 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + memcpy(cipherconfig->key, authkey, authkeylen); + memset(cipherconfig->key + authkeylen, + 0, ICP_QAT_HW_SNOW_3G_UEA2_IV_SZ); +- cdesc->cd_cur_ptr += sizeof(struct icp_qat_hw_cipher_config) + ++ cd_extra_size += sizeof(struct icp_qat_hw_cipher_config) + + authkeylen + ICP_QAT_HW_SNOW_3G_UEA2_IV_SZ; + auth_param->hash_state_sz = ICP_QAT_HW_SNOW_3G_UEA2_IV_SZ >> 3; + break; +@@ -1624,8 +1624,7 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + + ICP_QAT_HW_ZUC_3G_EEA3_IV_SZ); + + memcpy(cdesc->cd_cur_ptr + state1_size, authkey, authkeylen); +- cdesc->cd_cur_ptr += state1_size + state2_size +- + ICP_QAT_HW_ZUC_3G_EEA3_IV_SZ; ++ cd_extra_size += ICP_QAT_HW_ZUC_3G_EEA3_IV_SZ; + auth_param->hash_state_sz = ICP_QAT_HW_ZUC_3G_EEA3_IV_SZ >> 3; + cdesc->min_qat_dev_gen = QAT_GEN2; + +@@ -1711,7 +1710,7 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc, + RTE_ALIGN_CEIL(hash_cd_ctrl->inner_state1_sz, 8)) + >> 3); + +- cdesc->cd_cur_ptr += state1_size + state2_size; ++ cdesc->cd_cur_ptr += state1_size + state2_size + cd_extra_size; + cd_size = cdesc->cd_cur_ptr-(uint8_t *)&cdesc->cd; + + cd_pars->u.s.content_desc_addr = cdesc->cd_paddr; +diff --git a/dpdk/drivers/crypto/snow3g/rte_snow3g_pmd.c b/dpdk/drivers/crypto/snow3g/rte_snow3g_pmd.c +index a17536b777..aa3277ff57 100644 +--- a/dpdk/drivers/crypto/snow3g/rte_snow3g_pmd.c ++++ b/dpdk/drivers/crypto/snow3g/rte_snow3g_pmd.c +@@ -16,6 +16,7 @@ + #define SNOW3G_MAX_BURST 8 + #define BYTE_LEN 8 + ++int snow3g_logtype_driver; + static uint8_t cryptodev_driver_id; + + /** Get xform chain order. */ +diff --git a/dpdk/drivers/crypto/snow3g/rte_snow3g_pmd_private.h b/dpdk/drivers/crypto/snow3g/rte_snow3g_pmd_private.h +index b7807b6216..debe0516b0 100644 +--- a/dpdk/drivers/crypto/snow3g/rte_snow3g_pmd_private.h ++++ b/dpdk/drivers/crypto/snow3g/rte_snow3g_pmd_private.h +@@ -11,7 +11,7 @@ + /**< SNOW 3G PMD device name */ + + /** SNOW 3G PMD LOGTYPE DRIVER */ +-int snow3g_logtype_driver; ++extern int snow3g_logtype_driver; + + #define SNOW3G_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, snow3g_logtype_driver, \ +diff --git a/dpdk/drivers/crypto/zuc/rte_zuc_pmd.c b/dpdk/drivers/crypto/zuc/rte_zuc_pmd.c +index 313f4590bf..64a0d7a812 100644 +--- a/dpdk/drivers/crypto/zuc/rte_zuc_pmd.c ++++ b/dpdk/drivers/crypto/zuc/rte_zuc_pmd.c +@@ -14,6 +14,7 @@ + #define ZUC_MAX_BURST 4 + #define BYTE_LEN 8 + ++int zuc_logtype_driver; + static uint8_t cryptodev_driver_id; + + /** Get xform chain order. */ +diff --git a/dpdk/drivers/crypto/zuc/rte_zuc_pmd_private.h b/dpdk/drivers/crypto/zuc/rte_zuc_pmd_private.h +index 5e5906ddb5..3d827052a5 100644 +--- a/dpdk/drivers/crypto/zuc/rte_zuc_pmd_private.h ++++ b/dpdk/drivers/crypto/zuc/rte_zuc_pmd_private.h +@@ -8,10 +8,10 @@ + #include <sso_zuc.h> + + #define CRYPTODEV_NAME_ZUC_PMD crypto_zuc +-/**< KASUMI PMD device name */ ++/**< ZUC PMD device name */ + + /** ZUC PMD LOGTYPE DRIVER */ +-int zuc_logtype_driver; ++extern int zuc_logtype_driver; + #define ZUC_LOG(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, zuc_logtype_driver, \ + "%s()... line %u: " fmt "\n", __func__, __LINE__, \ +diff --git a/dpdk/drivers/event/dpaa2/dpaa2_eventdev.c b/dpdk/drivers/event/dpaa2/dpaa2_eventdev.c +index b8cb437a0c..270ebbe850 100644 +--- a/dpdk/drivers/event/dpaa2/dpaa2_eventdev.c ++++ b/dpdk/drivers/event/dpaa2/dpaa2_eventdev.c +@@ -379,7 +379,7 @@ dpaa2_eventdev_info_get(struct rte_eventdev *dev, + dev_info->max_event_priority_levels = + DPAA2_EVENT_MAX_EVENT_PRIORITY_LEVELS; + dev_info->max_event_ports = rte_fslmc_get_device_count(DPAA2_IO); +- /* we only support dpio upto number of cores*/ ++ /* we only support dpio up to number of cores */ + if (dev_info->max_event_ports > rte_lcore_count()) + dev_info->max_event_ports = rte_lcore_count(); + dev_info->max_event_port_dequeue_depth = +diff --git a/dpdk/drivers/event/dsw/dsw_event.c b/dpdk/drivers/event/dsw/dsw_event.c +index eae53b2404..0df9209e4f 100644 +--- a/dpdk/drivers/event/dsw/dsw_event.c ++++ b/dpdk/drivers/event/dsw/dsw_event.c +@@ -658,6 +658,9 @@ dsw_port_consider_migration(struct dsw_evdev *dsw, + if (dsw->num_ports == 1) + return; + ++ if (seen_events_len < DSW_MAX_EVENTS_RECORDED) ++ return; ++ + DSW_LOG_DP_PORT(DEBUG, source_port->id, "Considering migration.\n"); + + /* Randomize interval to avoid having all threads considering +@@ -930,11 +933,6 @@ dsw_port_ctl_process(struct dsw_evdev *dsw, struct dsw_port *port) + { + struct dsw_ctl_msg msg; + +- /* So any table loads happens before the ring dequeue, in the +- * case of a 'paus' message. +- */ +- rte_smp_rmb(); +- + if (dsw_port_ctl_dequeue(port, &msg) == 0) { + switch (msg.type) { + case DSW_CTL_PAUS_REQ: +@@ -1099,7 +1097,7 @@ dsw_event_enqueue_burst_generic(struct dsw_port *source_port, + DSW_LOG_DP_PORT(DEBUG, source_port->id, "%d non-release events " + "accepted.\n", num_non_release); + +- return num_non_release; ++ return (num_non_release + num_release); + } + + uint16_t +@@ -1194,11 +1192,6 @@ static uint16_t + dsw_port_dequeue_burst(struct dsw_port *port, struct rte_event *events, + uint16_t num) + { +- struct dsw_port *source_port = port; +- struct dsw_evdev *dsw = source_port->dsw; +- +- dsw_port_ctl_process(dsw, source_port); +- + if (unlikely(port->in_buffer_len > 0)) { + uint16_t dequeued = RTE_MIN(num, port->in_buffer_len); + +diff --git a/dpdk/drivers/mempool/dpaa2/meson.build b/dpdk/drivers/mempool/dpaa2/meson.build +index 6b6ead617e..cdec39ddda 100644 +--- a/dpdk/drivers/mempool/dpaa2/meson.build ++++ b/dpdk/drivers/mempool/dpaa2/meson.build +@@ -12,3 +12,5 @@ sources = files('dpaa2_hw_mempool.c') + + # depends on fslmc bus which uses experimental API + allow_experimental_apis = true ++ ++install_headers('rte_dpaa2_mempool.h') +diff --git a/dpdk/drivers/net/atlantic/atl_ethdev.c b/dpdk/drivers/net/atlantic/atl_ethdev.c +index 761347fb59..9c98bd7f91 100644 +--- a/dpdk/drivers/net/atlantic/atl_ethdev.c ++++ b/dpdk/drivers/net/atlantic/atl_ethdev.c +@@ -152,8 +152,7 @@ static const struct rte_pci_id pci_id_atl_map[] = { + + static struct rte_pci_driver rte_atl_pmd = { + .id_table = pci_id_atl_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_atl_pci_probe, + .remove = eth_atl_pci_remove, + }; +diff --git a/dpdk/drivers/net/avf/avf_ethdev.c b/dpdk/drivers/net/avf/avf_ethdev.c +index e67621e4a9..3924289d8d 100644 +--- a/dpdk/drivers/net/avf/avf_ethdev.c ++++ b/dpdk/drivers/net/avf/avf_ethdev.c +@@ -998,7 +998,7 @@ avf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) + } else { + PMD_DRV_LOG(ERR, "Get statistics failed"); + } +- return -EIO; ++ return ret; + } + + static int +@@ -1330,8 +1330,7 @@ static int eth_avf_pci_remove(struct rte_pci_device *pci_dev) + /* Adaptive virtual function driver struct */ + static struct rte_pci_driver rte_avf_pmd = { + .id_table = pci_id_avf_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_avf_pci_probe, + .remove = eth_avf_pci_remove, + }; +diff --git a/dpdk/drivers/net/avf/avf_rxtx_vec_common.h b/dpdk/drivers/net/avf/avf_rxtx_vec_common.h +index 8057b9682d..b97ea5f2b7 100644 +--- a/dpdk/drivers/net/avf/avf_rxtx_vec_common.h ++++ b/dpdk/drivers/net/avf/avf_rxtx_vec_common.h +@@ -33,6 +33,7 @@ reassemble_packets(struct avf_rx_queue *rxq, struct rte_mbuf **rx_bufs, + if (!split_flags[buf_idx]) { + /* it's the last packet of the set */ + start->hash = end->hash; ++ start->vlan_tci = end->vlan_tci; + start->ol_flags = end->ol_flags; + /* we need to strip crc for the whole packet */ + start->pkt_len -= rxq->crc_len; +diff --git a/dpdk/drivers/net/avp/avp_ethdev.c b/dpdk/drivers/net/avp/avp_ethdev.c +index 09388d05f8..c5dfb3cab7 100644 +--- a/dpdk/drivers/net/avp/avp_ethdev.c ++++ b/dpdk/drivers/net/avp/avp_ethdev.c +@@ -1697,7 +1697,7 @@ avp_xmit_scattered_pkts(void *tx_queue, + uint16_t nb_pkts) + { + struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST * +- RTE_AVP_MAX_MBUF_SEGMENTS)]; ++ RTE_AVP_MAX_MBUF_SEGMENTS)] = {}; + struct avp_queue *txq = (struct avp_queue *)tx_queue; + struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST]; + struct avp_dev *avp = txq->avp; +diff --git a/dpdk/drivers/net/bnxt/bnxt.h b/dpdk/drivers/net/bnxt/bnxt.h +index 140089d37e..5e5f91d730 100644 +--- a/dpdk/drivers/net/bnxt/bnxt.h ++++ b/dpdk/drivers/net/bnxt/bnxt.h +@@ -296,7 +296,6 @@ struct bnxt { + void *bar0; + + struct rte_eth_dev *eth_dev; +- struct rte_eth_rss_conf rss_conf; + struct rte_pci_device *pdev; + void *doorbell_base; + +diff --git a/dpdk/drivers/net/bnxt/bnxt_ethdev.c b/dpdk/drivers/net/bnxt/bnxt_ethdev.c +index 90139bc837..223f98f9e9 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_ethdev.c ++++ b/dpdk/drivers/net/bnxt/bnxt_ethdev.c +@@ -1029,7 +1029,9 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev, + } + + bp->flags |= BNXT_FLAG_UPDATE_HASH; +- memcpy(&bp->rss_conf, rss_conf, sizeof(*rss_conf)); ++ memcpy(ð_dev->data->dev_conf.rx_adv_conf.rss_conf, ++ rss_conf, ++ sizeof(*rss_conf)); + + if (rss_conf->rss_hf & ETH_RSS_IPV4) + hash_type |= HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV4; +@@ -1111,7 +1113,7 @@ static int bnxt_rss_hash_conf_get_op(struct rte_eth_dev *eth_dev, + } + if (hash_types) { + PMD_DRV_LOG(ERR, +- "Unknwon RSS config from firmware (%08x), RSS disabled", ++ "Unknown RSS config from firmware (%08x), RSS disabled", + vnic->hash_type); + return -ENOTSUP; + } +@@ -1418,6 +1420,11 @@ static int bnxt_vlan_filter_set_op(struct rte_eth_dev *eth_dev, + { + struct bnxt *bp = eth_dev->data->dev_private; + ++ if (!eth_dev->data->dev_started) { ++ PMD_DRV_LOG(ERR, "port must be started before setting vlan\n"); ++ return -EINVAL; ++ } ++ + /* These operations apply to ALL existing MAC/VLAN filters */ + if (on) + return bnxt_add_vlan_filter(bp, vlan_id); +@@ -1544,10 +1551,11 @@ bnxt_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) + uint8_t fw_major = (bp->fw_ver >> 24) & 0xff; + uint8_t fw_minor = (bp->fw_ver >> 16) & 0xff; + uint8_t fw_updt = (bp->fw_ver >> 8) & 0xff; ++ uint8_t fw_rsvd = bp->fw_ver & 0xff; + int ret; + +- ret = snprintf(fw_version, fw_size, "%d.%d.%d", +- fw_major, fw_minor, fw_updt); ++ ret = snprintf(fw_version, fw_size, "%d.%d.%d.%d", ++ fw_major, fw_minor, fw_updt, fw_rsvd); + + ret += 1; /* add the size of '\0' */ + if (fw_size < (uint32_t)ret) +@@ -3537,8 +3545,7 @@ static int bnxt_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver bnxt_rte_pmd = { + .id_table = bnxt_pci_id_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | +- RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = bnxt_pci_probe, + .remove = bnxt_pci_remove, + }; +diff --git a/dpdk/drivers/net/bnxt/bnxt_hwrm.c b/dpdk/drivers/net/bnxt/bnxt_hwrm.c +index 6e6d47751e..cfb4cb63ad 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_hwrm.c ++++ b/dpdk/drivers/net/bnxt/bnxt_hwrm.c +@@ -141,8 +141,9 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg, + } + + if (i >= timeout) { +- PMD_DRV_LOG(ERR, "Error(timeout) sending msg 0x%04x\n", +- req->req_type); ++ PMD_DRV_LOG(ERR, ++ "Error(timeout) sending msg 0x%04x, seq_id %d\n", ++ req->req_type, req->seq_id); + return -ETIMEDOUT; + } + return 0; +diff --git a/dpdk/drivers/net/bnxt/bnxt_ring.c b/dpdk/drivers/net/bnxt/bnxt_ring.c +index 85bb7a623d..40edfe52df 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_ring.c ++++ b/dpdk/drivers/net/bnxt/bnxt_ring.c +@@ -331,7 +331,7 @@ int bnxt_alloc_hwrm_rx_ring(struct bnxt *bp, int queue_index) + + if (rxq->rx_started) { + if (bnxt_init_one_rx_ring(rxq)) { +- RTE_LOG(ERR, PMD, ++ PMD_DRV_LOG(ERR, + "bnxt_init_one_rx_ring failed!\n"); + bnxt_rx_queue_release_op(rxq); + rc = -ENOMEM; +diff --git a/dpdk/drivers/net/bnxt/bnxt_rxq.c b/dpdk/drivers/net/bnxt/bnxt_rxq.c +index 005c9f2c2c..d1664dbc09 100644 +--- a/dpdk/drivers/net/bnxt/bnxt_rxq.c ++++ b/dpdk/drivers/net/bnxt/bnxt_rxq.c +@@ -160,10 +160,8 @@ int bnxt_mq_rx_configure(struct bnxt *bp) + struct rte_eth_rss_conf *rss = &dev_conf->rx_adv_conf.rss_conf; + uint16_t hash_type = 0; + +- if (bp->flags & BNXT_FLAG_UPDATE_HASH) { +- rss = &bp->rss_conf; ++ if (bp->flags & BNXT_FLAG_UPDATE_HASH) + bp->flags &= ~BNXT_FLAG_UPDATE_HASH; +- } + + if (rss->rss_hf & ETH_RSS_IPV4) + hash_type |= HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV4; +diff --git a/dpdk/drivers/net/cxgbe/cxgbe_flow.c b/dpdk/drivers/net/cxgbe/cxgbe_flow.c +index b6250a2a90..d2466f29e4 100644 +--- a/dpdk/drivers/net/cxgbe/cxgbe_flow.c ++++ b/dpdk/drivers/net/cxgbe/cxgbe_flow.c +@@ -156,7 +156,7 @@ ch_rte_parsetype_port(const void *dmask, const struct rte_flow_item *item, + if (val->index > 0x7) + return rte_flow_error_set(e, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, + item, +- "port index upto 0x7 is supported"); ++ "port index up to 0x7 is supported"); + + CXGBE_FILL_FS(val->index, mask->index, iport); + +diff --git a/dpdk/drivers/net/dpaa/dpaa_ethdev.c b/dpdk/drivers/net/dpaa/dpaa_ethdev.c +index f41a01e113..adbf5f6074 100644 +--- a/dpdk/drivers/net/dpaa/dpaa_ethdev.c ++++ b/dpdk/drivers/net/dpaa/dpaa_ethdev.c +@@ -827,8 +827,8 @@ dpaa_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) + PMD_INIT_FUNC_TRACE(); + + if (qman_query_fq_frm_cnt(rxq, &frm_cnt) == 0) { +- RTE_LOG(DEBUG, PMD, "RX frame count for q(%d) is %u\n", +- rx_queue_id, frm_cnt); ++ DPAA_PMD_DEBUG("RX frame count for q(%d) is %u", ++ rx_queue_id, frm_cnt); + } + return frm_cnt; + } +@@ -942,8 +942,7 @@ dpaa_dev_add_mac_addr(struct rte_eth_dev *dev, + ret = fman_if_add_mac_addr(dpaa_intf->fif, addr->addr_bytes, index); + + if (ret) +- RTE_LOG(ERR, PMD, "error: Adding the MAC ADDR failed:" +- " err = %d", ret); ++ DPAA_PMD_ERR("Adding the MAC ADDR failed: err = %d", ret); + return 0; + } + +@@ -969,7 +968,7 @@ dpaa_dev_set_mac_addr(struct rte_eth_dev *dev, + + ret = fman_if_add_mac_addr(dpaa_intf->fif, addr->addr_bytes, 0); + if (ret) +- RTE_LOG(ERR, PMD, "error: Setting the MAC ADDR failed %d", ret); ++ DPAA_PMD_ERR("Setting the MAC ADDR failed %d", ret); + + return ret; + } +@@ -1219,6 +1218,7 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev) + struct fman_if *fman_intf; + struct fman_if_bpool *bp, *tmp_bp; + uint32_t cgrid[DPAA_MAX_NUM_PCD_QUEUES]; ++ char eth_buf[ETHER_ADDR_FMT_SIZE]; + + PMD_INIT_FUNC_TRACE(); + +@@ -1365,15 +1365,9 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev) + + /* copy the primary mac address */ + ether_addr_copy(&fman_intf->mac_addr, ð_dev->data->mac_addrs[0]); ++ ether_format_addr(eth_buf, sizeof(eth_buf), &fman_intf->mac_addr); + +- RTE_LOG(INFO, PMD, "net: dpaa: %s: %02x:%02x:%02x:%02x:%02x:%02x\n", +- dpaa_device->name, +- fman_intf->mac_addr.addr_bytes[0], +- fman_intf->mac_addr.addr_bytes[1], +- fman_intf->mac_addr.addr_bytes[2], +- fman_intf->mac_addr.addr_bytes[3], +- fman_intf->mac_addr.addr_bytes[4], +- fman_intf->mac_addr.addr_bytes[5]); ++ DPAA_PMD_INFO("net: dpaa: %s: %s", dpaa_device->name, eth_buf); + + /* Disable RX mode */ + fman_if_discard_rx_errors(fman_intf); +@@ -1500,8 +1494,7 @@ rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv __rte_unused, + } + + if (access("/tmp/fmc.bin", F_OK) == -1) { +- RTE_LOG(INFO, PMD, +- "* FMC not configured.Enabling default mode\n"); ++ DPAA_PMD_INFO("* FMC not configured.Enabling default mode"); + default_q = 1; + } + +diff --git a/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c b/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c +index e50467285e..c801d922cf 100644 +--- a/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c ++++ b/dpdk/drivers/net/dpaa2/dpaa2_ethdev.c +@@ -436,9 +436,6 @@ dpaa2_eth_dev_configure(struct rte_eth_dev *dev) + if (rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER) + dpaa2_vlan_offload_set(dev, ETH_VLAN_FILTER_MASK); + +- /* update the current status */ +- dpaa2_dev_link_update(dev, 0); +- + return 0; + } + +@@ -1480,6 +1477,7 @@ dpaa2_dev_set_link_up(struct rte_eth_dev *dev) + /* changing tx burst function to start enqueues */ + dev->tx_pkt_burst = dpaa2_dev_tx; + dev->data->dev_link.link_status = state.up; ++ dev->data->dev_link.link_speed = state.rate; + + if (state.up) + DPAA2_PMD_INFO("Port %d Link is Up", dev->data->port_id); +diff --git a/dpdk/drivers/net/e1000/em_ethdev.c b/dpdk/drivers/net/e1000/em_ethdev.c +index 28637c4945..db8eef9b49 100644 +--- a/dpdk/drivers/net/e1000/em_ethdev.c ++++ b/dpdk/drivers/net/e1000/em_ethdev.c +@@ -320,7 +320,7 @@ eth_em_dev_uninit(struct rte_eth_dev *eth_dev) + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) +- return -EPERM; ++ return 0; + + if (adapter->stopped == 0) + eth_em_close(eth_dev); +@@ -351,8 +351,7 @@ static int eth_em_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_em_pmd = { + .id_table = pci_id_em_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_em_pci_probe, + .remove = eth_em_pci_remove, + }; +diff --git a/dpdk/drivers/net/e1000/igb_ethdev.c b/dpdk/drivers/net/e1000/igb_ethdev.c +index 2c1f2314e8..5b69f3646f 100644 +--- a/dpdk/drivers/net/e1000/igb_ethdev.c ++++ b/dpdk/drivers/net/e1000/igb_ethdev.c +@@ -923,7 +923,7 @@ eth_igb_dev_uninit(struct rte_eth_dev *eth_dev) + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) +- return -EPERM; ++ return 0; + + hw = E1000_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); +@@ -1083,7 +1083,7 @@ eth_igbvf_dev_uninit(struct rte_eth_dev *eth_dev) + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) +- return -EPERM; ++ return 0; + + if (adapter->stopped == 0) + igbvf_dev_close(eth_dev); +@@ -1115,8 +1115,7 @@ static int eth_igb_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_igb_pmd = { + .id_table = pci_id_igb_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_igb_pci_probe, + .remove = eth_igb_pci_remove, + }; +@@ -1139,7 +1138,7 @@ static int eth_igbvf_pci_remove(struct rte_pci_device *pci_dev) + */ + static struct rte_pci_driver rte_igbvf_pmd = { + .id_table = pci_id_igbvf_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = eth_igbvf_pci_probe, + .remove = eth_igbvf_pci_remove, + }; +diff --git a/dpdk/drivers/net/ena/base/ena_com.c b/dpdk/drivers/net/ena/base/ena_com.c +index f22d67cd4d..19eec493ed 100644 +--- a/dpdk/drivers/net/ena/base/ena_com.c ++++ b/dpdk/drivers/net/ena/base/ena_com.c +@@ -532,11 +532,11 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c + timeout = ENA_GET_SYSTEM_TIMEOUT(admin_queue->completion_timeout); + + while (1) { +- ENA_SPINLOCK_LOCK(admin_queue->q_lock, flags); +- ena_com_handle_admin_completion(admin_queue); +- ENA_SPINLOCK_UNLOCK(admin_queue->q_lock, flags); ++ ENA_SPINLOCK_LOCK(admin_queue->q_lock, flags); ++ ena_com_handle_admin_completion(admin_queue); ++ ENA_SPINLOCK_UNLOCK(admin_queue->q_lock, flags); + +- if (comp_ctx->status != ENA_CMD_SUBMITTED) ++ if (comp_ctx->status != ENA_CMD_SUBMITTED) + break; + + if (ENA_TIME_EXPIRE(timeout)) { +@@ -2098,12 +2098,14 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions func, + const u8 *key, u16 key_len, u32 init_val) + { +- struct ena_rss *rss = &ena_dev->rss; ++ struct ena_admin_feature_rss_flow_hash_control *hash_key; + struct ena_admin_get_feat_resp get_resp; +- struct ena_admin_feature_rss_flow_hash_control *hash_key = +- rss->hash_key; ++ enum ena_admin_hash_functions old_func; ++ struct ena_rss *rss = &ena_dev->rss; + int rc; + ++ hash_key = rss->hash_key; ++ + /* Make sure size is a mult of DWs */ + if (unlikely(key_len & 0x3)) + return ENA_COM_INVAL; +@@ -2115,7 +2117,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + if (unlikely(rc)) + return rc; + +- if (!((1 << func) & get_resp.u.flow_hash_func.supported_func)) { ++ if (!(BIT(func) & get_resp.u.flow_hash_func.supported_func)) { + ena_trc_err("Flow hash function %d isn't supported\n", func); + return ENA_COM_UNSUPPORTED; + } +@@ -2140,11 +2142,12 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + return ENA_COM_INVAL; + } + ++ old_func = rss->hash_func; + rc = ena_com_set_hash_function(ena_dev); + + /* Restore the old function */ + if (unlikely(rc)) +- ena_com_get_hash_function(ena_dev, NULL, NULL); ++ rss->hash_func = old_func; + + return rc; + } +@@ -2166,7 +2169,11 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev, + if (unlikely(rc)) + return rc; + +- rss->hash_func = get_resp.u.flow_hash_func.selected_func; ++ /* ENA_FFS returns 1 in case the lsb is set */ ++ rss->hash_func = ENA_FFS(get_resp.u.flow_hash_func.selected_func); ++ if (rss->hash_func) ++ rss->hash_func--; ++ + if (func) + *func = rss->hash_func; + +diff --git a/dpdk/drivers/net/ena/base/ena_com.h b/dpdk/drivers/net/ena/base/ena_com.h +index f58cd86a87..f373d50c50 100644 +--- a/dpdk/drivers/net/ena/base/ena_com.h ++++ b/dpdk/drivers/net/ena/base/ena_com.h +@@ -37,9 +37,9 @@ + #include "ena_plat.h" + #include "ena_includes.h" + +-#define ENA_MAX_NUM_IO_QUEUES 128U ++#define ENA_MAX_NUM_IO_QUEUES 128U + /* We need to queues for each IO (on for Tx and one for Rx) */ +-#define ENA_TOTAL_NUM_QUEUES (2 * (ENA_MAX_NUM_IO_QUEUES)) ++#define ENA_TOTAL_NUM_QUEUES (2 * (ENA_MAX_NUM_IO_QUEUES)) + + #define ENA_MAX_HANDLERS 256 + +@@ -84,7 +84,7 @@ + #define ENA_INTR_MODER_LEVEL_STRIDE 1 + #define ENA_INTR_BYTE_COUNT_NOT_SUPPORTED 0xFFFFFF + +-#define ENA_HW_HINTS_NO_TIMEOUT 0xFFFF ++#define ENA_HW_HINTS_NO_TIMEOUT 0xFFFF + + enum ena_intr_moder_level { + ENA_INTR_MODER_LOWEST = 0, +@@ -380,7 +380,7 @@ extern "C" { + */ + int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev); + +-/* ena_com_set_mmio_read_mode - Enable/disable the mmio reg read mechanism ++/* ena_com_set_mmio_read_mode - Enable/disable the indirect mmio reg read mechanism + * @ena_dev: ENA communication layer struct + * @readless_supported: readless mode (enable/disable) + */ +@@ -506,7 +506,7 @@ bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev); + /* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler + * @ena_dev: ENA communication layer struct + * +- * This method go over the admin completion queue and wake up all the pending ++ * This method goes over the admin completion queue and wakes up all the pending + * threads that wait on the commands wait event. + * + * @note: Should be called after MSI-X interrupt. +@@ -516,7 +516,7 @@ void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev); + /* ena_com_aenq_intr_handler - AENQ interrupt handler + * @ena_dev: ENA communication layer struct + * +- * This method go over the async event notification queue and call the proper ++ * This method goes over the async event notification queue and calls the proper + * aenq handler. + */ + void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data); +@@ -533,14 +533,14 @@ void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev); + /* ena_com_wait_for_abort_completion - Wait for admin commands abort. + * @ena_dev: ENA communication layer struct + * +- * This method wait until all the outstanding admin commands will be completed. ++ * This method waits until all the outstanding admin commands are completed. + */ + void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev); + + /* ena_com_validate_version - Validate the device parameters + * @ena_dev: ENA communication layer struct + * +- * This method validate the device parameters are the same as the saved ++ * This method verifies the device parameters are the same as the saved + * parameters in ena_dev. + * This method is useful after device reset, to validate the device mac address + * and the device offloads are the same as before the reset. +@@ -717,7 +717,7 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev); + * + * Retrieve the hash control from the device. + * +- * @note, If the caller called ena_com_fill_hash_ctrl but didn't flash ++ * @note: If the caller called ena_com_fill_hash_ctrl but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. +@@ -769,7 +769,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev); + * + * Retrieve the RSS indirection table from the device. + * +- * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flash ++ * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flush + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. +@@ -795,14 +795,14 @@ int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev, + /* ena_com_delete_debug_area - Free the debug area resources. + * @ena_dev: ENA communication layer struct + * +- * Free the allocate debug area. ++ * Free the allocated debug area. + */ + void ena_com_delete_debug_area(struct ena_com_dev *ena_dev); + + /* ena_com_delete_host_info - Free the host info resources. + * @ena_dev: ENA communication layer struct + * +- * Free the allocate host info. ++ * Free the allocated host info. + */ + void ena_com_delete_host_info(struct ena_com_dev *ena_dev); + +@@ -843,9 +843,9 @@ int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, + * @cmd_completion: command completion return value. + * @cmd_comp_size: command completion size. + +- * Submit an admin command and then wait until the device will return a ++ * Submit an admin command and then wait until the device returns a + * completion. +- * The completion will be copyed into cmd_comp. ++ * The completion will be copied into cmd_comp. + * + * @return - 0 on success, negative value on failure. + */ +@@ -1027,7 +1027,7 @@ static inline void ena_com_calculate_interrupt_delay(struct ena_com_dev *ena_dev + * @intr_reg: interrupt register to update. + * @rx_delay_interval: Rx interval in usecs + * @tx_delay_interval: Tx interval in usecs +- * @unmask: unask enable/disable ++ * @unmask: unmask enable/disable + * + * Prepare interrupt update register with the supplied parameters. + */ +diff --git a/dpdk/drivers/net/ena/base/ena_plat_dpdk.h b/dpdk/drivers/net/ena/base/ena_plat_dpdk.h +index 900ba1a6b0..03ea2cd0dd 100644 +--- a/dpdk/drivers/net/ena/base/ena_plat_dpdk.h ++++ b/dpdk/drivers/net/ena/base/ena_plat_dpdk.h +@@ -1,35 +1,7 @@ +-/*- +-* BSD LICENSE +-* +-* Copyright (c) 2015-2016 Amazon.com, Inc. or its affiliates. +-* All rights reserved. +-* +-* Redistribution and use in source and binary forms, with or without +-* modification, are permitted provided that the following conditions +-* are met: +-* +-* * Redistributions of source code must retain the above copyright +-* notice, this list of conditions and the following disclaimer. +-* * Redistributions in binary form must reproduce the above copyright +-* notice, this list of conditions and the following disclaimer in +-* the documentation and/or other materials provided with the +-* distribution. +-* * Neither the name of copyright holder nor the names of its +-* contributors may be used to endorse or promote products derived +-* from this software without specific prior written permission. +-* +-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-*/ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. ++ * All rights reserved. ++ */ + + #ifndef DPDK_ENA_COM_ENA_PLAT_DPDK_H_ + #define DPDK_ENA_COM_ENA_PLAT_DPDK_H_ +@@ -201,18 +173,22 @@ do { \ + * Each rte_memzone should have unique name. + * To satisfy it, count number of allocations and add it to name. + */ +-extern uint32_t ena_alloc_cnt; ++extern rte_atomic32_t ena_alloc_cnt; + + #define ENA_MEM_ALLOC_COHERENT(dmadev, size, virt, phys, handle) \ + do { \ +- const struct rte_memzone *mz; \ +- char z_name[RTE_MEMZONE_NAMESIZE]; \ ++ const struct rte_memzone *mz = NULL; \ + ENA_TOUCH(dmadev); ENA_TOUCH(handle); \ +- snprintf(z_name, sizeof(z_name), \ +- "ena_alloc_%d", ena_alloc_cnt++); \ +- mz = rte_memzone_reserve(z_name, size, SOCKET_ID_ANY, \ +- RTE_MEMZONE_IOVA_CONTIG); \ +- handle = mz; \ ++ if (size > 0) { \ ++ char z_name[RTE_MEMZONE_NAMESIZE]; \ ++ snprintf(z_name, sizeof(z_name), \ ++ "ena_alloc_%d", \ ++ rte_atomic32_add_return(&ena_alloc_cnt, 1)); \ ++ mz = rte_memzone_reserve(z_name, size, \ ++ SOCKET_ID_ANY, \ ++ RTE_MEMZONE_IOVA_CONTIG); \ ++ handle = mz; \ ++ } \ + if (mz == NULL) { \ + virt = NULL; \ + phys = 0; \ +@@ -230,14 +206,17 @@ extern uint32_t ena_alloc_cnt; + #define ENA_MEM_ALLOC_COHERENT_NODE( \ + dmadev, size, virt, phys, mem_handle, node, dev_node) \ + do { \ +- const struct rte_memzone *mz; \ +- char z_name[RTE_MEMZONE_NAMESIZE]; \ ++ const struct rte_memzone *mz = NULL; \ + ENA_TOUCH(dmadev); ENA_TOUCH(dev_node); \ +- snprintf(z_name, sizeof(z_name), \ +- "ena_alloc_%d", ena_alloc_cnt++); \ +- mz = rte_memzone_reserve(z_name, size, node, \ ++ if (size > 0) { \ ++ char z_name[RTE_MEMZONE_NAMESIZE]; \ ++ snprintf(z_name, sizeof(z_name), \ ++ "ena_alloc_%d", \ ++ rte_atomic32_add_return(&ena_alloc_cnt, 1)); \ ++ mz = rte_memzone_reserve(z_name, size, node, \ + RTE_MEMZONE_IOVA_CONTIG); \ +- mem_handle = mz; \ ++ mem_handle = mz; \ ++ } \ + if (mz == NULL) { \ + virt = NULL; \ + phys = 0; \ +@@ -283,4 +262,6 @@ extern uint32_t ena_alloc_cnt; + #define READ_ONCE(var) (*((volatile typeof(var) *)(&(var)))) + #endif + ++#define ENA_FFS(x) ffs(x) ++ + #endif /* DPDK_ENA_COM_ENA_PLAT_DPDK_H_ */ +diff --git a/dpdk/drivers/net/ena/ena_ethdev.c b/dpdk/drivers/net/ena/ena_ethdev.c +index 0779bb7713..8afd82bc87 100644 +--- a/dpdk/drivers/net/ena/ena_ethdev.c ++++ b/dpdk/drivers/net/ena/ena_ethdev.c +@@ -120,7 +120,7 @@ struct ena_stats { + * Each rte_memzone should have unique name. + * To satisfy it, count number of allocation and add it to name. + */ +-uint32_t ena_alloc_cnt; ++rte_atomic32_t ena_alloc_cnt; + + static const struct ena_stats ena_stats_global_strings[] = { + ENA_STAT_GLOBAL_ENTRY(tx_timeout), +@@ -1043,16 +1043,15 @@ static int ena_create_io_queue(struct ena_ring *ring) + ena_qid = ENA_IO_TXQ_IDX(ring->id); + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; + ctx.mem_queue_type = ena_dev->tx_mem_queue_type; +- ctx.queue_size = adapter->tx_ring_size; + for (i = 0; i < ring->ring_size; i++) + ring->empty_tx_reqs[i] = i; + } else { + ena_qid = ENA_IO_RXQ_IDX(ring->id); + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; +- ctx.queue_size = adapter->rx_ring_size; + for (i = 0; i < ring->ring_size; i++) + ring->empty_rx_reqs[i] = i; + } ++ ctx.queue_size = ring->ring_size; + ctx.qid = ena_qid; + ctx.msix_vector = -1; /* interrupts not used */ + ctx.numa_node = ring->numa_socket_id; +diff --git a/dpdk/drivers/net/enic/enic_ethdev.c b/dpdk/drivers/net/enic/enic_ethdev.c +index ed6dd70c84..5efaa4de43 100644 +--- a/dpdk/drivers/net/enic/enic_ethdev.c ++++ b/dpdk/drivers/net/enic/enic_ethdev.c +@@ -1082,8 +1082,7 @@ static int eth_enic_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_enic_pmd = { + .id_table = pci_id_enic_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_enic_pci_probe, + .remove = eth_enic_pci_remove, + }; +diff --git a/dpdk/drivers/net/failsafe/failsafe.c b/dpdk/drivers/net/failsafe/failsafe.c +index 06e859e9fd..0180954123 100644 +--- a/dpdk/drivers/net/failsafe/failsafe.c ++++ b/dpdk/drivers/net/failsafe/failsafe.c +@@ -182,6 +182,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev) + } + priv = PRIV(dev); + priv->dev = dev; ++ priv->rxp = FS_RX_PROXY_INIT; + dev->dev_ops = &failsafe_ops; + dev->data->mac_addrs = &PRIV(dev)->mac_addrs[0]; + dev->data->dev_link = eth_link; +diff --git a/dpdk/drivers/net/failsafe/failsafe_intr.c b/dpdk/drivers/net/failsafe/failsafe_intr.c +index 1c2cb71c41..de229b2c2f 100644 +--- a/dpdk/drivers/net/failsafe/failsafe_intr.c ++++ b/dpdk/drivers/net/failsafe/failsafe_intr.c +@@ -394,7 +394,7 @@ fs_rx_event_proxy_uninstall(struct fs_priv *priv) + free(priv->rxp.evec); + priv->rxp.evec = NULL; + } +- if (priv->rxp.efd > 0) { ++ if (priv->rxp.efd >= 0) { + close(priv->rxp.efd); + priv->rxp.efd = -1; + } +diff --git a/dpdk/drivers/net/failsafe/failsafe_ops.c b/dpdk/drivers/net/failsafe/failsafe_ops.c +index 595278bbf7..a3c30c25a9 100644 +--- a/dpdk/drivers/net/failsafe/failsafe_ops.c ++++ b/dpdk/drivers/net/failsafe/failsafe_ops.c +@@ -379,7 +379,7 @@ fs_rx_queue_release(void *queue) + rxq = queue; + dev = rxq->priv->dev; + fs_lock(dev, 0); +- if (rxq->event_fd > 0) ++ if (rxq->event_fd >= 0) + close(rxq->event_fd); + FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) { + if (ETH(sdev)->data->rx_queues != NULL && +diff --git a/dpdk/drivers/net/failsafe/failsafe_private.h b/dpdk/drivers/net/failsafe/failsafe_private.h +index 3264bff2f8..cfe29c3f33 100644 +--- a/dpdk/drivers/net/failsafe/failsafe_private.h ++++ b/dpdk/drivers/net/failsafe/failsafe_private.h +@@ -58,6 +58,14 @@ struct rx_proxy { + enum rxp_service_state sstate; + }; + ++#define FS_RX_PROXY_INIT (struct rx_proxy){ \ ++ .efd = -1, \ ++ .evec = NULL, \ ++ .sid = 0, \ ++ .scid = 0, \ ++ .sstate = SS_NO_SERVICE, \ ++} ++ + struct rxq { + struct fs_priv *priv; + uint16_t qid; +diff --git a/dpdk/drivers/net/fm10k/fm10k_ethdev.c b/dpdk/drivers/net/fm10k/fm10k_ethdev.c +index f64d07bbad..b90eda7d83 100644 +--- a/dpdk/drivers/net/fm10k/fm10k_ethdev.c ++++ b/dpdk/drivers/net/fm10k/fm10k_ethdev.c +@@ -3267,8 +3267,7 @@ static const struct rte_pci_id pci_id_fm10k_map[] = { + + static struct rte_pci_driver rte_pmd_fm10k = { + .id_table = pci_id_fm10k_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_fm10k_pci_probe, + .remove = eth_fm10k_pci_remove, + }; +diff --git a/dpdk/drivers/net/i40e/base/README b/dpdk/drivers/net/i40e/base/README +index 84f191fad1..b46593566b 100644 +--- a/dpdk/drivers/net/i40e/base/README ++++ b/dpdk/drivers/net/i40e/base/README +@@ -1,34 +1,6 @@ +-.. +- BSD LICENSE +- +- Copyright(c) 2017 Intel Corporation. All rights reserved. +- All rights reserved. +- +- Redistribution and use in source and binary forms, with or without +- modification, are permitted provided that the following conditions +- are met: +- +- * Redistributions of source code must retain the above copyright +- notice, this list of conditions and the following disclaimer. +- * Redistributions in binary form must reproduce the above copyright +- notice, this list of conditions and the following disclaimer in +- the documentation and/or other materials provided with the +- distribution. +- * Neither the name of Intel Corporation nor the names of its +- contributors may be used to endorse or promote products derived +- from this software without specific prior written permission. +- +- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2017-2020 Intel Corporation ++ */ + + Intel® I40E driver + ================== +diff --git a/dpdk/drivers/net/i40e/base/i40e_adminq.c b/dpdk/drivers/net/i40e/base/i40e_adminq.c +index b2fc6f5900..584da0383c 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_adminq.c ++++ b/dpdk/drivers/net/i40e/base/i40e_adminq.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_status.h" +diff --git a/dpdk/drivers/net/i40e/base/i40e_adminq.h b/dpdk/drivers/net/i40e/base/i40e_adminq.h +index 769d84809e..6ce262ad4b 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_adminq.h ++++ b/dpdk/drivers/net/i40e/base/i40e_adminq.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_ADMINQ_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h b/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h +index cf6ef63e39..ec1ba7825b 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h ++++ b/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_ADMINQ_CMD_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_alloc.h b/dpdk/drivers/net/i40e/base/i40e_alloc.h +index 4fc1860155..ae14e4d932 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_alloc.h ++++ b/dpdk/drivers/net/i40e/base/i40e_alloc.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_ALLOC_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_common.c b/dpdk/drivers/net/i40e/base/i40e_common.c +index 1f8bb603df..b256fb47d3 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_common.c ++++ b/dpdk/drivers/net/i40e/base/i40e_common.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_type.h" +diff --git a/dpdk/drivers/net/i40e/base/i40e_dcb.c b/dpdk/drivers/net/i40e/base/i40e_dcb.c +index a26f82b3a6..d99bd6e3f8 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_dcb.c ++++ b/dpdk/drivers/net/i40e/base/i40e_dcb.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_adminq.h" +diff --git a/dpdk/drivers/net/i40e/base/i40e_dcb.h b/dpdk/drivers/net/i40e/base/i40e_dcb.h +index 85b0eed3ad..8d36fce430 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_dcb.h ++++ b/dpdk/drivers/net/i40e/base/i40e_dcb.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_DCB_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_devids.h b/dpdk/drivers/net/i40e/base/i40e_devids.h +index 8b667c2afb..545e35d243 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_devids.h ++++ b/dpdk/drivers/net/i40e/base/i40e_devids.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_DEVIDS_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_diag.c b/dpdk/drivers/net/i40e/base/i40e_diag.c +index 3ccbea4829..b3c4cfd3aa 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_diag.c ++++ b/dpdk/drivers/net/i40e/base/i40e_diag.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_diag.h" +diff --git a/dpdk/drivers/net/i40e/base/i40e_diag.h b/dpdk/drivers/net/i40e/base/i40e_diag.h +index 4434fc960b..cb59285d9c 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_diag.h ++++ b/dpdk/drivers/net/i40e/base/i40e_diag.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_DIAG_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_hmc.c b/dpdk/drivers/net/i40e/base/i40e_hmc.c +index 11c9ae2072..a47d6e0d79 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_hmc.c ++++ b/dpdk/drivers/net/i40e/base/i40e_hmc.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_osdep.h" +diff --git a/dpdk/drivers/net/i40e/base/i40e_hmc.h b/dpdk/drivers/net/i40e/base/i40e_hmc.h +index 289264ed99..f9aad7dc31 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_hmc.h ++++ b/dpdk/drivers/net/i40e/base/i40e_hmc.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_HMC_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c b/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c +index 0afee49b13..d3969396f0 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c ++++ b/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_osdep.h" +diff --git a/dpdk/drivers/net/i40e/base/i40e_lan_hmc.h b/dpdk/drivers/net/i40e/base/i40e_lan_hmc.h +index e531ec490a..aa5dceb792 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_lan_hmc.h ++++ b/dpdk/drivers/net/i40e/base/i40e_lan_hmc.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_LAN_HMC_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_nvm.c b/dpdk/drivers/net/i40e/base/i40e_nvm.c +index 6c8ca87718..d87a6e56ff 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_nvm.c ++++ b/dpdk/drivers/net/i40e/base/i40e_nvm.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "i40e_prototype.h" +diff --git a/dpdk/drivers/net/i40e/base/i40e_osdep.h b/dpdk/drivers/net/i40e/base/i40e_osdep.h +index 8a2d82a8d0..64b15e1b61 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_osdep.h ++++ b/dpdk/drivers/net/i40e/base/i40e_osdep.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_OSDEP_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_prototype.h b/dpdk/drivers/net/i40e/base/i40e_prototype.h +index 0cf006dadc..055b38e73d 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_prototype.h ++++ b/dpdk/drivers/net/i40e/base/i40e_prototype.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_PROTOTYPE_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_register.h b/dpdk/drivers/net/i40e/base/i40e_register.h +index e93ec3f58f..2408dcb117 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_register.h ++++ b/dpdk/drivers/net/i40e/base/i40e_register.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_REGISTER_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_status.h b/dpdk/drivers/net/i40e/base/i40e_status.h +index 1dad4f4b83..cd72169f14 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_status.h ++++ b/dpdk/drivers/net/i40e/base/i40e_status.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_STATUS_H_ +diff --git a/dpdk/drivers/net/i40e/base/i40e_type.h b/dpdk/drivers/net/i40e/base/i40e_type.h +index 1a637e40e8..3a46e3e1fa 100644 +--- a/dpdk/drivers/net/i40e/base/i40e_type.h ++++ b/dpdk/drivers/net/i40e/base/i40e_type.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _I40E_TYPE_H_ +diff --git a/dpdk/drivers/net/i40e/base/meson.build b/dpdk/drivers/net/i40e/base/meson.build +index d4c8f872d5..07aa91c08f 100644 +--- a/dpdk/drivers/net/i40e/base/meson.build ++++ b/dpdk/drivers/net/i40e/base/meson.build +@@ -1,5 +1,5 @@ + # SPDX-License-Identifier: BSD-3-Clause +-# Copyright(c) 2017 Intel Corporation ++# Copyright(c) 2017-2020 Intel Corporation + + sources = [ + 'i40e_adminq.c', +diff --git a/dpdk/drivers/net/i40e/base/virtchnl.h b/dpdk/drivers/net/i40e/base/virtchnl.h +index 88096cb45c..483020d4e5 100644 +--- a/dpdk/drivers/net/i40e/base/virtchnl.h ++++ b/dpdk/drivers/net/i40e/base/virtchnl.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _VIRTCHNL_H_ +diff --git a/dpdk/drivers/net/i40e/i40e_ethdev.c b/dpdk/drivers/net/i40e/i40e_ethdev.c +index 040bedfad9..8f372d1c09 100644 +--- a/dpdk/drivers/net/i40e/i40e_ethdev.c ++++ b/dpdk/drivers/net/i40e/i40e_ethdev.c +@@ -694,8 +694,7 @@ static int eth_i40e_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_i40e_pmd = { + .id_table = pci_id_i40e_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_i40e_pci_probe, + .remove = eth_i40e_pci_remove, + }; +@@ -2241,6 +2240,7 @@ i40e_dev_start(struct rte_eth_dev *dev) + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + uint32_t intr_vector = 0; + struct i40e_vsi *vsi; ++ uint16_t nb_rxq, nb_txq; + + hw->adapter_stopped = 0; + +@@ -2279,7 +2279,7 @@ i40e_dev_start(struct rte_eth_dev *dev) + ret = i40e_dev_rxtx_init(pf); + if (ret != I40E_SUCCESS) { + PMD_DRV_LOG(ERR, "Failed to init rx/tx queues"); +- goto err_up; ++ return ret; + } + + /* Map queues with MSIX interrupt */ +@@ -2304,10 +2304,16 @@ i40e_dev_start(struct rte_eth_dev *dev) + } + + /* Enable all queues which have been configured */ +- ret = i40e_dev_switch_queues(pf, TRUE); +- if (ret != I40E_SUCCESS) { +- PMD_DRV_LOG(ERR, "Failed to enable VSI"); +- goto err_up; ++ for (nb_rxq = 0; nb_rxq < dev->data->nb_rx_queues; nb_rxq++) { ++ ret = i40e_dev_rx_queue_start(dev, nb_rxq); ++ if (ret) ++ goto rx_err; ++ } ++ ++ for (nb_txq = 0; nb_txq < dev->data->nb_tx_queues; nb_txq++) { ++ ret = i40e_dev_tx_queue_start(dev, nb_txq); ++ if (ret) ++ goto tx_err; + } + + /* Enable receiving broadcast packets */ +@@ -2337,7 +2343,7 @@ i40e_dev_start(struct rte_eth_dev *dev) + ret = i40e_aq_set_lb_modes(hw, dev->data->dev_conf.lpbk_mode, NULL); + if (ret != I40E_SUCCESS) { + PMD_DRV_LOG(ERR, "fail to set loopback link"); +- goto err_up; ++ goto tx_err; + } + } + +@@ -2345,7 +2351,7 @@ i40e_dev_start(struct rte_eth_dev *dev) + ret = i40e_apply_link_speed(dev); + if (I40E_SUCCESS != ret) { + PMD_DRV_LOG(ERR, "Fail to apply link setting"); +- goto err_up; ++ goto tx_err; + } + + if (!rte_intr_allow_others(intr_handle)) { +@@ -2388,9 +2394,12 @@ i40e_dev_start(struct rte_eth_dev *dev) + + return I40E_SUCCESS; + +-err_up: +- i40e_dev_switch_queues(pf, FALSE); +- i40e_dev_clear_queues(dev); ++tx_err: ++ for (i = 0; i < nb_txq; i++) ++ i40e_dev_tx_queue_stop(dev, i); ++rx_err: ++ for (i = 0; i < nb_rxq; i++) ++ i40e_dev_rx_queue_stop(dev, i); + + return ret; + } +@@ -2414,7 +2423,11 @@ i40e_dev_stop(struct rte_eth_dev *dev) + } + + /* Disable all queues */ +- i40e_dev_switch_queues(pf, FALSE); ++ for (i = 0; i < dev->data->nb_tx_queues; i++) ++ i40e_dev_tx_queue_stop(dev, i); ++ ++ for (i = 0; i < dev->data->nb_rx_queues; i++) ++ i40e_dev_rx_queue_stop(dev, i); + + /* un-map queues with interrupt registers */ + i40e_vsi_disable_queues_intr(main_vsi); +@@ -4806,6 +4819,7 @@ i40e_res_pool_free(struct i40e_res_pool_info *pool, + { + struct pool_entry *entry, *next, *prev, *valid_entry = NULL; + uint32_t pool_offset; ++ uint16_t len; + int insert; + + if (pool == NULL) { +@@ -4844,12 +4858,13 @@ i40e_res_pool_free(struct i40e_res_pool_info *pool, + } + + insert = 0; ++ len = valid_entry->len; + /* Try to merge with next one*/ + if (next != NULL) { + /* Merge with next one */ +- if (valid_entry->base + valid_entry->len == next->base) { ++ if (valid_entry->base + len == next->base) { + next->base = valid_entry->base; +- next->len += valid_entry->len; ++ next->len += len; + rte_free(valid_entry); + valid_entry = next; + insert = 1; +@@ -4859,13 +4874,15 @@ i40e_res_pool_free(struct i40e_res_pool_info *pool, + if (prev != NULL) { + /* Merge with previous one */ + if (prev->base + prev->len == valid_entry->base) { +- prev->len += valid_entry->len; ++ prev->len += len; + /* If it merge with next one, remove next node */ + if (insert == 1) { + LIST_REMOVE(valid_entry, next); + rte_free(valid_entry); ++ valid_entry = NULL; + } else { + rte_free(valid_entry); ++ valid_entry = NULL; + insert = 1; + } + } +@@ -4881,8 +4898,8 @@ i40e_res_pool_free(struct i40e_res_pool_info *pool, + LIST_INSERT_HEAD(&pool->free_list, valid_entry, next); + } + +- pool->num_free += valid_entry->len; +- pool->num_alloc -= valid_entry->len; ++ pool->num_free += len; ++ pool->num_alloc -= len; + + return 0; + } +@@ -6153,33 +6170,6 @@ i40e_switch_tx_queue(struct i40e_hw *hw, uint16_t q_idx, bool on) + return I40E_SUCCESS; + } + +-/* Swith on or off the tx queues */ +-static int +-i40e_dev_switch_tx_queues(struct i40e_pf *pf, bool on) +-{ +- struct rte_eth_dev_data *dev_data = pf->dev_data; +- struct i40e_tx_queue *txq; +- struct rte_eth_dev *dev = pf->adapter->eth_dev; +- uint16_t i; +- int ret; +- +- for (i = 0; i < dev_data->nb_tx_queues; i++) { +- txq = dev_data->tx_queues[i]; +- /* Don't operate the queue if not configured or +- * if starting only per queue */ +- if (!txq || !txq->q_set || (on && txq->tx_deferred_start)) +- continue; +- if (on) +- ret = i40e_dev_tx_queue_start(dev, i); +- else +- ret = i40e_dev_tx_queue_stop(dev, i); +- if ( ret != I40E_SUCCESS) +- return ret; +- } +- +- return I40E_SUCCESS; +-} +- + int + i40e_switch_rx_queue(struct i40e_hw *hw, uint16_t q_idx, bool on) + { +@@ -6231,59 +6221,6 @@ i40e_switch_rx_queue(struct i40e_hw *hw, uint16_t q_idx, bool on) + + return I40E_SUCCESS; + } +-/* Switch on or off the rx queues */ +-static int +-i40e_dev_switch_rx_queues(struct i40e_pf *pf, bool on) +-{ +- struct rte_eth_dev_data *dev_data = pf->dev_data; +- struct i40e_rx_queue *rxq; +- struct rte_eth_dev *dev = pf->adapter->eth_dev; +- uint16_t i; +- int ret; +- +- for (i = 0; i < dev_data->nb_rx_queues; i++) { +- rxq = dev_data->rx_queues[i]; +- /* Don't operate the queue if not configured or +- * if starting only per queue */ +- if (!rxq || !rxq->q_set || (on && rxq->rx_deferred_start)) +- continue; +- if (on) +- ret = i40e_dev_rx_queue_start(dev, i); +- else +- ret = i40e_dev_rx_queue_stop(dev, i); +- if (ret != I40E_SUCCESS) +- return ret; +- } +- +- return I40E_SUCCESS; +-} +- +-/* Switch on or off all the rx/tx queues */ +-int +-i40e_dev_switch_queues(struct i40e_pf *pf, bool on) +-{ +- int ret; +- +- if (on) { +- /* enable rx queues before enabling tx queues */ +- ret = i40e_dev_switch_rx_queues(pf, on); +- if (ret) { +- PMD_DRV_LOG(ERR, "Failed to switch rx queues"); +- return ret; +- } +- ret = i40e_dev_switch_tx_queues(pf, on); +- } else { +- /* Stop tx queues before stopping rx queues */ +- ret = i40e_dev_switch_tx_queues(pf, on); +- if (ret) { +- PMD_DRV_LOG(ERR, "Failed to switch tx queues"); +- return ret; +- } +- ret = i40e_dev_switch_rx_queues(pf, on); +- } +- +- return ret; +-} + + /* Initialize VSI for TX */ + static int +@@ -10276,6 +10213,7 @@ i40e_get_swr_pm_cfg(struct i40e_hw *hw, uint32_t *value) + { I40E_GL_SWR_PM_EF_DEVICE(I40E_DEV_ID_KX_C) }, + { I40E_GL_SWR_PM_EF_DEVICE(I40E_DEV_ID_10G_BASE_T) }, + { I40E_GL_SWR_PM_EF_DEVICE(I40E_DEV_ID_10G_BASE_T4) }, ++ { I40E_GL_SWR_PM_EF_DEVICE(I40E_DEV_ID_SFP_X722) }, + + { I40E_GL_SWR_PM_SF_DEVICE(I40E_DEV_ID_KX_B) }, + { I40E_GL_SWR_PM_SF_DEVICE(I40E_DEV_ID_QSFP_A) }, +diff --git a/dpdk/drivers/net/i40e/i40e_ethdev_vf.c b/dpdk/drivers/net/i40e/i40e_ethdev_vf.c +index 16d67b5938..ee2d8540ca 100644 +--- a/dpdk/drivers/net/i40e/i40e_ethdev_vf.c ++++ b/dpdk/drivers/net/i40e/i40e_ethdev_vf.c +@@ -738,7 +738,6 @@ i40evf_stop_queues(struct rte_eth_dev *dev) + for (i = 0; i < dev->data->nb_tx_queues; i++) { + if (i40evf_dev_tx_queue_stop(dev, i) != 0) { + PMD_DRV_LOG(ERR, "Fail to stop queue %u", i); +- return -1; + } + } + +@@ -746,7 +745,6 @@ i40evf_stop_queues(struct rte_eth_dev *dev) + for (i = 0; i < dev->data->nb_rx_queues; i++) { + if (i40evf_dev_rx_queue_stop(dev, i) != 0) { + PMD_DRV_LOG(ERR, "Fail to stop queue %u", i); +- return -1; + } + } + +@@ -1511,7 +1509,7 @@ static int eth_i40evf_pci_remove(struct rte_pci_device *pci_dev) + */ + static struct rte_pci_driver rte_i40evf_pmd = { + .id_table = pci_id_i40evf_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = eth_i40evf_pci_probe, + .remove = eth_i40evf_pci_remove, + }; +@@ -2094,10 +2092,6 @@ i40evf_dev_promiscuous_enable(struct rte_eth_dev *dev) + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + int ret; + +- /* If enabled, just return */ +- if (vf->promisc_unicast_enabled) +- return; +- + ret = i40evf_config_promisc(dev, 1, vf->promisc_multicast_enabled); + if (ret == 0) + vf->promisc_unicast_enabled = TRUE; +@@ -2109,10 +2103,6 @@ i40evf_dev_promiscuous_disable(struct rte_eth_dev *dev) + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + int ret; + +- /* If disabled, just return */ +- if (!vf->promisc_unicast_enabled) +- return; +- + ret = i40evf_config_promisc(dev, 0, vf->promisc_multicast_enabled); + if (ret == 0) + vf->promisc_unicast_enabled = FALSE; +@@ -2124,10 +2114,6 @@ i40evf_dev_allmulticast_enable(struct rte_eth_dev *dev) + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + int ret; + +- /* If enabled, just return */ +- if (vf->promisc_multicast_enabled) +- return; +- + ret = i40evf_config_promisc(dev, vf->promisc_unicast_enabled, 1); + if (ret == 0) + vf->promisc_multicast_enabled = TRUE; +@@ -2139,10 +2125,6 @@ i40evf_dev_allmulticast_disable(struct rte_eth_dev *dev) + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + int ret; + +- /* If enabled, just return */ +- if (!vf->promisc_multicast_enabled) +- return; +- + ret = i40evf_config_promisc(dev, vf->promisc_unicast_enabled, 0); + if (ret == 0) + vf->promisc_multicast_enabled = FALSE; +diff --git a/dpdk/drivers/net/i40e/i40e_flow.c b/dpdk/drivers/net/i40e/i40e_flow.c +index 642532ba96..088b92fdd1 100644 +--- a/dpdk/drivers/net/i40e/i40e_flow.c ++++ b/dpdk/drivers/net/i40e/i40e_flow.c +@@ -2543,7 +2543,6 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + if (next_type == RTE_FLOW_ITEM_TYPE_VLAN || + ether_type == ETHER_TYPE_IPv4 || + ether_type == ETHER_TYPE_IPv6 || +- ether_type == ETHER_TYPE_ARP || + ether_type == outer_tpid) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, +@@ -2588,7 +2587,6 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + + if (ether_type == ETHER_TYPE_IPv4 || + ether_type == ETHER_TYPE_IPv6 || +- ether_type == ETHER_TYPE_ARP || + ether_type == outer_tpid) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, +@@ -4768,7 +4766,6 @@ i40e_flow_destroy(struct rte_eth_dev *dev, + + /* If the last flow is destroyed, disable fdir. */ + if (!ret && TAILQ_EMPTY(&pf->fdir.fdir_list)) { +- i40e_fdir_teardown(pf); + dev->data->dev_conf.fdir_conf.mode = + RTE_FDIR_MODE_NONE; + } +@@ -4964,8 +4961,6 @@ i40e_flow_flush_fdir_filter(struct i40e_pf *pf) + pf->fdir.inset_flag[pctype] = 0; + } + +- i40e_fdir_teardown(pf); +- + return ret; + } + +diff --git a/dpdk/drivers/net/i40e/i40e_rxtx.c b/dpdk/drivers/net/i40e/i40e_rxtx.c +index e4a314f4ab..7c620a76b0 100644 +--- a/dpdk/drivers/net/i40e/i40e_rxtx.c ++++ b/dpdk/drivers/net/i40e/i40e_rxtx.c +@@ -1586,6 +1586,15 @@ i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id) + PMD_INIT_FUNC_TRACE(); + + rxq = dev->data->rx_queues[rx_queue_id]; ++ if (!rxq || !rxq->q_set) { ++ PMD_DRV_LOG(ERR, "RX queue %u not available or setup", ++ rx_queue_id); ++ return -EINVAL; ++ } ++ ++ if (rxq->rx_deferred_start) ++ PMD_DRV_LOG(WARNING, "RX queue %u is deferrd start", ++ rx_queue_id); + + err = i40e_alloc_rx_queue_mbufs(rxq); + if (err) { +@@ -1620,6 +1629,11 @@ i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id) + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + rxq = dev->data->rx_queues[rx_queue_id]; ++ if (!rxq || !rxq->q_set) { ++ PMD_DRV_LOG(ERR, "RX queue %u not available or setup", ++ rx_queue_id); ++ return -EINVAL; ++ } + + /* + * rx_queue_id is queue id application refers to, while +@@ -1648,6 +1662,15 @@ i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id) + PMD_INIT_FUNC_TRACE(); + + txq = dev->data->tx_queues[tx_queue_id]; ++ if (!txq || !txq->q_set) { ++ PMD_DRV_LOG(ERR, "TX queue %u is not available or setup", ++ tx_queue_id); ++ return -EINVAL; ++ } ++ ++ if (txq->tx_deferred_start) ++ PMD_DRV_LOG(WARNING, "TX queue %u is deferrd start", ++ tx_queue_id); + + /* + * tx_queue_id is queue id application refers to, while +@@ -1672,6 +1695,11 @@ i40e_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id) + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + txq = dev->data->tx_queues[tx_queue_id]; ++ if (!txq || !txq->q_set) { ++ PMD_DRV_LOG(ERR, "TX queue %u is not available or setup", ++ tx_queue_id); ++ return -EINVAL; ++ } + + /* + * tx_queue_id is queue id application refers to, while +diff --git a/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h b/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h +index 0e6ffa0078..31f73f6054 100644 +--- a/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h ++++ b/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h +@@ -33,6 +33,7 @@ reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs, + if (!split_flags[buf_idx]) { + /* it's the last packet of the set */ + start->hash = end->hash; ++ start->vlan_tci = end->vlan_tci; + start->ol_flags = end->ol_flags; + /* we need to strip crc for the whole packet */ + start->pkt_len -= rxq->crc_len; +diff --git a/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c b/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c +index deb185fe2f..4376d8911c 100644 +--- a/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c ++++ b/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c +@@ -72,8 +72,9 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) + rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? + (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); + ++ rte_cio_wmb(); + /* Update the tail pointer on the NIC */ +- I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id); ++ I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); + } + + static inline void +@@ -564,7 +565,8 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, + + txq->tx_tail = tx_id; + +- I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail); ++ rte_cio_wmb(); ++ I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id); + + return nb_pkts; + } +diff --git a/dpdk/drivers/net/ixgbe/base/README b/dpdk/drivers/net/ixgbe/base/README +index 431be0260e..a48b14ed27 100644 +--- a/dpdk/drivers/net/ixgbe/base/README ++++ b/dpdk/drivers/net/ixgbe/base/README +@@ -1,34 +1,6 @@ +-.. +- BSD LICENSE +- +- Copyright(c) 2010-2018 Intel Corporation. All rights reserved. +- All rights reserved. +- +- Redistribution and use in source and binary forms, with or without +- modification, are permitted provided that the following conditions +- are met: +- +- * Redistributions of source code must retain the above copyright +- notice, this list of conditions and the following disclaimer. +- * Redistributions in binary form must reproduce the above copyright +- notice, this list of conditions and the following disclaimer in +- the documentation and/or other materials provided with the +- distribution. +- * Neither the name of Intel Corporation nor the names of its +- contributors may be used to endorse or promote products derived +- from this software without specific prior written permission. +- +- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright(c) 2010-2020 Intel Corporation ++ */ + + Intel® IXGBE driver + =================== +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c b/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c +index 245ff75d55..c83e1c6b30 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_type.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_82598.h b/dpdk/drivers/net/ixgbe/base/ixgbe_82598.h +index 8013f495ec..7bad5e12d3 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_82598.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_82598.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_82598_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c b/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c +index 96bdde62c8..9cd0b1428c 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_type.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_82599.h b/dpdk/drivers/net/ixgbe/base/ixgbe_82599.h +index a32eb1f517..238481983f 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_82599.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_82599.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_82599_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_api.c b/dpdk/drivers/net/ixgbe/base/ixgbe_api.c +index 873c07999c..0a22df3d06 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_api.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_api.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_api.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_api.h b/dpdk/drivers/net/ixgbe/base/ixgbe_api.h +index ff8f7b2611..33e7c3c215 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_api.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_api.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_API_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_common.c b/dpdk/drivers/net/ixgbe/base/ixgbe_common.c +index 62ff767230..4eb98dc198 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_common.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_common.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_common.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_common.h b/dpdk/drivers/net/ixgbe/base/ixgbe_common.h +index 3bb2475119..7a31f088c4 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_common.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_common.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_COMMON_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.c b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.c +index a590e0e07c..53def2146e 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.h b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.h +index 503d06018f..c2a1013ac0 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_DCB_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.c b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.c +index d87cb58857..bb309e28fd 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.h b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.h +index 1a14744482..8f36881378 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82598.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_DCB_82598_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.c b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.c +index f4f0ff0190..04e0d1fb7d 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.h b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.h +index 085ada27f7..7bd1d6a325 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_dcb_82599.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_DCB_82599_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c b/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c +index 67a124d8d1..6005c4ac93 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_vf.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h b/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h +index 9664f3bdbf..dd2e1eee4e 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_HV_VF_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.c b/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.c +index cb82942dfa..13bdb5f68f 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_type.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h b/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h +index 5d32cbc074..1a45e49c2f 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_MBX_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h b/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h +index ea8dc1cbe5..a4eb71777c 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_OS_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c b/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c +index dd118f9170..a8243fa974 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_api.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h b/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h +index f1605f2cc9..a06c3be170 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_PHY_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_type.h b/dpdk/drivers/net/ixgbe/base/ixgbe_type.h +index 077b8f01c7..15e9370105 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_type.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_type.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_TYPE_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c b/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c +index aac37822e4..7f69ece107 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h b/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h +index dba643fced..be58b4f76e 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_VF_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c b/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c +index f00f0eae7e..d65f47c181 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_x540.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_x540.h b/dpdk/drivers/net/ixgbe/base/ixgbe_x540.h +index 231dfe56e5..ba79847d11 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_x540.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_x540.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_X540_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c b/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c +index a920a146e7..f2c8e5425e 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #include "ixgbe_x550.h" +diff --git a/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h b/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h +index 3bd98f243d..10086ab423 100644 +--- a/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h ++++ b/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h +@@ -1,5 +1,5 @@ + /* SPDX-License-Identifier: BSD-3-Clause +- * Copyright(c) 2001-2018 ++ * Copyright(c) 2001-2020 Intel Corporation + */ + + #ifndef _IXGBE_X550_H_ +diff --git a/dpdk/drivers/net/ixgbe/base/meson.build b/dpdk/drivers/net/ixgbe/base/meson.build +index 21ac64bf5c..e4807e59a5 100644 +--- a/dpdk/drivers/net/ixgbe/base/meson.build ++++ b/dpdk/drivers/net/ixgbe/base/meson.build +@@ -1,5 +1,5 @@ + # SPDX-License-Identifier: BSD-3-Clause +-# Copyright(c) 2017 Intel Corporation ++# Copyright(c) 2017-2020 Intel Corporation + + sources = [ + 'ixgbe_82598.c', +diff --git a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c +index 2d49ea011b..aa0e90a13c 100644 +--- a/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c ++++ b/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c +@@ -1866,8 +1866,7 @@ static int eth_ixgbe_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_ixgbe_pmd = { + .id_table = pci_id_ixgbe_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_ixgbe_pci_probe, + .remove = eth_ixgbe_pci_remove, + }; +@@ -1889,7 +1888,7 @@ static int eth_ixgbevf_pci_remove(struct rte_pci_device *pci_dev) + */ + static struct rte_pci_driver rte_ixgbevf_pmd = { + .id_table = pci_id_ixgbevf_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = eth_ixgbevf_pci_probe, + .remove = eth_ixgbevf_pci_remove, + }; +@@ -2580,6 +2579,8 @@ ixgbe_flow_ctrl_enable(struct rte_eth_dev *dev, struct ixgbe_hw *hw) + int err; + uint32_t mflcn; + ++ ixgbe_setup_fc(hw); ++ + err = ixgbe_fc_enable(hw); + + /* Not negotiated is not an error case */ +@@ -4187,6 +4188,11 @@ ixgbe_dev_link_update_share(struct rte_eth_dev *dev, + if (wait_to_complete == 0 || dev->data->dev_conf.intr_conf.lsc != 0) + wait = 0; + ++/* BSD has no interrupt mechanism, so force NIC status synchronization. */ ++#ifdef RTE_EXEC_ENV_FREEBSD ++ wait = 1; ++#endif ++ + if (vf) + diag = ixgbevf_check_link(hw, &link_speed, &link_up, wait); + else +diff --git a/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.c b/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.c +index b946808bc2..62376bdad7 100644 +--- a/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.c ++++ b/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.c +@@ -521,6 +521,9 @@ rte_pmd_ixgbe_macsec_enable(uint16_t port, uint8_t en, uint8_t rp) + + dev = &rte_eth_devices[port]; + ++ if (!is_ixgbe_supported(dev)) ++ return -ENOTSUP; ++ + macsec_setting.offload_en = 1; + macsec_setting.encrypt_en = en; + macsec_setting.replayprotect_en = rp; +@@ -541,6 +544,9 @@ rte_pmd_ixgbe_macsec_disable(uint16_t port) + + dev = &rte_eth_devices[port]; + ++ if (!is_ixgbe_supported(dev)) ++ return -ENOTSUP; ++ + ixgbe_dev_macsec_setting_reset(dev); + + ixgbe_dev_macsec_register_disable(dev); +diff --git a/dpdk/drivers/net/mlx4/mlx4.c b/dpdk/drivers/net/mlx4/mlx4.c +index 4428edf108..5e22ee4381 100644 +--- a/dpdk/drivers/net/mlx4/mlx4.c ++++ b/dpdk/drivers/net/mlx4/mlx4.c +@@ -48,6 +48,10 @@ + #include "mlx4_rxtx.h" + #include "mlx4_utils.h" + ++#ifdef MLX4_GLUE ++const struct mlx4_glue *mlx4_glue; ++#endif ++ + struct mlx4_dev_list mlx4_mem_event_cb_list = + LIST_HEAD_INITIALIZER(mlx4_mem_event_cb_list); + +@@ -69,6 +73,53 @@ const char *pmd_mlx4_init_params[] = { + + static void mlx4_dev_stop(struct rte_eth_dev *dev); + ++/** ++ * Initialize process private data structure. ++ * ++ * @param dev ++ * Pointer to Ethernet device structure. ++ * ++ * @return ++ * 0 on success, a negative errno value otherwise and rte_errno is set. ++ */ ++static int ++mlx4_proc_priv_init(struct rte_eth_dev *dev) ++{ ++ struct mlx4_proc_priv *ppriv; ++ size_t ppriv_size; ++ ++ /* ++ * UAR register table follows the process private structure. BlueFlame ++ * registers for Tx queues are stored in the table. ++ */ ++ ppriv_size = sizeof(struct mlx4_proc_priv) + ++ RTE_MAX_QUEUES_PER_PORT * sizeof(void *); ++ ppriv = rte_malloc_socket("mlx4_proc_priv", ppriv_size, ++ RTE_CACHE_LINE_SIZE, dev->device->numa_node); ++ if (!ppriv) { ++ rte_errno = ENOMEM; ++ return -rte_errno; ++ } ++ ppriv->uar_table_sz = ppriv_size; ++ dev->process_private = ppriv; ++ return 0; ++} ++ ++/** ++ * Un-initialize process private data structure. ++ * ++ * @param dev ++ * Pointer to Ethernet device structure. ++ */ ++static void ++mlx4_proc_priv_uninit(struct rte_eth_dev *dev) ++{ ++ if (!dev->process_private) ++ return; ++ rte_free(dev->process_private); ++ dev->process_private = NULL; ++} ++ + /** + * DPDK callback for Ethernet device configuration. + * +@@ -95,9 +146,17 @@ mlx4_dev_configure(struct rte_eth_dev *dev) + goto exit; + } + ret = mlx4_intr_install(priv); +- if (ret) ++ if (ret) { + ERROR("%p: interrupt handler installation failed", + (void *)dev); ++ goto exit; ++ } ++ ret = mlx4_proc_priv_init(dev); ++ if (ret) { ++ ERROR("%p: process private data allocation failed", ++ (void *)dev); ++ goto exit; ++ } + exit: + return ret; + } +@@ -209,6 +268,7 @@ mlx4_dev_close(struct rte_eth_dev *dev) + mlx4_rx_queue_release(dev->data->rx_queues[i]); + for (i = 0; i != dev->data->nb_tx_queues; ++i) + mlx4_tx_queue_release(dev->data->tx_queues[i]); ++ mlx4_proc_priv_uninit(dev); + mlx4_mr_release(dev); + if (priv->pd != NULL) { + assert(priv->ctx != NULL); +diff --git a/dpdk/drivers/net/mlx4/mlx4.h b/dpdk/drivers/net/mlx4/mlx4.h +index 758b7aa435..8454a5fcec 100644 +--- a/dpdk/drivers/net/mlx4/mlx4.h ++++ b/dpdk/drivers/net/mlx4/mlx4.h +@@ -75,6 +75,17 @@ struct rte_flow; + LIST_HEAD(mlx4_dev_list, mlx4_priv); + LIST_HEAD(mlx4_mr_list, mlx4_mr); + ++/* Per-process private structure. */ ++struct mlx4_proc_priv { ++ size_t uar_table_sz; ++ /* Size of UAR register table. */ ++ void *uar_table[]; ++ /* Table of UAR registers for each process. */ ++}; ++ ++#define MLX4_PROC_PRIV(port_id) \ ++ ((struct mlx4_proc_priv *)rte_eth_devices[port_id].process_private) ++ + /** Private data structure. */ + struct mlx4_priv { + LIST_ENTRY(mlx4_priv) mem_event_cb; +diff --git a/dpdk/drivers/net/mlx4/mlx4_flow.c b/dpdk/drivers/net/mlx4/mlx4_flow.c +index 5136d136ff..dfdc790aaf 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_flow.c ++++ b/dpdk/drivers/net/mlx4/mlx4_flow.c +@@ -976,12 +976,13 @@ mlx4_drop_get(struct mlx4_priv *priv) + priv->drop = drop; + return drop; + error: +- if (drop->qp) +- claim_zero(mlx4_glue->destroy_qp(drop->qp)); +- if (drop->cq) +- claim_zero(mlx4_glue->destroy_cq(drop->cq)); +- if (drop) ++ if (drop) { ++ if (drop->qp) ++ claim_zero(mlx4_glue->destroy_qp(drop->qp)); ++ if (drop->cq) ++ claim_zero(mlx4_glue->destroy_cq(drop->cq)); + rte_free(drop); ++ } + rte_errno = ENOMEM; + return NULL; + } +diff --git a/dpdk/drivers/net/mlx4/mlx4_glue.h b/dpdk/drivers/net/mlx4/mlx4_glue.h +index 668ca86700..5d9e985495 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_glue.h ++++ b/dpdk/drivers/net/mlx4/mlx4_glue.h +@@ -84,6 +84,6 @@ struct mlx4_glue { + void *attr); + }; + +-const struct mlx4_glue *mlx4_glue; ++extern const struct mlx4_glue *mlx4_glue; + + #endif /* MLX4_GLUE_H_ */ +diff --git a/dpdk/drivers/net/mlx4/mlx4_prm.h b/dpdk/drivers/net/mlx4/mlx4_prm.h +index aef77ba06e..16ae6db82d 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_prm.h ++++ b/dpdk/drivers/net/mlx4/mlx4_prm.h +@@ -77,7 +77,8 @@ struct mlx4_sq { + uint32_t owner_opcode; + /**< Default owner opcode with HW valid owner bit. */ + uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */ +- volatile uint32_t *db; /**< Pointer to the doorbell. */ ++ uint32_t *db; /**< Pointer to the doorbell. */ ++ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ + uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */ + }; + +diff --git a/dpdk/drivers/net/mlx4/mlx4_rxtx.c b/dpdk/drivers/net/mlx4/mlx4_rxtx.c +index 8c88effcd1..d5290162dc 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_rxtx.c ++++ b/dpdk/drivers/net/mlx4/mlx4_rxtx.c +@@ -1048,7 +1048,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) + /* Make sure that descriptors are written before doorbell record. */ + rte_wmb(); + /* Ring QP doorbell. */ +- rte_write32(txq->msq.doorbell_qpn, txq->msq.db); ++ rte_write32(txq->msq.doorbell_qpn, MLX4_TX_BFREG(txq)); + txq->elts_head += i; + return i; + } +diff --git a/dpdk/drivers/net/mlx4/mlx4_rxtx.h b/dpdk/drivers/net/mlx4/mlx4_rxtx.h +index 29389f1ea7..7a7cc36cd4 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_rxtx.h ++++ b/dpdk/drivers/net/mlx4/mlx4_rxtx.h +@@ -97,6 +97,7 @@ struct mlx4_txq_stats { + struct txq { + struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */ + struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */ ++ uint16_t port_id; /**< Port ID of device. */ + unsigned int elts_head; /**< Current index in (*elts)[]. */ + unsigned int elts_tail; /**< First element awaiting completion. */ + int elts_comp_cd; /**< Countdown for next completion. */ +@@ -118,9 +119,12 @@ struct txq { + uint8_t data[]; /**< Remaining queue resources. */ + }; + ++#define MLX4_TX_BFREG(txq) \ ++ (MLX4_PROC_PRIV((txq)->port_id)->uar_table[(txq)->stats.idx]) ++ + /* mlx4_rxq.c */ + +-uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE]; ++extern uint8_t mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE]; + int mlx4_rss_init(struct mlx4_priv *priv); + void mlx4_rss_deinit(struct mlx4_priv *priv); + struct mlx4_rss *mlx4_rss_get(struct mlx4_priv *priv, uint64_t fields, +diff --git a/dpdk/drivers/net/mlx4/mlx4_txq.c b/dpdk/drivers/net/mlx4/mlx4_txq.c +index 352700820d..92c9c03d36 100644 +--- a/dpdk/drivers/net/mlx4/mlx4_txq.c ++++ b/dpdk/drivers/net/mlx4/mlx4_txq.c +@@ -37,6 +37,23 @@ + #include "mlx4_rxtx.h" + #include "mlx4_utils.h" + ++/** ++ * Initialize Tx UAR registers for primary process. ++ * ++ * @param txq ++ * Pointer to Tx queue structure. ++ */ ++static void ++txq_uar_init(struct txq *txq) ++{ ++ struct mlx4_priv *priv = txq->priv; ++ struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv)); ++ ++ assert(rte_eal_process_type() == RTE_PROC_PRIMARY); ++ assert(ppriv); ++ ppriv->uar_table[txq->stats.idx] = txq->msq.db; ++} ++ + /** + * Free Tx queue elements. + * +@@ -89,6 +106,7 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv) + sq->owner_opcode = MLX4_OPCODE_SEND | (0u << MLX4_SQ_OWNER_BIT); + sq->stamp = rte_cpu_to_be_32(MLX4_SQ_STAMP_VAL | + (0u << MLX4_SQ_OWNER_BIT)); ++ sq->uar_mmap_offset = -1; /* Make mmap() fail. */ + sq->db = dqp->sdb; + sq->doorbell_qpn = dqp->doorbell_qpn; + cq->buf = dcq->buf.buf; +@@ -214,6 +232,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + } + *txq = (struct txq){ + .priv = priv, ++ .port_id = dev->data->port_id, + .stats = { + .idx = idx, + }, +@@ -319,6 +338,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + goto error; + } + mlx4_txq_fill_dv_obj_info(txq, &mlxdv); ++ txq_uar_init(txq); + /* Save first wqe pointer in the first element. */ + (&(*txq->elts)[0])->wqe = + (volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf; +diff --git a/dpdk/drivers/net/mlx5/mlx5.c b/dpdk/drivers/net/mlx5/mlx5.c +index 29370f58f0..01fff19224 100644 +--- a/dpdk/drivers/net/mlx5/mlx5.c ++++ b/dpdk/drivers/net/mlx5/mlx5.c +@@ -131,6 +131,10 @@ static rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER; + /** Driver-specific log messages type. */ + int mlx5_logtype; + ++#ifdef MLX5_GLUE ++const struct mlx5_glue *mlx5_glue; ++#endif ++ + /** + * Prepare shared data between primary and secondary process. + */ +@@ -238,6 +242,53 @@ mlx5_free_verbs_buf(void *ptr, void *data __rte_unused) + rte_free(ptr); + } + ++/** ++ * Initialize process private data structure. ++ * ++ * @param dev ++ * Pointer to Ethernet device structure. ++ * ++ * @return ++ * 0 on success, a negative errno value otherwise and rte_errno is set. ++ */ ++int ++mlx5_proc_priv_init(struct rte_eth_dev *dev) ++{ ++ struct mlx5_proc_priv *ppriv; ++ size_t ppriv_size; ++ ++ /* ++ * UAR register table follows the process private structure. BlueFlame ++ * registers for Tx queues are stored in the table. ++ */ ++ ppriv_size = sizeof(struct mlx5_proc_priv) + ++ RTE_MAX_QUEUES_PER_PORT * sizeof(void *); ++ ppriv = rte_malloc_socket("mlx5_proc_priv", ppriv_size, ++ RTE_CACHE_LINE_SIZE, dev->device->numa_node); ++ if (!ppriv) { ++ rte_errno = ENOMEM; ++ return -rte_errno; ++ } ++ ppriv->uar_table_sz = ppriv_size; ++ dev->process_private = ppriv; ++ return 0; ++} ++ ++/** ++ * Un-initialize process private data structure. ++ * ++ * @param dev ++ * Pointer to Ethernet device structure. ++ */ ++static void ++mlx5_proc_priv_uninit(struct rte_eth_dev *dev) ++{ ++ if (!dev->process_private) ++ return; ++ rte_free(dev->process_private); ++ dev->process_private = NULL; ++} ++ + /** + * DPDK callback to close the device. + * +@@ -279,6 +330,7 @@ mlx5_dev_close(struct rte_eth_dev *dev) + priv->txqs_n = 0; + priv->txqs = NULL; + } ++ mlx5_proc_priv_uninit(dev); + mlx5_mprq_free_mp(dev); + mlx5_mr_release(dev); + if (priv->pd != NULL) { +@@ -596,121 +648,6 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) + + static struct rte_pci_driver mlx5_driver; + +-/* +- * Reserved UAR address space for TXQ UAR(hw doorbell) mapping, process +- * local resource used by both primary and secondary to avoid duplicate +- * reservation. +- * The space has to be available on both primary and secondary process, +- * TXQ UAR maps to this area using fixed mmap w/o double check. +- */ +-static void *uar_base; +- +-static int +-find_lower_va_bound(const struct rte_memseg_list *msl, +- const struct rte_memseg *ms, void *arg) +-{ +- void **addr = arg; +- +- if (msl->external) +- return 0; +- if (*addr == NULL) +- *addr = ms->addr; +- else +- *addr = RTE_MIN(*addr, ms->addr); +- +- return 0; +-} +- +-/** +- * Reserve UAR address space for primary process. +- * +- * @param[in] dev +- * Pointer to Ethernet device. +- * +- * @return +- * 0 on success, a negative errno value otherwise and rte_errno is set. +- */ +-static int +-mlx5_uar_init_primary(struct rte_eth_dev *dev) +-{ +- struct mlx5_priv *priv = dev->data->dev_private; +- void *addr = (void *)0; +- +- if (uar_base) { /* UAR address space mapped. */ +- priv->uar_base = uar_base; +- return 0; +- } +- /* find out lower bound of hugepage segments */ +- rte_memseg_walk(find_lower_va_bound, &addr); +- +- /* keep distance to hugepages to minimize potential conflicts. */ +- addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE)); +- /* anonymous mmap, no real memory consumption. */ +- addr = mmap(addr, MLX5_UAR_SIZE, +- PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +- if (addr == MAP_FAILED) { +- DRV_LOG(ERR, +- "port %u failed to reserve UAR address space, please" +- " adjust MLX5_UAR_SIZE or try --base-virtaddr", +- dev->data->port_id); +- rte_errno = ENOMEM; +- return -rte_errno; +- } +- /* Accept either same addr or a new addr returned from mmap if target +- * range occupied. +- */ +- DRV_LOG(INFO, "port %u reserved UAR address space: %p", +- dev->data->port_id, addr); +- priv->uar_base = addr; /* for primary and secondary UAR re-mmap. */ +- uar_base = addr; /* process local, don't reserve again. */ +- return 0; +-} +- +-/** +- * Reserve UAR address space for secondary process, align with +- * primary process. +- * +- * @param[in] dev +- * Pointer to Ethernet device. +- * +- * @return +- * 0 on success, a negative errno value otherwise and rte_errno is set. +- */ +-static int +-mlx5_uar_init_secondary(struct rte_eth_dev *dev) +-{ +- struct mlx5_priv *priv = dev->data->dev_private; +- void *addr; +- +- assert(priv->uar_base); +- if (uar_base) { /* already reserved. */ +- assert(uar_base == priv->uar_base); +- return 0; +- } +- /* anonymous mmap, no real memory consumption. */ +- addr = mmap(priv->uar_base, MLX5_UAR_SIZE, +- PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +- if (addr == MAP_FAILED) { +- DRV_LOG(ERR, "port %u UAR mmap failed: %p size: %llu", +- dev->data->port_id, priv->uar_base, MLX5_UAR_SIZE); +- rte_errno = ENXIO; +- return -rte_errno; +- } +- if (priv->uar_base != addr) { +- DRV_LOG(ERR, +- "port %u UAR address %p size %llu occupied, please" +- " adjust MLX5_UAR_OFFSET or try EAL parameter" +- " --base-virtaddr", +- dev->data->port_id, priv->uar_base, MLX5_UAR_SIZE); +- rte_errno = ENXIO; +- return -rte_errno; +- } +- uar_base = addr; /* process local, don't reserve again */ +- DRV_LOG(INFO, "port %u reserved UAR address space: %p", +- dev->data->port_id, addr); +- return 0; +-} +- + /** + * Spawn an Ethernet device from Verbs information. + * +@@ -916,7 +853,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + } + eth_dev->device = dpdk_dev; + eth_dev->dev_ops = &mlx5_dev_sec_ops; +- err = mlx5_uar_init_secondary(eth_dev); ++ err = mlx5_proc_priv_init(eth_dev); + if (err) { + err = rte_errno; + goto error; +@@ -928,7 +865,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + goto error; + } + /* Remap UAR for Tx queues. */ +- err = mlx5_tx_uar_remap(eth_dev, err); ++ err = mlx5_tx_uar_init_secondary(eth_dev, err); + if (err) { + err = rte_errno; + goto error; +@@ -1143,11 +1080,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + priv->dev_data = eth_dev->data; + eth_dev->data->mac_addrs = priv->mac; + eth_dev->device = dpdk_dev; +- err = mlx5_uar_init_primary(eth_dev); +- if (err) { +- err = rte_errno; +- goto error; +- } + /* Configure the first MAC address by default. */ + if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) { + DRV_LOG(ERR, +@@ -1278,12 +1210,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, + if (own_domain_id) + claim_zero(rte_eth_switch_domain_free(priv->domain_id)); + rte_free(priv); +- if (eth_dev != NULL) +- eth_dev->data->dev_private = NULL; + } + if (pd) + claim_zero(mlx5_glue->dealloc_pd(pd)); + if (eth_dev != NULL) { ++ mlx5_proc_priv_uninit(eth_dev); + /* mac_addrs must not be freed alone because part of dev_private */ + eth_dev->data->mac_addrs = NULL; + rte_eth_dev_release_port(eth_dev); +@@ -1497,6 +1428,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + for (i = 0; i != n; ++i) { + uint32_t restore; + ++ if (!list[i].ifindex) ++ continue; + list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device, + list[i].ibv_dev, dev_config, + &list[i].info, list[i].ifindex); +diff --git a/dpdk/drivers/net/mlx5/mlx5.h b/dpdk/drivers/net/mlx5/mlx5.h +index 26cbdbc389..cf38b6544d 100644 +--- a/dpdk/drivers/net/mlx5/mlx5.h ++++ b/dpdk/drivers/net/mlx5/mlx5.h +@@ -33,7 +33,6 @@ + + #include "mlx5_utils.h" + #include "mlx5_mr.h" +-#include "mlx5_rxtx.h" + #include "mlx5_autoconf.h" + #include "mlx5_defs.h" + +@@ -54,10 +53,22 @@ enum { + PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF = 0xa2d3, + }; + ++/* Recognized Infiniband device physical port name types. */ ++enum mlx5_nl_phys_port_name_type { ++ MLX5_PHYS_PORT_NAME_TYPE_NOTSET = 0, /* Not set. */ ++ MLX5_PHYS_PORT_NAME_TYPE_LEGACY, /* before kernel ver < 5.0 */ ++ MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */ ++ MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 */ ++ MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, /* Unrecognized. */ ++}; ++ ++ + /** Switch information returned by mlx5_nl_switch_info(). */ + struct mlx5_switch_info { + uint32_t master:1; /**< Master device. */ + uint32_t representor:1; /**< Representor device. */ ++ enum mlx5_nl_phys_port_name_type name_type; /** < Port name type. */ ++ int32_t pf_num; /**< PF number (valid for pfxvfx format only). */ + int32_t port_name; /**< Representor port name. */ + uint64_t switch_id; /**< Switch identifier. */ + }; +@@ -177,6 +188,17 @@ struct mlx5_drop { + + struct mlx5_flow_tcf_context; + ++/* Per-process private structure. */ ++struct mlx5_proc_priv { ++ size_t uar_table_sz; ++ /* Size of UAR register table. */ ++ void *uar_table[]; ++ /* Table of UAR registers for each process. */ ++}; ++ ++#define MLX5_PROC_PRIV(port_id) \ ++ ((struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private) ++ + struct mlx5_priv { + LIST_ENTRY(mlx5_priv) mem_event_cb; + /**< Called by memory event callback. */ +@@ -255,6 +277,7 @@ struct mlx5_priv { + /* mlx5.c */ + + int mlx5_getenv_int(const char *); ++int mlx5_proc_priv_init(struct rte_eth_dev *dev); + + /* mlx5_ethdev.c */ + +@@ -290,6 +313,8 @@ unsigned int mlx5_dev_to_port_id(const struct rte_device *dev, + unsigned int port_list_n); + int mlx5_sysfs_switch_info(unsigned int ifindex, + struct mlx5_switch_info *info); ++void mlx5_translate_port_name(const char *port_name_in, ++ struct mlx5_switch_info *port_info_out); + + /* mlx5_mac.c */ + +diff --git a/dpdk/drivers/net/mlx5/mlx5_defs.h b/dpdk/drivers/net/mlx5/mlx5_defs.h +index 480b33c812..13801a5c2d 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_defs.h ++++ b/dpdk/drivers/net/mlx5/mlx5_defs.h +@@ -92,16 +92,6 @@ + /* Timeout in seconds to get a valid link status. */ + #define MLX5_LINK_STATUS_TIMEOUT 10 + +-/* Reserved address space for UAR mapping. */ +-#define MLX5_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4)) +- +-/* Offset of reserved UAR address space to hugepage memory. Offset is used here +- * to minimize possibility of address next to hugepage being used by other code +- * in either primary or secondary process, failing to map TX UAR would make TX +- * packets invisible to HW. +- */ +-#define MLX5_UAR_OFFSET (1ULL << (sizeof(uintptr_t) * 4)) +- + /* Maximum number of UAR pages used by a port, + * These are the size and mask for an array of mutexes used to synchronize + * the access to port's UARs on platforms that do not support 64 bit writes. +diff --git a/dpdk/drivers/net/mlx5/mlx5_ethdev.c b/dpdk/drivers/net/mlx5/mlx5_ethdev.c +index d49cb59b34..e35ff327c8 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_ethdev.c ++++ b/dpdk/drivers/net/mlx5/mlx5_ethdev.c +@@ -446,6 +446,9 @@ mlx5_dev_configure(struct rte_eth_dev *dev) + if (++j == rxqs_n) + j = 0; + } ++ ret = mlx5_proc_priv_init(dev); ++ if (ret) ++ return ret; + return 0; + } + +@@ -1311,6 +1314,110 @@ mlx5_dev_to_port_id(const struct rte_device *dev, uint16_t *port_list, + return n; + } + ++/** ++ * Extract port name, as a number, from sysfs or netlink information. ++ * ++ * @param[in] port_name_in ++ * String representing the port name. ++ * @param[out] port_info_out ++ * Port information, including port name as a number and port name ++ * type if recognized ++ * ++ * @return ++ * port_name field set according to recognized name format. ++ */ ++void ++mlx5_translate_port_name(const char *port_name_in, ++ struct mlx5_switch_info *port_info_out) ++{ ++ char pf_c1, pf_c2, vf_c1, vf_c2; ++ char *end; ++ int sc_items; ++ ++ /* ++ * Check for port-name as a string of the form pf0vf0 ++ * (support kernel ver >= 5.0 or OFED ver >= 4.6). ++ */ ++ sc_items = sscanf(port_name_in, "%c%c%d%c%c%d", ++ &pf_c1, &pf_c2, &port_info_out->pf_num, ++ &vf_c1, &vf_c2, &port_info_out->port_name); ++ if (sc_items == 6 && ++ pf_c1 == 'p' && pf_c2 == 'f' && ++ vf_c1 == 'v' && vf_c2 == 'f') { ++ port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF; ++ return; ++ } ++ /* ++ * Check for port-name as a string of the form p0 ++ * (support kernel ver >= 5.0, or OFED ver >= 4.6). ++ */ ++ sc_items = sscanf(port_name_in, "%c%d", ++ &pf_c1, &port_info_out->port_name); ++ if (sc_items == 2 && pf_c1 == 'p') { ++ port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; ++ return; ++ } ++ /* Check for port-name as a number (support kernel ver < 5.0 */ ++ errno = 0; ++ port_info_out->port_name = strtol(port_name_in, &end, 0); ++ if (!errno && ++ (size_t)(end - port_name_in) == strlen(port_name_in)) { ++ port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; ++ return; ++ } ++ port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; ++} ++ ++/** ++ * Analyze gathered port parameters via sysfs to recognize master ++ * and representor devices for E-Switch configuration. ++ * ++ * @param[in] device_dir ++ * flag of presence of "device" directory under port device key. ++ * @param[inout] switch_info ++ * Port information, including port name as a number and port name ++ * type if recognized ++ * ++ * @return ++ * master and representor flags are set in switch_info according to ++ * recognized parameters (if any). ++ */ ++static void ++mlx5_sysfs_check_switch_info(bool device_dir, ++ struct mlx5_switch_info *switch_info) ++{ ++ switch (switch_info->name_type) { ++ case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: ++ /* ++ * Name is not recognized, assume the master, ++ * check the device directory presence. ++ */ ++ switch_info->master = device_dir; ++ break; ++ case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: ++ /* ++ * Name is not set, this assumes the legacy naming ++ * schema for master, just check if there is ++ * a device directory. ++ */ ++ switch_info->master = device_dir; ++ break; ++ case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: ++ /* New uplink naming schema recognized. */ ++ switch_info->master = 1; ++ break; ++ case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: ++ /* Legacy representors naming schema. */ ++ switch_info->representor = !device_dir; ++ break; ++ case MLX5_PHYS_PORT_NAME_TYPE_PFVF: ++ /* New representors naming schema. */ ++ switch_info->representor = 1; ++ break; ++ } ++} ++ ++ + /** + * Get switch information associated with network interface. + * +@@ -1326,11 +1433,20 @@ int + mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) + { + char ifname[IF_NAMESIZE]; ++ char port_name[IF_NAMESIZE]; + FILE *file; +- struct mlx5_switch_info data = { .master = 0, }; +- bool port_name_set = false; ++ struct mlx5_switch_info data = { ++ .master = 0, ++ .representor = 0, ++ .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, ++ .port_name = 0, ++ .switch_id = 0, ++ }; ++ DIR *dir; + bool port_switch_id_set = false; ++ bool device_dir = false; + char c; ++ int ret; + + if (!if_indextoname(ifindex, ifname)) { + rte_errno = errno; +@@ -1341,13 +1457,15 @@ mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) + ifname); + MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", + ifname); ++ MKSTR(pci_device, "/sys/class/net/%s/device", ++ ifname); + + file = fopen(phys_port_name, "rb"); + if (file != NULL) { +- port_name_set = +- fscanf(file, "%d%c", &data.port_name, &c) == 2 && +- c == '\n'; ++ ret = fscanf(file, "%s", port_name); + fclose(file); ++ if (ret == 1) ++ mlx5_translate_port_name(port_name, &data); + } + file = fopen(phys_switch_id, "rb"); + if (file == NULL) { +@@ -1358,8 +1476,22 @@ mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) + fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && + c == '\n'; + fclose(file); +- data.master = port_switch_id_set && !port_name_set; +- data.representor = port_switch_id_set && port_name_set; ++ dir = opendir(pci_device); ++ if (dir != NULL) { ++ closedir(dir); ++ device_dir = true; ++ } ++ if (port_switch_id_set) { ++ /* We have some E-Switch configuration. */ ++ mlx5_sysfs_check_switch_info(device_dir, &data); ++ } + *info = data; ++ assert(!(data.master && data.representor)); ++ if (data.master && data.representor) { ++ DRV_LOG(ERR, "ifindex %u device is recognized as master" ++ " and as representor", ifindex); ++ rte_errno = ENODEV; ++ return -rte_errno; ++ } + return 0; + } +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow.c b/dpdk/drivers/net/mlx5/mlx5_flow.c +index 7f518fcd26..30d5b66e64 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow.c ++++ b/dpdk/drivers/net/mlx5/mlx5_flow.c +@@ -31,8 +31,9 @@ + #include "mlx5.h" + #include "mlx5_defs.h" + #include "mlx5_prm.h" +-#include "mlx5_glue.h" + #include "mlx5_flow.h" ++#include "mlx5_glue.h" ++#include "mlx5_rxtx.h" + + /* Dev ops structure defined in mlx5.c */ + extern const struct eth_dev_ops mlx5_dev_ops; +@@ -1266,7 +1267,6 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, + "\xff\xff\xff\xff\xff\xff\xff\xff", + .vtc_flow = RTE_BE32(0xffffffff), + .proto = 0xff, +- .hop_limits = 0xff, + }, + }; + const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); +@@ -1441,7 +1441,6 @@ mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, + uint32_t vlan_id; + uint8_t vni[4]; + } id = { .vlan_id = 0, }; +- uint32_t vlan_id = 0; + + + if (item_flags & MLX5_FLOW_LAYER_TUNNEL) +@@ -1468,23 +1467,8 @@ mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, + return ret; + if (spec) { + memcpy(&id.vni[1], spec->vni, 3); +- vlan_id = id.vlan_id; + memcpy(&id.vni[1], mask->vni, 3); +- vlan_id &= id.vlan_id; + } +- /* +- * Tunnel id 0 is equivalent as not adding a VXLAN layer, if +- * only this layer is defined in the Verbs specification it is +- * interpreted as wildcard and all packets will match this +- * rule, if it follows a full stack layer (ex: eth / ipv4 / +- * udp), all packets matching the layers before will also +- * match this rule. To avoid such situation, VNI 0 is +- * currently refused. +- */ +- if (!vlan_id) +- return rte_flow_error_set(error, ENOTSUP, +- RTE_FLOW_ERROR_TYPE_ITEM, item, +- "VXLAN vni cannot be 0"); + if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, item, +@@ -1523,7 +1507,6 @@ mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, + uint32_t vlan_id; + uint8_t vni[4]; + } id = { .vlan_id = 0, }; +- uint32_t vlan_id = 0; + + if (!priv->config.l3_vxlan_en) + return rte_flow_error_set(error, ENOTSUP, +@@ -1561,22 +1544,8 @@ mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, + "VxLAN-GPE protocol" + " not supported"); + memcpy(&id.vni[1], spec->vni, 3); +- vlan_id = id.vlan_id; + memcpy(&id.vni[1], mask->vni, 3); +- vlan_id &= id.vlan_id; + } +- /* +- * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this +- * layer is defined in the Verbs specification it is interpreted as +- * wildcard and all packets will match this rule, if it follows a full +- * stack layer (ex: eth / ipv4 / udp), all packets matching the layers +- * before will also match this rule. To avoid such situation, VNI 0 +- * is currently refused. +- */ +- if (!vlan_id) +- return rte_flow_error_set(error, ENOTSUP, +- RTE_FLOW_ERROR_TYPE_ITEM, item, +- "VXLAN-GPE vni cannot be 0"); + if (!(item_flags & MLX5_FLOW_LAYER_OUTER)) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, item, +@@ -2377,6 +2346,8 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, + rte_errno = EINVAL; + return -rte_errno; + } ++ if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) ++ action_rss.types = 0; + for (i = 0; i != priv->reta_idx_n; ++i) + queue[i] = (*priv->reta_idx)[i]; + flow = flow_list_create(dev, &priv->ctrl_flows, +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow_dv.c b/dpdk/drivers/net/mlx5/mlx5_flow_dv.c +index 54b8770ffc..460461ab0d 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow_dv.c ++++ b/dpdk/drivers/net/mlx5/mlx5_flow_dv.c +@@ -29,9 +29,10 @@ + + #include "mlx5.h" + #include "mlx5_defs.h" +-#include "mlx5_prm.h" + #include "mlx5_glue.h" ++#include "mlx5_prm.h" + #include "mlx5_flow.h" ++#include "mlx5_rxtx.h" + + #ifdef HAVE_IBV_FLOW_DV_SUPPORT + +@@ -1307,6 +1308,13 @@ flow_dv_translate_item_ipv4(void *matcher, void *key, + } + MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0xf); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 4); ++ /* ++ * On outer header (which must contains L2), or inner header with L2, ++ * set cvlan_tag mask bit to mark this packet as untagged. ++ * This should be done even if item->spec is empty. ++ */ ++ if (!inner || item_flags & MLX5_FLOW_LAYER_INNER_L2) ++ MLX5_SET(fte_match_set_lyr_2_4, headers_m, cvlan_tag, 1); + if (!ipv4_v) + return; + if (!ipv4_m) +@@ -1334,12 +1342,6 @@ flow_dv_translate_item_ipv4(void *matcher, void *key, + ipv4_m->hdr.next_proto_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + ipv4_v->hdr.next_proto_id & ipv4_m->hdr.next_proto_id); +- /* +- * On outer header (which must contains L2), or inner header with L2, +- * set cvlan_tag mask bit to mark this packet as untagged. +- */ +- if (!inner || item_flags & MLX5_FLOW_LAYER_INNER_L2) +- MLX5_SET(fte_match_set_lyr_2_4, headers_m, cvlan_tag, 1); + } + + /** +@@ -1399,6 +1401,13 @@ flow_dv_translate_item_ipv6(void *matcher, void *key, + } + MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0xf); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 6); ++ /* ++ * On outer header (which must contains L2), or inner header with L2, ++ * set cvlan_tag mask bit to mark this packet as untagged. ++ * This should be done even if item->spec is empty. ++ */ ++ if (!inner || item_flags & MLX5_FLOW_LAYER_INNER_L2) ++ MLX5_SET(fte_match_set_lyr_2_4, headers_m, cvlan_tag, 1); + if (!ipv6_v) + return; + if (!ipv6_m) +@@ -1442,12 +1451,6 @@ flow_dv_translate_item_ipv6(void *matcher, void *key, + ipv6_m->hdr.proto); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + ipv6_v->hdr.proto & ipv6_m->hdr.proto); +- /* +- * On outer header (which must contains L2), or inner header with L2, +- * set cvlan_tag mask bit to mark this packet as untagged. +- */ +- if (!inner || item_flags & MLX5_FLOW_LAYER_INNER_L2) +- MLX5_SET(fte_match_set_lyr_2_4, headers_m, cvlan_tag, 1); + } + + /** +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow_tcf.c b/dpdk/drivers/net/mlx5/mlx5_flow_tcf.c +index e9c4fe9d75..8456e033d6 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow_tcf.c ++++ b/dpdk/drivers/net/mlx5/mlx5_flow_tcf.c +@@ -31,6 +31,7 @@ + #include <rte_cycles.h> + + #include "mlx5.h" ++#include "mlx5_prm.h" + #include "mlx5_flow.h" + #include "mlx5_autoconf.h" + +diff --git a/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c b/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c +index 1a8a9e63ca..f4b43d50b9 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c ++++ b/dpdk/drivers/net/mlx5/mlx5_flow_verbs.c +@@ -30,8 +30,9 @@ + #include "mlx5.h" + #include "mlx5_defs.h" + #include "mlx5_prm.h" +-#include "mlx5_glue.h" + #include "mlx5_flow.h" ++#include "mlx5_glue.h" ++#include "mlx5_rxtx.h" + + #define VERBS_SPEC_INNER(item_flags) \ + (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? IBV_FLOW_SPEC_INNER : 0) +@@ -479,14 +480,12 @@ flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow, + ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >> + IPV6_HDR_TC_SHIFT; + ipv6.val.next_hdr = spec->hdr.proto; +- ipv6.val.hop_limit = spec->hdr.hop_limits; + ipv6.mask.flow_label = + rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >> + IPV6_HDR_FL_SHIFT); + ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >> + IPV6_HDR_TC_SHIFT; + ipv6.mask.next_hdr = mask->hdr.proto; +- ipv6.mask.hop_limit = mask->hdr.hop_limits; + /* Remove unwanted bits from values. */ + for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) { + ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i]; +@@ -495,7 +494,6 @@ flow_verbs_translate_item_ipv6(struct mlx5_flow *dev_flow, + ipv6.val.flow_label &= ipv6.mask.flow_label; + ipv6.val.traffic_class &= ipv6.mask.traffic_class; + ipv6.val.next_hdr &= ipv6.mask.next_hdr; +- ipv6.val.hop_limit &= ipv6.mask.hop_limit; + } + flow_verbs_spec_add(&dev_flow->verbs, &ipv6, size); + } +@@ -575,6 +573,28 @@ flow_verbs_translate_item_udp(struct mlx5_flow *dev_flow, + udp.val.src_port &= udp.mask.src_port; + udp.val.dst_port &= udp.mask.dst_port; + } ++ item++; ++ while (item->type == RTE_FLOW_ITEM_TYPE_VOID) ++ item++; ++ if (!(udp.val.dst_port & udp.mask.dst_port)) { ++ switch ((item)->type) { ++ case RTE_FLOW_ITEM_TYPE_VXLAN: ++ udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN); ++ udp.mask.dst_port = 0xffff; ++ break; ++ case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: ++ udp.val.dst_port = htons(MLX5_UDP_PORT_VXLAN_GPE); ++ udp.mask.dst_port = 0xffff; ++ break; ++ case RTE_FLOW_ITEM_TYPE_MPLS: ++ udp.val.dst_port = htons(MLX5_UDP_PORT_MPLS); ++ udp.mask.dst_port = 0xffff; ++ break; ++ default: ++ break; ++ } ++ } ++ + flow_verbs_spec_add(&dev_flow->verbs, &udp, size); + } + +diff --git a/dpdk/drivers/net/mlx5/mlx5_glue.h b/dpdk/drivers/net/mlx5/mlx5_glue.h +index 2d92ba8bcf..0ed754a070 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_glue.h ++++ b/dpdk/drivers/net/mlx5/mlx5_glue.h +@@ -166,6 +166,6 @@ struct mlx5_glue { + enum mlx5dv_flow_table_type ft_type); + }; + +-const struct mlx5_glue *mlx5_glue; ++extern const struct mlx5_glue *mlx5_glue; + + #endif /* MLX5_GLUE_H_ */ +diff --git a/dpdk/drivers/net/mlx5/mlx5_nl.c b/dpdk/drivers/net/mlx5/mlx5_nl.c +index fe5a27461e..7098d31322 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_nl.c ++++ b/dpdk/drivers/net/mlx5/mlx5_nl.c +@@ -84,6 +84,7 @@ struct mlx5_nl_ifindex_data { + const char *name; /**< IB device name (in). */ + uint32_t ibindex; /**< IB device index (out). */ + uint32_t ifindex; /**< Network interface index (out). */ ++ uint32_t ibfound; /**< Found IB index for matching device. */ + }; + + /** +@@ -695,7 +696,7 @@ mlx5_nl_ifindex_cb(struct nlmsghdr *nh, void *arg) + size_t off = NLMSG_HDRLEN; + uint32_t ibindex = 0; + uint32_t ifindex = 0; +- int found = 0; ++ uint32_t found = 0, ibfound = 0; + + if (nh->nlmsg_type != + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && +@@ -711,6 +712,7 @@ mlx5_nl_ifindex_cb(struct nlmsghdr *nh, void *arg) + switch (na->nla_type) { + case RDMA_NLDEV_ATTR_DEV_INDEX: + ibindex = *(uint32_t *)payload; ++ ibfound = 1; + break; + case RDMA_NLDEV_ATTR_DEV_NAME: + if (!strcmp(payload, data->name)) +@@ -727,6 +729,7 @@ mlx5_nl_ifindex_cb(struct nlmsghdr *nh, void *arg) + if (found) { + data->ibindex = ibindex; + data->ifindex = ifindex; ++ data->ibfound = ibfound; + } + return 0; + error: +@@ -759,6 +762,7 @@ mlx5_nl_ifindex(int nl, const char *name) + .name = name, + .ibindex = 0, /* Determined during first pass. */ + .ifindex = 0, /* Determined during second pass. */ ++ .ibfound = 0, + }; + union { + struct nlmsghdr nh; +@@ -782,7 +786,7 @@ mlx5_nl_ifindex(int nl, const char *name) + ret = mlx5_nl_recv(nl, seq, mlx5_nl_ifindex_cb, &data); + if (ret < 0) + return 0; +- if (!data.ibindex) ++ if (!data.ibfound) + goto error; + ++seq; + req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, +@@ -813,6 +817,55 @@ mlx5_nl_ifindex(int nl, const char *name) + return 0; + } + ++/** ++ * Analyze gathered port parameters via Netlink to recognize master ++ * and representor devices for E-Switch configuration. ++ * ++ * @param[in] num_vf_set ++ * flag of presence of number of VFs port attribute. ++ * @param[inout] switch_info ++ * Port information, including port name as a number and port name ++ * type if recognized ++ * ++ * @return ++ * master and representor flags are set in switch_info according to ++ * recognized parameters (if any). ++ */ ++static void ++mlx5_nl_check_switch_info(bool num_vf_set, ++ struct mlx5_switch_info *switch_info) ++{ ++ switch (switch_info->name_type) { ++ case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: ++ /* ++ * Name is not recognized, assume the master, ++ * check the number of VFs key presence. ++ */ ++ switch_info->master = num_vf_set; ++ break; ++ case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: ++ /* ++ * Name is not set, this assumes the legacy naming ++ * schema for master, just check if there is a ++ * number of VFs key. ++ */ ++ switch_info->master = num_vf_set; ++ break; ++ case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: ++ /* New uplink naming schema recognized. */ ++ switch_info->master = 1; ++ break; ++ case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: ++ /* Legacy representors naming schema. */ ++ switch_info->representor = !num_vf_set; ++ break; ++ case MLX5_PHYS_PORT_NAME_TYPE_PFVF: ++ /* New representors naming schema. */ ++ switch_info->representor = 1; ++ break; ++ } ++} ++ + /** + * Process switch information from Netlink message. + * +@@ -830,31 +883,29 @@ mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) + struct mlx5_switch_info info = { + .master = 0, + .representor = 0, ++ .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, + .port_name = 0, + .switch_id = 0, + }; + size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); +- bool port_name_set = false; + bool switch_id_set = false; ++ bool num_vf_set = false; + + if (nh->nlmsg_type != RTM_NEWLINK) + goto error; + while (off < nh->nlmsg_len) { + struct rtattr *ra = (void *)((uintptr_t)nh + off); + void *payload = RTA_DATA(ra); +- char *end; + unsigned int i; + + if (ra->rta_len > nh->nlmsg_len - off) + goto error; + switch (ra->rta_type) { ++ case IFLA_NUM_VF: ++ num_vf_set = true; ++ break; + case IFLA_PHYS_PORT_NAME: +- errno = 0; +- info.port_name = strtol(payload, &end, 0); +- if (errno || +- (size_t)(end - (char *)payload) != strlen(payload)) +- goto error; +- port_name_set = true; ++ mlx5_translate_port_name((char *)payload, &info); + break; + case IFLA_PHYS_SWITCH_ID: + info.switch_id = 0; +@@ -867,8 +918,11 @@ mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) + } + off += RTA_ALIGN(ra->rta_len); + } +- info.master = switch_id_set && !port_name_set; +- info.representor = switch_id_set && port_name_set; ++ if (switch_id_set) { ++ /* We have some E-Switch configuration. */ ++ mlx5_nl_check_switch_info(num_vf_set, &info); ++ } ++ assert(!(info.master && info.representor)); + memcpy(arg, &info, sizeof(info)); + return 0; + error: +@@ -890,15 +944,19 @@ mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg) + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ + int +-mlx5_nl_switch_info(int nl, unsigned int ifindex, struct mlx5_switch_info *info) ++mlx5_nl_switch_info(int nl, unsigned int ifindex, ++ struct mlx5_switch_info *info) + { +- uint32_t seq = random(); + struct { + struct nlmsghdr nh; + struct ifinfomsg info; ++ struct rtattr rta; ++ uint32_t extmask; + } req = { + .nh = { +- .nlmsg_len = NLMSG_LENGTH(sizeof(req.info)), ++ .nlmsg_len = NLMSG_LENGTH ++ (sizeof(req.info) + ++ RTA_LENGTH(sizeof(uint32_t))), + .nlmsg_type = RTM_GETLINK, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + }, +@@ -906,11 +964,23 @@ mlx5_nl_switch_info(int nl, unsigned int ifindex, struct mlx5_switch_info *info) + .ifi_family = AF_UNSPEC, + .ifi_index = ifindex, + }, ++ .rta = { ++ .rta_type = IFLA_EXT_MASK, ++ .rta_len = RTA_LENGTH(sizeof(int32_t)), ++ }, ++ .extmask = RTE_LE32(1), + }; ++ uint32_t sn = random(); + int ret; + +- ret = mlx5_nl_send(nl, &req.nh, seq); ++ ret = mlx5_nl_send(nl, &req.nh, sn); + if (ret >= 0) +- ret = mlx5_nl_recv(nl, seq, mlx5_nl_switch_info_cb, info); ++ ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info); ++ if (info->master && info->representor) { ++ DRV_LOG(ERR, "ifindex %u device is recognized as master" ++ " and as representor", ifindex); ++ rte_errno = ENODEV; ++ ret = -rte_errno; ++ } + return ret; + } +diff --git a/dpdk/drivers/net/mlx5/mlx5_rxtx.h b/dpdk/drivers/net/mlx5/mlx5_rxtx.h +index 820675b824..dc2ca5e4ad 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_rxtx.h ++++ b/dpdk/drivers/net/mlx5/mlx5_rxtx.h +@@ -202,8 +202,9 @@ struct mlx5_txq_data { + volatile void *wqes; /* Work queue (use volatile to write into). */ + volatile uint32_t *qp_db; /* Work queue doorbell. */ + volatile uint32_t *cq_db; /* Completion queue doorbell. */ +- volatile void *bf_reg; /* Blueflame register remapped. */ + struct rte_mbuf *(*elts)[]; /* TX elements. */ ++ uint16_t port_id; /* Port ID of device. */ ++ uint16_t idx; /* Queue index. */ + struct mlx5_txq_stats stats; /* TX queue counters. */ + #ifndef RTE_ARCH_64 + rte_spinlock_t *uar_lock; +@@ -231,10 +232,12 @@ struct mlx5_txq_ctrl { + struct mlx5_priv *priv; /* Back pointer to private data. */ + struct mlx5_txq_data txq; /* Data path structure. */ + off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ +- volatile void *bf_reg_orig; /* Blueflame register from verbs. */ +- uint16_t idx; /* Queue index. */ ++ void *bf_reg; /* BlueFlame register from Verbs. */ + }; + ++#define MLX5_TX_BFREG(txq) \ ++ (MLX5_PROC_PRIV((txq)->port_id)->uar_table[(txq)->idx]) ++ + /* mlx5_rxq.c */ + + extern uint8_t rss_hash_default_key[]; +@@ -302,7 +305,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev); + int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + unsigned int socket, const struct rte_eth_txconf *conf); + void mlx5_tx_queue_release(void *dpdk_txq); +-int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd); ++int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); + struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx); + struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx); + int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv); +@@ -701,7 +704,7 @@ static __rte_always_inline void + mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe, + int cond) + { +- uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg); ++ uint64_t *dst = MLX5_TX_BFREG(txq); + volatile uint64_t *src = ((volatile uint64_t *)wqe); + + rte_cio_wmb(); +diff --git a/dpdk/drivers/net/mlx5/mlx5_trigger.c b/dpdk/drivers/net/mlx5/mlx5_trigger.c +index 2137bdc461..6b0f8b2056 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_trigger.c ++++ b/dpdk/drivers/net/mlx5/mlx5_trigger.c +@@ -58,12 +58,6 @@ mlx5_txq_start(struct rte_eth_dev *dev) + goto error; + } + } +- ret = mlx5_tx_uar_remap(dev, priv->ctx->cmd_fd); +- if (ret) { +- /* Adjust index for rollback. */ +- i = priv->txqs_n - 1; +- goto error; +- } + return 0; + error: + ret = rte_errno; /* Save rte_errno before cleanup. */ +diff --git a/dpdk/drivers/net/mlx5/mlx5_txq.c b/dpdk/drivers/net/mlx5/mlx5_txq.c +index e6020fbcbf..42ab34aa77 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_txq.c ++++ b/dpdk/drivers/net/mlx5/mlx5_txq.c +@@ -49,7 +49,7 @@ txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl) + for (i = 0; (i != elts_n); ++i) + (*txq_ctrl->txq.elts)[i] = NULL; + DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs", +- PORT_ID(txq_ctrl->priv), txq_ctrl->idx, elts_n); ++ PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx, elts_n); + txq_ctrl->txq.elts_head = 0; + txq_ctrl->txq.elts_tail = 0; + txq_ctrl->txq.elts_comp = 0; +@@ -71,7 +71,7 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) + struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts; + + DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs", +- PORT_ID(txq_ctrl->priv), txq_ctrl->idx); ++ PORT_ID(txq_ctrl->priv), txq_ctrl->txq.idx); + txq_ctrl->txq.elts_head = 0; + txq_ctrl->txq.elts_tail = 0; + txq_ctrl->txq.elts_comp = 0; +@@ -223,20 +223,107 @@ mlx5_tx_queue_release(void *dpdk_txq) + priv = txq_ctrl->priv; + for (i = 0; (i != priv->txqs_n); ++i) + if ((*priv->txqs)[i] == txq) { +- mlx5_txq_release(ETH_DEV(priv), i); + DRV_LOG(DEBUG, "port %u removing Tx queue %u from list", +- PORT_ID(priv), txq_ctrl->idx); ++ PORT_ID(priv), txq_ctrl->txq.idx); ++ mlx5_txq_release(ETH_DEV(priv), i); + break; + } + } + ++/** ++ * Initialize Tx UAR registers for primary process. ++ * ++ * @param txq_ctrl ++ * Pointer to Tx queue control structure. ++ */ ++static void ++txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl) ++{ ++ struct mlx5_priv *priv = txq_ctrl->priv; ++ struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); ++#ifndef RTE_ARCH_64 ++ unsigned int lock_idx; ++ const size_t page_size = sysconf(_SC_PAGESIZE); ++#endif ++ ++ assert(rte_eal_process_type() == RTE_PROC_PRIMARY); ++ assert(ppriv); ++ ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg; ++#ifndef RTE_ARCH_64 ++ /* Assign an UAR lock according to UAR page number */ ++ lock_idx = (txq_ctrl->uar_mmap_offset / page_size) & ++ MLX5_UAR_PAGE_NUM_MASK; ++ txq_ctrl->txq.uar_lock = &priv->uar_lock[lock_idx]; ++#endif ++} ++ ++/** ++ * Remap UAR register of a Tx queue for secondary process. ++ * ++ * Remapped address is stored at the table in the process private structure of ++ * the device, indexed by queue index. ++ * ++ * @param txq_ctrl ++ * Pointer to Tx queue control structure. ++ * @param fd ++ * Verbs file descriptor to map UAR pages. ++ * ++ * @return ++ * 0 on success, a negative errno value otherwise and rte_errno is set. ++ */ ++static int ++txq_uar_init_secondary(struct mlx5_txq_ctrl *txq_ctrl, int fd) ++{ ++ struct mlx5_priv *priv = txq_ctrl->priv; ++ struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv)); ++ struct mlx5_txq_data *txq = &txq_ctrl->txq; ++ void *addr; ++ uintptr_t uar_va; ++ uintptr_t offset; ++ const size_t page_size = sysconf(_SC_PAGESIZE); ++ ++ assert(ppriv); ++ /* ++ * As rdma-core, UARs are mapped in size of OS page ++ * size. Ref to libmlx5 function: mlx5_init_context() ++ */ ++ uar_va = (uintptr_t)txq_ctrl->bf_reg; ++ offset = uar_va & (page_size - 1); /* Offset in page. */ ++ addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, ++ txq_ctrl->uar_mmap_offset); ++ if (addr == MAP_FAILED) { ++ DRV_LOG(ERR, ++ "port %u mmap failed for BF reg of txq %u", ++ txq->port_id, txq->idx); ++ rte_errno = ENXIO; ++ return -rte_errno; ++ } ++ addr = RTE_PTR_ADD(addr, offset); ++ ppriv->uar_table[txq->idx] = addr; ++ return 0; ++} ++ ++/** ++ * Unmap UAR register of a Tx queue for secondary process. ++ * ++ * @param txq_ctrl ++ * Pointer to Tx queue control structure. ++ */ ++static void ++txq_uar_uninit_secondary(struct mlx5_txq_ctrl *txq_ctrl) ++{ ++ struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(txq_ctrl->priv)); ++ const size_t page_size = sysconf(_SC_PAGESIZE); ++ void *addr; ++ ++ addr = ppriv->uar_table[txq_ctrl->txq.idx]; ++ munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size); ++} + + /** +- * Mmap TX UAR(HW doorbell) pages into reserved UAR address space. +- * Both primary and secondary process do mmap to make UAR address +- * aligned. ++ * Initialize Tx UAR registers for secondary process. + * +- * @param[in] dev ++ * @param dev + * Pointer to Ethernet device. + * @param fd + * Verbs file descriptor to map UAR pages. +@@ -245,81 +332,36 @@ mlx5_tx_queue_release(void *dpdk_txq) + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ + int +-mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd) ++mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd) + { + struct mlx5_priv *priv = dev->data->dev_private; +- unsigned int i, j; +- uintptr_t pages[priv->txqs_n]; +- unsigned int pages_n = 0; +- uintptr_t uar_va; +- uintptr_t off; +- void *addr; +- void *ret; + struct mlx5_txq_data *txq; + struct mlx5_txq_ctrl *txq_ctrl; +- int already_mapped; +- size_t page_size = sysconf(_SC_PAGESIZE); +-#ifndef RTE_ARCH_64 +- unsigned int lock_idx; +-#endif ++ unsigned int i; ++ int ret; + +- memset(pages, 0, priv->txqs_n * sizeof(uintptr_t)); +- /* +- * As rdma-core, UARs are mapped in size of OS page size. +- * Use aligned address to avoid duplicate mmap. +- * Ref to libmlx5 function: mlx5_init_context() +- */ ++ assert(rte_eal_process_type() == RTE_PROC_SECONDARY); + for (i = 0; i != priv->txqs_n; ++i) { + if (!(*priv->txqs)[i]) + continue; + txq = (*priv->txqs)[i]; + txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); +- assert(txq_ctrl->idx == (uint16_t)i); +- /* UAR addr form verbs used to find dup and offset in page. */ +- uar_va = (uintptr_t)txq_ctrl->bf_reg_orig; +- off = uar_va & (page_size - 1); /* offset in page. */ +- uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */ +- already_mapped = 0; +- for (j = 0; j != pages_n; ++j) { +- if (pages[j] == uar_va) { +- already_mapped = 1; +- break; +- } +- } +- /* new address in reserved UAR address space. */ +- addr = RTE_PTR_ADD(priv->uar_base, +- uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1)); +- if (!already_mapped) { +- pages[pages_n++] = uar_va; +- /* fixed mmap to specified address in reserved +- * address space. +- */ +- ret = mmap(addr, page_size, +- PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, +- txq_ctrl->uar_mmap_offset); +- if (ret != addr) { +- /* fixed mmap have to return same address */ +- DRV_LOG(ERR, +- "port %u call to mmap failed on UAR" +- " for txq %u", +- dev->data->port_id, txq_ctrl->idx); +- rte_errno = ENXIO; +- return -rte_errno; +- } +- } +- if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */ +- txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off); +- else +- assert(txq_ctrl->txq.bf_reg == +- RTE_PTR_ADD((void *)addr, off)); +-#ifndef RTE_ARCH_64 +- /* Assign a UAR lock according to UAR page number */ +- lock_idx = (txq_ctrl->uar_mmap_offset / page_size) & +- MLX5_UAR_PAGE_NUM_MASK; +- txq->uar_lock = &priv->uar_lock[lock_idx]; +-#endif ++ assert(txq->idx == (uint16_t)i); ++ ret = txq_uar_init_secondary(txq_ctrl, fd); ++ if (ret) ++ goto error; + } + return 0; ++error: ++ /* Rollback. */ ++ do { ++ if (!(*priv->txqs)[i]) ++ continue; ++ txq = (*priv->txqs)[i]; ++ txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); ++ txq_uar_uninit_secondary(txq_ctrl); ++ } while (i--); ++ return -rte_errno; + } + + /** +@@ -507,7 +549,6 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) + txq_data->wqes = qp.sq.buf; + txq_data->wqe_n = log2above(qp.sq.wqe_cnt); + txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR]; +- txq_ctrl->bf_reg_orig = qp.bf.reg; + txq_data->cq_db = cq_info.dbrec; + txq_data->cqes = + (volatile struct mlx5_cqe (*)[]) +@@ -521,6 +562,8 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) + txq_ibv->qp = tmpl.qp; + txq_ibv->cq = tmpl.cq; + rte_atomic32_inc(&txq_ibv->refcnt); ++ txq_ctrl->bf_reg = qp.bf.reg; ++ txq_uar_init(txq_ctrl); + if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) { + txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset; + DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%"PRIx64, +@@ -631,7 +674,7 @@ mlx5_txq_ibv_verify(struct rte_eth_dev *dev) + + LIST_FOREACH(txq_ibv, &priv->txqsibv, next) { + DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced", +- dev->data->port_id, txq_ibv->txq_ctrl->idx); ++ dev->data->port_id, txq_ibv->txq_ctrl->txq.idx); + ++ret; + } + return ret; +@@ -801,7 +844,8 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + tmpl->priv = priv; + tmpl->socket = socket; + tmpl->txq.elts_n = log2above(desc); +- tmpl->idx = idx; ++ tmpl->txq.port_id = dev->data->port_id; ++ tmpl->txq.idx = idx; + txq_set_params(tmpl); + if (txq_calc_wqebb_cnt(tmpl) > + priv->device_attr.orig_attr.max_qp_wr) { +@@ -866,16 +910,12 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) + { + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_txq_ctrl *txq; +- size_t page_size = sysconf(_SC_PAGESIZE); + + if (!(*priv->txqs)[idx]) + return 0; + txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); + if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv)) + txq->ibv = NULL; +- if (priv->uar_base) +- munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, +- page_size), page_size); + if (rte_atomic32_dec_and_test(&txq->refcnt)) { + txq_free_elts(txq); + mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh); +@@ -928,7 +968,7 @@ mlx5_txq_verify(struct rte_eth_dev *dev) + + LIST_FOREACH(txq, &priv->txqsctrl, next) { + DRV_LOG(DEBUG, "port %u Tx queue %u still referenced", +- dev->data->port_id, txq->idx); ++ dev->data->port_id, txq->txq.idx); + ++ret; + } + return ret; +diff --git a/dpdk/drivers/net/mlx5/mlx5_vlan.c b/dpdk/drivers/net/mlx5/mlx5_vlan.c +index 6568a3a475..4004930942 100644 +--- a/dpdk/drivers/net/mlx5/mlx5_vlan.c ++++ b/dpdk/drivers/net/mlx5/mlx5_vlan.c +@@ -27,10 +27,11 @@ + #include <rte_ethdev_driver.h> + #include <rte_common.h> + +-#include "mlx5_utils.h" + #include "mlx5.h" + #include "mlx5_autoconf.h" + #include "mlx5_glue.h" ++#include "mlx5_rxtx.h" ++#include "mlx5_utils.h" + + /** + * DPDK callback to configure a VLAN filter. +diff --git a/dpdk/drivers/net/mvneta/mvneta_ethdev.c b/dpdk/drivers/net/mvneta/mvneta_ethdev.c +index 919856885a..950a4be75b 100644 +--- a/dpdk/drivers/net/mvneta/mvneta_ethdev.c ++++ b/dpdk/drivers/net/mvneta/mvneta_ethdev.c +@@ -730,7 +730,7 @@ mvneta_stats_reset(struct rte_eth_dev *dev) + + ret = mvneta_stats_get(dev, &priv->prev_stats); + if (unlikely(ret)) +- RTE_LOG(ERR, PMD, "Failed to reset port statistics"); ++ MVNETA_LOG(ERR, "Failed to reset port statistics"); + } + + +diff --git a/dpdk/drivers/net/mvpp2/mrvl_flow.c b/dpdk/drivers/net/mvpp2/mrvl_flow.c +index ffd1dab9b5..8b1ec7e2bc 100644 +--- a/dpdk/drivers/net/mvpp2/mrvl_flow.c ++++ b/dpdk/drivers/net/mvpp2/mrvl_flow.c +@@ -2511,14 +2511,14 @@ mrvl_create_cls_table(struct rte_eth_dev *dev, struct rte_flow *first_flow) + + if (first_flow->pattern & F_UDP_SPORT) { + key->proto_field[key->num_fields].proto = MV_NET_PROTO_UDP; +- key->proto_field[key->num_fields].field.tcp = MV_NET_TCP_F_SP; ++ key->proto_field[key->num_fields].field.udp = MV_NET_UDP_F_SP; + key->key_size += 2; + key->num_fields += 1; + } + + if (first_flow->pattern & F_UDP_DPORT) { + key->proto_field[key->num_fields].proto = MV_NET_PROTO_UDP; +- key->proto_field[key->num_fields].field.udp = MV_NET_TCP_F_DP; ++ key->proto_field[key->num_fields].field.udp = MV_NET_UDP_F_DP; + key->key_size += 2; + key->num_fields += 1; + } +diff --git a/dpdk/drivers/net/netvsc/hn_ethdev.c b/dpdk/drivers/net/netvsc/hn_ethdev.c +index 04efd092ec..55382907a4 100644 +--- a/dpdk/drivers/net/netvsc/hn_ethdev.c ++++ b/dpdk/drivers/net/netvsc/hn_ethdev.c +@@ -118,8 +118,6 @@ eth_dev_vmbus_allocate(struct rte_vmbus_device *dev, size_t private_data_size) + static void + eth_dev_vmbus_release(struct rte_eth_dev *eth_dev) + { +- /* mac_addrs must not be freed alone because part of dev_private */ +- eth_dev->data->mac_addrs = NULL; + /* free ether device */ + rte_eth_dev_release_port(eth_dev); + +@@ -240,6 +238,9 @@ static void hn_dev_info_get(struct rte_eth_dev *dev, + dev_info->max_rx_queues = hv->max_queues; + dev_info->max_tx_queues = hv->max_queues; + ++ dev_info->tx_desc_lim.nb_min = 1; ++ dev_info->tx_desc_lim.nb_max = 4096; ++ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return; + +@@ -728,9 +729,6 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) + eth_dev->tx_pkt_burst = &hn_xmit_pkts; + eth_dev->rx_pkt_burst = &hn_recv_pkts; + +- /* Since Hyper-V only supports one MAC address, just use local data */ +- eth_dev->data->mac_addrs = &hv->mac_addr; +- + /* + * for secondary processes, we don't initialize any further as primary + * has already done this work. +@@ -738,6 +736,15 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + ++ /* Since Hyper-V only supports one MAC address */ ++ eth_dev->data->mac_addrs = rte_calloc("hv_mac", HN_MAX_MAC_ADDRS, ++ sizeof(struct ether_addr), 0); ++ if (eth_dev->data->mac_addrs == NULL) { ++ PMD_INIT_LOG(ERR, ++ "Failed to allocate memory store MAC addresses"); ++ return -ENOMEM; ++ } ++ + hv->vmbus = vmbus; + hv->rxbuf_res = &vmbus->resource[HV_RECV_BUF_MAP]; + hv->chim_res = &vmbus->resource[HV_SEND_BUF_MAP]; +@@ -776,11 +783,11 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) + if (err) + goto failed; + +- err = hn_tx_pool_init(eth_dev); ++ err = hn_chim_init(eth_dev); + if (err) + goto failed; + +- err = hn_rndis_get_eaddr(hv, hv->mac_addr.addr_bytes); ++ err = hn_rndis_get_eaddr(hv, eth_dev->data->mac_addrs->addr_bytes); + if (err) + goto failed; + +@@ -812,7 +819,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) + failed: + PMD_INIT_LOG(NOTICE, "device init failed"); + +- hn_tx_pool_uninit(eth_dev); ++ hn_chim_uninit(eth_dev); + hn_detach(hv); + return err; + } +@@ -835,7 +842,7 @@ eth_hn_dev_uninit(struct rte_eth_dev *eth_dev) + eth_dev->rx_pkt_burst = NULL; + + hn_detach(hv); +- hn_tx_pool_uninit(eth_dev); ++ hn_chim_uninit(eth_dev); + rte_vmbus_chan_close(hv->primary->chan); + rte_free(hv->primary); + rte_eth_dev_owner_delete(hv->owner.id); +diff --git a/dpdk/drivers/net/netvsc/hn_nvs.c b/dpdk/drivers/net/netvsc/hn_nvs.c +index d58770e045..76a7281f1b 100644 +--- a/dpdk/drivers/net/netvsc/hn_nvs.c ++++ b/dpdk/drivers/net/netvsc/hn_nvs.c +@@ -54,7 +54,7 @@ static int hn_nvs_req_send(struct hn_data *hv, + } + + static int +-hn_nvs_execute(struct hn_data *hv, ++__hn_nvs_execute(struct hn_data *hv, + void *req, uint32_t reqlen, + void *resp, uint32_t resplen, + uint32_t type) +@@ -62,6 +62,7 @@ hn_nvs_execute(struct hn_data *hv, + struct vmbus_channel *chan = hn_primary_chan(hv); + char buffer[NVS_RESPSIZE_MAX]; + const struct hn_nvs_hdr *hdr; ++ uint64_t xactid; + uint32_t len; + int ret; + +@@ -77,7 +78,7 @@ hn_nvs_execute(struct hn_data *hv, + + retry: + len = sizeof(buffer); +- ret = rte_vmbus_chan_recv(chan, buffer, &len, NULL); ++ ret = rte_vmbus_chan_recv(chan, buffer, &len, &xactid); + if (ret == -EAGAIN) { + rte_delay_us(HN_CHAN_INTERVAL_US); + goto retry; +@@ -88,7 +89,20 @@ hn_nvs_execute(struct hn_data *hv, + return ret; + } + ++ if (len < sizeof(*hdr)) { ++ PMD_DRV_LOG(ERR, "response missing NVS header"); ++ return -EINVAL; ++ } ++ + hdr = (struct hn_nvs_hdr *)buffer; ++ ++ /* Silently drop received packets while waiting for response */ ++ if (hdr->type == NVS_TYPE_RNDIS) { ++ hn_nvs_ack_rxbuf(chan, xactid); ++ --hv->rxbuf_outstanding; ++ goto retry; ++ } ++ + if (hdr->type != type) { + PMD_DRV_LOG(ERR, "unexpected NVS resp %#x, expect %#x", + hdr->type, type); +@@ -108,6 +122,29 @@ hn_nvs_execute(struct hn_data *hv, + return 0; + } + ++ ++/* ++ * Execute one control command and get the response. ++ * Only one command can be active on a channel at once ++ * Unlike BSD, DPDK does not have an interrupt context ++ * so the polling is required to wait for response. ++ */ ++static int ++hn_nvs_execute(struct hn_data *hv, ++ void *req, uint32_t reqlen, ++ void *resp, uint32_t resplen, ++ uint32_t type) ++{ ++ struct hn_rx_queue *rxq = hv->primary; ++ int ret; ++ ++ rte_spinlock_lock(&rxq->ring_lock); ++ ret = __hn_nvs_execute(hv, req, reqlen, resp, resplen, type); ++ rte_spinlock_unlock(&rxq->ring_lock); ++ ++ return ret; ++} ++ + static int + hn_nvs_doinit(struct hn_data *hv, uint32_t nvs_ver) + { +diff --git a/dpdk/drivers/net/netvsc/hn_nvs.h b/dpdk/drivers/net/netvsc/hn_nvs.h +index 2563fd8d86..015839e364 100644 +--- a/dpdk/drivers/net/netvsc/hn_nvs.h ++++ b/dpdk/drivers/net/netvsc/hn_nvs.h +@@ -37,7 +37,7 @@ + #define NVS_RNDIS_MTYPE_CTRL 1 + + /* +- * NVS message transacion status codes. ++ * NVS message transaction status codes. + */ + #define NVS_STATUS_OK 1 + #define NVS_STATUS_FAILED 2 +diff --git a/dpdk/drivers/net/netvsc/hn_rxtx.c b/dpdk/drivers/net/netvsc/hn_rxtx.c +index 5ffc0ee145..b526b490b2 100644 +--- a/dpdk/drivers/net/netvsc/hn_rxtx.c ++++ b/dpdk/drivers/net/netvsc/hn_rxtx.c +@@ -18,6 +18,7 @@ + #include <rte_memzone.h> + #include <rte_malloc.h> + #include <rte_atomic.h> ++#include <rte_bitmap.h> + #include <rte_branch_prediction.h> + #include <rte_ether.h> + #include <rte_common.h> +@@ -83,7 +84,7 @@ struct hn_txdesc { + struct rte_mbuf *m; + + uint16_t queue_id; +- uint16_t chim_index; ++ uint32_t chim_index; + uint32_t chim_size; + uint32_t data_size; + uint32_t packets; +@@ -98,11 +99,13 @@ struct hn_txdesc { + RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ + RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) + ++#define HN_RNDIS_PKT_ALIGNED RTE_ALIGN(HN_RNDIS_PKT_LEN, RTE_CACHE_LINE_SIZE) ++ + /* Minimum space required for a packet */ + #define HN_PKTSIZE_MIN(align) \ + RTE_ALIGN(ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align) + +-#define DEFAULT_TX_FREE_THRESH 32U ++#define DEFAULT_TX_FREE_THRESH 32 + + static void + hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m) +@@ -150,63 +153,77 @@ hn_rndis_pktmsg_offset(uint32_t ofs) + static void hn_txd_init(struct rte_mempool *mp __rte_unused, + void *opaque, void *obj, unsigned int idx) + { ++ struct hn_tx_queue *txq = opaque; + struct hn_txdesc *txd = obj; +- struct rte_eth_dev *dev = opaque; +- struct rndis_packet_msg *pkt; + + memset(txd, 0, sizeof(*txd)); +- txd->chim_index = idx; + +- pkt = rte_malloc_socket("RNDIS_TX", HN_RNDIS_PKT_LEN, +- rte_align32pow2(HN_RNDIS_PKT_LEN), +- dev->device->numa_node); +- if (!pkt) +- rte_exit(EXIT_FAILURE, "can not allocate RNDIS header"); +- +- txd->rndis_pkt = pkt; ++ txd->queue_id = txq->queue_id; ++ txd->chim_index = NVS_CHIM_IDX_INVALID; ++ txd->rndis_pkt = (struct rndis_packet_msg *)(char *)txq->tx_rndis ++ + idx * HN_RNDIS_PKT_ALIGNED; + } + +-/* +- * Unlike Linux and FreeBSD, this driver uses a mempool +- * to limit outstanding transmits and reserve buffers +- */ + int +-hn_tx_pool_init(struct rte_eth_dev *dev) ++hn_chim_init(struct rte_eth_dev *dev) + { + struct hn_data *hv = dev->data->dev_private; +- char name[RTE_MEMPOOL_NAMESIZE]; +- struct rte_mempool *mp; ++ uint32_t i, chim_bmp_size; ++ ++ rte_spinlock_init(&hv->chim_lock); ++ chim_bmp_size = rte_bitmap_get_memory_footprint(hv->chim_cnt); ++ hv->chim_bmem = rte_zmalloc("hn_chim_bitmap", chim_bmp_size, ++ RTE_CACHE_LINE_SIZE); ++ if (hv->chim_bmem == NULL) { ++ PMD_INIT_LOG(ERR, "failed to allocate bitmap size %u", ++ chim_bmp_size); ++ return -1; ++ } + +- snprintf(name, sizeof(name), +- "hn_txd_%u", dev->data->port_id); +- +- PMD_INIT_LOG(DEBUG, "create a TX send pool %s n=%u size=%zu socket=%d", +- name, hv->chim_cnt, sizeof(struct hn_txdesc), +- dev->device->numa_node); +- +- mp = rte_mempool_create(name, hv->chim_cnt, sizeof(struct hn_txdesc), +- HN_TXD_CACHE_SIZE, 0, +- NULL, NULL, +- hn_txd_init, dev, +- dev->device->numa_node, 0); +- if (!mp) { +- PMD_DRV_LOG(ERR, +- "mempool %s create failed: %d", name, rte_errno); +- return -rte_errno; ++ hv->chim_bmap = rte_bitmap_init(hv->chim_cnt, ++ hv->chim_bmem, chim_bmp_size); ++ if (hv->chim_bmap == NULL) { ++ PMD_INIT_LOG(ERR, "failed to init chim bitmap"); ++ return -1; + } + +- hv->tx_pool = mp; ++ for (i = 0; i < hv->chim_cnt; i++) ++ rte_bitmap_set(hv->chim_bmap, i); ++ + return 0; + } + + void +-hn_tx_pool_uninit(struct rte_eth_dev *dev) ++hn_chim_uninit(struct rte_eth_dev *dev) + { + struct hn_data *hv = dev->data->dev_private; + +- if (hv->tx_pool) { +- rte_mempool_free(hv->tx_pool); +- hv->tx_pool = NULL; ++ rte_bitmap_free(hv->chim_bmap); ++ rte_free(hv->chim_bmem); ++ hv->chim_bmem = NULL; ++} ++ ++static uint32_t hn_chim_alloc(struct hn_data *hv) ++{ ++ uint32_t index = NVS_CHIM_IDX_INVALID; ++ uint64_t slab; ++ ++ rte_spinlock_lock(&hv->chim_lock); ++ if (rte_bitmap_scan(hv->chim_bmap, &index, &slab)) ++ rte_bitmap_clear(hv->chim_bmap, index); ++ rte_spinlock_unlock(&hv->chim_lock); ++ ++ return index; ++} ++ ++static void hn_chim_free(struct hn_data *hv, uint32_t chim_idx) ++{ ++ if (chim_idx >= hv->chim_cnt) { ++ PMD_DRV_LOG(ERR, "Invalid chimney index %u", chim_idx); ++ } else { ++ rte_spinlock_lock(&hv->chim_lock); ++ rte_bitmap_set(hv->chim_bmap, chim_idx); ++ rte_spinlock_unlock(&hv->chim_lock); + } + } + +@@ -220,15 +237,16 @@ static void hn_reset_txagg(struct hn_tx_queue *txq) + + int + hn_dev_tx_queue_setup(struct rte_eth_dev *dev, +- uint16_t queue_idx, uint16_t nb_desc __rte_unused, ++ uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) + + { + struct hn_data *hv = dev->data->dev_private; + struct hn_tx_queue *txq; ++ char name[RTE_MEMPOOL_NAMESIZE]; + uint32_t tx_free_thresh; +- int err; ++ int err = -ENOMEM; + + PMD_INIT_FUNC_TRACE(); + +@@ -244,14 +262,42 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev, + + tx_free_thresh = tx_conf->tx_free_thresh; + if (tx_free_thresh == 0) +- tx_free_thresh = RTE_MIN(hv->chim_cnt / 4, ++ tx_free_thresh = RTE_MIN(nb_desc / 4, + DEFAULT_TX_FREE_THRESH); + +- if (tx_free_thresh >= hv->chim_cnt - 3) +- tx_free_thresh = hv->chim_cnt - 3; ++ if (tx_free_thresh + 3 >= nb_desc) { ++ PMD_INIT_LOG(ERR, ++ "tx_free_thresh must be less than the number of TX entries minus 3(%u)." ++ " (tx_free_thresh=%u port=%u queue=%u)\n", ++ nb_desc - 3, ++ tx_free_thresh, dev->data->port_id, queue_idx); ++ return -EINVAL; ++ } + + txq->free_thresh = tx_free_thresh; + ++ snprintf(name, sizeof(name), ++ "hn_txd_%u_%u", dev->data->port_id, queue_idx); ++ ++ PMD_INIT_LOG(DEBUG, "TX descriptor pool %s n=%u size=%zu", ++ name, nb_desc, sizeof(struct hn_txdesc)); ++ ++ txq->tx_rndis = rte_calloc("hn_txq_rndis", nb_desc, ++ HN_RNDIS_PKT_ALIGNED, RTE_CACHE_LINE_SIZE); ++ if (txq->tx_rndis == NULL) ++ goto error; ++ ++ txq->txdesc_pool = rte_mempool_create(name, nb_desc, ++ sizeof(struct hn_txdesc), ++ 0, 0, NULL, NULL, ++ hn_txd_init, txq, ++ dev->device->numa_node, 0); ++ if (txq->txdesc_pool == NULL) { ++ PMD_DRV_LOG(ERR, ++ "mempool %s create failed: %d", name, rte_errno); ++ goto error; ++ } ++ + txq->agg_szmax = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size); + txq->agg_pktmax = hv->rndis_agg_pkts; + txq->agg_align = hv->rndis_agg_align; +@@ -260,31 +306,57 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev, + + err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc, + socket_id, tx_conf); +- if (err) { +- rte_free(txq); +- return err; ++ if (err == 0) { ++ dev->data->tx_queues[queue_idx] = txq; ++ return 0; + } + +- dev->data->tx_queues[queue_idx] = txq; +- return 0; ++error: ++ if (txq->txdesc_pool) ++ rte_mempool_free(txq->txdesc_pool); ++ rte_free(txq->tx_rndis); ++ rte_free(txq); ++ return err; ++} ++ ++ ++static struct hn_txdesc *hn_txd_get(struct hn_tx_queue *txq) ++{ ++ struct hn_txdesc *txd; ++ ++ if (rte_mempool_get(txq->txdesc_pool, (void **)&txd)) { ++ ++txq->stats.ring_full; ++ PMD_TX_LOG(DEBUG, "tx pool exhausted!"); ++ return NULL; ++ } ++ ++ txd->m = NULL; ++ txd->packets = 0; ++ txd->data_size = 0; ++ txd->chim_size = 0; ++ ++ return txd; ++} ++ ++static void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd) ++{ ++ rte_mempool_put(txq->txdesc_pool, txd); + } + + void + hn_dev_tx_queue_release(void *arg) + { + struct hn_tx_queue *txq = arg; +- struct hn_txdesc *txd; + + PMD_INIT_FUNC_TRACE(); + + if (!txq) + return; + +- /* If any pending data is still present just drop it */ +- txd = txq->agg_txd; +- if (txd) +- rte_mempool_put(txq->hv->tx_pool, txd); ++ if (txq->txdesc_pool) ++ rte_mempool_free(txq->txdesc_pool); + ++ rte_free(txq->tx_rndis); + rte_free(txq); + } + +@@ -292,6 +364,7 @@ static void + hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, + unsigned long xactid, const struct hn_nvs_rndis_ack *ack) + { ++ struct hn_data *hv = dev->data->dev_private; + struct hn_txdesc *txd = (struct hn_txdesc *)xactid; + struct hn_tx_queue *txq; + +@@ -312,9 +385,11 @@ hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, + ++txq->stats.errors; + } + +- rte_pktmbuf_free(txd->m); ++ if (txd->chim_index != NVS_CHIM_IDX_INVALID) ++ hn_chim_free(hv, txd->chim_index); + +- rte_mempool_put(txq->hv->tx_pool, txd); ++ rte_pktmbuf_free(txd->m); ++ hn_txd_put(txq, txd); + } + + /* Handle transmit completion events */ +@@ -878,10 +953,6 @@ uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, + + rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id]; + +- /* If no pending data then nothing to do */ +- if (rte_vmbus_chan_rx_empty(rxq->chan)) +- return 0; +- + /* + * Since channel is shared between Rx and TX queue need to have a lock + * since DPDK does not force same CPU to be used for Rx/Tx. +@@ -945,9 +1016,6 @@ uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, + + if (tx_limit && tx_done >= tx_limit) + break; +- +- if (rxq->rx_ring && rte_ring_full(rxq->rx_ring)) +- break; + } + + if (bytes_read > 0) +@@ -1020,28 +1088,15 @@ static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig) + return ret; + } + +-static struct hn_txdesc *hn_new_txd(struct hn_data *hv, +- struct hn_tx_queue *txq) +-{ +- struct hn_txdesc *txd; +- +- if (rte_mempool_get(hv->tx_pool, (void **)&txd)) { +- ++txq->stats.ring_full; +- PMD_TX_LOG(DEBUG, "tx pool exhausted!"); +- return NULL; +- } +- +- txd->m = NULL; +- txd->queue_id = txq->queue_id; +- txd->packets = 0; +- txd->data_size = 0; +- txd->chim_size = 0; +- +- return txd; +-} +- ++/* ++ * Try and find a place in a send chimney buffer to put ++ * the small packet. If space is available, this routine ++ * returns a pointer of where to place the data. ++ * If no space, caller should try direct transmit. ++ */ + static void * +-hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize) ++hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, ++ struct hn_txdesc *txd, uint32_t pktsize) + { + struct hn_txdesc *agg_txd = txq->agg_txd; + struct rndis_packet_msg *pkt; +@@ -1069,7 +1124,7 @@ hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize) + } + + chim = (uint8_t *)pkt + pkt->len; +- ++ txq->agg_prevpkt = chim; + txq->agg_pktleft--; + txq->agg_szleft -= pktsize; + if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) { +@@ -1079,18 +1134,21 @@ hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize) + */ + txq->agg_pktleft = 0; + } +- } else { +- agg_txd = hn_new_txd(hv, txq); +- if (!agg_txd) +- return NULL; +- +- chim = (uint8_t *)hv->chim_res->addr +- + agg_txd->chim_index * hv->chim_szmax; + +- txq->agg_txd = agg_txd; +- txq->agg_pktleft = txq->agg_pktmax - 1; +- txq->agg_szleft = txq->agg_szmax - pktsize; ++ hn_txd_put(txq, txd); ++ return chim; + } ++ ++ txd->chim_index = hn_chim_alloc(hv); ++ if (txd->chim_index == NVS_CHIM_IDX_INVALID) ++ return NULL; ++ ++ chim = (uint8_t *)hv->chim_res->addr ++ + txd->chim_index * hv->chim_szmax; ++ ++ txq->agg_txd = txd; ++ txq->agg_pktleft = txq->agg_pktmax - 1; ++ txq->agg_szleft = txq->agg_szmax - pktsize; + txq->agg_prevpkt = chim; + + return chim; +@@ -1298,7 +1356,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + struct hn_data *hv = txq->hv; + struct rte_eth_dev *vf_dev; + bool need_sig = false; +- uint16_t nb_tx; ++ uint16_t nb_tx, avail; + int ret; + + if (unlikely(hv->closed)) +@@ -1313,13 +1371,19 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts); + } + +- if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh) ++ avail = rte_mempool_avail_count(txq->txdesc_pool); ++ if (nb_pkts > avail || avail <= txq->free_thresh) + hn_process_events(hv, txq->queue_id, 0); + + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + struct rte_mbuf *m = tx_pkts[nb_tx]; + uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN; + struct rndis_packet_msg *pkt; ++ struct hn_txdesc *txd; ++ ++ txd = hn_txd_get(txq); ++ if (txd == NULL) ++ break; + + /* For small packets aggregate them in chimney buffer */ + if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) { +@@ -1330,7 +1394,8 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + goto fail; + } + +- pkt = hn_try_txagg(hv, txq, pkt_size); ++ ++ pkt = hn_try_txagg(hv, txq, txd, pkt_size); + if (unlikely(!pkt)) + break; + +@@ -1344,21 +1409,13 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + hn_flush_txagg(txq, &need_sig)) + goto fail; + } else { +- struct hn_txdesc *txd; +- +- /* can send chimney data and large packet at once */ +- txd = txq->agg_txd; +- if (txd) { +- hn_reset_txagg(txq); +- } else { +- txd = hn_new_txd(hv, txq); +- if (unlikely(!txd)) +- break; +- } ++ /* Send any outstanding packets in buffer */ ++ if (txq->agg_txd && hn_flush_txagg(txq, &need_sig)) ++ goto fail; + + pkt = txd->rndis_pkt; + txd->m = m; +- txd->data_size += m->pkt_len; ++ txd->data_size = m->pkt_len; + ++txd->packets; + + hn_encap(pkt, queue_id, m); +@@ -1367,7 +1424,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) + if (unlikely(ret != 0)) { + PMD_TX_LOG(NOTICE, "sg send failed: %d", ret); + ++txq->stats.errors; +- rte_mempool_put(hv->tx_pool, txd); ++ hn_txd_put(txq, txd); + goto fail; + } + } +diff --git a/dpdk/drivers/net/netvsc/hn_var.h b/dpdk/drivers/net/netvsc/hn_var.h +index d10e164e68..5dcd53638a 100644 +--- a/dpdk/drivers/net/netvsc/hn_var.h ++++ b/dpdk/drivers/net/netvsc/hn_var.h +@@ -52,6 +52,8 @@ struct hn_tx_queue { + uint16_t port_id; + uint16_t queue_id; + uint32_t free_thresh; ++ struct rte_mempool *txdesc_pool; ++ void *tx_rndis; + + /* Applied packet transmission aggregation limits. */ + uint32_t agg_szmax; +@@ -114,8 +116,10 @@ struct hn_data { + uint16_t num_queues; + uint64_t rss_offloads; + ++ rte_spinlock_t chim_lock; + struct rte_mem_resource *chim_res; /* UIO resource for Tx */ +- struct rte_mempool *tx_pool; /* Tx descriptors */ ++ struct rte_bitmap *chim_bmap; /* Send buffer map */ ++ void *chim_bmem; + uint32_t chim_szmax; /* Max size per buffer */ + uint32_t chim_cnt; /* Max packets per buffer */ + +@@ -130,8 +134,6 @@ struct hn_data { + rte_atomic32_t rndis_req_id; + uint8_t rndis_resp[256]; + +- struct ether_addr mac_addr; +- + struct rte_eth_dev_owner owner; + struct rte_intr_handle vf_intr; + +@@ -152,8 +154,8 @@ uint16_t hn_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t hn_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +-int hn_tx_pool_init(struct rte_eth_dev *dev); +-void hn_tx_pool_uninit(struct rte_eth_dev *dev); ++int hn_chim_init(struct rte_eth_dev *dev); ++void hn_chim_uninit(struct rte_eth_dev *dev); + int hn_dev_link_update(struct rte_eth_dev *dev, int wait); + int hn_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + uint16_t nb_desc, unsigned int socket_id, +diff --git a/dpdk/drivers/net/netvsc/hn_vf.c b/dpdk/drivers/net/netvsc/hn_vf.c +index 50f92a00ae..86485235ae 100644 +--- a/dpdk/drivers/net/netvsc/hn_vf.c ++++ b/dpdk/drivers/net/netvsc/hn_vf.c +@@ -170,6 +170,17 @@ hn_nvs_handle_vfassoc(struct rte_eth_dev *dev, + hn_vf_remove(hv); + } + ++static void ++hn_vf_merge_desc_lim(struct rte_eth_desc_lim *lim, ++ const struct rte_eth_desc_lim *vf_lim) ++{ ++ lim->nb_max = RTE_MIN(vf_lim->nb_max, lim->nb_max); ++ lim->nb_min = RTE_MAX(vf_lim->nb_min, lim->nb_min); ++ lim->nb_align = RTE_MAX(vf_lim->nb_align, lim->nb_align); ++ lim->nb_seg_max = RTE_MIN(vf_lim->nb_seg_max, lim->nb_seg_max); ++ lim->nb_mtu_seg_max = RTE_MIN(vf_lim->nb_seg_max, lim->nb_seg_max); ++} ++ + /* + * Merge the info from the VF and synthetic path. + * use the default config of the VF +@@ -196,11 +207,13 @@ static void hn_vf_info_merge(struct rte_eth_dev *vf_dev, + info->max_tx_queues); + info->tx_offload_capa &= vf_info.tx_offload_capa; + info->tx_queue_offload_capa &= vf_info.tx_queue_offload_capa; ++ hn_vf_merge_desc_lim(&info->tx_desc_lim, &vf_info.tx_desc_lim); + + info->min_rx_bufsize = RTE_MAX(vf_info.min_rx_bufsize, + info->min_rx_bufsize); + info->max_rx_pktlen = RTE_MAX(vf_info.max_rx_pktlen, + info->max_rx_pktlen); ++ hn_vf_merge_desc_lim(&info->rx_desc_lim, &vf_info.rx_desc_lim); + } + + void hn_vf_info_get(struct hn_data *hv, struct rte_eth_dev_info *info) +diff --git a/dpdk/drivers/net/nfp/nfp_net.c b/dpdk/drivers/net/nfp/nfp_net.c +index 68c853c94f..ddd9d9fca9 100644 +--- a/dpdk/drivers/net/nfp/nfp_net.c ++++ b/dpdk/drivers/net/nfp/nfp_net.c +@@ -3297,16 +3297,14 @@ static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_nfp_net_pf_pmd = { + .id_table = pci_id_nfp_pf_net_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = nfp_pf_pci_probe, + .remove = eth_nfp_pci_remove, + }; + + static struct rte_pci_driver rte_nfp_net_vf_pmd = { + .id_table = pci_id_nfp_vf_net_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = eth_nfp_pci_probe, + .remove = eth_nfp_pci_remove, + }; +diff --git a/dpdk/drivers/net/null/rte_eth_null.c b/dpdk/drivers/net/null/rte_eth_null.c +index 1067e6e4ea..1de26b0f69 100644 +--- a/dpdk/drivers/net/null/rte_eth_null.c ++++ b/dpdk/drivers/net/null/rte_eth_null.c +@@ -613,6 +613,7 @@ rte_pmd_null_probe(struct rte_vdev_device *dev) + PMD_LOG(INFO, "Initializing pmd_null for %s", name); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { ++ struct pmd_internals *internals; + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { + PMD_LOG(ERR, "Failed to probe %s", name); +@@ -621,7 +622,8 @@ rte_pmd_null_probe(struct rte_vdev_device *dev) + /* TODO: request info from primary to set up Rx and Tx */ + eth_dev->dev_ops = &ops; + eth_dev->device = &dev->device; +- if (packet_copy) { ++ internals = eth_dev->data->dev_private; ++ if (internals->packet_copy) { + eth_dev->rx_pkt_burst = eth_null_copy_rx; + eth_dev->tx_pkt_burst = eth_null_copy_tx; + } else { +@@ -637,23 +639,18 @@ rte_pmd_null_probe(struct rte_vdev_device *dev) + if (kvlist == NULL) + return -1; + +- if (rte_kvargs_count(kvlist, ETH_NULL_PACKET_SIZE_ARG) == 1) { ++ ret = rte_kvargs_process(kvlist, ++ ETH_NULL_PACKET_SIZE_ARG, ++ &get_packet_size_arg, &packet_size); ++ if (ret < 0) ++ goto free_kvlist; + +- ret = rte_kvargs_process(kvlist, +- ETH_NULL_PACKET_SIZE_ARG, +- &get_packet_size_arg, &packet_size); +- if (ret < 0) +- goto free_kvlist; +- } +- +- if (rte_kvargs_count(kvlist, ETH_NULL_PACKET_COPY_ARG) == 1) { + +- ret = rte_kvargs_process(kvlist, +- ETH_NULL_PACKET_COPY_ARG, +- &get_packet_copy_arg, &packet_copy); +- if (ret < 0) +- goto free_kvlist; +- } ++ ret = rte_kvargs_process(kvlist, ++ ETH_NULL_PACKET_COPY_ARG, ++ &get_packet_copy_arg, &packet_copy); ++ if (ret < 0) ++ goto free_kvlist; + } + + PMD_LOG(INFO, "Configure pmd_null: packet size is %d, " +diff --git a/dpdk/drivers/net/octeontx/base/meson.build b/dpdk/drivers/net/octeontx/base/meson.build +index a06a2c89c9..e1060fc4ec 100644 +--- a/dpdk/drivers/net/octeontx/base/meson.build ++++ b/dpdk/drivers/net/octeontx/base/meson.build +@@ -10,7 +10,10 @@ sources = [ + depends = ['ethdev', 'mempool_octeontx'] + static_objs = [] + foreach d: depends +- static_objs += [get_variable('static_rte_' + d)] ++ if not is_variable('shared_rte_' + d) ++ subdir_done() ++ endif ++ static_objs += get_variable('static_rte_' + d) + endforeach + + c_args = cflags +diff --git a/dpdk/drivers/net/octeontx/octeontx_ethdev.c b/dpdk/drivers/net/octeontx/octeontx_ethdev.c +index ac193ace43..17128da994 100644 +--- a/dpdk/drivers/net/octeontx/octeontx_ethdev.c ++++ b/dpdk/drivers/net/octeontx/octeontx_ethdev.c +@@ -1106,6 +1106,7 @@ octeontx_create(struct rte_vdev_device *dev, int port, uint8_t evdev, + + free_mac_addrs: + rte_free(data->mac_addrs); ++ data->mac_addrs = NULL; + err: + if (nic) + octeontx_port_close(nic); +diff --git a/dpdk/drivers/net/qede/qede_ethdev.c b/dpdk/drivers/net/qede/qede_ethdev.c +index 49f7b32056..d269e75cc0 100644 +--- a/dpdk/drivers/net/qede/qede_ethdev.c ++++ b/dpdk/drivers/net/qede/qede_ethdev.c +@@ -1046,7 +1046,7 @@ static int qede_dev_start(struct rte_eth_dev *eth_dev) + qede_reset_queue_stats(qdev, true); + + /* Newer SR-IOV PF driver expects RX/TX queues to be started before +- * enabling RSS. Hence RSS configuration is deferred upto this point. ++ * enabling RSS. Hence RSS configuration is deferred up to this point. + * Also, we would like to retain similar behavior in PF case, so we + * don't do PF/VF specific check here. + */ +@@ -1058,6 +1058,9 @@ static int qede_dev_start(struct rte_eth_dev *eth_dev) + if (qede_activate_vport(eth_dev, true)) + goto err; + ++ /* Bring-up the link */ ++ qede_dev_set_link_state(eth_dev, true); ++ + /* Update link status */ + qede_link_update(eth_dev, 0); + +@@ -1079,6 +1082,12 @@ static void qede_dev_stop(struct rte_eth_dev *eth_dev) + + PMD_INIT_FUNC_TRACE(edev); + ++ /* Bring the link down */ ++ qede_dev_set_link_state(eth_dev, false); ++ ++ /* Update link status */ ++ qede_link_update(eth_dev, 0); ++ + /* Disable vport */ + if (qede_activate_vport(eth_dev, false)) + return; +@@ -1164,6 +1173,8 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev) + struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev); + struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); + struct rte_eth_rxmode *rxmode = ð_dev->data->dev_conf.rxmode; ++ uint8_t num_rxqs; ++ uint8_t num_txqs; + int ret; + + PMD_INIT_FUNC_TRACE(edev); +@@ -1193,12 +1204,17 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev) + if (qede_check_fdir_support(eth_dev)) + return -ENOTSUP; + +- qede_dealloc_fp_resc(eth_dev); +- qdev->num_tx_queues = eth_dev->data->nb_tx_queues * edev->num_hwfns; +- qdev->num_rx_queues = eth_dev->data->nb_rx_queues * edev->num_hwfns; +- +- if (qede_alloc_fp_resc(qdev)) +- return -ENOMEM; ++ /* Allocate/reallocate fastpath resources only for new queue config */ ++ num_txqs = eth_dev->data->nb_tx_queues * edev->num_hwfns; ++ num_rxqs = eth_dev->data->nb_rx_queues * edev->num_hwfns; ++ if (qdev->num_tx_queues != num_txqs || ++ qdev->num_rx_queues != num_rxqs) { ++ qede_dealloc_fp_resc(eth_dev); ++ qdev->num_tx_queues = num_txqs; ++ qdev->num_rx_queues = num_rxqs; ++ if (qede_alloc_fp_resc(qdev)) ++ return -ENOMEM; ++ } + + /* If jumbo enabled adjust MTU */ + if (rxmode->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) +@@ -1451,8 +1467,6 @@ static void qede_dev_close(struct rte_eth_dev *eth_dev) + eth_dev->data->nb_rx_queues = 0; + eth_dev->data->nb_tx_queues = 0; + +- /* Bring the link down */ +- qede_dev_set_link_state(eth_dev, false); + qdev->ops->common->slowpath_stop(edev); + qdev->ops->common->remove(edev); + rte_intr_disable(&pci_dev->intr_handle); +@@ -2551,9 +2565,6 @@ static int qede_common_dev_init(struct rte_eth_dev *eth_dev, bool is_vf) + do_once = false; + } + +- /* Bring-up the link */ +- qede_dev_set_link_state(eth_dev, true); +- + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + SLIST_INIT(&adapter->arfs_info.arfs_list_head); +@@ -2702,8 +2713,7 @@ static int qedevf_eth_dev_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_qedevf_pmd = { + .id_table = pci_id_qedevf_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = qedevf_eth_dev_pci_probe, + .remove = qedevf_eth_dev_pci_remove, + }; +@@ -2722,8 +2732,7 @@ static int qede_eth_dev_pci_remove(struct rte_pci_device *pci_dev) + + static struct rte_pci_driver rte_qede_pmd = { + .id_table = pci_id_qede_map, +- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | +- RTE_PCI_DRV_IOVA_AS_VA, ++ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = qede_eth_dev_pci_probe, + .remove = qede_eth_dev_pci_remove, + }; +diff --git a/dpdk/drivers/net/qede/qede_rxtx.c b/dpdk/drivers/net/qede/qede_rxtx.c +index 52ebc8b8b3..a72c9aa333 100644 +--- a/dpdk/drivers/net/qede/qede_rxtx.c ++++ b/dpdk/drivers/net/qede/qede_rxtx.c +@@ -593,12 +593,14 @@ qede_alloc_mem_sb(struct qede_dev *qdev, struct ecore_sb_info *sb_info, + + int qede_alloc_fp_resc(struct qede_dev *qdev) + { +- struct ecore_dev *edev = &qdev->edev; ++ struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); + struct qede_fastpath *fp; + uint32_t num_sbs; + uint16_t sb_idx; + int i; + ++ PMD_INIT_FUNC_TRACE(edev); ++ + if (IS_VF(edev)) + ecore_vf_get_num_sbs(ECORE_LEADING_HWFN(edev), &num_sbs); + else +diff --git a/dpdk/drivers/net/ring/rte_eth_ring.c b/dpdk/drivers/net/ring/rte_eth_ring.c +index 452114d0c5..7298ee93e2 100644 +--- a/dpdk/drivers/net/ring/rte_eth_ring.c ++++ b/dpdk/drivers/net/ring/rte_eth_ring.c +@@ -251,6 +251,7 @@ static const struct eth_dev_ops ops = { + + static int + do_eth_dev_ring_create(const char *name, ++ struct rte_vdev_device *vdev, + struct rte_ring * const rx_queues[], + const unsigned int nb_rx_queues, + struct rte_ring *const tx_queues[], +@@ -296,12 +297,15 @@ do_eth_dev_ring_create(const char *name, + } + + /* now put it all together ++ * - store EAL device in eth_dev, + * - store queue data in internals, + * - store numa_node info in eth_dev_data + * - point eth_dev_data to internals + * - and point eth_dev structure to new eth_dev_data structure + */ + ++ eth_dev->device = &vdev->device; ++ + data = eth_dev->data; + data->rx_queues = rx_queues_local; + data->tx_queues = tx_queues_local; +@@ -411,7 +415,9 @@ rte_eth_from_ring(struct rte_ring *r) + } + + static int +-eth_dev_ring_create(const char *name, const unsigned int numa_node, ++eth_dev_ring_create(const char *name, ++ struct rte_vdev_device *vdev, ++ const unsigned int numa_node, + enum dev_action action, struct rte_eth_dev **eth_dev) + { + /* rx and tx are so-called from point of view of first port. +@@ -441,7 +447,7 @@ eth_dev_ring_create(const char *name, const unsigned int numa_node, + return -1; + } + +- if (do_eth_dev_ring_create(name, rxtx, num_rings, rxtx, num_rings, ++ if (do_eth_dev_ring_create(name, vdev, rxtx, num_rings, rxtx, num_rings, + numa_node, action, eth_dev) < 0) + return -1; + +@@ -562,12 +568,12 @@ rte_pmd_ring_probe(struct rte_vdev_device *dev) + PMD_LOG(INFO, "Initializing pmd_ring for %s", name); + + if (params == NULL || params[0] == '\0') { +- ret = eth_dev_ring_create(name, rte_socket_id(), DEV_CREATE, ++ ret = eth_dev_ring_create(name, dev, rte_socket_id(), DEV_CREATE, + ð_dev); + if (ret == -1) { + PMD_LOG(INFO, + "Attach to pmd_ring for %s", name); +- ret = eth_dev_ring_create(name, rte_socket_id(), ++ ret = eth_dev_ring_create(name, dev, rte_socket_id(), + DEV_ATTACH, ð_dev); + } + } else { +@@ -576,19 +582,16 @@ rte_pmd_ring_probe(struct rte_vdev_device *dev) + if (!kvlist) { + PMD_LOG(INFO, + "Ignoring unsupported parameters when creatingrings-backed ethernet device"); +- ret = eth_dev_ring_create(name, rte_socket_id(), ++ ret = eth_dev_ring_create(name, dev, rte_socket_id(), + DEV_CREATE, ð_dev); + if (ret == -1) { + PMD_LOG(INFO, + "Attach to pmd_ring for %s", + name); +- ret = eth_dev_ring_create(name, rte_socket_id(), ++ ret = eth_dev_ring_create(name, dev, rte_socket_id(), + DEV_ATTACH, ð_dev); + } + +- if (eth_dev) +- eth_dev->device = &dev->device; +- + return ret; + } + +@@ -599,7 +602,7 @@ rte_pmd_ring_probe(struct rte_vdev_device *dev) + if (ret < 0) + goto out_free; + +- ret = do_eth_dev_ring_create(name, ++ ret = do_eth_dev_ring_create(name, dev, + internal_args->rx_queues, + internal_args->nb_rx_queues, + internal_args->tx_queues, +@@ -629,6 +632,7 @@ rte_pmd_ring_probe(struct rte_vdev_device *dev) + + for (info->count = 0; info->count < info->total; info->count++) { + ret = eth_dev_ring_create(info->list[info->count].name, ++ dev, + info->list[info->count].node, + info->list[info->count].action, + ð_dev); +@@ -637,7 +641,7 @@ rte_pmd_ring_probe(struct rte_vdev_device *dev) + PMD_LOG(INFO, + "Attach to pmd_ring for %s", + name); +- ret = eth_dev_ring_create(name, ++ ret = eth_dev_ring_create(name, dev, + info->list[info->count].node, + DEV_ATTACH, + ð_dev); +@@ -646,9 +650,6 @@ rte_pmd_ring_probe(struct rte_vdev_device *dev) + } + } + +- if (eth_dev) +- eth_dev->device = &dev->device; +- + out_free: + rte_kvargs_free(kvlist); + rte_free(info); +diff --git a/dpdk/drivers/net/sfc/base/ef10_filter.c b/dpdk/drivers/net/sfc/base/ef10_filter.c +index afe4064d9f..cf3446805a 100644 +--- a/dpdk/drivers/net/sfc/base/ef10_filter.c ++++ b/dpdk/drivers/net/sfc/base/ef10_filter.c +@@ -597,6 +597,231 @@ ef10_filter_restore( + return (rc); + } + ++enum ef10_filter_add_action_e { ++ /* Insert a new filter */ ++ EF10_FILTER_ADD_NEW, ++ /* ++ * Replace old filter with a new, overriding the old one ++ * if it has lower priority. ++ */ ++ EF10_FILTER_ADD_REPLACE, ++ /* Store new, lower priority filter as overridden by old filter */ ++ EF10_FILTER_ADD_STORE, ++ /* Special case for AUTO filters, remove AUTO_OLD flag */ ++ EF10_FILTER_ADD_REFRESH, ++}; ++ ++static __checkReturn efx_rc_t ++ef10_filter_add_lookup_equal_spec( ++ __in efx_filter_spec_t *spec, ++ __in efx_filter_spec_t *probe_spec, ++ __in efx_filter_replacement_policy_t policy, ++ __out boolean_t *found) ++{ ++ efx_rc_t rc; ++ ++ /* Refreshing AUTO filter */ ++ if (spec->efs_priority == EFX_FILTER_PRI_AUTO && ++ probe_spec->efs_priority == EFX_FILTER_PRI_AUTO) { ++ *found = B_TRUE; ++ return (0); ++ } ++ ++ /* ++ * With exclusive filters, higher priority ones ++ * override lower priority ones, and lower priority ++ * ones are stored in case the higher priority one ++ * is removed. ++ */ ++ if (ef10_filter_is_exclusive(spec)) { ++ switch (policy) { ++ case EFX_FILTER_REPLACEMENT_HIGHER_OR_EQUAL_PRIORITY: ++ if (spec->efs_priority == probe_spec->efs_priority) { ++ *found = B_TRUE; ++ break; ++ } ++ /* Fall-through */ ++ case EFX_FILTER_REPLACEMENT_HIGHER_PRIORITY: ++ if (spec->efs_priority > probe_spec->efs_priority) { ++ *found = B_TRUE; ++ break; ++ } ++ /* Fall-through */ ++ case EFX_FILTER_REPLACEMENT_NEVER: ++ /* ++ * Lower priority filter needs to be ++ * stored. It does *not* replace the ++ * old one. That is why EEXIST is not ++ * returned in that case. ++ */ ++ if (spec->efs_priority < probe_spec->efs_priority) { ++ *found = B_TRUE; ++ break; ++ } else { ++ rc = EEXIST; ++ goto fail1; ++ } ++ default: ++ EFSYS_ASSERT(0); ++ rc = EEXIST; ++ goto fail2; ++ } ++ } else { ++ *found = B_FALSE; ++ } ++ ++ return (0); ++ ++fail2: ++ EFSYS_PROBE(fail2); ++ ++fail1: ++ EFSYS_PROBE1(fail1, efx_rc_t, rc); ++ ++ return (rc); ++} ++ ++ ++static void ++ef10_filter_add_select_action( ++ __in efx_filter_spec_t *saved_spec, ++ __in efx_filter_spec_t *spec, ++ __out enum ef10_filter_add_action_e *action, ++ __out efx_filter_spec_t **overridden_spec) ++{ ++ efx_filter_spec_t *overridden = NULL; ++ ++ if (saved_spec == NULL) { ++ *action = EF10_FILTER_ADD_NEW; ++ } else if (ef10_filter_is_exclusive(spec) == B_FALSE) { ++ /* ++ * Non-exclusive filters are always stored in separate entries ++ * in the table. The only case involving a saved spec is ++ * refreshing an AUTO filter. ++ */ ++ EFSYS_ASSERT(saved_spec->efs_overridden_spec == NULL); ++ EFSYS_ASSERT(spec->efs_priority == EFX_FILTER_PRI_AUTO); ++ EFSYS_ASSERT(saved_spec->efs_priority == EFX_FILTER_PRI_AUTO); ++ *action = EF10_FILTER_ADD_REFRESH; ++ } else { ++ /* Exclusive filters stored in the same entry */ ++ if (spec->efs_priority > saved_spec->efs_priority) { ++ /* ++ * Insert a high priority filter over a lower priority ++ * one. Only two priority levels are implemented, so ++ * there must not already be an overridden filter. ++ */ ++ EFX_STATIC_ASSERT(EFX_FILTER_NPRI == 2); ++ EFSYS_ASSERT(saved_spec->efs_overridden_spec == NULL); ++ overridden = saved_spec; ++ *action = EF10_FILTER_ADD_REPLACE; ++ } else if (spec->efs_priority == saved_spec->efs_priority) { ++ /* Replace in-place or refresh an existing filter */ ++ if (spec->efs_priority == EFX_FILTER_PRI_AUTO) ++ *action = EF10_FILTER_ADD_REFRESH; ++ else ++ *action = EF10_FILTER_ADD_REPLACE; ++ } else { ++ /* ++ * Insert a lower priority filter, storing it in case ++ * the higher priority filter is removed. ++ * ++ * Currently there are only two priority levels, so this ++ * must be an AUTO filter. ++ */ ++ EFX_STATIC_ASSERT(EFX_FILTER_NPRI == 2); ++ EFSYS_ASSERT(spec->efs_priority == EFX_FILTER_PRI_AUTO); ++ if (saved_spec->efs_overridden_spec != NULL) { ++ *action = EF10_FILTER_ADD_REFRESH; ++ } else { ++ overridden = spec; ++ *action = EF10_FILTER_ADD_STORE; ++ } ++ } ++ } ++ ++ *overridden_spec = overridden; ++} ++ ++static __checkReturn efx_rc_t ++ef10_filter_add_execute_action( ++ __in efx_nic_t *enp, ++ __in efx_filter_spec_t *saved_spec, ++ __in efx_filter_spec_t *spec, ++ __in efx_filter_spec_t *overridden_spec, ++ __in enum ef10_filter_add_action_e action, ++ __in int ins_index) ++{ ++ ef10_filter_table_t *eftp = enp->en_filter.ef_ef10_filter_table; ++ efsys_lock_state_t state; ++ efx_rc_t rc; ++ ++ EFSYS_LOCK(enp->en_eslp, state); ++ ++ if (action == EF10_FILTER_ADD_REFRESH) { ++ ef10_filter_set_entry_not_auto_old(eftp, ins_index); ++ goto out_unlock; ++ } else if (action == EF10_FILTER_ADD_STORE) { ++ EFSYS_ASSERT(overridden_spec != NULL); ++ saved_spec->efs_overridden_spec = overridden_spec; ++ goto out_unlock; ++ } ++ ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ ++ switch (action) { ++ case EF10_FILTER_ADD_REPLACE: ++ /* ++ * On replacing the filter handle may change after a ++ * successful replace operation. ++ */ ++ rc = efx_mcdi_filter_op_add(enp, spec, ++ MC_CMD_FILTER_OP_IN_OP_REPLACE, ++ &eftp->eft_entry[ins_index].efe_handle); ++ break; ++ case EF10_FILTER_ADD_NEW: ++ if (ef10_filter_is_exclusive(spec)) { ++ rc = efx_mcdi_filter_op_add(enp, spec, ++ MC_CMD_FILTER_OP_IN_OP_INSERT, ++ &eftp->eft_entry[ins_index].efe_handle); ++ } else { ++ rc = efx_mcdi_filter_op_add(enp, spec, ++ MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE, ++ &eftp->eft_entry[ins_index].efe_handle); ++ } ++ break; ++ default: ++ rc = EINVAL; ++ EFSYS_ASSERT(0); ++ break; ++ } ++ if (rc != 0) ++ goto fail1; ++ ++ EFSYS_LOCK(enp->en_eslp, state); ++ ++ if (action == EF10_FILTER_ADD_REPLACE) { ++ /* Update the fields that may differ */ ++ saved_spec->efs_priority = spec->efs_priority; ++ saved_spec->efs_flags = spec->efs_flags; ++ saved_spec->efs_rss_context = spec->efs_rss_context; ++ saved_spec->efs_dmaq_id = spec->efs_dmaq_id; ++ ++ if (overridden_spec != NULL) ++ saved_spec->efs_overridden_spec = overridden_spec; ++ } ++ ++out_unlock: ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ ++ return (0); ++ ++fail1: ++ EFSYS_PROBE1(fail1, efx_rc_t, rc); ++ ++ return (rc); ++} ++ + /* + * An arbitrary search limit for the software hash table. As per the linux net + * driver. +@@ -607,17 +832,17 @@ static __checkReturn efx_rc_t + ef10_filter_add_internal( + __in efx_nic_t *enp, + __inout efx_filter_spec_t *spec, +- __in boolean_t may_replace, ++ __in efx_filter_replacement_policy_t policy, + __out_opt uint32_t *filter_id) + { + efx_rc_t rc; + ef10_filter_table_t *eftp = enp->en_filter.ef_ef10_filter_table; ++ enum ef10_filter_add_action_e action; ++ efx_filter_spec_t *overridden_spec = NULL; + efx_filter_spec_t *saved_spec; + uint32_t hash; + unsigned int depth; + int ins_index; +- boolean_t replacing = B_FALSE; +- unsigned int i; + efsys_lock_state_t state; + boolean_t locked = B_FALSE; + +@@ -625,6 +850,8 @@ ef10_filter_add_internal( + enp->en_family == EFX_FAMILY_MEDFORD || + enp->en_family == EFX_FAMILY_MEDFORD2); + ++ EFSYS_ASSERT(spec->efs_overridden_spec == NULL); ++ + hash = ef10_filter_hash(spec); + + /* +@@ -637,145 +864,136 @@ ef10_filter_add_internal( + * else a free slot to insert at. If any of them are busy, + * we have to wait and retry. + */ +- for (;;) { +- ins_index = -1; +- depth = 1; +- EFSYS_LOCK(enp->en_eslp, state); +- locked = B_TRUE; ++retry: ++ EFSYS_LOCK(enp->en_eslp, state); ++ locked = B_TRUE; ++ ++ ins_index = -1; ++ ++ for (depth = 1; depth <= EF10_FILTER_SEARCH_LIMIT; depth++) { ++ unsigned int probe_index; ++ efx_filter_spec_t *probe_spec; + +- for (;;) { +- i = (hash + depth) & (EFX_EF10_FILTER_TBL_ROWS - 1); +- saved_spec = ef10_filter_entry_spec(eftp, i); +- +- if (!saved_spec) { +- if (ins_index < 0) { +- ins_index = i; +- } +- } else if (ef10_filter_equal(spec, saved_spec)) { +- if (ef10_filter_entry_is_busy(eftp, i)) +- break; +- if (saved_spec->efs_priority +- == EFX_FILTER_PRI_AUTO) { +- ins_index = i; +- goto found; +- } else if (ef10_filter_is_exclusive(spec)) { +- if (may_replace) { +- ins_index = i; +- goto found; +- } else { +- rc = EEXIST; +- goto fail1; +- } +- } +- +- /* Leave existing */ ++ probe_index = (hash + depth) & (EFX_EF10_FILTER_TBL_ROWS - 1); ++ probe_spec = ef10_filter_entry_spec(eftp, probe_index); ++ ++ if (probe_spec == NULL) { ++ if (ins_index < 0) ++ ins_index = probe_index; ++ } else if (ef10_filter_equal(spec, probe_spec)) { ++ boolean_t found; ++ ++ if (ef10_filter_entry_is_busy(eftp, probe_index)) { ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ locked = B_FALSE; ++ goto retry; + } + +- /* +- * Once we reach the maximum search depth, use +- * the first suitable slot or return EBUSY if +- * there was none. +- */ +- if (depth == EF10_FILTER_SEARCH_LIMIT) { +- if (ins_index < 0) { +- rc = EBUSY; +- goto fail2; +- } +- goto found; ++ rc = ef10_filter_add_lookup_equal_spec(spec, ++ probe_spec, policy, &found); ++ if (rc != 0) ++ goto fail1; ++ ++ if (found != B_FALSE) { ++ ins_index = probe_index; ++ break; + } +- depth++; + } +- EFSYS_UNLOCK(enp->en_eslp, state); +- locked = B_FALSE; + } + +-found: + /* +- * Create a software table entry if necessary, and mark it +- * busy. We might yet fail to insert, but any attempt to +- * insert a conflicting filter while we're waiting for the +- * firmware must find the busy entry. ++ * Once we reach the maximum search depth, use the first suitable slot ++ * or return EBUSY if there was none. + */ +- saved_spec = ef10_filter_entry_spec(eftp, ins_index); +- if (saved_spec) { +- if (saved_spec->efs_priority == EFX_FILTER_PRI_AUTO) { +- /* This is a filter we are refreshing */ +- ef10_filter_set_entry_not_auto_old(eftp, ins_index); +- goto out_unlock; +- +- } +- replacing = B_TRUE; +- } else { +- EFSYS_KMEM_ALLOC(enp->en_esip, sizeof (*spec), saved_spec); +- if (!saved_spec) { +- rc = ENOMEM; +- goto fail3; +- } +- *saved_spec = *spec; +- ef10_filter_set_entry(eftp, ins_index, saved_spec); ++ if (ins_index < 0) { ++ rc = EBUSY; ++ goto fail2; + } ++ ++ /* ++ * Mark software table entry busy. We might yet fail to insert, ++ * but any attempt to insert a conflicting filter while we're ++ * waiting for the firmware must find the busy entry. ++ */ + ef10_filter_set_entry_busy(eftp, ins_index); + +- EFSYS_UNLOCK(enp->en_eslp, state); +- locked = B_FALSE; ++ saved_spec = ef10_filter_entry_spec(eftp, ins_index); ++ ef10_filter_add_select_action(saved_spec, spec, &action, ++ &overridden_spec); + + /* +- * On replacing the filter handle may change after after a successful +- * replace operation. ++ * Allocate a new filter if found entry is empty or ++ * a filter should be overridden. + */ +- if (replacing) { +- rc = efx_mcdi_filter_op_add(enp, spec, +- MC_CMD_FILTER_OP_IN_OP_REPLACE, +- &eftp->eft_entry[ins_index].efe_handle); +- } else if (ef10_filter_is_exclusive(spec)) { +- rc = efx_mcdi_filter_op_add(enp, spec, +- MC_CMD_FILTER_OP_IN_OP_INSERT, +- &eftp->eft_entry[ins_index].efe_handle); +- } else { +- rc = efx_mcdi_filter_op_add(enp, spec, +- MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE, +- &eftp->eft_entry[ins_index].efe_handle); +- } +- +- if (rc != 0) +- goto fail4; ++ if (overridden_spec != NULL || saved_spec == NULL) { ++ efx_filter_spec_t *new_spec; + +- EFSYS_LOCK(enp->en_eslp, state); +- locked = B_TRUE; ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ locked = B_FALSE; + +- if (replacing) { +- /* Update the fields that may differ */ +- saved_spec->efs_priority = spec->efs_priority; +- saved_spec->efs_flags = spec->efs_flags; +- saved_spec->efs_rss_context = spec->efs_rss_context; +- saved_spec->efs_dmaq_id = spec->efs_dmaq_id; +- } ++ EFSYS_KMEM_ALLOC(enp->en_esip, sizeof (*new_spec), new_spec); ++ if (new_spec == NULL) { ++ rc = ENOMEM; ++ overridden_spec = NULL; ++ goto fail3; ++ } + +- ef10_filter_set_entry_not_busy(eftp, ins_index); ++ EFSYS_LOCK(enp->en_eslp, state); ++ locked = B_TRUE; + +-out_unlock: ++ if (saved_spec == NULL) { ++ *new_spec = *spec; ++ ef10_filter_set_entry(eftp, ins_index, new_spec); ++ } else { ++ *new_spec = *overridden_spec; ++ overridden_spec = new_spec; ++ } ++ } + + EFSYS_UNLOCK(enp->en_eslp, state); + locked = B_FALSE; + ++ rc = ef10_filter_add_execute_action(enp, saved_spec, spec, ++ overridden_spec, action, ins_index); ++ if (rc != 0) ++ goto fail4; ++ + if (filter_id) + *filter_id = ins_index; + ++ EFSYS_LOCK(enp->en_eslp, state); ++ ef10_filter_set_entry_not_busy(eftp, ins_index); ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ + return (0); + + fail4: + EFSYS_PROBE(fail4); + +- if (!replacing) { +- EFSYS_KMEM_FREE(enp->en_esip, sizeof (*spec), saved_spec); +- saved_spec = NULL; ++ EFSYS_ASSERT(locked == B_FALSE); ++ EFSYS_LOCK(enp->en_eslp, state); ++ ++ if (action == EF10_FILTER_ADD_NEW) { ++ EFSYS_KMEM_FREE(enp->en_esip, sizeof (*spec), ++ ef10_filter_entry_spec(eftp, ins_index)); ++ ef10_filter_set_entry(eftp, ins_index, NULL); + } +- ef10_filter_set_entry_not_busy(eftp, ins_index); +- ef10_filter_set_entry(eftp, ins_index, NULL); ++ ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ ++ if (overridden_spec != NULL) ++ EFSYS_KMEM_FREE(enp->en_esip, sizeof (*spec), overridden_spec); + + fail3: + EFSYS_PROBE(fail3); + ++ EFSYS_ASSERT(locked == B_FALSE); ++ EFSYS_LOCK(enp->en_eslp, state); ++ ++ ef10_filter_set_entry_not_busy(eftp, ins_index); ++ ++ EFSYS_UNLOCK(enp->en_eslp, state); ++ + fail2: + EFSYS_PROBE(fail2); + +@@ -792,11 +1010,11 @@ ef10_filter_add_internal( + ef10_filter_add( + __in efx_nic_t *enp, + __inout efx_filter_spec_t *spec, +- __in boolean_t may_replace) ++ __in enum efx_filter_replacement_policy_e policy) + { + efx_rc_t rc; + +- rc = ef10_filter_add_internal(enp, spec, may_replace, NULL); ++ rc = ef10_filter_add_internal(enp, spec, policy, NULL); + if (rc != 0) + goto fail1; + +@@ -808,11 +1026,15 @@ ef10_filter_add( + return (rc); + } + +- ++/* ++ * Delete a filter by index from the filter table with priority ++ * that is not higher than specified. ++ */ + static __checkReturn efx_rc_t + ef10_filter_delete_internal( + __in efx_nic_t *enp, +- __in uint32_t filter_id) ++ __in uint32_t filter_id, ++ __in efx_filter_priority_t priority) + { + efx_rc_t rc; + ef10_filter_table_t *table = enp->en_filter.ef_ef10_filter_table; +@@ -834,7 +1056,8 @@ ef10_filter_delete_internal( + EFSYS_LOCK(enp->en_eslp, state); + } + if ((spec = ef10_filter_entry_spec(table, filter_idx)) != NULL) { +- ef10_filter_set_entry_busy(table, filter_idx); ++ if (spec->efs_priority <= priority) ++ ef10_filter_set_entry_busy(table, filter_idx); + } + EFSYS_UNLOCK(enp->en_eslp, state); + +@@ -843,31 +1066,53 @@ ef10_filter_delete_internal( + goto fail1; + } + +- /* +- * Try to remove the hardware filter. This may fail if the MC has +- * rebooted (which frees all hardware filter resources). +- */ +- if (ef10_filter_is_exclusive(spec)) { +- rc = efx_mcdi_filter_op_delete(enp, +- MC_CMD_FILTER_OP_IN_OP_REMOVE, +- &table->eft_entry[filter_idx].efe_handle); ++ if (spec->efs_priority > priority) { ++ /* ++ * Applied filter stays, but overridden filter is removed since ++ * next user request to delete the applied filter should not ++ * restore outdated filter. ++ */ ++ if (spec->efs_overridden_spec != NULL) { ++ EFSYS_ASSERT(spec->efs_overridden_spec->efs_overridden_spec == ++ NULL); ++ EFSYS_KMEM_FREE(enp->en_esip, sizeof (*spec), ++ spec->efs_overridden_spec); ++ spec->efs_overridden_spec = NULL; ++ } + } else { +- rc = efx_mcdi_filter_op_delete(enp, +- MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE, +- &table->eft_entry[filter_idx].efe_handle); +- } ++ /* ++ * Try to remove the hardware filter or replace it with the ++ * saved automatic filter. This may fail if the MC has ++ * rebooted (which frees all hardware filter resources). ++ */ ++ if (spec->efs_overridden_spec != NULL) { ++ rc = efx_mcdi_filter_op_add(enp, ++ spec->efs_overridden_spec, ++ MC_CMD_FILTER_OP_IN_OP_REPLACE, ++ &table->eft_entry[filter_idx].efe_handle); ++ } else if (ef10_filter_is_exclusive(spec)) { ++ rc = efx_mcdi_filter_op_delete(enp, ++ MC_CMD_FILTER_OP_IN_OP_REMOVE, ++ &table->eft_entry[filter_idx].efe_handle); ++ } else { ++ rc = efx_mcdi_filter_op_delete(enp, ++ MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE, ++ &table->eft_entry[filter_idx].efe_handle); ++ } + +- /* Free the software table entry */ +- EFSYS_LOCK(enp->en_eslp, state); +- ef10_filter_set_entry_not_busy(table, filter_idx); +- ef10_filter_set_entry(table, filter_idx, NULL); +- EFSYS_UNLOCK(enp->en_eslp, state); ++ /* Free the software table entry */ ++ EFSYS_LOCK(enp->en_eslp, state); ++ ef10_filter_set_entry_not_busy(table, filter_idx); ++ ef10_filter_set_entry(table, filter_idx, ++ spec->efs_overridden_spec); ++ EFSYS_UNLOCK(enp->en_eslp, state); + +- EFSYS_KMEM_FREE(enp->en_esip, sizeof (*spec), spec); ++ EFSYS_KMEM_FREE(enp->en_esip, sizeof (*spec), spec); + +- /* Check result of hardware filter removal */ +- if (rc != 0) +- goto fail2; ++ /* Check result of hardware filter removal */ ++ if (rc != 0) ++ goto fail2; ++ } + + return (0); + +@@ -880,6 +1125,25 @@ ef10_filter_delete_internal( + return (rc); + } + ++static void ++ef10_filter_delete_auto( ++ __in efx_nic_t *enp, ++ __in uint32_t filter_id) ++{ ++ ef10_filter_table_t *table = enp->en_filter.ef_ef10_filter_table; ++ uint32_t filter_idx = filter_id % EFX_EF10_FILTER_TBL_ROWS; ++ ++ /* ++ * AUTO_OLD flag is cleared since the auto filter that is to be removed ++ * may not be the filter at the specified index itself, but the filter ++ * that is overridden by it. ++ */ ++ ef10_filter_set_entry_not_auto_old(table, filter_idx); ++ ++ (void) ef10_filter_delete_internal(enp, filter_idx, ++ EFX_FILTER_PRI_AUTO); ++} ++ + __checkReturn efx_rc_t + ef10_filter_delete( + __in efx_nic_t *enp, +@@ -908,7 +1172,8 @@ ef10_filter_delete( + i = (hash + depth) & (EFX_EF10_FILTER_TBL_ROWS - 1); + saved_spec = ef10_filter_entry_spec(table, i); + if (saved_spec && ef10_filter_equal(spec, saved_spec) && +- ef10_filter_same_dest(spec, saved_spec)) { ++ ef10_filter_same_dest(spec, saved_spec) && ++ saved_spec->efs_priority == EFX_FILTER_PRI_MANUAL) { + break; + } + if (depth == EF10_FILTER_SEARCH_LIMIT) { +@@ -921,7 +1186,7 @@ ef10_filter_delete( + EFSYS_UNLOCK(enp->en_eslp, state); + locked = B_FALSE; + +- rc = ef10_filter_delete_internal(enp, i); ++ rc = ef10_filter_delete_internal(enp, i, EFX_FILTER_PRI_MANUAL); + if (rc != 0) + goto fail2; + +@@ -1146,7 +1411,7 @@ ef10_filter_insert_unicast( + if (rc != 0) + goto fail1; + +- rc = ef10_filter_add_internal(enp, &spec, B_TRUE, ++ rc = ef10_filter_add_internal(enp, &spec, EFX_FILTER_REPLACEMENT_NEVER, + &eftp->eft_unicst_filter_indexes[eftp->eft_unicst_filter_count]); + if (rc != 0) + goto fail2; +@@ -1180,7 +1445,7 @@ ef10_filter_insert_all_unicast( + rc = efx_filter_spec_set_uc_def(&spec); + if (rc != 0) + goto fail1; +- rc = ef10_filter_add_internal(enp, &spec, B_TRUE, ++ rc = ef10_filter_add_internal(enp, &spec, EFX_FILTER_REPLACEMENT_NEVER, + &eftp->eft_unicst_filter_indexes[eftp->eft_unicst_filter_count]); + if (rc != 0) + goto fail2; +@@ -1250,8 +1515,8 @@ ef10_filter_insert_multicast_list( + } + } + +- rc = ef10_filter_add_internal(enp, &spec, B_TRUE, +- &filter_index); ++ rc = ef10_filter_add_internal(enp, &spec, ++ EFX_FILTER_REPLACEMENT_NEVER, &filter_index); + + if (rc == 0) { + eftp->eft_mulcst_filter_indexes[filter_count] = +@@ -1278,8 +1543,8 @@ ef10_filter_insert_multicast_list( + goto rollback; + } + +- rc = ef10_filter_add_internal(enp, &spec, B_TRUE, +- &filter_index); ++ rc = ef10_filter_add_internal(enp, &spec, ++ EFX_FILTER_REPLACEMENT_NEVER, &filter_index); + + if (rc == 0) { + eftp->eft_mulcst_filter_indexes[filter_count] = +@@ -1300,7 +1565,7 @@ ef10_filter_insert_multicast_list( + /* Remove any filters we have inserted */ + i = filter_count; + while (i--) { +- (void) ef10_filter_delete_internal(enp, ++ ef10_filter_delete_auto(enp, + eftp->eft_mulcst_filter_indexes[i]); + } + eftp->eft_mulcst_filter_count = 0; +@@ -1328,7 +1593,7 @@ ef10_filter_insert_all_multicast( + if (rc != 0) + goto fail1; + +- rc = ef10_filter_add_internal(enp, &spec, B_TRUE, ++ rc = ef10_filter_add_internal(enp, &spec, EFX_FILTER_REPLACEMENT_NEVER, + &eftp->eft_mulcst_filter_indexes[0]); + if (rc != 0) + goto fail2; +@@ -1431,8 +1696,9 @@ ef10_filter_insert_encap_filters( + if (rc != 0) + goto fail1; + +- rc = ef10_filter_add_internal(enp, &spec, B_TRUE, +- &table->eft_encap_filter_indexes[ ++ rc = ef10_filter_add_internal(enp, &spec, ++ EFX_FILTER_REPLACEMENT_NEVER, ++ &table->eft_encap_filter_indexes[ + table->eft_encap_filter_count]); + if (rc != 0) { + if (rc != EACCES) +@@ -1461,7 +1727,7 @@ ef10_filter_remove_old( + + for (i = 0; i < EFX_ARRAY_SIZE(table->eft_entry); i++) { + if (ef10_filter_entry_is_auto_old(table, i)) { +- (void) ef10_filter_delete_internal(enp, i); ++ ef10_filter_delete_auto(enp, i); + } + } + } +@@ -1536,19 +1802,19 @@ ef10_filter_reconfigure( + * has rebooted, which removes hardware filters). + */ + for (i = 0; i < table->eft_unicst_filter_count; i++) { +- (void) ef10_filter_delete_internal(enp, ++ ef10_filter_delete_auto(enp, + table->eft_unicst_filter_indexes[i]); + } + table->eft_unicst_filter_count = 0; + + for (i = 0; i < table->eft_mulcst_filter_count; i++) { +- (void) ef10_filter_delete_internal(enp, ++ ef10_filter_delete_auto(enp, + table->eft_mulcst_filter_indexes[i]); + } + table->eft_mulcst_filter_count = 0; + + for (i = 0; i < table->eft_encap_filter_count; i++) { +- (void) ef10_filter_delete_internal(enp, ++ ef10_filter_delete_auto(enp, + table->eft_encap_filter_indexes[i]); + } + table->eft_encap_filter_count = 0; +diff --git a/dpdk/drivers/net/sfc/base/ef10_impl.h b/dpdk/drivers/net/sfc/base/ef10_impl.h +index 6f5d0f9aae..4183325c0f 100644 +--- a/dpdk/drivers/net/sfc/base/ef10_impl.h ++++ b/dpdk/drivers/net/sfc/base/ef10_impl.h +@@ -1030,6 +1030,8 @@ ef10_rx_fini( + + #if EFSYS_OPT_FILTER + ++enum efx_filter_replacement_policy_e; ++ + typedef struct ef10_filter_handle_s { + uint32_t efh_lo; + uint32_t efh_hi; +@@ -1099,7 +1101,7 @@ ef10_filter_restore( + ef10_filter_add( + __in efx_nic_t *enp, + __inout efx_filter_spec_t *spec, +- __in boolean_t may_replace); ++ __in enum efx_filter_replacement_policy_e policy); + + __checkReturn efx_rc_t + ef10_filter_delete( +diff --git a/dpdk/drivers/net/sfc/base/efx.h b/dpdk/drivers/net/sfc/base/efx.h +index 444f6d1db3..9babf8d40b 100644 +--- a/dpdk/drivers/net/sfc/base/efx.h ++++ b/dpdk/drivers/net/sfc/base/efx.h +@@ -2867,17 +2867,15 @@ typedef uint8_t efx_filter_flags_t; + + typedef uint32_t efx_filter_match_flags_t; + ++/* Filter priority from lowest to highest */ + typedef enum efx_filter_priority_s { +- EFX_FILTER_PRI_HINT = 0, /* Performance hint */ +- EFX_FILTER_PRI_AUTO, /* Automatic filter based on device ++ EFX_FILTER_PRI_AUTO = 0, /* Automatic filter based on device + * address list or hardware + * requirements. This may only be used + * by the filter implementation for + * each NIC type. */ + EFX_FILTER_PRI_MANUAL, /* Manually configured filter */ +- EFX_FILTER_PRI_REQUIRED, /* Required for correct behaviour of the +- * client (e.g. SR-IOV, HyperV VMQ etc.) +- */ ++ EFX_FILTER_NPRI, + } efx_filter_priority_t; + + /* +@@ -2892,6 +2890,11 @@ typedef struct efx_filter_spec_s { + uint16_t efs_dmaq_id; + uint32_t efs_rss_context; + uint32_t efs_mark; ++ /* ++ * Saved lower-priority filter. If it is set, it is restored on ++ * filter delete operation. ++ */ ++ struct efx_filter_spec_s *efs_overridden_spec; + /* Fields below here are hashed for software filter lookup */ + uint16_t efs_outer_vid; + uint16_t efs_inner_vid; +diff --git a/dpdk/drivers/net/sfc/base/efx_filter.c b/dpdk/drivers/net/sfc/base/efx_filter.c +index a7523b38b6..f70717cbc2 100644 +--- a/dpdk/drivers/net/sfc/base/efx_filter.c ++++ b/dpdk/drivers/net/sfc/base/efx_filter.c +@@ -28,7 +28,7 @@ static __checkReturn efx_rc_t + siena_filter_add( + __in efx_nic_t *enp, + __inout efx_filter_spec_t *spec, +- __in boolean_t may_replace); ++ __in efx_filter_replacement_policy_t policy); + + static __checkReturn efx_rc_t + siena_filter_delete( +@@ -93,8 +93,16 @@ efx_filter_insert( + goto fail2; + } + +- return (efop->efo_add(enp, spec, B_FALSE)); ++ if (spec->efs_priority == EFX_FILTER_PRI_AUTO) { ++ rc = EINVAL; ++ goto fail3; ++ } + ++ return (efop->efo_add(enp, spec, ++ EFX_FILTER_REPLACEMENT_HIGHER_PRIORITY)); ++ ++fail3: ++ EFSYS_PROBE(fail3); + fail2: + EFSYS_PROBE(fail2); + fail1: +@@ -314,7 +322,7 @@ efx_filter_spec_init_tx( + EFSYS_ASSERT3P(etp, !=, NULL); + + memset(spec, 0, sizeof (*spec)); +- spec->efs_priority = EFX_FILTER_PRI_REQUIRED; ++ spec->efs_priority = EFX_FILTER_PRI_MANUAL; + spec->efs_flags = EFX_FILTER_FLAG_TX; + spec->efs_dmaq_id = (uint16_t)etp->et_index; + } +@@ -1437,7 +1445,7 @@ static __checkReturn efx_rc_t + siena_filter_add( + __in efx_nic_t *enp, + __inout efx_filter_spec_t *spec, +- __in boolean_t may_replace) ++ __in efx_filter_replacement_policy_t policy) + { + efx_rc_t rc; + siena_filter_spec_t sf_spec; +@@ -1478,9 +1486,17 @@ siena_filter_add( + saved_sf_spec = &sftp->sft_spec[filter_idx]; + + if (siena_filter_test_used(sftp, filter_idx)) { +- if (may_replace == B_FALSE) { ++ /* All Siena filter are considered the same priority */ ++ switch (policy) { ++ case EFX_FILTER_REPLACEMENT_NEVER: ++ case EFX_FILTER_REPLACEMENT_HIGHER_PRIORITY: + rc = EEXIST; + goto fail4; ++ case EFX_FILTER_REPLACEMENT_HIGHER_OR_EQUAL_PRIORITY: ++ break; ++ default: ++ EFSYS_ASSERT(0); ++ break; + } + } + siena_filter_set_used(sftp, filter_idx); +diff --git a/dpdk/drivers/net/sfc/base/efx_impl.h b/dpdk/drivers/net/sfc/base/efx_impl.h +index bad23f8198..a61b9460f0 100644 +--- a/dpdk/drivers/net/sfc/base/efx_impl.h ++++ b/dpdk/drivers/net/sfc/base/efx_impl.h +@@ -240,12 +240,31 @@ typedef struct efx_phy_ops_s { + } efx_phy_ops_t; + + #if EFSYS_OPT_FILTER ++ ++/* ++ * Policy for replacing existing filter when inserting a new one. ++ * Note that all policies allow for storing the new lower priority ++ * filters as overridden by existing higher priority ones. It is needed ++ * to restore the lower priority filters on higher priority ones removal. ++ */ ++typedef enum efx_filter_replacement_policy_e { ++ /* Cannot replace existing filter */ ++ EFX_FILTER_REPLACEMENT_NEVER, ++ /* Higher priority filters can replace lower priotiry ones */ ++ EFX_FILTER_REPLACEMENT_HIGHER_PRIORITY, ++ /* ++ * Higher priority filters can replace lower priority ones and ++ * equal priority filters can replace each other. ++ */ ++ EFX_FILTER_REPLACEMENT_HIGHER_OR_EQUAL_PRIORITY, ++} efx_filter_replacement_policy_t; ++ + typedef struct efx_filter_ops_s { + efx_rc_t (*efo_init)(efx_nic_t *); + void (*efo_fini)(efx_nic_t *); + efx_rc_t (*efo_restore)(efx_nic_t *); + efx_rc_t (*efo_add)(efx_nic_t *, efx_filter_spec_t *, +- boolean_t may_replace); ++ efx_filter_replacement_policy_t policy); + efx_rc_t (*efo_delete)(efx_nic_t *, efx_filter_spec_t *); + efx_rc_t (*efo_supported_filters)(efx_nic_t *, uint32_t *, + size_t, size_t *); +diff --git a/dpdk/drivers/net/sfc/sfc.c b/dpdk/drivers/net/sfc/sfc.c +index d056d12164..48d95845d6 100644 +--- a/dpdk/drivers/net/sfc/sfc.c ++++ b/dpdk/drivers/net/sfc/sfc.c +@@ -240,8 +240,8 @@ sfc_estimate_resource_limits(struct sfc_adapter *sa) + return 0; + + fail_get_vi_pool: +-fail_nic_init: + efx_nic_fini(sa->nic); ++fail_nic_init: + return rc; + } + +diff --git a/dpdk/drivers/net/sfc/sfc_rx.c b/dpdk/drivers/net/sfc/sfc_rx.c +index 960ab62c19..2d506c3860 100644 +--- a/dpdk/drivers/net/sfc/sfc_rx.c ++++ b/dpdk/drivers/net/sfc/sfc_rx.c +@@ -646,6 +646,7 @@ sfc_rx_default_rxq_set_filter(struct sfc_adapter *sa, struct sfc_rxq *rxq) + sfc_warn(sa, "promiscuous mode will be disabled"); + + port->promisc = B_FALSE; ++ sa->eth_dev->data->promiscuous = 0; + rc = sfc_set_rx_mode(sa); + if (rc != 0) + return rc; +@@ -659,6 +660,7 @@ sfc_rx_default_rxq_set_filter(struct sfc_adapter *sa, struct sfc_rxq *rxq) + sfc_warn(sa, "all-multicast mode will be disabled"); + + port->allmulti = B_FALSE; ++ sa->eth_dev->data->all_multicast = 0; + rc = sfc_set_rx_mode(sa); + if (rc != 0) + return rc; +@@ -748,10 +750,12 @@ sfc_rx_qstart(struct sfc_adapter *sa, unsigned int sw_index) + return 0; + + fail_mac_filter_default_rxq_set: ++ sfc_rx_qflush(sa, sw_index); + sa->dp_rx->qstop(rxq->dp, &rxq->evq->read_ptr); ++ rxq->state = SFC_RXQ_INITIALIZED; + + fail_dp_qstart: +- sfc_rx_qflush(sa, sw_index); ++ efx_rx_qdestroy(rxq->common); + + fail_rx_qcreate: + fail_bad_contig_block_size: +diff --git a/dpdk/drivers/net/softnic/rte_eth_softnic_thread.c b/dpdk/drivers/net/softnic/rte_eth_softnic_thread.c +index 4572adfa6a..6a888d20ec 100644 +--- a/dpdk/drivers/net/softnic/rte_eth_softnic_thread.c ++++ b/dpdk/drivers/net/softnic/rte_eth_softnic_thread.c +@@ -267,8 +267,6 @@ softnic_thread_pipeline_enable(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = thread_msg_send_recv(softnic, thread_id, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -355,8 +353,6 @@ softnic_thread_pipeline_disable(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = thread_msg_send_recv(softnic, thread_id, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -751,8 +747,6 @@ softnic_pipeline_port_in_stats_read(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -800,8 +794,6 @@ softnic_pipeline_port_in_enable(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -847,8 +839,6 @@ softnic_pipeline_port_in_disable(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -902,8 +892,6 @@ softnic_pipeline_port_out_stats_read(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -959,8 +947,6 @@ softnic_pipeline_table_stats_read(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1239,8 +1225,6 @@ softnic_pipeline_table_rule_add(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1323,8 +1307,6 @@ softnic_pipeline_table_rule_add_default(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1481,8 +1463,6 @@ softnic_pipeline_table_rule_add_bulk(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1546,8 +1526,6 @@ softnic_pipeline_table_rule_delete(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1596,8 +1574,6 @@ softnic_pipeline_table_rule_delete_default(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1656,8 +1632,6 @@ softnic_pipeline_table_rule_stats_read(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1737,10 +1711,6 @@ softnic_pipeline_table_mtr_profile_add(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) { +- free(mp); +- return -1; +- } + + /* Read response */ + status = rsp->status; +@@ -1796,8 +1766,6 @@ softnic_pipeline_table_mtr_profile_delete(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1859,8 +1827,6 @@ softnic_pipeline_table_rule_mtr_read(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1924,8 +1890,6 @@ softnic_pipeline_table_dscp_table_update(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1989,8 +1953,6 @@ softnic_pipeline_table_rule_ttl_read(struct pmd_internals *softnic, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +diff --git a/dpdk/drivers/net/tap/rte_eth_tap.c b/dpdk/drivers/net/tap/rte_eth_tap.c +index 3a8aa44a73..62fb542865 100644 +--- a/dpdk/drivers/net/tap/rte_eth_tap.c ++++ b/dpdk/drivers/net/tap/rte_eth_tap.c +@@ -18,8 +18,8 @@ + #include <rte_string_fns.h> + #include <rte_ethdev.h> + #include <rte_errno.h> ++#include <rte_cycles.h> + +-#include <assert.h> + #include <sys/types.h> + #include <sys/stat.h> + #include <sys/socket.h> +@@ -341,6 +341,23 @@ tap_rx_offload_get_queue_capa(void) + DEV_RX_OFFLOAD_TCP_CKSUM; + } + ++static void ++tap_rxq_pool_free(struct rte_mbuf *pool) ++{ ++ struct rte_mbuf *mbuf = pool; ++ uint16_t nb_segs = 1; ++ ++ if (mbuf == NULL) ++ return; ++ ++ while (mbuf->next) { ++ mbuf = mbuf->next; ++ nb_segs++; ++ } ++ pool->nb_segs = nb_segs; ++ rte_pktmbuf_free(pool); ++} ++ + /* Callback to handle the rx burst of packets to the correct interface and + * file descriptor(s) in a multi-queue setup. + */ +@@ -391,7 +408,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + goto end; + + seg->next = NULL; +- rte_pktmbuf_free(mbuf); ++ tap_rxq_pool_free(mbuf); + + goto end; + } +@@ -523,7 +540,7 @@ tap_tx_l3_cksum(char *packet, uint64_t ol_flags, unsigned int l2_len, + } + } + +-static inline void ++static inline int + tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, + struct rte_mbuf **pmbufs, + uint16_t *num_packets, unsigned long *num_tx_bytes) +@@ -590,7 +607,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, + seg_len = rte_pktmbuf_data_len(mbuf); + l234_hlen = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len; + if (seg_len < l234_hlen) +- break; ++ return -1; + + /* To change checksums, work on a * copy of l2, l3 + * headers + l4 pseudo header +@@ -636,10 +653,12 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs, + /* copy the tx frame data */ + n = writev(process_private->txq_fds[txq->queue_id], iovecs, j); + if (n <= 0) +- break; ++ return -1; ++ + (*num_packets)++; + (*num_tx_bytes) += rte_pktmbuf_pkt_len(mbuf); + } ++ return 0; + } + + /* Callback to handle sending packets from the tap interface +@@ -665,6 +684,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + uint16_t num_mbufs = 0; + uint16_t tso_segsz = 0; + int ret; ++ int num_tso_mbufs; + uint16_t hdrs_len; + int j; + uint64_t tso; +@@ -673,8 +693,6 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + if (tso) { + struct rte_gso_ctx *gso_ctx = &txq->gso_ctx; + +- assert(gso_ctx != NULL); +- + /* TCP segmentation implies TCP checksum offload */ + mbuf_in->ol_flags |= PKT_TX_TCP_CKSUM; + +@@ -688,35 +706,43 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + break; + } + gso_ctx->gso_size = tso_segsz; +- ret = rte_gso_segment(mbuf_in, /* packet to segment */ ++ /* 'mbuf_in' packet to segment */ ++ num_tso_mbufs = rte_gso_segment(mbuf_in, + gso_ctx, /* gso control block */ + (struct rte_mbuf **)&gso_mbufs, /* out mbufs */ + RTE_DIM(gso_mbufs)); /* max tso mbufs */ + + /* ret contains the number of new created mbufs */ +- if (ret < 0) ++ if (num_tso_mbufs < 0) + break; + + mbuf = gso_mbufs; +- num_mbufs = ret; ++ num_mbufs = num_tso_mbufs; + } else { + /* stats.errs will be incremented */ + if (rte_pktmbuf_pkt_len(mbuf_in) > max_size) + break; + + /* ret 0 indicates no new mbufs were created */ +- ret = 0; ++ num_tso_mbufs = 0; + mbuf = &mbuf_in; + num_mbufs = 1; + } + +- tap_write_mbufs(txq, num_mbufs, mbuf, ++ ret = tap_write_mbufs(txq, num_mbufs, mbuf, + &num_packets, &num_tx_bytes); ++ if (ret == -1) { ++ txq->stats.errs++; ++ /* free tso mbufs */ ++ for (j = 0; j < num_tso_mbufs; j++) ++ rte_pktmbuf_free(mbuf[j]); ++ break; ++ } + num_tx++; + /* free original mbuf */ + rte_pktmbuf_free(mbuf_in); + /* free tso mbufs */ +- for (j = 0; j < ret; j++) ++ for (j = 0; j < num_tso_mbufs; j++) + rte_pktmbuf_free(mbuf[j]); + } + +@@ -724,7 +750,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) + txq->stats.errs += nb_pkts - num_tx; + txq->stats.obytes += num_tx_bytes; + +- return num_packets; ++ return num_tx; + } + + static const char * +@@ -780,7 +806,7 @@ tap_ioctl(struct pmd_internals *pmd, unsigned long request, + case SIOCSIFMTU: + break; + default: +- RTE_LOG(WARNING, PMD, "%s: ioctl() called with wrong arg\n", ++ TAP_LOG(WARNING, "%s: ioctl() called with wrong arg", + pmd->name); + return -EINVAL; + } +@@ -1010,15 +1036,25 @@ tap_dev_close(struct rte_eth_dev *dev) + int i; + struct pmd_internals *internals = dev->data->dev_private; + struct pmd_process_private *process_private = dev->process_private; ++ struct rx_queue *rxq; + + tap_link_set_down(dev); +- tap_flow_flush(dev, NULL); +- tap_flow_implicit_flush(internals, NULL); ++ if (internals->nlsk_fd != -1) { ++ tap_flow_flush(dev, NULL); ++ tap_flow_implicit_flush(internals, NULL); ++ tap_nl_final(internals->nlsk_fd); ++ internals->nlsk_fd = -1; ++ } + + for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { + if (process_private->rxq_fds[i] != -1) { ++ rxq = &internals->rxq[i]; + close(process_private->rxq_fds[i]); + process_private->rxq_fds[i] = -1; ++ tap_rxq_pool_free(rxq->pool); ++ rte_free(rxq->iovecs); ++ rxq->pool = NULL; ++ rxq->iovecs = NULL; + } + if (process_private->txq_fds[i] != -1) { + close(process_private->txq_fds[i]); +@@ -1051,10 +1087,10 @@ tap_rx_queue_release(void *queue) + if (!rxq) + return; + process_private = rte_eth_devices[rxq->in_port].process_private; +- if (process_private->rxq_fds[rxq->queue_id] > 0) { ++ if (process_private->rxq_fds[rxq->queue_id] != -1) { + close(process_private->rxq_fds[rxq->queue_id]); + process_private->rxq_fds[rxq->queue_id] = -1; +- rte_pktmbuf_free(rxq->pool); ++ tap_rxq_pool_free(rxq->pool); + rte_free(rxq->iovecs); + rxq->pool = NULL; + rxq->iovecs = NULL; +@@ -1071,7 +1107,7 @@ tap_tx_queue_release(void *queue) + return; + process_private = rte_eth_devices[txq->out_port].process_private; + +- if (process_private->txq_fds[txq->queue_id] > 0) { ++ if (process_private->txq_fds[txq->queue_id] != -1) { + close(process_private->txq_fds[txq->queue_id]); + process_private->txq_fds[txq->queue_id] = -1; + } +@@ -1232,7 +1268,9 @@ tap_gso_ctx_setup(struct rte_gso_ctx *gso_ctx, struct rte_eth_dev *dev) + SOCKET_ID_ANY); + if (!mp) { + struct pmd_internals *pmd = dev->data->dev_private; +- RTE_LOG(DEBUG, PMD, "%s: failed to create mbuf pool for device %s\n", ++ ++ TAP_LOG(ERR, ++ "%s: failed to create mbuf pool for device %s\n", + pmd->name, dev->device->name); + return -1; + } +@@ -1396,7 +1434,7 @@ tap_rx_queue_setup(struct rte_eth_dev *dev, + return 0; + + error: +- rte_pktmbuf_free(rxq->pool); ++ tap_rxq_pool_free(rxq->pool); + rxq->pool = NULL; + rte_free(rxq->iovecs); + rxq->iovecs = NULL; +@@ -1494,13 +1532,12 @@ static int + tap_lsc_intr_handle_set(struct rte_eth_dev *dev, int set) + { + struct pmd_internals *pmd = dev->data->dev_private; ++ int ret; + + /* In any case, disable interrupt if the conf is no longer there. */ + if (!dev->data->dev_conf.intr_conf.lsc) { + if (pmd->intr_handle.fd != -1) { +- tap_nl_final(pmd->intr_handle.fd); +- rte_intr_callback_unregister(&pmd->intr_handle, +- tap_dev_intr_handler, dev); ++ goto clean; + } + return 0; + } +@@ -1511,9 +1548,26 @@ tap_lsc_intr_handle_set(struct rte_eth_dev *dev, int set) + return rte_intr_callback_register( + &pmd->intr_handle, tap_dev_intr_handler, dev); + } ++ ++clean: ++ do { ++ ret = rte_intr_callback_unregister(&pmd->intr_handle, ++ tap_dev_intr_handler, dev); ++ if (ret >= 0) { ++ break; ++ } else if (ret == -EAGAIN) { ++ rte_delay_ms(100); ++ } else { ++ TAP_LOG(ERR, "intr callback unregister failed: %d", ++ ret); ++ break; ++ } ++ } while (true); ++ + tap_nl_final(pmd->intr_handle.fd); +- return rte_intr_callback_unregister(&pmd->intr_handle, +- tap_dev_intr_handler, dev); ++ pmd->intr_handle.fd = -1; ++ ++ return 0; + } + + static int +@@ -1714,6 +1768,8 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name, + pmd->dev = dev; + snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name); + pmd->type = type; ++ pmd->ka_fd = -1; ++ pmd->nlsk_fd = -1; + + pmd->ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0); + if (pmd->ioctl_sock == -1) { +@@ -1744,7 +1800,6 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name, + dev->intr_handle = &pmd->intr_handle; + + /* Presetup the fds to -1 as being not valid */ +- pmd->ka_fd = -1; + for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { + process_private->rxq_fds[i] = -1; + process_private->txq_fds[i] = -1; +@@ -1885,7 +1940,11 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name, + tap_flow_implicit_flush(pmd, NULL); + + error_exit: +- if (pmd->ioctl_sock > 0) ++ if (pmd->nlsk_fd != -1) ++ close(pmd->nlsk_fd); ++ if (pmd->ka_fd != -1) ++ close(pmd->ka_fd); ++ if (pmd->ioctl_sock != -1) + close(pmd->ioctl_sock); + /* mac_addrs must not be freed alone because part of dev_private */ + dev->data->mac_addrs = NULL; +@@ -2291,8 +2350,6 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev) + { + struct rte_eth_dev *eth_dev = NULL; + struct pmd_internals *internals; +- struct pmd_process_private *process_private; +- int i; + + /* find the ethdev entry */ + eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev)); +@@ -2305,29 +2362,13 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev) + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return rte_eth_dev_release_port(eth_dev); + +- internals = eth_dev->data->dev_private; +- process_private = eth_dev->process_private; ++ tap_dev_close(eth_dev); + ++ internals = eth_dev->data->dev_private; + TAP_LOG(DEBUG, "Closing %s Ethernet device on numa %u", + (internals->type == ETH_TUNTAP_TYPE_TAP) ? "TAP" : "TUN", + rte_socket_id()); + +- if (internals->nlsk_fd) { +- tap_flow_flush(eth_dev, NULL); +- tap_flow_implicit_flush(internals, NULL); +- tap_nl_final(internals->nlsk_fd); +- } +- for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) { +- if (process_private->rxq_fds[i] != -1) { +- close(process_private->rxq_fds[i]); +- process_private->rxq_fds[i] = -1; +- } +- if (process_private->txq_fds[i] != -1) { +- close(process_private->txq_fds[i]); +- process_private->txq_fds[i] = -1; +- } +- } +- + close(internals->ioctl_sock); + rte_free(eth_dev->process_private); + if (tap_devices_count == 1) +@@ -2335,10 +2376,6 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev) + tap_devices_count--; + rte_eth_dev_release_port(eth_dev); + +- if (internals->ka_fd != -1) { +- close(internals->ka_fd); +- internals->ka_fd = -1; +- } + return 0; + } + +diff --git a/dpdk/drivers/net/tap/tap_flow.c b/dpdk/drivers/net/tap/tap_flow.c +index d155618fc8..0c5043b63f 100644 +--- a/dpdk/drivers/net/tap/tap_flow.c ++++ b/dpdk/drivers/net/tap/tap_flow.c +@@ -1378,7 +1378,7 @@ tap_flow_create(struct rte_eth_dev *dev, + NULL, "priority value too big"); + goto fail; + } +- flow = rte_malloc(__func__, sizeof(struct rte_flow), 0); ++ flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); + if (!flow) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "cannot allocate memory for rte_flow"); +@@ -1414,7 +1414,7 @@ tap_flow_create(struct rte_eth_dev *dev, + * to the local pmd->if_index. + */ + if (pmd->remote_if_index) { +- remote_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0); ++ remote_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); + if (!remote_flow) { + rte_flow_error_set( + error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, +@@ -1691,7 +1691,7 @@ int tap_flow_implicit_create(struct pmd_internals *pmd, + } + }; + +- remote_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0); ++ remote_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); + if (!remote_flow) { + TAP_LOG(ERR, "Cannot allocate memory for rte_flow"); + goto fail; +@@ -1894,7 +1894,7 @@ static int rss_enable(struct pmd_internals *pmd, + return -ENOTSUP; + } + +- rss_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0); ++ rss_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0); + if (!rss_flow) { + TAP_LOG(ERR, + "Cannot allocate memory for rte_flow"); +diff --git a/dpdk/drivers/net/tap/tap_intr.c b/dpdk/drivers/net/tap/tap_intr.c +index 7af0010e37..5cf4f173a0 100644 +--- a/dpdk/drivers/net/tap/tap_intr.c ++++ b/dpdk/drivers/net/tap/tap_intr.c +@@ -7,7 +7,6 @@ + * Interrupts handling for tap driver. + */ + +-#include <assert.h> + #include <errno.h> + #include <fcntl.h> + #include <signal.h> +@@ -72,7 +71,7 @@ tap_rx_intr_vec_install(struct rte_eth_dev *dev) + struct rx_queue *rxq = pmd->dev->data->rx_queues[i]; + + /* Skip queues that cannot request interrupts. */ +- if (!rxq || process_private->rxq_fds[i] <= 0) { ++ if (!rxq || process_private->rxq_fds[i] == -1) { + /* Use invalid intr_vec[] index to disable entry. */ + intr_handle->intr_vec[i] = + RTE_INTR_VEC_RXTX_OFFSET + +diff --git a/dpdk/drivers/net/thunderx/nicvf_ethdev.c b/dpdk/drivers/net/thunderx/nicvf_ethdev.c +index ae5a33e3a1..d627a3bb70 100644 +--- a/dpdk/drivers/net/thunderx/nicvf_ethdev.c ++++ b/dpdk/drivers/net/thunderx/nicvf_ethdev.c +@@ -488,9 +488,10 @@ nicvf_dev_reta_query(struct rte_eth_dev *dev, + int ret, i, j; + + if (reta_size != NIC_MAX_RSS_IDR_TBL_SIZE) { +- RTE_LOG(ERR, PMD, "The size of hash lookup table configured " +- "(%d) doesn't match the number hardware can supported " +- "(%d)", reta_size, NIC_MAX_RSS_IDR_TBL_SIZE); ++ PMD_DRV_LOG(ERR, ++ "The size of hash lookup table configured " ++ "(%u) doesn't match the number hardware can supported " ++ "(%u)", reta_size, NIC_MAX_RSS_IDR_TBL_SIZE); + return -EINVAL; + } + +@@ -518,9 +519,9 @@ nicvf_dev_reta_update(struct rte_eth_dev *dev, + int ret, i, j; + + if (reta_size != NIC_MAX_RSS_IDR_TBL_SIZE) { +- RTE_LOG(ERR, PMD, "The size of hash lookup table configured " +- "(%d) doesn't match the number hardware can supported " +- "(%d)", reta_size, NIC_MAX_RSS_IDR_TBL_SIZE); ++ PMD_DRV_LOG(ERR, "The size of hash lookup table configured " ++ "(%u) doesn't match the number hardware can supported " ++ "(%u)", reta_size, NIC_MAX_RSS_IDR_TBL_SIZE); + return -EINVAL; + } + +@@ -561,8 +562,8 @@ nicvf_dev_rss_hash_update(struct rte_eth_dev *dev, + + if (rss_conf->rss_key && + rss_conf->rss_key_len != RSS_HASH_KEY_BYTE_SIZE) { +- RTE_LOG(ERR, PMD, "Hash key size mismatch %d", +- rss_conf->rss_key_len); ++ PMD_DRV_LOG(ERR, "Hash key size mismatch %u", ++ rss_conf->rss_key_len); + return -EINVAL; + } + +diff --git a/dpdk/drivers/net/vhost/rte_eth_vhost.c b/dpdk/drivers/net/vhost/rte_eth_vhost.c +index 52b9e0c102..93ca8ebe4b 100644 +--- a/dpdk/drivers/net/vhost/rte_eth_vhost.c ++++ b/dpdk/drivers/net/vhost/rte_eth_vhost.c +@@ -985,16 +985,14 @@ eth_dev_close(struct rte_eth_dev *dev) + + eth_dev_stop(dev); + +- rte_vhost_driver_unregister(internal->iface_name); +- + list = find_internal_resource(internal->iface_name); +- if (!list) +- return; +- +- pthread_mutex_lock(&internal_list_lock); +- TAILQ_REMOVE(&internal_list, list, next); +- pthread_mutex_unlock(&internal_list_lock); +- rte_free(list); ++ if (list) { ++ rte_vhost_driver_unregister(internal->iface_name); ++ pthread_mutex_lock(&internal_list_lock); ++ TAILQ_REMOVE(&internal_list, list, next); ++ pthread_mutex_unlock(&internal_list_lock); ++ rte_free(list); ++ } + + if (dev->data->rx_queues) + for (i = 0; i < dev->data->nb_rx_queues; i++) +diff --git a/dpdk/drivers/net/virtio/virtio_ethdev.c b/dpdk/drivers/net/virtio/virtio_ethdev.c +index 2d4c6c7fa9..be24677093 100644 +--- a/dpdk/drivers/net/virtio/virtio_ethdev.c ++++ b/dpdk/drivers/net/virtio/virtio_ethdev.c +@@ -349,7 +349,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx) + } + + if (!rte_is_power_of_2(vq_size)) { +- PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2"); ++ PMD_INIT_LOG(ERR, "virtqueue size is not power of 2"); + return -EINVAL; + } + +@@ -464,8 +464,8 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx) + hw->cvq = cvq; + } + +- /* For virtio_user case (that is when hw->dev is NULL), we use +- * virtual address. And we need properly set _offset_, please see ++ /* For virtio_user case (that is when hw->virtio_user_dev is not NULL), ++ * we use virtual address. And we need properly set _offset_, please see + * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information. + */ + if (!hw->virtio_user_dev) +diff --git a/dpdk/drivers/net/virtio/virtio_rxtx.c b/dpdk/drivers/net/virtio/virtio_rxtx.c +index db2d1f1c1e..8ce5c33940 100644 +--- a/dpdk/drivers/net/virtio/virtio_rxtx.c ++++ b/dpdk/drivers/net/virtio/virtio_rxtx.c +@@ -708,7 +708,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, + RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); + + if (tx_free_thresh >= (vq->vq_nentries - 3)) { +- RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " ++ PMD_DRV_LOG(ERR, "tx_free_thresh must be less than the " + "number of TX entries minus 3 (%u)." + " (tx_free_thresh=%u port=%u queue=%u)\n", + vq->vq_nentries - 3, +@@ -751,7 +751,7 @@ virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) + error = virtqueue_enqueue_recv_refill(vq, m); + + if (unlikely(error)) { +- RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); ++ PMD_DRV_LOG(ERR, "cannot requeue discarded mbuf"); + rte_pktmbuf_free(m); + } + } +@@ -763,7 +763,7 @@ virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m) + + error = virtqueue_enqueue_refill_inorder(vq, &m, 1); + if (unlikely(error)) { +- RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); ++ PMD_DRV_LOG(ERR, "cannot requeue discarded mbuf"); + rte_pktmbuf_free(m); + } + } +diff --git a/dpdk/drivers/net/virtio/virtio_user_ethdev.c b/dpdk/drivers/net/virtio/virtio_user_ethdev.c +index 63b647dc52..7c275c7397 100644 +--- a/dpdk/drivers/net/virtio/virtio_user_ethdev.c ++++ b/dpdk/drivers/net/virtio/virtio_user_ethdev.c +@@ -388,12 +388,17 @@ static int + get_integer_arg(const char *key __rte_unused, + const char *value, void *extra_args) + { ++ uint64_t integer = 0; + if (!value || !extra_args) + return -EINVAL; +- +- *(uint64_t *)extra_args = strtoull(value, NULL, 0); +- +- return 0; ++ errno = 0; ++ integer = strtoull(value, NULL, 0); ++ /* extra_args keeps default value, it should be replaced ++ * only in case of successful parsing of the 'value' arg ++ */ ++ if (errno == 0) ++ *(uint64_t *)extra_args = integer; ++ return -errno; + } + + static struct rte_eth_dev * +@@ -471,7 +476,7 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev) + const char *name = rte_vdev_device_name(dev); + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { +- RTE_LOG(ERR, PMD, "Failed to probe %s\n", name); ++ PMD_INIT_LOG(ERR, "Failed to probe %s", name); + return -1; + } + +@@ -614,7 +619,7 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev) + goto end; + } + +- /* previously called by rte_pci_probe() for physical dev */ ++ /* previously called by pci probing for physical dev */ + if (eth_virtio_dev_init(eth_dev) < 0) { + PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails"); + virtio_user_eth_dev_free(eth_dev); +diff --git a/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c b/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c +index 6efa3ac217..f15fb6368e 100644 +--- a/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c ++++ b/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c +@@ -947,13 +947,17 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) + + RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY); + +- if (rxm->data_len) { ++ if (likely(start && rxm->data_len > 0)) { + start->pkt_len += rxm->data_len; + start->nb_segs++; + + rxq->last_seg->next = rxm; + rxq->last_seg = rxm; + } else { ++ PMD_RX_LOG(ERR, "Error received empty or out of order frame."); ++ rxq->stats.drop_total++; ++ rxq->stats.drop_err++; ++ + rte_pktmbuf_free_seg(rxm); + } + } +diff --git a/dpdk/examples/eventdev_pipeline/main.c b/dpdk/examples/eventdev_pipeline/main.c +index 92e08bc0c8..85d8a624bc 100644 +--- a/dpdk/examples/eventdev_pipeline/main.c ++++ b/dpdk/examples/eventdev_pipeline/main.c +@@ -10,6 +10,8 @@ + + #include "pipeline_common.h" + ++struct fastpath_data *fdata; ++ + struct config_data cdata = { + .num_packets = (1L << 25), /* do ~32M packets */ + .num_fids = 512, +@@ -417,11 +419,6 @@ signal_handler(int signum) + + rte_eal_mp_wait_lcore(); + +- RTE_ETH_FOREACH_DEV(portid) { +- rte_eth_dev_close(portid); +- } +- +- rte_event_dev_close(0); + } + if (signum == SIGTSTP) + rte_event_dev_dump(0, stdout); +@@ -439,6 +436,7 @@ int + main(int argc, char **argv) + { + struct worker_data *worker_data; ++ uint16_t portid; + uint16_t num_ports; + int lcore_id; + int err; +@@ -576,5 +574,13 @@ main(int argc, char **argv) + + } + ++ RTE_ETH_FOREACH_DEV(portid) { ++ rte_eth_dev_close(portid); ++ } ++ ++ rte_event_dev_close(0); ++ ++ rte_eal_cleanup(); ++ + return 0; + } +diff --git a/dpdk/examples/eventdev_pipeline/pipeline_common.h b/dpdk/examples/eventdev_pipeline/pipeline_common.h +index a6cc912fbb..016a3f702b 100644 +--- a/dpdk/examples/eventdev_pipeline/pipeline_common.h ++++ b/dpdk/examples/eventdev_pipeline/pipeline_common.h +@@ -93,8 +93,8 @@ struct port_link { + uint8_t priority; + }; + +-struct fastpath_data *fdata; +-struct config_data cdata; ++extern struct fastpath_data *fdata; ++extern struct config_data cdata; + + static __rte_always_inline void + exchange_mac(struct rte_mbuf *m) +diff --git a/dpdk/examples/ip_fragmentation/main.c b/dpdk/examples/ip_fragmentation/main.c +index 68d40c19ac..0631a1a2d4 100644 +--- a/dpdk/examples/ip_fragmentation/main.c ++++ b/dpdk/examples/ip_fragmentation/main.c +@@ -591,7 +591,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up .Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/ip_pipeline/thread.c b/dpdk/examples/ip_pipeline/thread.c +index 272fbbeed1..adb83167cd 100644 +--- a/dpdk/examples/ip_pipeline/thread.c ++++ b/dpdk/examples/ip_pipeline/thread.c +@@ -325,8 +325,6 @@ thread_pipeline_enable(uint32_t thread_id, + + /* Send request and wait for response */ + rsp = thread_msg_send_recv(thread_id, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -412,8 +410,6 @@ thread_pipeline_disable(uint32_t thread_id, + + /* Send request and wait for response */ + rsp = thread_msg_send_recv(thread_id, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -815,8 +811,6 @@ pipeline_port_in_stats_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -863,8 +857,6 @@ pipeline_port_in_enable(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -909,8 +901,6 @@ pipeline_port_in_disable(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -963,8 +953,6 @@ pipeline_port_out_stats_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1019,8 +1007,6 @@ pipeline_table_stats_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1436,10 +1422,6 @@ pipeline_table_rule_add(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) { +- free(rule); +- return -1; +- } + + /* Read response */ + status = rsp->status; +@@ -1538,10 +1520,6 @@ pipeline_table_rule_add_default(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) { +- free(rule); +- return -1; +- } + + /* Read response */ + status = rsp->status; +@@ -1655,10 +1633,6 @@ pipeline_table_rule_add_bulk(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) { +- table_rule_list_free(list); +- return -ENOMEM; +- } + + /* Read response */ + status = rsp->status; +@@ -1733,8 +1707,6 @@ pipeline_table_rule_delete(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1790,8 +1762,6 @@ pipeline_table_rule_delete_default(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1857,8 +1827,6 @@ pipeline_table_rule_stats_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1915,8 +1883,6 @@ pipeline_table_mtr_profile_add(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -1967,8 +1933,6 @@ pipeline_table_mtr_profile_delete(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -2037,8 +2001,6 @@ pipeline_table_rule_mtr_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -2096,8 +2058,6 @@ pipeline_table_dscp_table_update(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -2164,8 +2124,6 @@ pipeline_table_rule_ttl_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +@@ -2229,8 +2187,6 @@ pipeline_table_rule_time_read(const char *pipeline_name, + + /* Send request and wait for response */ + rsp = pipeline_msg_send_recv(p, req); +- if (rsp == NULL) +- return -1; + + /* Read response */ + status = rsp->status; +diff --git a/dpdk/examples/ip_reassembly/main.c b/dpdk/examples/ip_reassembly/main.c +index 17b55d4c76..d29efc5354 100644 +--- a/dpdk/examples/ip_reassembly/main.c ++++ b/dpdk/examples/ip_reassembly/main.c +@@ -724,7 +724,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/ipsec-secgw/ipsec-secgw.c b/dpdk/examples/ipsec-secgw/ipsec-secgw.c +index c55606e078..be77a839d3 100644 +--- a/dpdk/examples/ipsec-secgw/ipsec-secgw.c ++++ b/dpdk/examples/ipsec-secgw/ipsec-secgw.c +@@ -1325,7 +1325,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up - speed %u Mbps -%s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/ipv4_multicast/main.c b/dpdk/examples/ipv4_multicast/main.c +index 428ca4694e..e7cb4ba566 100644 +--- a/dpdk/examples/ipv4_multicast/main.c ++++ b/dpdk/examples/ipv4_multicast/main.c +@@ -598,7 +598,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/kni/main.c b/dpdk/examples/kni/main.c +index 5dff7d3b52..54bd69491b 100644 +--- a/dpdk/examples/kni/main.c ++++ b/dpdk/examples/kni/main.c +@@ -660,7 +660,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up - speed %uMbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +@@ -738,15 +738,16 @@ monitor_all_ports_link_status(void *arg) + return NULL; + } + +-/* Callback for request of changing MTU */ + static int +-kni_change_mtu(uint16_t port_id, unsigned int new_mtu) ++kni_change_mtu_(uint16_t port_id, unsigned int new_mtu) + { + int ret; + uint16_t nb_rxd = NB_RXD; ++ uint16_t nb_txd = NB_TXD; + struct rte_eth_conf conf; + struct rte_eth_dev_info dev_info; + struct rte_eth_rxconf rxq_conf; ++ struct rte_eth_txconf txq_conf; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id); +@@ -774,7 +775,7 @@ kni_change_mtu(uint16_t port_id, unsigned int new_mtu) + return ret; + } + +- ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, NULL); ++ ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Could not adjust number of descriptors " + "for port%u (%d)\n", (unsigned int)port_id, +@@ -791,6 +792,16 @@ kni_change_mtu(uint16_t port_id, unsigned int new_mtu) + return ret; + } + ++ txq_conf = dev_info.default_txconf; ++ txq_conf.offloads = conf.txmode.offloads; ++ ret = rte_eth_tx_queue_setup(port_id, 0, nb_txd, ++ rte_eth_dev_socket_id(port_id), &txq_conf); ++ if (ret < 0) { ++ RTE_LOG(ERR, APP, "Fail to setup Tx queue of port %d\n", ++ port_id); ++ return ret; ++ } ++ + /* Restart specific port */ + ret = rte_eth_dev_start(port_id); + if (ret < 0) { +@@ -801,6 +812,19 @@ kni_change_mtu(uint16_t port_id, unsigned int new_mtu) + return 0; + } + ++/* Callback for request of changing MTU */ ++static int ++kni_change_mtu(uint16_t port_id, unsigned int new_mtu) ++{ ++ int ret; ++ ++ rte_atomic32_inc(&kni_pause); ++ ret = kni_change_mtu_(port_id, new_mtu); ++ rte_atomic32_dec(&kni_pause); ++ ++ return ret; ++} ++ + /* Callback for request of configuring network interface up/down */ + static int + kni_config_network_interface(uint16_t port_id, uint8_t if_up) +diff --git a/dpdk/examples/l2fwd-crypto/main.c b/dpdk/examples/l2fwd-crypto/main.c +index f12fd266e6..2d79327875 100644 +--- a/dpdk/examples/l2fwd-crypto/main.c ++++ b/dpdk/examples/l2fwd-crypto/main.c +@@ -1745,7 +1745,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/l2fwd-jobstats/main.c b/dpdk/examples/l2fwd-jobstats/main.c +index a4d28e1782..8443f685d3 100644 +--- a/dpdk/examples/l2fwd-jobstats/main.c ++++ b/dpdk/examples/l2fwd-jobstats/main.c +@@ -702,7 +702,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/l2fwd-keepalive/main.c b/dpdk/examples/l2fwd-keepalive/main.c +index 0bf2b53364..e74eb1f53e 100644 +--- a/dpdk/examples/l2fwd-keepalive/main.c ++++ b/dpdk/examples/l2fwd-keepalive/main.c +@@ -44,7 +44,7 @@ + + #define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1 + +-#define NB_MBUF 8192 ++#define NB_MBUF_PER_PORT 3000 + + #define MAX_PKT_BURST 32 + #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */ +@@ -467,7 +467,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +@@ -528,6 +528,7 @@ main(int argc, char **argv) + uint16_t portid, last_port; + unsigned lcore_id, rx_lcore_id; + unsigned nb_ports_in_mask = 0; ++ unsigned int total_nb_mbufs; + struct sigaction signal_handler; + struct rte_keepalive_shm *ka_shm; + +@@ -553,16 +554,19 @@ main(int argc, char **argv) + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments\n"); + +- /* create the mbuf pool */ +- l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 32, +- 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); +- if (l2fwd_pktmbuf_pool == NULL) +- rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); +- + nb_ports = rte_eth_dev_count_avail(); + if (nb_ports == 0) + rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n"); + ++ /* create the mbuf pool */ ++ total_nb_mbufs = NB_MBUF_PER_PORT * nb_ports; ++ ++ l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", ++ total_nb_mbufs, 32, 0, RTE_MBUF_DEFAULT_BUF_SIZE, ++ rte_socket_id()); ++ if (l2fwd_pktmbuf_pool == NULL) ++ rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); ++ + /* reset l2fwd_dst_ports */ + for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) + l2fwd_dst_ports[portid] = 0; +diff --git a/dpdk/examples/l2fwd/main.c b/dpdk/examples/l2fwd/main.c +index 6c23215a54..6ddf94b005 100644 +--- a/dpdk/examples/l2fwd/main.c ++++ b/dpdk/examples/l2fwd/main.c +@@ -470,7 +470,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/l3fwd-acl/main.c b/dpdk/examples/l3fwd-acl/main.c +index 8ed0c07ec1..f2fe20e48f 100644 +--- a/dpdk/examples/l3fwd-acl/main.c ++++ b/dpdk/examples/l3fwd-acl/main.c +@@ -1827,7 +1827,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/l3fwd-power/main.c b/dpdk/examples/l3fwd-power/main.c +index 77009ce809..c2a9d36cb6 100644 +--- a/dpdk/examples/l3fwd-power/main.c ++++ b/dpdk/examples/l3fwd-power/main.c +@@ -1781,7 +1781,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); +diff --git a/dpdk/examples/l3fwd/main.c b/dpdk/examples/l3fwd/main.c +index 71a67f422b..3ca84c80d9 100644 +--- a/dpdk/examples/l3fwd/main.c ++++ b/dpdk/examples/l3fwd/main.c +@@ -720,7 +720,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps -%s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/link_status_interrupt/main.c b/dpdk/examples/link_status_interrupt/main.c +index f3346d23b4..17e77427a9 100644 +--- a/dpdk/examples/link_status_interrupt/main.c ++++ b/dpdk/examples/link_status_interrupt/main.c +@@ -482,7 +482,7 @@ check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/multi_process/client_server_mp/mp_server/init.c b/dpdk/examples/multi_process/client_server_mp/mp_server/init.c +index 1b0569937b..bf209522f8 100644 +--- a/dpdk/examples/multi_process/client_server_mp/mp_server/init.c ++++ b/dpdk/examples/multi_process/client_server_mp/mp_server/init.c +@@ -199,7 +199,7 @@ check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) + "Mbps - %s\n", ports->id[portid], + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", + (uint8_t)ports->id[portid]); +diff --git a/dpdk/examples/multi_process/symmetric_mp/main.c b/dpdk/examples/multi_process/symmetric_mp/main.c +index 62771e036c..762c3cdfc7 100644 +--- a/dpdk/examples/multi_process/symmetric_mp/main.c ++++ b/dpdk/examples/multi_process/symmetric_mp/main.c +@@ -373,7 +373,7 @@ check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/performance-thread/l3fwd-thread/main.c b/dpdk/examples/performance-thread/l3fwd-thread/main.c +index 79523d23d3..8ec819dcbe 100644 +--- a/dpdk/examples/performance-thread/l3fwd-thread/main.c ++++ b/dpdk/examples/performance-thread/l3fwd-thread/main.c +@@ -3438,7 +3438,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", portid); + continue; +diff --git a/dpdk/examples/qos_sched/init.c b/dpdk/examples/qos_sched/init.c +index 37c2b95fd6..8f53bafbcb 100644 +--- a/dpdk/examples/qos_sched/init.c ++++ b/dpdk/examples/qos_sched/init.c +@@ -153,7 +153,7 @@ app_init_port(uint16_t portid, struct rte_mempool *mp) + printf(" Link Up - speed %u Mbps - %s\n", + (uint32_t) link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + } else { + printf(" Link Down\n"); + } +diff --git a/dpdk/examples/server_node_efd/server/init.c b/dpdk/examples/server_node_efd/server/init.c +index af5a18e285..ff5b08351e 100644 +--- a/dpdk/examples/server_node_efd/server/init.c ++++ b/dpdk/examples/server_node_efd/server/init.c +@@ -259,7 +259,7 @@ check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) + info->id[portid], + link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", + info->id[portid]); +diff --git a/dpdk/examples/vm_power_manager/channel_manager.c b/dpdk/examples/vm_power_manager/channel_manager.c +index 4fac099dfd..c53ad4bf1b 100644 +--- a/dpdk/examples/vm_power_manager/channel_manager.c ++++ b/dpdk/examples/vm_power_manager/channel_manager.c +@@ -4,7 +4,6 @@ + + #include <stdio.h> + #include <stdlib.h> +-#include <sys/un.h> + #include <fcntl.h> + #include <unistd.h> + #include <inttypes.h> +@@ -37,6 +36,8 @@ + for (i = 0; mask_u64b; mask_u64b &= ~(1ULL << i++)) \ + if ((mask_u64b >> i) & 1) \ + ++struct libvirt_vm_info lvm_info[MAX_CLIENTS]; ++ + /* Global pointer to libvirt connection */ + static virConnectPtr global_vir_conn_ptr; + +diff --git a/dpdk/examples/vm_power_manager/channel_manager.h b/dpdk/examples/vm_power_manager/channel_manager.h +index d948b304c4..3c48d6ae66 100644 +--- a/dpdk/examples/vm_power_manager/channel_manager.h ++++ b/dpdk/examples/vm_power_manager/channel_manager.h +@@ -10,7 +10,7 @@ extern "C" { + #endif + + #include <linux/limits.h> +-#include <sys/un.h> ++#include <linux/un.h> + #include <rte_atomic.h> + + /* Maximum number of CPUs */ +@@ -32,10 +32,6 @@ extern "C" { + /* File socket directory */ + #define CHANNEL_MGR_SOCKET_PATH "/tmp/powermonitor/" + +-#ifndef UNIX_PATH_MAX +-struct sockaddr_un _sockaddr_un; +-#define UNIX_PATH_MAX sizeof(_sockaddr_un.sun_path) +-#endif + + #define MAX_CLIENTS 64 + #define MAX_VCPUS 20 +@@ -47,7 +43,7 @@ struct libvirt_vm_info { + uint8_t num_cpus; + }; + +-struct libvirt_vm_info lvm_info[MAX_CLIENTS]; ++extern struct libvirt_vm_info lvm_info[MAX_CLIENTS]; + /* Communication Channel Status */ + enum channel_status { CHANNEL_MGR_CHANNEL_DISCONNECTED = 0, + CHANNEL_MGR_CHANNEL_CONNECTED, +diff --git a/dpdk/examples/vm_power_manager/channel_monitor.c b/dpdk/examples/vm_power_manager/channel_monitor.c +index 1d6d7ec6d5..7881c51885 100644 +--- a/dpdk/examples/vm_power_manager/channel_monitor.c ++++ b/dpdk/examples/vm_power_manager/channel_monitor.c +@@ -30,6 +30,7 @@ + #ifdef RTE_LIBRTE_I40E_PMD + #include <rte_pmd_i40e.h> + #endif ++#include <rte_string_fns.h> + + #include <libvirt/libvirt.h> + #include "channel_monitor.h" +diff --git a/dpdk/examples/vm_power_manager/main.c b/dpdk/examples/vm_power_manager/main.c +index 5fa13fe621..30f9ceb73e 100644 +--- a/dpdk/examples/vm_power_manager/main.c ++++ b/dpdk/examples/vm_power_manager/main.c +@@ -250,7 +250,7 @@ check_all_ports_link_status(uint32_t port_mask) + "Mbps - %s\n", (uint16_t)portid, + (unsigned int)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + else + printf("Port %d Link Down\n", + (uint16_t)portid); +diff --git a/dpdk/examples/vm_power_manager/power_manager.c b/dpdk/examples/vm_power_manager/power_manager.c +index a7e98cf40b..ecdf9a5583 100644 +--- a/dpdk/examples/vm_power_manager/power_manager.c ++++ b/dpdk/examples/vm_power_manager/power_manager.c +@@ -6,7 +6,6 @@ + #include <stdlib.h> + #include <stdint.h> + #include <inttypes.h> +-#include <sys/un.h> + #include <fcntl.h> + #include <unistd.h> + #include <dirent.h> +diff --git a/dpdk/examples/vmdq/main.c b/dpdk/examples/vmdq/main.c +index 627a5da485..2c0e8a596c 100644 +--- a/dpdk/examples/vmdq/main.c ++++ b/dpdk/examples/vmdq/main.c +@@ -59,6 +59,7 @@ static uint32_t enabled_port_mask; + /* number of pools (if user does not specify any, 8 by default */ + static uint32_t num_queues = 8; + static uint32_t num_pools = 8; ++static uint8_t rss_enable; + + /* empty vmdq configuration structure. Filled in programatically */ + static const struct rte_eth_conf vmdq_conf_default = { +@@ -143,6 +144,13 @@ get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools) + (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf))); + (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf, + sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf))); ++ if (rss_enable) { ++ eth_conf->rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS; ++ eth_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | ++ ETH_RSS_UDP | ++ ETH_RSS_TCP | ++ ETH_RSS_SCTP; ++ } + return 0; + } + +@@ -164,6 +172,7 @@ port_init(uint16_t port, struct rte_mempool *mbuf_pool) + uint16_t q; + uint16_t queues_per_pool; + uint32_t max_nb_pools; ++ uint64_t rss_hf_tmp; + + /* + * The max pool number from dev_info will be used to validate the pool +@@ -203,6 +212,17 @@ port_init(uint16_t port, struct rte_mempool *mbuf_pool) + if (!rte_eth_dev_is_valid_port(port)) + return -1; + ++ rss_hf_tmp = port_conf.rx_adv_conf.rss_conf.rss_hf; ++ port_conf.rx_adv_conf.rss_conf.rss_hf &= ++ dev_info.flow_type_rss_offloads; ++ if (port_conf.rx_adv_conf.rss_conf.rss_hf != rss_hf_tmp) { ++ printf("Port %u modified RSS hash function based on hardware support," ++ "requested:%#"PRIx64" configured:%#"PRIx64"\n", ++ port, ++ rss_hf_tmp, ++ port_conf.rx_adv_conf.rss_conf.rss_hf); ++ } ++ + /* + * Though in this example, we only receive packets from the first queue + * of each pool and send packets through first rte_lcore_count() tx +@@ -346,7 +366,8 @@ static void + vmdq_usage(const char *prgname) + { + printf("%s [EAL options] -- -p PORTMASK]\n" +- " --nb-pools NP: number of pools\n", ++ " --nb-pools NP: number of pools\n" ++ " --enable-rss: enable RSS (disabled by default)\n", + prgname); + } + +@@ -360,6 +381,7 @@ vmdq_parse_args(int argc, char **argv) + const char *prgname = argv[0]; + static struct option long_option[] = { + {"nb-pools", required_argument, NULL, 0}, ++ {"enable-rss", 0, NULL, 0}, + {NULL, 0, 0, 0} + }; + +@@ -377,11 +399,18 @@ vmdq_parse_args(int argc, char **argv) + } + break; + case 0: +- if (vmdq_parse_num_pools(optarg) == -1) { +- printf("invalid number of pools\n"); +- vmdq_usage(prgname); +- return -1; ++ if (!strcmp(long_option[option_index].name, ++ "nb-pools")) { ++ if (vmdq_parse_num_pools(optarg) == -1) { ++ printf("invalid number of pools\n"); ++ vmdq_usage(prgname); ++ return -1; ++ } + } ++ ++ if (!strcmp(long_option[option_index].name, ++ "enable-rss")) ++ rss_enable = 1; + break; + + default: +@@ -424,10 +453,11 @@ update_mac_address(struct rte_mbuf *m, unsigned dst_port) + static void + sighup_handler(int signum) + { +- unsigned q; +- for (q = 0; q < num_queues; q++) { +- if (q % (num_queues/num_pools) == 0) +- printf("\nPool %u: ", q/(num_queues/num_pools)); ++ unsigned int q = vmdq_queue_base; ++ for (; q < num_queues; q++) { ++ if ((q - vmdq_queue_base) % (num_vmdq_queues / num_pools) == 0) ++ printf("\nPool %u: ", (q - vmdq_queue_base) / ++ (num_vmdq_queues / num_pools)); + printf("%lu ", rxPackets[q]); + } + printf("\nFinished handling signal %d\n", signum); +diff --git a/dpdk/kernel/freebsd/contigmem/contigmem.c b/dpdk/kernel/freebsd/contigmem/contigmem.c +index 64e0a7fecd..abb76f241e 100644 +--- a/dpdk/kernel/freebsd/contigmem/contigmem.c ++++ b/dpdk/kernel/freebsd/contigmem/contigmem.c +@@ -165,9 +165,11 @@ contigmem_load() + + error: + for (i = 0; i < contigmem_num_buffers; i++) { +- if (contigmem_buffers[i].addr != NULL) ++ if (contigmem_buffers[i].addr != NULL) { + contigfree(contigmem_buffers[i].addr, + contigmem_buffer_size, M_CONTIGMEM); ++ contigmem_buffers[i].addr = NULL; ++ } + if (mtx_initialized(&contigmem_buffers[i].mtx)) + mtx_destroy(&contigmem_buffers[i].mtx); + } +diff --git a/dpdk/kernel/linux/kni/ethtool/igb/e1000_phy.c b/dpdk/kernel/linux/kni/ethtool/igb/e1000_phy.c +index 5257b9141e..1510211e3f 100644 +--- a/dpdk/kernel/linux/kni/ethtool/igb/e1000_phy.c ++++ b/dpdk/kernel/linux/kni/ethtool/igb/e1000_phy.c +@@ -1586,7 +1586,7 @@ static s32 e1000_copper_link_autoneg(struct e1000_hw *hw) + s32 e1000_setup_copper_link_generic(struct e1000_hw *hw) + { + s32 ret_val; +- bool link; ++ bool link = true; + + DEBUGFUNC("e1000_setup_copper_link_generic"); + +@@ -1641,7 +1641,7 @@ s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw) + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; +- bool link; ++ bool link = true; + + DEBUGFUNC("e1000_phy_force_speed_duplex_igp"); + +@@ -1707,7 +1707,7 @@ s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw) + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; +- bool link; ++ bool link = true; + + DEBUGFUNC("e1000_phy_force_speed_duplex_m88"); + +@@ -1844,7 +1844,7 @@ s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw) + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; +- bool link; ++ bool link = true; + + DEBUGFUNC("e1000_phy_force_speed_duplex_ife"); + +@@ -2256,12 +2256,16 @@ s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, + * it across the board. + */ + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); +- if (ret_val) ++ if (ret_val) { + /* If the first read fails, another entity may have + * ownership of the resources, wait and try again to + * see if they have relinquished the resources yet. + */ +- usec_delay(usec_interval); ++ if (usec_interval >= 1000) ++ msec_delay_irq(usec_interval/1000); ++ else ++ usec_delay(usec_interval); ++ } + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val) + break; +@@ -2516,7 +2520,7 @@ s32 e1000_get_phy_info_m88(struct e1000_hw *hw) + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; +- bool link; ++ bool link = true; + + DEBUGFUNC("e1000_get_phy_info_m88"); + +@@ -2591,7 +2595,7 @@ s32 e1000_get_phy_info_igp(struct e1000_hw *hw) + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; +- bool link; ++ bool link = true; + + DEBUGFUNC("e1000_get_phy_info_igp"); + +@@ -2653,7 +2657,7 @@ s32 e1000_get_phy_info_ife(struct e1000_hw *hw) + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; +- bool link; ++ bool link = true; + + DEBUGFUNC("e1000_get_phy_info_ife"); + +@@ -3042,7 +3046,7 @@ s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw) + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; +- bool link; ++ bool link = true; + + DEBUGFUNC("e1000_phy_force_speed_duplex_82577"); + +@@ -3091,7 +3095,7 @@ s32 e1000_get_phy_info_82577(struct e1000_hw *hw) + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; +- bool link; ++ bool link = true; + + DEBUGFUNC("e1000_get_phy_info_82577"); + +diff --git a/dpdk/kernel/linux/kni/ethtool/igb/igb_main.c b/dpdk/kernel/linux/kni/ethtool/igb/igb_main.c +index cb1b536775..5a07d007a2 100644 +--- a/dpdk/kernel/linux/kni/ethtool/igb/igb_main.c ++++ b/dpdk/kernel/linux/kni/ethtool/igb/igb_main.c +@@ -36,6 +36,7 @@ + #endif /* CONFIG_PM_RUNTIME */ + + #include <linux/if_bridge.h> ++#include "compat.h" + #include "igb.h" + #include "igb_vmdq.h" + +@@ -154,7 +155,11 @@ static int igb_poll(struct napi_struct *, int); + static bool igb_clean_tx_irq(struct igb_q_vector *); + static bool igb_clean_rx_irq(struct igb_q_vector *, int); + static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); ++#ifdef HAVE_TX_TIMEOUT_TXQUEUE ++static void igb_tx_timeout(struct net_device *, unsigned int); ++#else + static void igb_tx_timeout(struct net_device *); ++#endif /* HAVE_TX_TIMEOUT_TXQUEUE */ + static void igb_reset_task(struct work_struct *); + #ifdef HAVE_VLAN_RX_REGISTER + static void igb_vlan_mode(struct net_device *, struct vlan_group *); +@@ -5623,7 +5628,11 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, + * igb_tx_timeout - Respond to a Tx Hang + * @netdev: network interface device structure + **/ ++#ifdef HAVE_TX_TIMEOUT_TXQUEUE ++static void igb_tx_timeout(struct net_device *netdev, unsigned int txqueue) ++#else + static void igb_tx_timeout(struct net_device *netdev) ++#endif + { + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; +diff --git a/dpdk/kernel/linux/kni/ethtool/igb/kcompat.h b/dpdk/kernel/linux/kni/ethtool/igb/kcompat.h +index 964317508d..611a5b7c49 100644 +--- a/dpdk/kernel/linux/kni/ethtool/igb/kcompat.h ++++ b/dpdk/kernel/linux/kni/ethtool/igb/kcompat.h +@@ -3947,14 +3947,18 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) + #define HAVE_PCI_ENABLE_MSIX + #endif + +-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(5,0,0) ) ++#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)) \ ++ || (defined(RHEL_RELEASE_CODE) \ ++ && (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(8, 1)))) + #define dev_open(x) dev_open(x, NULL) + #define HAVE_NDO_BRIDGE_SETLINK_EXTACK +-#endif /* >= 5.0.0 */ ++#endif /* >= 5.0.0 or >= RHEL/CentOS 8.1 */ + +-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(5,1,0) ) ++#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)) \ ++ || (defined(RHEL_RELEASE_CODE) \ ++ && (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(8, 1)))) + #define HAVE_NDO_FDB_ADD_EXTACK +-#endif /* >= 5.1.0 */ ++#endif /* >= 5.1.0 or >= RHEL/CentOS 8.1 */ + + #if defined(timer_setup) && defined(from_timer) + #define HAVE_TIMER_SETUP +diff --git a/dpdk/kernel/linux/kni/ethtool/ixgbe/kcompat.h b/dpdk/kernel/linux/kni/ethtool/ixgbe/kcompat.h +index e1671e91a9..73e2c3fb96 100644 +--- a/dpdk/kernel/linux/kni/ethtool/ixgbe/kcompat.h ++++ b/dpdk/kernel/linux/kni/ethtool/ixgbe/kcompat.h +@@ -3131,9 +3131,11 @@ static inline int __kc_pci_vfs_assigned(struct pci_dev *dev) + #define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops)) + #endif /* >= 3.16.0 */ + +-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(5,0,0) ) ++#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)) \ ++ || (defined(RHEL_RELEASE_CODE) \ ++ && (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(8, 1)))) + #define dev_open(x) dev_open(x, NULL) +-#endif /* >= 5.0.0 */ ++#endif /* >= 5.0.0 or >= RHEL/CentOS 8.1 */ + + /* + * vlan_tx_tag_* macros renamed to skb_vlan_tag_* (Linux commit: df8a39defad4) +diff --git a/dpdk/lib/librte_bbdev/rte_bbdev.h b/dpdk/lib/librte_bbdev/rte_bbdev.h +index 4a2873b2fe..f53ee101da 100644 +--- a/dpdk/lib/librte_bbdev/rte_bbdev.h ++++ b/dpdk/lib/librte_bbdev/rte_bbdev.h +@@ -420,13 +420,13 @@ TAILQ_HEAD(rte_bbdev_cb_list, rte_bbdev_callback); + * these fields, but should only write to the *_ops fields. + */ + struct __rte_cache_aligned rte_bbdev { +- /**< Enqueue encode function */ ++ /** Enqueue encode function */ + rte_bbdev_enqueue_enc_ops_t enqueue_enc_ops; +- /**< Enqueue decode function */ ++ /** Enqueue decode function */ + rte_bbdev_enqueue_dec_ops_t enqueue_dec_ops; +- /**< Dequeue encode function */ ++ /** Dequeue encode function */ + rte_bbdev_dequeue_enc_ops_t dequeue_enc_ops; +- /**< Dequeue decode function */ ++ /** Dequeue decode function */ + rte_bbdev_dequeue_dec_ops_t dequeue_dec_ops; + const struct rte_bbdev_ops *dev_ops; /**< Functions exported by PMD */ + struct rte_bbdev_data *data; /**< Pointer to device data */ +diff --git a/dpdk/lib/librte_bbdev/rte_bbdev_op.h b/dpdk/lib/librte_bbdev/rte_bbdev_op.h +index 83f62c2dc2..14cb87cd96 100644 +--- a/dpdk/lib/librte_bbdev/rte_bbdev_op.h ++++ b/dpdk/lib/librte_bbdev/rte_bbdev_op.h +@@ -277,11 +277,12 @@ struct rte_bbdev_op_turbo_dec { + */ + uint8_t num_maps; + +- uint8_t code_block_mode; /**< [0 - TB : 1 - CB] */ ++ /** [0 - TB : 1 - CB] */ ++ uint8_t code_block_mode; + union { +- /**< Struct which stores Code Block specific parameters */ ++ /** Struct which stores Code Block specific parameters */ + struct rte_bbdev_op_dec_cb_params cb_params; +- /**< Struct which stores Transport Block specific parameters */ ++ /** Struct which stores Transport Block specific parameters */ + struct rte_bbdev_op_dec_tb_params tb_params; + }; + }; +@@ -338,7 +339,7 @@ struct rte_bbdev_op_enc_tb_params { + * the Turbo operation when r >= C-, [K:3*Kpi] + */ + uint16_t ncb_pos; +- /**< The index of the first CB in the inbound mbuf data, default is 0 */ ++ /** The index of the first CB in the inbound mbuf data, default is 0 */ + uint8_t r; + }; + +@@ -419,10 +420,13 @@ enum { + + /**< Structure specifying a single encode operation */ + struct rte_bbdev_enc_op { +- int status; /**< Status of operation that was performed */ +- struct rte_mempool *mempool; /**< Mempool which op instance is in */ +- void *opaque_data; /**< Opaque pointer for user data */ +- /**< Contains encoder specific parameters */ ++ /** Status of operation that was performed */ ++ int status; ++ /** Mempool which op instance is in */ ++ struct rte_mempool *mempool; ++ /** Opaque pointer for user data */ ++ void *opaque_data; ++ /** Contains encoder specific parameters */ + struct rte_bbdev_op_turbo_enc turbo_enc; + }; + +@@ -444,7 +448,7 @@ struct rte_bbdev_op_cap { + } cap; /**< Operation-type specific capabilities */ + }; + +-/**< @internal Private data structure stored with operation pool. */ ++/** @internal Private data structure stored with operation pool. */ + struct rte_bbdev_op_pool_private { + enum rte_bbdev_op_type type; /**< Type of operations in a pool */ + }; +diff --git a/dpdk/lib/librte_bbdev/rte_bbdev_pmd.h b/dpdk/lib/librte_bbdev/rte_bbdev_pmd.h +index db9a04cdf9..f6091a5e99 100644 +--- a/dpdk/lib/librte_bbdev/rte_bbdev_pmd.h ++++ b/dpdk/lib/librte_bbdev/rte_bbdev_pmd.h +@@ -143,18 +143,18 @@ typedef int (*rte_bbdev_queue_intr_disable_t)(struct rte_bbdev *dev, + * fields are for non-vital operations + */ + struct rte_bbdev_ops { +- /**< Allocate and configure device memory. Optional. */ ++ /** Allocate and configure device memory. Optional. */ + rte_bbdev_setup_queues_t setup_queues; +- /**< Configure interrupts. Optional. */ ++ /** Configure interrupts. Optional. */ + rte_bbdev_intr_enable_t intr_enable; +- /**< Start device. Optional. */ ++ /** Start device. Optional. */ + rte_bbdev_start_t start; +- /**< Stop device. Optional. */ ++ /** Stop device. Optional. */ + rte_bbdev_stop_t stop; +- /**< Close device. Optional. */ ++ /** Close device. Optional. */ + rte_bbdev_close_t close; + +- /**< Get device info. Required. */ ++ /** Get device info. Required. */ + rte_bbdev_info_get_t info_get; + /** Get device statistics. Optional. */ + rte_bbdev_stats_get_t stats_get; +@@ -167,7 +167,7 @@ struct rte_bbdev_ops { + rte_bbdev_queue_release_t queue_release; + /** Start a queue. Optional. */ + rte_bbdev_queue_start_t queue_start; +- /**< Stop a queue pair. Optional. */ ++ /** Stop a queue pair. Optional. */ + rte_bbdev_queue_stop_t queue_stop; + + /** Enable queue interrupt. Optional */ +diff --git a/dpdk/lib/librte_cryptodev/rte_crypto_sym.h b/dpdk/lib/librte_cryptodev/rte_crypto_sym.h +index eb5afc5ef0..70038db776 100644 +--- a/dpdk/lib/librte_cryptodev/rte_crypto_sym.h ++++ b/dpdk/lib/librte_cryptodev/rte_crypto_sym.h +@@ -219,9 +219,12 @@ enum rte_crypto_auth_algorithm { + /**< HMAC using MD5 algorithm */ + + RTE_CRYPTO_AUTH_SHA1, +- /**< 128 bit SHA algorithm. */ ++ /**< 160 bit SHA algorithm. */ + RTE_CRYPTO_AUTH_SHA1_HMAC, +- /**< HMAC using 128 bit SHA algorithm. */ ++ /**< HMAC using 160 bit SHA algorithm. ++ * HMAC-SHA-1-96 can be generated by setting ++ * digest_length to 12 bytes in auth/aead xforms. ++ */ + RTE_CRYPTO_AUTH_SHA224, + /**< 224 bit SHA algorithm. */ + RTE_CRYPTO_AUTH_SHA224_HMAC, +diff --git a/dpdk/lib/librte_eal/bsdapp/eal/eal.c b/dpdk/lib/librte_eal/bsdapp/eal/eal.c +index 30b4cf7bf0..0e3452210b 100644 +--- a/dpdk/lib/librte_eal/bsdapp/eal/eal.c ++++ b/dpdk/lib/librte_eal/bsdapp/eal/eal.c +@@ -691,13 +691,19 @@ rte_eal_init(int argc, char **argv) + /* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */ + if (internal_config.iova_mode == RTE_IOVA_DC) { + /* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */ +- rte_eal_get_configuration()->iova_mode = +- rte_bus_get_iommu_class(); ++ enum rte_iova_mode iova_mode = rte_bus_get_iommu_class(); ++ ++ if (iova_mode == RTE_IOVA_DC) ++ iova_mode = RTE_IOVA_PA; ++ rte_eal_get_configuration()->iova_mode = iova_mode; + } else { + rte_eal_get_configuration()->iova_mode = + internal_config.iova_mode; + } + ++ RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n", ++ rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA"); ++ + if (internal_config.no_hugetlbfs == 0) { + /* rte_config isn't initialized yet */ + ret = internal_config.process_type == RTE_PROC_PRIMARY ? +diff --git a/dpdk/lib/librte_eal/bsdapp/eal/eal_memory.c b/dpdk/lib/librte_eal/bsdapp/eal/eal_memory.c +index 4b092e1f21..418c4bb0aa 100644 +--- a/dpdk/lib/librte_eal/bsdapp/eal/eal_memory.c ++++ b/dpdk/lib/librte_eal/bsdapp/eal/eal_memory.c +@@ -435,7 +435,7 @@ memseg_primary_init(void) + * + * we need (N*2)-1 segments because we cannot guarantee that + * each segment will be IOVA-contiguous with the previous one, +- * so we will allocate more and put spaces inbetween segments ++ * so we will allocate more and put spaces between segments + * that are non-contiguous. + */ + avail_segs = (hpi->num_pages[0] * 2) - 1; +diff --git a/dpdk/lib/librte_eal/common/eal_common_bus.c b/dpdk/lib/librte_eal/common/eal_common_bus.c +index c8f1901f0b..04590485b1 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_bus.c ++++ b/dpdk/lib/librte_eal/common/eal_common_bus.c +@@ -228,19 +228,39 @@ rte_bus_find_by_device_name(const char *str) + enum rte_iova_mode + rte_bus_get_iommu_class(void) + { +- int mode = RTE_IOVA_DC; ++ enum rte_iova_mode mode = RTE_IOVA_DC; ++ bool buses_want_va = false; ++ bool buses_want_pa = false; + struct rte_bus *bus; + + TAILQ_FOREACH(bus, &rte_bus_list, next) { ++ enum rte_iova_mode bus_iova_mode; + +- if (bus->get_iommu_class) +- mode |= bus->get_iommu_class(); +- } ++ if (bus->get_iommu_class == NULL) ++ continue; + +- if (mode != RTE_IOVA_VA) { +- /* Use default IOVA mode */ ++ bus_iova_mode = bus->get_iommu_class(); ++ RTE_LOG(DEBUG, EAL, "Bus %s wants IOVA as '%s'\n", ++ bus->name, ++ bus_iova_mode == RTE_IOVA_DC ? "DC" : ++ (bus_iova_mode == RTE_IOVA_PA ? "PA" : "VA")); ++ if (bus_iova_mode == RTE_IOVA_PA) ++ buses_want_pa = true; ++ else if (bus_iova_mode == RTE_IOVA_VA) ++ buses_want_va = true; ++ } ++ if (buses_want_va && !buses_want_pa) { ++ mode = RTE_IOVA_VA; ++ } else if (buses_want_pa && !buses_want_va) { + mode = RTE_IOVA_PA; ++ } else { ++ mode = RTE_IOVA_DC; ++ if (buses_want_va) { ++ RTE_LOG(WARNING, EAL, "Some buses want 'VA' but forcing 'DC' because other buses want 'PA'.\n"); ++ RTE_LOG(WARNING, EAL, "Depending on the final decision by the EAL, not all buses may be able to initialize.\n"); ++ } + } ++ + return mode; + } + +diff --git a/dpdk/lib/librte_eal/common/eal_common_log.c b/dpdk/lib/librte_eal/common/eal_common_log.c +index 2c1200310d..9453fa3e9a 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_log.c ++++ b/dpdk/lib/librte_eal/common/eal_common_log.c +@@ -284,7 +284,7 @@ rte_log_register_type_and_pick_level(const char *name, uint32_t level_def) + continue; + + if (opt_ll->pattern) { +- if (fnmatch(opt_ll->pattern, name, 0)) ++ if (fnmatch(opt_ll->pattern, name, 0) == 0) + level = opt_ll->level; + } else { + if (regexec(&opt_ll->re_match, name, 0, NULL, 0) == 0) +diff --git a/dpdk/lib/librte_eal/common/eal_common_memory.c b/dpdk/lib/librte_eal/common/eal_common_memory.c +index 9a14698aae..030f1b261c 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_memory.c ++++ b/dpdk/lib/librte_eal/common/eal_common_memory.c +@@ -112,7 +112,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size, + return NULL; + } + +- mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_READ, ++ mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_NONE, + mmap_flags, -1, 0); + if (mapped_addr == MAP_FAILED && allow_shrink) + *size -= page_sz; +diff --git a/dpdk/lib/librte_eal/common/eal_common_options.c b/dpdk/lib/librte_eal/common/eal_common_options.c +index f742d4d384..f38888edf3 100644 +--- a/dpdk/lib/librte_eal/common/eal_common_options.c ++++ b/dpdk/lib/librte_eal/common/eal_common_options.c +@@ -1036,7 +1036,7 @@ eal_parse_log_level(const char *arg) + if (regex) { + if (rte_log_set_level_regexp(regex, priority) < 0) { + fprintf(stderr, "cannot set log level %s,%d\n", +- pattern, priority); ++ regex, priority); + goto fail; + } + if (rte_log_save_regexp(regex, priority) < 0) +diff --git a/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h b/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h +index 859b09748c..f79718ce8c 100644 +--- a/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h ++++ b/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h +@@ -57,7 +57,7 @@ __rte_rdtsc_syscall(void) + * asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(29)); + * asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(0x8000000f)); + * +- * which is possible only from the priviledged mode (kernel space). ++ * which is possible only from the privileged mode (kernel space). + */ + static inline uint64_t + __rte_rdtsc_pmccntr(void) +diff --git a/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h b/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h +index 68e7c73384..da557b6a10 100644 +--- a/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h ++++ b/dpdk/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h +@@ -62,7 +62,7 @@ rte_rdtsc(void) + static inline uint64_t + rte_rdtsc_precise(void) + { +- rte_mb(); ++ asm volatile("isb" : : : "memory"); + return rte_rdtsc(); + } + +diff --git a/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h b/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h +index 75f74897b3..b194564b55 100644 +--- a/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h ++++ b/dpdk/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h +@@ -37,6 +37,7 @@ + #include <string.h> + /*To include altivec.h, GCC version must >= 4.8 */ + #include <altivec.h> ++#include "rte_common.h" + + #ifdef __cplusplus + extern "C" { +@@ -44,6 +45,11 @@ extern "C" { + + #include "generic/rte_memcpy.h" + ++#if (GCC_VERSION >= 90000 && GCC_VERSION < 90400) ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Warray-bounds" ++#endif ++ + static inline void + rte_mov16(uint8_t *dst, const uint8_t *src) + { +@@ -219,6 +225,10 @@ rte_memcpy_func(void *dst, const void *src, size_t n) + return ret; + } + ++#if (GCC_VERSION >= 90000 && GCC_VERSION < 90400) ++#pragma GCC diagnostic pop ++#endif ++ + #ifdef __cplusplus + } + #endif +diff --git a/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h +index 148398f50a..b9dcd30aba 100644 +--- a/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h ++++ b/dpdk/lib/librte_eal/common/include/arch/x86/rte_atomic.h +@@ -55,7 +55,7 @@ extern "C" { + * + * As pointed by Java guys, that makes possible to use lock-prefixed + * instructions to get the same effect as mfence and on most modern HW +- * that gives a better perfomance then using mfence: ++ * that gives a better performance then using mfence: + * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ + * Basic idea is to use lock prefixed add with some dummy memory location + * as the destination. From their experiments 128B(2 cache lines) below +diff --git a/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +index ba44c4a328..9c67232df9 100644 +--- a/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h ++++ b/dpdk/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +@@ -22,6 +22,11 @@ + extern "C" { + #endif + ++#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 100000) ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wstringop-overflow" ++#endif ++ + /** + * Copy bytes from one location to another. The locations must not overlap. + * +@@ -869,6 +874,10 @@ rte_memcpy(void *dst, const void *src, size_t n) + return rte_memcpy_generic(dst, src, n); + } + ++#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 100000) ++#pragma GCC diagnostic pop ++#endif ++ + #ifdef __cplusplus + } + #endif +diff --git a/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h b/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h +index 7d9a1463c4..ac167936de 100644 +--- a/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h ++++ b/dpdk/lib/librte_eal/common/include/generic/rte_byteorder.h +@@ -93,9 +93,9 @@ + #define RTE_BE16(v) (rte_be16_t)(RTE_STATIC_BSWAP16(v)) + #define RTE_BE32(v) (rte_be32_t)(RTE_STATIC_BSWAP32(v)) + #define RTE_BE64(v) (rte_be64_t)(RTE_STATIC_BSWAP64(v)) +-#define RTE_LE16(v) (rte_be16_t)(v) +-#define RTE_LE32(v) (rte_be32_t)(v) +-#define RTE_LE64(v) (rte_be64_t)(v) ++#define RTE_LE16(v) (rte_le16_t)(v) ++#define RTE_LE32(v) (rte_le32_t)(v) ++#define RTE_LE64(v) (rte_le64_t)(v) + #else + #error Unsupported endianness. + #endif +diff --git a/dpdk/lib/librte_eal/common/include/rte_bus.h b/dpdk/lib/librte_eal/common/include/rte_bus.h +index 6be4b5cabe..b87e23b19e 100644 +--- a/dpdk/lib/librte_eal/common/include/rte_bus.h ++++ b/dpdk/lib/librte_eal/common/include/rte_bus.h +@@ -348,7 +348,7 @@ struct rte_bus *rte_bus_find_by_name(const char *busname); + + /** + * Get the common iommu class of devices bound on to buses available in the +- * system. The default mode is PA. ++ * system. RTE_IOVA_DC means that no preferrence has been expressed. + * + * @return + * enum rte_iova_mode value. +diff --git a/dpdk/lib/librte_eal/common/include/rte_common.h b/dpdk/lib/librte_eal/common/include/rte_common.h +index 48bf28ca5d..5006ba8cad 100644 +--- a/dpdk/lib/librte_eal/common/include/rte_common.h ++++ b/dpdk/lib/librte_eal/common/include/rte_common.h +@@ -283,7 +283,7 @@ rte_is_aligned(void *ptr, unsigned align) + * The combined value. + */ + static inline uint32_t +-rte_combine32ms1b(register uint32_t x) ++rte_combine32ms1b(uint32_t x) + { + x |= x >> 1; + x |= x >> 2; +@@ -305,7 +305,7 @@ rte_combine32ms1b(register uint32_t x) + * The combined value. + */ + static inline uint64_t +-rte_combine64ms1b(register uint64_t v) ++rte_combine64ms1b(uint64_t v) + { + v |= v >> 1; + v |= v >> 2; +diff --git a/dpdk/lib/librte_eal/common/include/rte_service.h b/dpdk/lib/librte_eal/common/include/rte_service.h +index 11f673503b..21002209bc 100644 +--- a/dpdk/lib/librte_eal/common/include/rte_service.h ++++ b/dpdk/lib/librte_eal/common/include/rte_service.h +@@ -104,12 +104,16 @@ int32_t rte_service_probe_capability(uint32_t id, uint32_t capability); + * Each core can be added or removed from running a specific service. This + * function enables or disables *lcore* to run *service_id*. + * +- * If multiple cores are enabled on a service, an atomic is used to ensure that +- * only one cores runs the service at a time. The exception to this is when ++ * If multiple cores are enabled on a service, a lock is used to ensure that ++ * only one core runs the service at a time. The exception to this is when + * a service indicates that it is multi-thread safe by setting the capability + * called RTE_SERVICE_CAP_MT_SAFE. With the multi-thread safe capability set, + * the service function can be run on multiple threads at the same time. + * ++ * If the service is known to be mapped to a single lcore, setting the ++ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve ++ * better performance by avoiding the use of lock. ++ * + * @param service_id the service to apply the lcore to + * @param lcore The lcore that will be mapped to service + * @param enable Zero to unmap or disable the core, non-zero to enable +diff --git a/dpdk/lib/librte_eal/common/include/rte_service_component.h b/dpdk/lib/librte_eal/common/include/rte_service_component.h +index c12adbc256..259e3182d8 100644 +--- a/dpdk/lib/librte_eal/common/include/rte_service_component.h ++++ b/dpdk/lib/librte_eal/common/include/rte_service_component.h +@@ -43,7 +43,7 @@ struct rte_service_spec { + /** + * Register a new service. + * +- * A service represents a component that the requires CPU time periodically to ++ * A service represents a component that requires CPU time periodically to + * achieve its purpose. + * + * For example the eventdev SW PMD requires CPU cycles to perform its +@@ -56,6 +56,10 @@ struct rte_service_spec { + * *rte_service_component_runstate_set*, which indicates that the service + * component is ready to be executed. + * ++ * If the service is known to be mapped to a single lcore, setting the ++ * capability of the service to RTE_SERVICE_CAP_MT_SAFE can achieve ++ * better performance. ++ * + * @param spec The specification of the service to register + * @param[out] service_id A pointer to a uint32_t, which will be filled in + * during registration of the service. It is set to the integers +diff --git a/dpdk/lib/librte_eal/common/include/rte_version.h b/dpdk/lib/librte_eal/common/include/rte_version.h +index a1cce4802e..514cfe67b6 100644 +--- a/dpdk/lib/librte_eal/common/include/rte_version.h ++++ b/dpdk/lib/librte_eal/common/include/rte_version.h +@@ -37,7 +37,7 @@ extern "C" { + /** + * Patch level number i.e. the z in yy.mm.z + */ +-#define RTE_VER_MINOR 7 ++#define RTE_VER_MINOR 9 + + /** + * Extra string to be appended to version number +diff --git a/dpdk/lib/librte_eal/common/malloc_elem.c b/dpdk/lib/librte_eal/common/malloc_elem.c +index 24e1eb55f3..69ff063883 100644 +--- a/dpdk/lib/librte_eal/common/malloc_elem.c ++++ b/dpdk/lib/librte_eal/common/malloc_elem.c +@@ -157,7 +157,7 @@ malloc_elem_insert(struct malloc_elem *elem) + next_elem = NULL; + heap->last = elem; + } else { +- /* the new memory is somewhere inbetween start and end */ ++ /* the new memory is somewhere between start and end */ + uint64_t dist_from_start, dist_from_end; + + dist_from_end = RTE_PTR_DIFF(heap->last, elem); +diff --git a/dpdk/lib/librte_eal/common/malloc_heap.c b/dpdk/lib/librte_eal/common/malloc_heap.c +index b8f26f2b37..de3b73e8c5 100644 +--- a/dpdk/lib/librte_eal/common/malloc_heap.c ++++ b/dpdk/lib/librte_eal/common/malloc_heap.c +@@ -238,6 +238,9 @@ heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size, + size = RTE_CACHE_LINE_ROUNDUP(size); + align = RTE_CACHE_LINE_ROUNDUP(align); + ++ /* roundup might cause an overflow */ ++ if (size == 0) ++ return NULL; + elem = find_suitable_element(heap, size, flags, align, bound, contig); + if (elem != NULL) { + elem = malloc_elem_alloc(elem, size, align, bound, contig); +diff --git a/dpdk/lib/librte_eal/common/rte_service.c b/dpdk/lib/librte_eal/common/rte_service.c +index 53dd6a7bbf..607f9a0407 100644 +--- a/dpdk/lib/librte_eal/common/rte_service.c ++++ b/dpdk/lib/librte_eal/common/rte_service.c +@@ -49,6 +49,10 @@ struct rte_service_spec_impl { + uint8_t internal_flags; + + /* per service statistics */ ++ /* Indicates how many cores the service is mapped to run on. ++ * It does not indicate the number of cores the service is running ++ * on currently. ++ */ + rte_atomic32_t num_mapped_cores; + uint64_t calls; + uint64_t cycles_spent; +@@ -121,6 +125,9 @@ rte_service_finalize(void) + if (!rte_service_library_initialized) + return; + ++ rte_service_lcore_reset_all(); ++ rte_eal_mp_wait_lcore(); ++ + rte_free(rte_services); + rte_free(lcore_states); + +@@ -332,8 +339,8 @@ rte_service_runstate_get(uint32_t id) + } + + static inline void +-rte_service_runner_do_callback(struct rte_service_spec_impl *s, +- struct core_state *cs, uint32_t service_idx) ++service_runner_do_callback(struct rte_service_spec_impl *s, ++ struct core_state *cs, uint32_t service_idx) + { + void *userdata = s->spec.callback_userdata; + +@@ -352,7 +359,7 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s, + /* Expects the service 's' is valid. */ + static int32_t + service_run(uint32_t i, struct core_state *cs, uint64_t service_mask, +- struct rte_service_spec_impl *s) ++ struct rte_service_spec_impl *s, uint32_t serialize_mt_unsafe) + { + if (!s) + return -EINVAL; +@@ -366,19 +373,14 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask, + + cs->service_active_on_lcore[i] = 1; + +- /* check do we need cmpset, if MT safe or <= 1 core +- * mapped, atomic ops are not required. +- */ +- const int use_atomics = (service_mt_safe(s) == 0) && +- (rte_atomic32_read(&s->num_mapped_cores) > 1); +- if (use_atomics) { ++ if ((service_mt_safe(s) == 0) && (serialize_mt_unsafe == 1)) { + if (!rte_atomic32_cmpset((uint32_t *)&s->execute_lock, 0, 1)) + return -EBUSY; + +- rte_service_runner_do_callback(s, cs, i); ++ service_runner_do_callback(s, cs, i); + rte_atomic32_clear(&s->execute_lock); + } else +- rte_service_runner_do_callback(s, cs, i); ++ service_runner_do_callback(s, cs, i); + + return 0; + } +@@ -409,44 +411,34 @@ rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe) + + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + +- /* Atomically add this core to the mapped cores first, then examine if +- * we can run the service. This avoids a race condition between +- * checking the value, and atomically adding to the mapped count. ++ /* Increment num_mapped_cores to reflect that this core is ++ * now mapped capable of running the service. + */ +- if (serialize_mt_unsafe) +- rte_atomic32_inc(&s->num_mapped_cores); ++ rte_atomic32_inc(&s->num_mapped_cores); + +- if (service_mt_safe(s) == 0 && +- rte_atomic32_read(&s->num_mapped_cores) > 1) { +- if (serialize_mt_unsafe) +- rte_atomic32_dec(&s->num_mapped_cores); +- return -EBUSY; +- } +- +- int ret = service_run(id, cs, UINT64_MAX, s); ++ int ret = service_run(id, cs, UINT64_MAX, s, serialize_mt_unsafe); + +- if (serialize_mt_unsafe) +- rte_atomic32_dec(&s->num_mapped_cores); ++ rte_atomic32_dec(&s->num_mapped_cores); + + return ret; + } + + static int32_t +-rte_service_runner_func(void *arg) ++service_runner_func(void *arg) + { + RTE_SET_USED(arg); + uint32_t i; + const int lcore = rte_lcore_id(); + struct core_state *cs = &lcore_states[lcore]; + +- while (lcore_states[lcore].runstate == RUNSTATE_RUNNING) { ++ while (cs->runstate == RUNSTATE_RUNNING) { + const uint64_t service_mask = cs->service_mask; + + for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { + if (!service_valid(i)) + continue; + /* return value ignored as no change to code flow */ +- service_run(i, cs, service_mask, service_get(i)); ++ service_run(i, cs, service_mask, service_get(i), 1); + } + + cs->loops++; +@@ -700,9 +692,9 @@ rte_service_lcore_start(uint32_t lcore) + /* set core to run state first, and then launch otherwise it will + * return immediately as runstate keeps it in the service poll loop + */ +- lcore_states[lcore].runstate = RUNSTATE_RUNNING; ++ cs->runstate = RUNSTATE_RUNNING; + +- int ret = rte_eal_remote_launch(rte_service_runner_func, 0, lcore); ++ int ret = rte_eal_remote_launch(service_runner_func, 0, lcore); + /* returns -EBUSY if the core is already launched, 0 on success */ + return ret; + } +@@ -781,13 +773,9 @@ rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id, + } + + static void +-rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s, +- uint64_t all_cycles, uint32_t reset) ++service_dump_one(FILE *f, struct rte_service_spec_impl *s, uint32_t reset) + { + /* avoid divide by zero */ +- if (all_cycles == 0) +- all_cycles = 1; +- + int calls = 1; + if (s->calls != 0) + calls = s->calls; +@@ -814,7 +802,7 @@ rte_service_attr_reset_all(uint32_t id) + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + + int reset = 1; +- rte_service_dump_one(NULL, s, 0, reset); ++ service_dump_one(NULL, s, reset); + return 0; + } + +@@ -858,21 +846,13 @@ rte_service_dump(FILE *f, uint32_t id) + uint32_t i; + int print_one = (id != UINT32_MAX); + +- uint64_t total_cycles = 0; +- +- for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { +- if (!service_valid(i)) +- continue; +- total_cycles += rte_services[i].cycles_spent; +- } +- + /* print only the specified service */ + if (print_one) { + struct rte_service_spec_impl *s; + SERVICE_VALID_GET_OR_ERR_RET(id, s, -EINVAL); + fprintf(f, "Service %s Summary\n", s->spec.name); + uint32_t reset = 0; +- rte_service_dump_one(f, s, total_cycles, reset); ++ service_dump_one(f, s, reset); + return 0; + } + +@@ -882,7 +862,7 @@ rte_service_dump(FILE *f, uint32_t id) + if (!service_valid(i)) + continue; + uint32_t reset = 0; +- rte_service_dump_one(f, &rte_services[i], total_cycles, reset); ++ service_dump_one(f, &rte_services[i], reset); + } + + fprintf(f, "Service Cores Summary\n"); +diff --git a/dpdk/lib/librte_eal/linuxapp/eal/eal.c b/dpdk/lib/librte_eal/linuxapp/eal/eal.c +index f453337f71..987efb9b24 100644 +--- a/dpdk/lib/librte_eal/linuxapp/eal/eal.c ++++ b/dpdk/lib/librte_eal/linuxapp/eal/eal.c +@@ -944,6 +944,7 @@ rte_eal_init(int argc, char **argv) + static char logid[PATH_MAX]; + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; ++ bool phys_addrs; + + /* checks if the machine is adequate */ + if (!rte_cpu_is_supported()) { +@@ -1031,25 +1032,46 @@ rte_eal_init(int argc, char **argv) + return -1; + } + ++ phys_addrs = rte_eal_using_phys_addrs() != 0; ++ + /* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */ + if (internal_config.iova_mode == RTE_IOVA_DC) { +- /* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */ +- rte_eal_get_configuration()->iova_mode = +- rte_bus_get_iommu_class(); ++ /* autodetect the IOVA mapping mode */ ++ enum rte_iova_mode iova_mode = rte_bus_get_iommu_class(); + ++ if (iova_mode == RTE_IOVA_DC) { ++ iova_mode = phys_addrs ? RTE_IOVA_PA : RTE_IOVA_VA; ++ RTE_LOG(DEBUG, EAL, ++ "Buses did not request a specific IOVA mode, using '%s' based on physical addresses availability.\n", ++ phys_addrs ? "PA" : "VA"); ++ } ++#ifdef RTE_LIBRTE_KNI + /* Workaround for KNI which requires physical address to work */ +- if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA && ++ if (iova_mode == RTE_IOVA_VA && + rte_eal_check_module("rte_kni") == 1) { +- rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA; +- RTE_LOG(WARNING, EAL, +- "Some devices want IOVA as VA but PA will be used because.. " +- "KNI module inserted\n"); ++ if (phys_addrs) { ++ iova_mode = RTE_IOVA_PA; ++ RTE_LOG(WARNING, EAL, "Forcing IOVA as 'PA' because KNI module is loaded\n"); ++ } else { ++ RTE_LOG(DEBUG, EAL, "KNI can not work since physical addresses are unavailable\n"); ++ } + } ++#endif ++ rte_eal_get_configuration()->iova_mode = iova_mode; + } else { + rte_eal_get_configuration()->iova_mode = + internal_config.iova_mode; + } + ++ if (rte_eal_iova_mode() == RTE_IOVA_PA && !phys_addrs) { ++ rte_eal_init_alert("Cannot use IOVA as 'PA' since physical addresses are not available"); ++ rte_errno = EINVAL; ++ return -1; ++ } ++ ++ RTE_LOG(INFO, EAL, "Selected IOVA mode '%s'\n", ++ rte_eal_iova_mode() == RTE_IOVA_PA ? "PA" : "VA"); ++ + if (internal_config.no_hugetlbfs == 0) { + /* rte_config isn't initialized yet */ + ret = internal_config.process_type == RTE_PROC_PRIMARY ? +diff --git a/dpdk/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/dpdk/lib/librte_eal/linuxapp/eal/eal_memalloc.c +index bff7dcd58e..518314d89c 100644 +--- a/dpdk/lib/librte_eal/linuxapp/eal/eal_memalloc.c ++++ b/dpdk/lib/librte_eal/linuxapp/eal/eal_memalloc.c +@@ -806,7 +806,7 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi, + /* erase page data */ + memset(ms->addr, 0, ms->len); + +- if (mmap(ms->addr, ms->len, PROT_READ, ++ if (mmap(ms->addr, ms->len, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) == + MAP_FAILED) { + RTE_LOG(DEBUG, EAL, "couldn't unmap page\n"); +diff --git a/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c b/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c +index ac0424582e..4b82432dee 100644 +--- a/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c ++++ b/dpdk/lib/librte_eal/linuxapp/eal/eal_memory.c +@@ -61,34 +61,10 @@ + * zone as well as a physical contiguous zone. + */ + +-static bool phys_addrs_available = true; ++static int phys_addrs_available = -1; + + #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" + +-static void +-test_phys_addrs_available(void) +-{ +- uint64_t tmp = 0; +- phys_addr_t physaddr; +- +- if (!rte_eal_has_hugepages()) { +- RTE_LOG(ERR, EAL, +- "Started without hugepages support, physical addresses not available\n"); +- phys_addrs_available = false; +- return; +- } +- +- physaddr = rte_mem_virt2phy(&tmp); +- if (physaddr == RTE_BAD_PHYS_ADDR) { +- if (rte_eal_iova_mode() == RTE_IOVA_PA) +- RTE_LOG(ERR, EAL, +- "Cannot obtain physical addresses: %s. " +- "Only vfio will function.\n", +- strerror(errno)); +- phys_addrs_available = false; +- } +-} +- + /* + * Get physical address of any mapped virtual address in the current process. + */ +@@ -101,8 +77,7 @@ rte_mem_virt2phy(const void *virtaddr) + int page_size; + off_t offset; + +- /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ +- if (!phys_addrs_available) ++ if (phys_addrs_available == 0) + return RTE_BAD_IOVA; + + /* standard page size */ +@@ -1332,8 +1307,6 @@ eal_legacy_hugepage_init(void) + int nr_hugefiles, nr_hugepages = 0; + void *addr; + +- test_phys_addrs_available(); +- + memset(used_hp, 0, sizeof(used_hp)); + + /* get pointer to global configuration */ +@@ -1466,7 +1439,7 @@ eal_legacy_hugepage_init(void) + continue; + } + +- if (phys_addrs_available && ++ if (rte_eal_using_phys_addrs() && + rte_eal_iova_mode() != RTE_IOVA_VA) { + /* find physical addresses for each hugepage */ + if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) { +@@ -1685,8 +1658,6 @@ eal_hugepage_init(void) + uint64_t memory[RTE_MAX_NUMA_NODES]; + int hp_sz_idx, socket_id; + +- test_phys_addrs_available(); +- + memset(used_hp, 0, sizeof(used_hp)); + + for (hp_sz_idx = 0; +@@ -1812,8 +1783,6 @@ eal_legacy_hugepage_attach(void) + "into secondary processes\n"); + } + +- test_phys_addrs_available(); +- + fd_hugepage = open(eal_hugepage_data_path(), O_RDONLY); + if (fd_hugepage < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", +@@ -1950,6 +1919,15 @@ rte_eal_hugepage_attach(void) + int + rte_eal_using_phys_addrs(void) + { ++ if (phys_addrs_available == -1) { ++ uint64_t tmp = 0; ++ ++ if (rte_eal_has_hugepages() != 0 && ++ rte_mem_virt2phy(&tmp) != RTE_BAD_PHYS_ADDR) ++ phys_addrs_available = 1; ++ else ++ phys_addrs_available = 0; ++ } + return phys_addrs_available; + } + +diff --git a/dpdk/lib/librte_eal/linuxapp/eal/eal_vfio.c b/dpdk/lib/librte_eal/linuxapp/eal/eal_vfio.c +index 48d2abafaa..b4619c2117 100644 +--- a/dpdk/lib/librte_eal/linuxapp/eal/eal_vfio.c ++++ b/dpdk/lib/librte_eal/linuxapp/eal/eal_vfio.c +@@ -378,7 +378,7 @@ vfio_get_group_fd(struct vfio_config *vfio_cfg, + } + + vfio_group_fd = vfio_open_group_fd(iommu_group_num); +- if (vfio_group_fd < 0) { ++ if (vfio_group_fd <= 0) { + RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num); + return -1; + } +@@ -1025,6 +1025,7 @@ vfio_get_default_container_fd(void) + struct rte_mp_reply mp_reply = {0}; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; ++ int container_fd; + + if (default_vfio_cfg->vfio_enabled) + return default_vfio_cfg->vfio_container_fd; +@@ -1047,8 +1048,9 @@ vfio_get_default_container_fd(void) + mp_rep = &mp_reply.msgs[0]; + p = (struct vfio_mp_param *)mp_rep->param; + if (p->result == SOCKET_OK && mp_rep->num_fds == 1) { ++ container_fd = mp_rep->fds[0]; + free(mp_reply.msgs); +- return mp_rep->fds[0]; ++ return container_fd; + } + } + +diff --git a/dpdk/lib/librte_ethdev/ethdev_profile.h b/dpdk/lib/librte_ethdev/ethdev_profile.h +index 65031e6f3f..e5ee4df824 100644 +--- a/dpdk/lib/librte_ethdev/ethdev_profile.h ++++ b/dpdk/lib/librte_ethdev/ethdev_profile.h +@@ -24,4 +24,13 @@ + int + __rte_eth_dev_profile_init(uint16_t port_id, struct rte_eth_dev *dev); + ++#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE ++ ++uint16_t ++profile_hook_rx_burst_cb(uint16_t port_id, uint16_t queue_id, ++ struct rte_mbuf *pkts[], uint16_t nb_pkts, ++ uint16_t max_pkts, void *user_param); ++ ++#endif /* RTE_ETHDEV_PROFILE_WITH_VTUNE */ ++ + #endif +diff --git a/dpdk/lib/librte_ethdev/rte_ethdev.c b/dpdk/lib/librte_ethdev/rte_ethdev.c +index 13866150ef..2929d10c22 100644 +--- a/dpdk/lib/librte_ethdev/rte_ethdev.c ++++ b/dpdk/lib/librte_ethdev/rte_ethdev.c +@@ -2705,7 +2705,7 @@ rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask) + /* save original values in case of failure */ + orig_offloads = dev->data->dev_conf.rxmode.offloads; + +- /*check which option changed by application*/ ++ /* check which option changed by application */ + cur = !!(offload_mask & ETH_VLAN_STRIP_OFFLOAD); + org = !!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_STRIP); +@@ -3861,7 +3861,7 @@ rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id, + cb->param = user_param; + + rte_spinlock_lock(&rte_eth_rx_cb_lock); +- /* Add the callbacks at fisrt position*/ ++ /* Add the callbacks at first position */ + cb->next = rte_eth_devices[port_id].post_rx_burst_cbs[queue_id]; + rte_smp_wmb(); + rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb; +diff --git a/dpdk/lib/librte_eventdev/rte_eventdev.c b/dpdk/lib/librte_eventdev/rte_eventdev.c +index 6396a96490..a2db0fd5fb 100644 +--- a/dpdk/lib/librte_eventdev/rte_eventdev.c ++++ b/dpdk/lib/librte_eventdev/rte_eventdev.c +@@ -13,6 +13,7 @@ + #include <sys/types.h> + #include <sys/queue.h> + ++#include <rte_string_fns.h> + #include <rte_byteorder.h> + #include <rte_log.h> + #include <rte_debug.h> +@@ -1362,15 +1363,17 @@ rte_event_pmd_allocate(const char *name, int socket_id) + + eventdev->data = eventdev_data; + +- snprintf(eventdev->data->name, RTE_EVENTDEV_NAME_MAX_LEN, +- "%s", name); ++ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + +- eventdev->data->dev_id = dev_id; +- eventdev->data->socket_id = socket_id; +- eventdev->data->dev_started = 0; ++ strlcpy(eventdev->data->name, name, ++ RTE_EVENTDEV_NAME_MAX_LEN); + +- eventdev->attached = RTE_EVENTDEV_ATTACHED; ++ eventdev->data->dev_id = dev_id; ++ eventdev->data->socket_id = socket_id; ++ eventdev->data->dev_started = 0; ++ } + ++ eventdev->attached = RTE_EVENTDEV_ATTACHED; + eventdev_globals.nb_devs++; + } + +diff --git a/dpdk/lib/librte_eventdev/rte_eventdev_pmd_pci.h b/dpdk/lib/librte_eventdev/rte_eventdev_pmd_pci.h +index 8fb61386fd..443cd38c23 100644 +--- a/dpdk/lib/librte_eventdev/rte_eventdev_pmd_pci.h ++++ b/dpdk/lib/librte_eventdev/rte_eventdev_pmd_pci.h +@@ -112,9 +112,11 @@ rte_event_pmd_pci_remove(struct rte_pci_device *pci_dev, + if (eventdev == NULL) + return -ENODEV; + +- ret = rte_event_dev_close(eventdev->data->dev_id); +- if (ret < 0) +- return ret; ++ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { ++ ret = rte_event_dev_close(eventdev->data->dev_id); ++ if (ret < 0) ++ return ret; ++ } + + /* Invoke PMD device un-init function */ + if (devuninit) +diff --git a/dpdk/lib/librte_kvargs/rte_kvargs.c b/dpdk/lib/librte_kvargs/rte_kvargs.c +index f7030c63b7..b6f8a6db87 100644 +--- a/dpdk/lib/librte_kvargs/rte_kvargs.c ++++ b/dpdk/lib/librte_kvargs/rte_kvargs.c +@@ -50,6 +50,8 @@ rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params) + /* Find the end of the list. */ + while (str[strlen(str) - 1] != ']') { + /* Restore the comma erased by strtok_r(). */ ++ if (ctx1 == NULL || ctx1[0] == '\0') ++ return -1; /* no closing bracket */ + str[strlen(str)] = ','; + /* Parse until next comma. */ + str = strtok_r(NULL, RTE_KVARGS_PAIRS_DELIM, &ctx1); +diff --git a/dpdk/lib/librte_kvargs/rte_kvargs.h b/dpdk/lib/librte_kvargs/rte_kvargs.h +index 1946195de4..eff598e08b 100644 +--- a/dpdk/lib/librte_kvargs/rte_kvargs.h ++++ b/dpdk/lib/librte_kvargs/rte_kvargs.h +@@ -171,7 +171,7 @@ unsigned rte_kvargs_count(const struct rte_kvargs *kvlist, + * 0 if the strings match. + * !0 otherwise or on error. + * +- * Unless strcmp, comparison ordering is not kept. ++ * Unlike strcmp, comparison ordering is not kept. + * In order for rte_kvargs_process to stop processing on match error, + * a negative value is returned even if strcmp had returned a positive one. + */ +diff --git a/dpdk/lib/librte_lpm/rte_lpm6.c b/dpdk/lib/librte_lpm/rte_lpm6.c +index 6212003f4c..5b0db3b549 100644 +--- a/dpdk/lib/librte_lpm/rte_lpm6.c ++++ b/dpdk/lib/librte_lpm/rte_lpm6.c +@@ -725,7 +725,8 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl, + tbl8_group_start = tbl8_gindex * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES; + memset(&lpm->tbl8[tbl8_group_start], 0, +- RTE_LPM6_TBL8_GROUP_NUM_ENTRIES); ++ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * ++ sizeof(struct rte_lpm6_tbl_entry)); + + /* init the new table's header: + * save the reference to the owner table +@@ -824,7 +825,7 @@ VERSION_SYMBOL(rte_lpm6_add, _v20, 2.0); + * + * Returns: + * 0 on success +- * -ENOSPC not enought tbl8 left ++ * -ENOSPC not enough tbl8 left + */ + static int + simulate_add(struct rte_lpm6 *lpm, const uint8_t *masked_ip, uint8_t depth) +@@ -854,7 +855,7 @@ simulate_add(struct rte_lpm6 *lpm, const uint8_t *masked_ip, uint8_t depth) + } + + if (tbl8_available(lpm) < total_need_tbl_nb) +- /* not enought tbl8 to add a rule */ ++ /* not enough tbl8 to add a rule */ + return -ENOSPC; + + return 0; +@@ -1314,7 +1315,7 @@ rule_find_range(struct rte_lpm6 *lpm, const uint8_t *ip, uint8_t depth, + /* minus top level */ + depth -= 24; + +- /* interate through levels (tbl8s) ++ /* iterate through levels (tbl8s) + * until we reach the last one + */ + while (depth > 8) { +diff --git a/dpdk/lib/librte_pci/rte_pci.c b/dpdk/lib/librte_pci/rte_pci.c +index f400178bb6..2c98c3efb5 100644 +--- a/dpdk/lib/librte_pci/rte_pci.c ++++ b/dpdk/lib/librte_pci/rte_pci.c +@@ -20,6 +20,7 @@ + #include <rte_eal.h> + #include <rte_string_fns.h> + #include <rte_common.h> ++#include <rte_debug.h> + + #include "rte_pci.h" + +@@ -34,6 +35,12 @@ get_u8_pciaddr_field(const char *in, void *_u8, char dlm) + if (*in == '\0') + return NULL; + ++ /* PCI field starting with spaces is forbidden. ++ * Negative wrap-around is not reported as an error by strtoul. ++ */ ++ if (*in == ' ' || *in == '-') ++ return NULL; ++ + errno = 0; + val = strtoul(in, &end, 16); + if (errno != 0 || end[0] != dlm || val > UINT8_MAX) { +@@ -69,11 +76,17 @@ pci_dbdf_parse(const char *input, struct rte_pci_addr *dev_addr) + unsigned long val; + char *end; + ++ /* PCI id starting with spaces is forbidden. ++ * Negative wrap-around is not reported as an error by strtoul. ++ */ ++ if (*in == ' ' || *in == '-') ++ return -EINVAL; ++ + errno = 0; + val = strtoul(in, &end, 16); +- if (errno != 0 || end[0] != ':' || val > UINT16_MAX) ++ if (errno != 0 || end[0] != ':' || val > UINT32_MAX) + return -EINVAL; +- dev_addr->domain = (uint16_t)val; ++ dev_addr->domain = (uint32_t)val; + in = end + 1; + in = get_u8_pciaddr_field(in, &dev_addr->bus, ':'); + if (in == NULL) +diff --git a/dpdk/lib/librte_security/rte_security.c b/dpdk/lib/librte_security/rte_security.c +index a222b33cec..6ff7a9e69b 100644 +--- a/dpdk/lib/librte_security/rte_security.c ++++ b/dpdk/lib/librte_security/rte_security.c +@@ -1,6 +1,7 @@ + /* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 NXP. + * Copyright(c) 2017 Intel Corporation. ++ * Copyright (c) 2020 Samsung Electronics Co., Ltd All Rights Reserved + */ + + #include <rte_malloc.h> +@@ -9,6 +10,19 @@ + #include "rte_security.h" + #include "rte_security_driver.h" + ++/* Macro to check for invalid pointers */ ++#define RTE_PTR_OR_ERR_RET(ptr, retval) do { \ ++ if ((ptr) == NULL) \ ++ return retval; \ ++} while (0) ++ ++/* Macro to check for invalid pointers chains */ ++#define RTE_PTR_CHAIN3_OR_ERR_RET(p1, p2, p3, retval, last_retval) do { \ ++ RTE_PTR_OR_ERR_RET(p1, retval); \ ++ RTE_PTR_OR_ERR_RET(p1->p2, retval); \ ++ RTE_PTR_OR_ERR_RET(p1->p2->p3, last_retval); \ ++} while (0) ++ + struct rte_security_session * + rte_security_session_create(struct rte_security_ctx *instance, + struct rte_security_session_conf *conf, +@@ -16,10 +30,9 @@ rte_security_session_create(struct rte_security_ctx *instance, + { + struct rte_security_session *sess = NULL; + +- if (conf == NULL) +- return NULL; +- +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_create, NULL); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, session_create, NULL, NULL); ++ RTE_PTR_OR_ERR_RET(conf, NULL); ++ RTE_PTR_OR_ERR_RET(mp, NULL); + + if (rte_mempool_get(mp, (void **)&sess)) + return NULL; +@@ -38,14 +51,19 @@ rte_security_session_update(struct rte_security_ctx *instance, + struct rte_security_session *sess, + struct rte_security_session_conf *conf) + { +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_update, -ENOTSUP); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, session_update, -EINVAL, ++ -ENOTSUP); ++ RTE_PTR_OR_ERR_RET(sess, -EINVAL); ++ RTE_PTR_OR_ERR_RET(conf, -EINVAL); ++ + return instance->ops->session_update(instance->device, sess, conf); + } + + unsigned int + rte_security_session_get_size(struct rte_security_ctx *instance) + { +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_get_size, 0); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, session_get_size, 0, 0); ++ + return instance->ops->session_get_size(instance->device); + } + +@@ -54,7 +72,11 @@ rte_security_session_stats_get(struct rte_security_ctx *instance, + struct rte_security_session *sess, + struct rte_security_stats *stats) + { +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_stats_get, -ENOTSUP); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, session_stats_get, -EINVAL, ++ -ENOTSUP); ++ /* Parameter sess can be NULL in case of getting global statistics. */ ++ RTE_PTR_OR_ERR_RET(stats, -EINVAL); ++ + return instance->ops->session_stats_get(instance->device, sess, stats); + } + +@@ -64,16 +86,20 @@ rte_security_session_destroy(struct rte_security_ctx *instance, + { + int ret; + +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_destroy, -ENOTSUP); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, session_destroy, -EINVAL, ++ -ENOTSUP); ++ RTE_PTR_OR_ERR_RET(sess, -EINVAL); ++ ++ ret = instance->ops->session_destroy(instance->device, sess); ++ if (ret != 0) ++ return ret; ++ ++ rte_mempool_put(rte_mempool_from_obj(sess), (void *)sess); + + if (instance->sess_cnt) + instance->sess_cnt--; + +- ret = instance->ops->session_destroy(instance->device, sess); +- if (!ret) +- rte_mempool_put(rte_mempool_from_obj(sess), (void *)sess); +- +- return ret; ++ return 0; + } + + int +@@ -81,6 +107,11 @@ rte_security_set_pkt_metadata(struct rte_security_ctx *instance, + struct rte_security_session *sess, + struct rte_mbuf *m, void *params) + { ++#ifdef RTE_DEBUG ++ RTE_PTR_OR_ERR_RET(sess, -EINVAL); ++ RTE_PTR_OR_ERR_RET(instance, -EINVAL); ++ RTE_PTR_OR_ERR_RET(instance->ops, -EINVAL); ++#endif + RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->set_pkt_metadata, -ENOTSUP); + return instance->ops->set_pkt_metadata(instance->device, + sess, m, params); +@@ -91,6 +122,10 @@ rte_security_get_userdata(struct rte_security_ctx *instance, uint64_t md) + { + void *userdata = NULL; + ++#ifdef RTE_DEBUG ++ RTE_PTR_OR_ERR_RET(instance, NULL); ++ RTE_PTR_OR_ERR_RET(instance->ops, NULL); ++#endif + RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->get_userdata, NULL); + if (instance->ops->get_userdata(instance->device, md, &userdata)) + return NULL; +@@ -101,7 +136,8 @@ rte_security_get_userdata(struct rte_security_ctx *instance, uint64_t md) + const struct rte_security_capability * + rte_security_capabilities_get(struct rte_security_ctx *instance) + { +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->capabilities_get, NULL); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, capabilities_get, NULL, NULL); ++ + return instance->ops->capabilities_get(instance->device); + } + +@@ -113,7 +149,9 @@ rte_security_capability_get(struct rte_security_ctx *instance, + const struct rte_security_capability *capability; + uint16_t i = 0; + +- RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->capabilities_get, NULL); ++ RTE_PTR_CHAIN3_OR_ERR_RET(instance, ops, capabilities_get, NULL, NULL); ++ RTE_PTR_OR_ERR_RET(idx, NULL); ++ + capabilities = instance->ops->capabilities_get(instance->device); + + if (capabilities == NULL) +@@ -121,7 +159,7 @@ rte_security_capability_get(struct rte_security_ctx *instance, + + while ((capability = &capabilities[i++])->action + != RTE_SECURITY_ACTION_TYPE_NONE) { +- if (capability->action == idx->action && ++ if (capability->action == idx->action && + capability->protocol == idx->protocol) { + if (idx->protocol == RTE_SECURITY_PROTOCOL_IPSEC) { + if (capability->ipsec.proto == +diff --git a/dpdk/lib/librte_security/rte_security.h b/dpdk/lib/librte_security/rte_security.h +index ad7898c72b..133ce9481c 100644 +--- a/dpdk/lib/librte_security/rte_security.h ++++ b/dpdk/lib/librte_security/rte_security.h +@@ -342,7 +342,7 @@ rte_security_session_create(struct rte_security_ctx *instance, + * @param conf update configuration parameters + * @return + * - On success returns 0 +- * - On failure return errno ++ * - On failure returns a negative errno value. + */ + int __rte_experimental + rte_security_session_update(struct rte_security_ctx *instance, +@@ -366,12 +366,14 @@ rte_security_session_get_size(struct rte_security_ctx *instance); + * return it to its original mempool. + * + * @param instance security instance +- * @param sess security session to freed ++ * @param sess security session to be freed + * + * @return + * - 0 if successful. +- * - -EINVAL if session is NULL. ++ * - -EINVAL if session or context instance is NULL. + * - -EBUSY if not all device private data has been freed. ++ * - -ENOTSUP if destroying private data is not supported. ++ * - other negative values in case of freeing private data errors. + */ + int + rte_security_session_destroy(struct rte_security_ctx *instance, +diff --git a/dpdk/lib/librte_vhost/iotlb.c b/dpdk/lib/librte_vhost/iotlb.c +index c6354fef7e..3dfde94e80 100644 +--- a/dpdk/lib/librte_vhost/iotlb.c ++++ b/dpdk/lib/librte_vhost/iotlb.c +@@ -336,8 +336,9 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) + TAILQ_INIT(&vq->iotlb_list); + TAILQ_INIT(&vq->iotlb_pending_list); + +- snprintf(pool_name, sizeof(pool_name), "iotlb_cache_%d_%d", +- dev->vid, vq_index); ++ snprintf(pool_name, sizeof(pool_name), "iotlb_%u_%d_%d", ++ getpid(), dev->vid, vq_index); ++ RTE_LOG(DEBUG, VHOST_CONFIG, "IOTLB cache name: %s\n", pool_name); + + /* If already created, free it and recreate */ + vq->iotlb_pool = rte_mempool_lookup(pool_name); +diff --git a/dpdk/lib/librte_vhost/rte_vhost.h b/dpdk/lib/librte_vhost/rte_vhost.h +index ce1f12e1d5..d9307987a8 100644 +--- a/dpdk/lib/librte_vhost/rte_vhost.h ++++ b/dpdk/lib/librte_vhost/rte_vhost.h +@@ -64,6 +64,10 @@ extern "C" { + #define VHOST_USER_PROTOCOL_F_PAGEFAULT 8 + #endif + ++#ifndef VHOST_USER_PROTOCOL_F_CONFIG ++#define VHOST_USER_PROTOCOL_F_CONFIG 9 ++#endif ++ + #ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD + #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10 + #endif +@@ -77,6 +81,7 @@ extern "C" { + #define VHOST_USER_F_PROTOCOL_FEATURES 30 + #endif + ++ + /** + * Information relating to memory regions including offsets to + * addresses in QEMUs memory file. +@@ -132,7 +137,15 @@ struct vhost_device_ops { + int (*new_connection)(int vid); + void (*destroy_connection)(int vid); + +- void *reserved[2]; /**< Reserved for future extension */ ++ /** ++ * This callback gets called each time a guest gets notified ++ * about waiting packets. This is the interrupt handling trough ++ * the eventfd_write(callfd), which can be used for counting these ++ * "slow" syscalls. ++ */ ++ void (*guest_notified)(int vid); ++ ++ void *reserved[1]; /**< Reserved for future extension */ + }; + + /** +diff --git a/dpdk/lib/librte_vhost/socket.c b/dpdk/lib/librte_vhost/socket.c +index 75f6703f56..6f7142be23 100644 +--- a/dpdk/lib/librte_vhost/socket.c ++++ b/dpdk/lib/librte_vhost/socket.c +@@ -890,6 +890,10 @@ rte_vhost_driver_register(const char *path, uint64_t flags) + * not compatible with postcopy. + */ + if (vsocket->dequeue_zero_copy) { ++ if ((flags & RTE_VHOST_USER_CLIENT) != 0) ++ RTE_LOG(WARNING, VHOST_CONFIG, ++ "zero copy may be incompatible with vhost client mode\n"); ++ + vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER); + vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER); + +diff --git a/dpdk/lib/librte_vhost/vhost.h b/dpdk/lib/librte_vhost/vhost.h +index 535591927f..158b97375c 100644 +--- a/dpdk/lib/librte_vhost/vhost.h ++++ b/dpdk/lib/librte_vhost/vhost.h +@@ -512,7 +512,6 @@ vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq, + #define PRINT_PACKET(device, addr, size, header) do {} while (0) + #endif + +-extern uint64_t VHOST_FEATURES; + #define MAX_VHOST_DEVICE 1024 + extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; + +@@ -655,13 +654,19 @@ vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq) + + if ((vhost_need_event(vhost_used_event(vq), new, old) && + (vq->callfd >= 0)) || +- unlikely(!signalled_used_valid)) ++ unlikely(!signalled_used_valid)) { + eventfd_write(vq->callfd, (eventfd_t) 1); ++ if (dev->notify_ops->guest_notified) ++ dev->notify_ops->guest_notified(dev->vid); ++ } + } else { + /* Kick the guest if necessary. */ + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) +- && (vq->callfd >= 0)) ++ && (vq->callfd >= 0)) { + eventfd_write(vq->callfd, (eventfd_t)1); ++ if (dev->notify_ops->guest_notified) ++ dev->notify_ops->guest_notified(dev->vid); ++ } + } + } + +@@ -712,8 +717,11 @@ vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq) + if (vhost_need_event(off, new, old)) + kick = true; + kick: +- if (kick) ++ if (kick) { + eventfd_write(vq->callfd, (eventfd_t)1); ++ if (dev->notify_ops->guest_notified) ++ dev->notify_ops->guest_notified(dev->vid); ++ } + } + + static __rte_always_inline void +diff --git a/dpdk/lib/librte_vhost/vhost_crypto.c b/dpdk/lib/librte_vhost/vhost_crypto.c +index cf01c7ebe3..dfbac66f05 100644 +--- a/dpdk/lib/librte_vhost/vhost_crypto.c ++++ b/dpdk/lib/librte_vhost/vhost_crypto.c +@@ -40,7 +40,8 @@ + (1 << VIRTIO_RING_F_EVENT_IDX) | \ + (1 << VIRTIO_CRYPTO_SERVICE_CIPHER) | \ + (1 << VIRTIO_CRYPTO_SERVICE_MAC) | \ +- (1 << VIRTIO_NET_F_CTRL_VQ)) ++ (1 << VIRTIO_NET_F_CTRL_VQ) | \ ++ (1 << VHOST_USER_PROTOCOL_F_CONFIG)) + + #define IOVA_TO_VVA(t, r, a, l, p) \ + ((t)(uintptr_t)vhost_iova_to_vva(r->dev, r->vq, a, l, p)) +@@ -236,6 +237,11 @@ transform_cipher_param(struct rte_crypto_sym_xform *xform, + if (unlikely(ret < 0)) + return ret; + ++ if (param->cipher_key_len > VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH) { ++ VC_LOG_DBG("Invalid cipher key length\n"); ++ return -VIRTIO_CRYPTO_BADMSG; ++ } ++ + xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + xform->cipher.key.length = param->cipher_key_len; + if (xform->cipher.key.length > 0) +@@ -286,6 +292,12 @@ transform_chain_param(struct rte_crypto_sym_xform *xforms, + &xform_cipher->cipher.algo); + if (unlikely(ret < 0)) + return ret; ++ ++ if (param->cipher_key_len > VHOST_USER_CRYPTO_MAX_CIPHER_KEY_LENGTH) { ++ VC_LOG_DBG("Invalid cipher key length\n"); ++ return -VIRTIO_CRYPTO_BADMSG; ++ } ++ + xform_cipher->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + xform_cipher->cipher.key.length = param->cipher_key_len; + xform_cipher->cipher.key.data = param->cipher_key_buf; +@@ -300,6 +312,12 @@ transform_chain_param(struct rte_crypto_sym_xform *xforms, + ret = auth_algo_transform(param->hash_algo, &xform_auth->auth.algo); + if (unlikely(ret < 0)) + return ret; ++ ++ if (param->auth_key_len > VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH) { ++ VC_LOG_DBG("Invalid auth key length\n"); ++ return -VIRTIO_CRYPTO_BADMSG; ++ } ++ + xform_auth->auth.digest_length = param->digest_len; + xform_auth->auth.key.length = param->auth_key_len; + xform_auth->auth.key.data = param->auth_key_buf; +diff --git a/dpdk/lib/librte_vhost/vhost_user.c b/dpdk/lib/librte_vhost/vhost_user.c +index 2f4bbb342d..4ed75104b9 100644 +--- a/dpdk/lib/librte_vhost/vhost_user.c ++++ b/dpdk/lib/librte_vhost/vhost_user.c +@@ -1595,10 +1595,10 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct VhostUserMsg *msg, + size = msg->payload.log.mmap_size; + off = msg->payload.log.mmap_offset; + +- /* Don't allow mmap_offset to point outside the mmap region */ +- if (off > size) { ++ /* Check for mmap size and offset overflow. */ ++ if (off >= -size) { + RTE_LOG(ERR, VHOST_CONFIG, +- "log offset %#"PRIx64" exceeds log size %#"PRIx64"\n", ++ "log offset %#"PRIx64" and log size %#"PRIx64" overflow\n", + off, size); + return VH_RESULT_ERR; + } +@@ -2062,7 +2062,7 @@ static int + vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, + struct VhostUserMsg *msg) + { +- uint16_t vring_idx; ++ uint32_t vring_idx; + + switch (msg->request.master) { + case VHOST_USER_SET_VRING_KICK: +@@ -2329,11 +2329,19 @@ static int process_slave_message_reply(struct virtio_net *dev, + if ((msg->flags & VHOST_USER_NEED_REPLY) == 0) + return 0; + +- if (read_vhost_message(dev->slave_req_fd, &msg_reply) < 0) { ++ ret = read_vhost_message(dev->slave_req_fd, &msg_reply); ++ if (ret <= 0) { ++ if (ret < 0) ++ RTE_LOG(INFO, VHOST_CONFIG, ++ "vhost read slave message reply failed\n"); ++ else ++ RTE_LOG(INFO, VHOST_CONFIG, ++ "vhost peer closed\n"); + ret = -1; + goto out; + } + ++ ret = 0; + if (msg_reply.request.slave != msg->request.slave) { + RTE_LOG(ERR, VHOST_CONFIG, + "Received unexpected msg type (%u), expected %u\n", +diff --git a/dpdk/meson.build b/dpdk/meson.build +index 45b5a37f35..82c676c010 100644 +--- a/dpdk/meson.build ++++ b/dpdk/meson.build +@@ -2,7 +2,7 @@ + # Copyright(c) 2017 Intel Corporation + + project('DPDK', 'C', +- version: '18.11.7', ++ version: '18.11.9', + license: 'BSD', + default_options: ['buildtype=release', 'default_library=static'], + meson_version: '>= 0.41' +diff --git a/dpdk/mk/toolchain/gcc/rte.vars.mk b/dpdk/mk/toolchain/gcc/rte.vars.mk +index b852fcfd7e..50f5e6f58d 100644 +--- a/dpdk/mk/toolchain/gcc/rte.vars.mk ++++ b/dpdk/mk/toolchain/gcc/rte.vars.mk +@@ -71,6 +71,11 @@ ifeq ($(shell test $(GCC_VERSION) -lt 47 && echo 1), 1) + WERROR_FLAGS += -Wno-uninitialized + endif + ++ifeq ($(shell test $(GCC_VERSION) -ge 100 && echo 1), 1) ++# FIXME: Bugzilla 396 ++WERROR_FLAGS += -Wno-zero-length-bounds ++endif ++ + HOST_WERROR_FLAGS := $(WERROR_FLAGS) + + ifeq ($(shell test $(HOST_GCC_VERSION) -gt 70 && echo 1), 1) +diff --git a/dpdk/pkg/dpdk.spec b/dpdk/pkg/dpdk.spec +index d497857178..fee77eabe9 100644 +--- a/dpdk/pkg/dpdk.spec ++++ b/dpdk/pkg/dpdk.spec +@@ -2,7 +2,7 @@ + # Copyright 2014 6WIND S.A. + + Name: dpdk +-Version: 18.11.7 ++Version: 18.11.9 + Release: 1 + Packager: packaging@6wind.com + URL: http://dpdk.org +diff --git a/dpdk/test/test-acl/main.c b/dpdk/test/test-acl/main.c +index 648525af56..420a2c9b99 100644 +--- a/dpdk/test/test-acl/main.c ++++ b/dpdk/test/test-acl/main.c +@@ -11,7 +11,7 @@ + #include <rte_lcore.h> + #include <rte_ip.h> + +-#define PRINT_USAGE_START "%s [EAL options]\n" ++#define PRINT_USAGE_START "%s [EAL options] --\n" + + #define RTE_LOGTYPE_TESTACL RTE_LOGTYPE_USER1 + +diff --git a/dpdk/test/test-pipeline/config.c b/dpdk/test/test-pipeline/config.c +index 28ac9fcc0e..33f3f1c827 100644 +--- a/dpdk/test/test-pipeline/config.c ++++ b/dpdk/test/test-pipeline/config.c +@@ -42,8 +42,6 @@ + + #include "main.h" + +-struct app_params app; +- + static const char usage[] = "\n"; + + void +diff --git a/dpdk/test/test/test.h b/dpdk/test/test/test.h +index 7c24432303..705a311068 100644 +--- a/dpdk/test/test/test.h ++++ b/dpdk/test/test/test.h +@@ -22,8 +22,6 @@ + # define TEST_TRACE_FAILURE(_file, _line, _func) + #endif + +-#define RTE_TEST_TRACE_FAILURE TEST_TRACE_FAILURE +- + #include <rte_test.h> + + #define TEST_ASSERT RTE_TEST_ASSERT +diff --git a/dpdk/test/test/test_acl.c b/dpdk/test/test/test_acl.c +index b1f75d1bc7..4fedcf446f 100644 +--- a/dpdk/test/test/test_acl.c ++++ b/dpdk/test/test/test_acl.c +@@ -1394,16 +1394,18 @@ test_invalid_parameters(void) + } else + rte_acl_free(acx); + +- /* invalid NUMA node */ +- memcpy(¶m, &acl_param, sizeof(param)); +- param.socket_id = RTE_MAX_NUMA_NODES + 1; +- +- acx = rte_acl_create(¶m); +- if (acx != NULL) { +- printf("Line %i: ACL context creation with invalid NUMA " +- "should have failed!\n", __LINE__); +- rte_acl_free(acx); +- return -1; ++ if (rte_eal_has_hugepages()) { ++ /* invalid NUMA node */ ++ memcpy(¶m, &acl_param, sizeof(param)); ++ param.socket_id = RTE_MAX_NUMA_NODES + 1; ++ ++ acx = rte_acl_create(¶m); ++ if (acx != NULL) { ++ printf("Line %i: ACL context creation with invalid " ++ "NUMA should have failed!\n", __LINE__); ++ rte_acl_free(acx); ++ return -1; ++ } + } + + /* NULL name */ +diff --git a/dpdk/test/test/test_cryptodev_blockcipher.c b/dpdk/test/test/test_cryptodev_blockcipher.c +index 4f1298ea21..660d5a7682 100644 +--- a/dpdk/test/test/test_cryptodev_blockcipher.c ++++ b/dpdk/test/test/test_cryptodev_blockcipher.c +@@ -85,7 +85,7 @@ test_blockcipher_one_case(const struct blockcipher_test_case *t, + uint64_t feat_flags = dev_info.feature_flags; + uint64_t oop_flag = RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT; + +- if (t->feature_mask && BLOCKCIPHER_TEST_FEATURE_OOP) { ++ if (t->feature_mask & BLOCKCIPHER_TEST_FEATURE_OOP) { + if (!(feat_flags & oop_flag)) { + printf("Device doesn't support out-of-place " + "scatter-gather in input mbuf. " +diff --git a/dpdk/test/test/test_flow_classify.c b/dpdk/test/test/test_flow_classify.c +index 90066713ed..b4ae229079 100644 +--- a/dpdk/test/test/test_flow_classify.c ++++ b/dpdk/test/test/test_flow_classify.c +@@ -23,7 +23,7 @@ + + #define FLOW_CLASSIFY_MAX_RULE_NUM 100 + #define MAX_PKT_BURST 32 +-#define NB_SOCKETS 1 ++#define NB_SOCKETS 4 + #define MEMPOOL_CACHE_SIZE 256 + #define MBUF_SIZE 512 + #define NB_MBUF 512 +diff --git a/dpdk/test/test/test_hash.c b/dpdk/test/test/test_hash.c +index fe607fadf2..ea711b1f10 100644 +--- a/dpdk/test/test/test_hash.c ++++ b/dpdk/test/test/test_hash.c +@@ -1046,8 +1046,11 @@ fbk_hash_unit_test(void) + handle = rte_fbk_hash_create(&invalid_params_7); + RETURN_IF_ERROR_FBK(handle != NULL, "fbk hash creation should have failed"); + +- handle = rte_fbk_hash_create(&invalid_params_8); +- RETURN_IF_ERROR_FBK(handle != NULL, "fbk hash creation should have failed"); ++ if (rte_eal_has_hugepages()) { ++ handle = rte_fbk_hash_create(&invalid_params_8); ++ RETURN_IF_ERROR_FBK(handle != NULL, ++ "fbk hash creation should have failed"); ++ } + + handle = rte_fbk_hash_create(&invalid_params_same_name_1); + RETURN_IF_ERROR_FBK(handle == NULL, "fbk hash creation should have succeeded"); +diff --git a/dpdk/test/test/test_kvargs.c b/dpdk/test/test/test_kvargs.c +index a42056f361..2a2dae43a0 100644 +--- a/dpdk/test/test/test_kvargs.c ++++ b/dpdk/test/test/test_kvargs.c +@@ -142,7 +142,7 @@ static int test_valid_kvargs(void) + valid_keys = valid_keys_list; + kvlist = rte_kvargs_parse(args, valid_keys); + if (kvlist == NULL) { +- printf("rte_kvargs_parse() error"); ++ printf("rte_kvargs_parse() error\n"); + goto fail; + } + if (strcmp(kvlist->pairs[0].value, "[0,1]") != 0) { +@@ -157,6 +157,40 @@ static int test_valid_kvargs(void) + } + rte_kvargs_free(kvlist); + ++ /* test using empty string (it is valid) */ ++ args = ""; ++ kvlist = rte_kvargs_parse(args, NULL); ++ if (kvlist == NULL) { ++ printf("rte_kvargs_parse() error\n"); ++ goto fail; ++ } ++ if (rte_kvargs_count(kvlist, NULL) != 0) { ++ printf("invalid count value\n"); ++ goto fail; ++ } ++ rte_kvargs_free(kvlist); ++ ++ /* test using empty elements (it is valid) */ ++ args = "foo=1,,check=value2,,"; ++ kvlist = rte_kvargs_parse(args, NULL); ++ if (kvlist == NULL) { ++ printf("rte_kvargs_parse() error\n"); ++ goto fail; ++ } ++ if (rte_kvargs_count(kvlist, NULL) != 2) { ++ printf("invalid count value\n"); ++ goto fail; ++ } ++ if (rte_kvargs_count(kvlist, "foo") != 1) { ++ printf("invalid count value for 'foo'\n"); ++ goto fail; ++ } ++ if (rte_kvargs_count(kvlist, "check") != 1) { ++ printf("invalid count value for 'check'\n"); ++ goto fail; ++ } ++ rte_kvargs_free(kvlist); ++ + return 0; + + fail: +@@ -179,11 +213,11 @@ static int test_invalid_kvargs(void) + const char *args_list[] = { + "wrong-key=x", /* key not in valid_keys_list */ + "foo=1,foo=", /* empty value */ +- "foo=1,,foo=2", /* empty key/value */ + "foo=1,foo", /* no value */ + "foo=1,=2", /* no key */ + "foo=[1,2", /* no closing bracket in value */ + ",=", /* also test with a smiley */ ++ "foo=[", /* no value in list and no closing bracket */ + NULL }; + const char **args; + const char *valid_keys_list[] = { "foo", "check", NULL }; +@@ -197,8 +231,8 @@ static int test_invalid_kvargs(void) + rte_kvargs_free(kvlist); + goto fail; + } +- return 0; + } ++ return 0; + + fail: + printf("while processing <%s>", *args); +diff --git a/dpdk/test/test/test_malloc.c b/dpdk/test/test/test_malloc.c +index 5e52724194..20788011ad 100644 +--- a/dpdk/test/test/test_malloc.c ++++ b/dpdk/test/test/test_malloc.c +@@ -697,6 +697,18 @@ test_malloc_bad_params(void) + if (bad_ptr != NULL) + goto err_return; + ++ /* rte_malloc expected to return null with size will cause overflow */ ++ align = RTE_CACHE_LINE_SIZE; ++ size = (size_t)-8; ++ ++ bad_ptr = rte_malloc(type, size, align); ++ if (bad_ptr != NULL) ++ goto err_return; ++ ++ bad_ptr = rte_realloc(NULL, size, align); ++ if (bad_ptr != NULL) ++ goto err_return; ++ + return 0; + + err_return: +diff --git a/dpdk/test/test/test_pmd_perf.c b/dpdk/test/test/test_pmd_perf.c +index ed8524a176..d68f9599c7 100644 +--- a/dpdk/test/test/test_pmd_perf.c ++++ b/dpdk/test/test/test_pmd_perf.c +@@ -142,7 +142,7 @@ check_all_ports_link_status(uint16_t port_num, uint32_t port_mask) + "Port%d Link Up. Speed %u Mbps - %s\n", + portid, link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? +- ("full-duplex") : ("half-duplex\n")); ++ ("full-duplex") : ("half-duplex")); + if (link_mbps == 0) + link_mbps = link.link_speed; + } else +diff --git a/dpdk/test/test/test_table_pipeline.c b/dpdk/test/test/test_table_pipeline.c +index 441338ac01..bc412c3081 100644 +--- a/dpdk/test/test/test_table_pipeline.c ++++ b/dpdk/test/test/test_table_pipeline.c +@@ -190,11 +190,13 @@ check_pipeline_invalid_params(void) + goto fail; + } + +- p = rte_pipeline_create(&pipeline_params_3); +- if (p != NULL) { +- RTE_LOG(INFO, PIPELINE, "%s: Configure pipeline with invalid " +- "socket\n", __func__); +- goto fail; ++ if (rte_eal_has_hugepages()) { ++ p = rte_pipeline_create(&pipeline_params_3); ++ if (p != NULL) { ++ RTE_LOG(INFO, PIPELINE, "%s: Configure pipeline with " ++ "invalid socket\n", __func__); ++ goto fail; ++ } + } + + /* Check pipeline consistency */ +diff --git a/dpdk/usertools/dpdk-pmdinfo.py b/dpdk/usertools/dpdk-pmdinfo.py +index 9d5c6369a0..12f20735e0 100755 +--- a/dpdk/usertools/dpdk-pmdinfo.py ++++ b/dpdk/usertools/dpdk-pmdinfo.py +@@ -561,7 +561,10 @@ def main(stream=None): + + pcifile_default = "./pci.ids" # For unknown OS's assume local file + if platform.system() == 'Linux': +- pcifile_default = "/usr/share/hwdata/pci.ids" ++ # hwdata is the legacy location, misc is supported going forward ++ pcifile_default = "/usr/share/misc/pci.ids" ++ if not os.path.exists(pcifile_default): ++ pcifile_default = "/usr/share/hwdata/pci.ids" + elif platform.system() == 'FreeBSD': + pcifile_default = "/usr/local/share/pciids/pci.ids" + if not os.path.exists(pcifile_default): +diff --git a/include/linux/automake.mk b/include/linux/automake.mk +index b464fe0f5b..d757f7be3f 100644 +--- a/include/linux/automake.mk ++++ b/include/linux/automake.mk +@@ -1,4 +1,5 @@ + noinst_HEADERS += \ ++ include/linux/netlink.h \ + include/linux/pkt_cls.h \ + include/linux/tc_act/tc_pedit.h \ + include/linux/tc_act/tc_tunnel_key.h \ +diff --git a/include/linux/netlink.h b/include/linux/netlink.h +new file mode 100644 +index 0000000000..cd558fb4c1 +--- /dev/null ++++ b/include/linux/netlink.h +@@ -0,0 +1,30 @@ ++#ifndef __UAPI_LINUX_NETLINK_WRAPPER_H ++#define __UAPI_LINUX_NETLINK_WRAPPER_H 1 ++ ++#if !defined(__KERNEL__) && !defined(HAVE_NLA_BITFIELD32) ++ ++#include <linux/types.h> ++ ++/* Generic 32 bitflags attribute content sent to the kernel. ++ * ++ * The value is a bitmap that defines the values being set ++ * The selector is a bitmask that defines which value is legit ++ * ++ * Examples: ++ * value = 0x0, and selector = 0x1 ++ * implies we are selecting bit 1 and we want to set its value to 0. ++ * ++ * value = 0x2, and selector = 0x2 ++ * implies we are selecting bit 2 and we want to set its value to 1. ++ * ++ */ ++struct nla_bitfield32 { ++ __u32 value; ++ __u32 selector; ++}; ++ ++#endif /* !__KERNEL__ && !HAVE_NLA_BITFIELD32 */ ++ ++#include_next <linux/netlink.h> ++ ++#endif /* __UAPI_LINUX_NETLINK_WRAPPER_H */ +diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h +index 1384d71f95..c8066ace88 100644 +--- a/include/linux/pkt_cls.h ++++ b/include/linux/pkt_cls.h +@@ -1,7 +1,7 @@ + #ifndef __LINUX_PKT_CLS_WRAPPER_H + #define __LINUX_PKT_CLS_WRAPPER_H 1 + +-#if defined(__KERNEL__) || defined(HAVE_TCA_FLOWER_KEY_ENC_IP_TTL_MASK) ++#if defined(__KERNEL__) || defined(HAVE_TCA_ACT_FLAGS) + #include_next <linux/pkt_cls.h> + #else + +@@ -17,9 +17,14 @@ enum { + TCA_ACT_STATS, + TCA_ACT_PAD, + TCA_ACT_COOKIE, ++ TCA_ACT_FLAGS, + __TCA_ACT_MAX + }; + ++#define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for ++ * actions stats. ++ */ ++ + #define TCA_ACT_MAX __TCA_ACT_MAX + #define TCA_OLD_COMPAT (TCA_ACT_MAX+1) + #define TCA_ACT_MAX_PRIO 32 +@@ -64,7 +69,9 @@ struct tcf_t { + __u64 install; + __u64 lastuse; + __u64 expires; ++#ifdef HAVE_STRUCT_TCF_T_FIRSTUSE + __u64 firstuse; ++#endif + }; + + #define tc_gen \ +diff --git a/include/openvswitch/ofp-actions.h b/include/openvswitch/ofp-actions.h +index 14c5eab74b..436c4aadf5 100644 +--- a/include/openvswitch/ofp-actions.h ++++ b/include/openvswitch/ofp-actions.h +@@ -123,6 +123,8 @@ struct vl_mff_map; + OFPACT(NAT, ofpact_nat, ofpact, "nat") \ + OFPACT(OUTPUT_TRUNC, ofpact_output_trunc,ofpact, "output_trunc") \ + OFPACT(CLONE, ofpact_nest, actions, "clone") \ ++ OFPACT(CHECK_PKT_LARGER, ofpact_check_pkt_larger, ofpact, \ ++ "check_pkt_larger") \ + \ + /* Debugging actions. \ + * \ +@@ -225,6 +227,13 @@ ofpact_last(const struct ofpact *a, const struct ofpact *ofpacts, + return ofpact_next(a) == ofpact_end(ofpacts, ofpact_len); + } + ++static inline size_t ++ofpact_remaining_len(const struct ofpact *a, const struct ofpact *ofpacts, ++ size_t ofpact_len) ++{ ++ return ofpact_len - ((uint8_t *)a - (uint8_t *)ofpacts); ++} ++ + static inline const struct ofpact * + ofpact_find_type_flattened(const struct ofpact *a, enum ofpact_type type, + const struct ofpact * const end) +@@ -620,6 +629,16 @@ struct ofpact_meter { + ); + }; + ++/* OFPACT_CHECK_PKT_LARGER. ++ * ++ * Used for NXAST_CHECK_PKT_LARGER. */ ++struct ofpact_check_pkt_larger { ++ OFPACT_PADDED_MEMBERS( ++ struct ofpact ofpact; ++ struct mf_subfield dst; ++ uint16_t pkt_len; ++ ); ++}; + /* OFPACT_WRITE_ACTIONS, OFPACT_CLONE. + * + * Used for OFPIT11_WRITE_ACTIONS, NXAST_CLONE. */ +diff --git a/include/openvswitch/ofp-packet.h b/include/openvswitch/ofp-packet.h +index 67001cb3f5..77128d829b 100644 +--- a/include/openvswitch/ofp-packet.h ++++ b/include/openvswitch/ofp-packet.h +@@ -140,6 +140,9 @@ struct ofputil_packet_in_private { + /* NXCPT_ACTION_SET. */ + struct ofpact *action_set; + size_t action_set_len; ++ ++ /* NXCPT_ODP_PORT. */ ++ odp_port_t odp_port; + }; + + struct ofpbuf *ofputil_encode_packet_in_private( +diff --git a/include/sparse/automake.mk b/include/sparse/automake.mk +index 985ee6a2fc..445b627dd0 100644 +--- a/include/sparse/automake.mk ++++ b/include/sparse/automake.mk +@@ -1,7 +1,6 @@ + noinst_HEADERS += \ + include/sparse/rte_byteorder.h \ + include/sparse/rte_esp.h \ +- include/sparse/rte_flow.h \ + include/sparse/rte_icmp.h \ + include/sparse/rte_ip.h \ + include/sparse/rte_sctp.h \ +diff --git a/include/sparse/rte_flow.h b/include/sparse/rte_flow.h +deleted file mode 100644 +index a36ab45e7c..0000000000 +--- a/include/sparse/rte_flow.h ++++ /dev/null +@@ -1,1483 +0,0 @@ +-/*- +- * BSD LICENSE +- * +- * Copyright 2016 6WIND S.A. +- * Copyright 2016 Mellanox. +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions +- * are met: +- * +- * * Redistributions of source code must retain the above copyright +- * notice, this list of conditions and the following disclaimer. +- * * Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in +- * the documentation and/or other materials provided with the +- * distribution. +- * * Neither the name of 6WIND S.A. nor the names of its +- * contributors may be used to endorse or promote products derived +- * from this software without specific prior written permission. +- * +- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +- */ +- +-#ifndef __CHECKER__ +-#error "Use this header only with sparse. It is not a correct implementation." +-#endif +- +-#ifndef RTE_FLOW_H_ +-#define RTE_FLOW_H_ +- +-/** +- * @file +- * RTE generic flow API +- * +- * This interface provides the ability to program packet matching and +- * associated actions in hardware through flow rules. +- */ +- +-#include <rte_arp.h> +-#include <rte_ether.h> +-#include <rte_icmp.h> +-#include <rte_ip.h> +-#include <rte_sctp.h> +-#include <rte_tcp.h> +-#include <rte_udp.h> +-#include <rte_byteorder.h> +-#include <rte_esp.h> +- +-#ifdef __cplusplus +-extern "C" { +-#endif +- +-/** +- * Flow rule attributes. +- * +- * Priorities are set on two levels: per group and per rule within groups. +- * +- * Lower values denote higher priority, the highest priority for both levels +- * is 0, so that a rule with priority 0 in group 8 is always matched after a +- * rule with priority 8 in group 0. +- * +- * Although optional, applications are encouraged to group similar rules as +- * much as possible to fully take advantage of hardware capabilities +- * (e.g. optimized matching) and work around limitations (e.g. a single +- * pattern type possibly allowed in a given group). +- * +- * Group and priority levels are arbitrary and up to the application, they +- * do not need to be contiguous nor start from 0, however the maximum number +- * varies between devices and may be affected by existing flow rules. +- * +- * If a packet is matched by several rules of a given group for a given +- * priority level, the outcome is undefined. It can take any path, may be +- * duplicated or even cause unrecoverable errors. +- * +- * Note that support for more than a single group and priority level is not +- * guaranteed. +- * +- * Flow rules can apply to inbound and/or outbound traffic (ingress/egress). +- * +- * Several pattern items and actions are valid and can be used in both +- * directions. Those valid for only one direction are described as such. +- * +- * At least one direction must be specified. +- * +- * Specifying both directions at once for a given rule is not recommended +- * but may be valid in a few cases (e.g. shared counter). +- */ +-struct rte_flow_attr { +- uint32_t group; /**< Priority group. */ +- uint32_t priority; /**< Priority level within group. */ +- uint32_t ingress:1; /**< Rule applies to ingress traffic. */ +- uint32_t egress:1; /**< Rule applies to egress traffic. */ +- uint32_t reserved:30; /**< Reserved, must be zero. */ +-}; +- +-/** +- * Matching pattern item types. +- * +- * Pattern items fall in two categories: +- * +- * - Matching protocol headers and packet data (ANY, RAW, ETH, VLAN, IPV4, +- * IPV6, ICMP, UDP, TCP, SCTP, VXLAN and so on), usually associated with a +- * specification structure. These must be stacked in the same order as the +- * protocol layers to match, starting from the lowest. +- * +- * - Matching meta-data or affecting pattern processing (END, VOID, INVERT, +- * PF, VF, PORT and so on), often without a specification structure. Since +- * they do not match packet contents, these can be specified anywhere +- * within item lists without affecting others. +- * +- * See the description of individual types for more information. Those +- * marked with [META] fall into the second category. +- */ +-enum rte_flow_item_type { +- /** +- * [META] +- * +- * End marker for item lists. Prevents further processing of items, +- * thereby ending the pattern. +- * +- * No associated specification structure. +- */ +- RTE_FLOW_ITEM_TYPE_END, +- +- /** +- * [META] +- * +- * Used as a placeholder for convenience. It is ignored and simply +- * discarded by PMDs. +- * +- * No associated specification structure. +- */ +- RTE_FLOW_ITEM_TYPE_VOID, +- +- /** +- * [META] +- * +- * Inverted matching, i.e. process packets that do not match the +- * pattern. +- * +- * No associated specification structure. +- */ +- RTE_FLOW_ITEM_TYPE_INVERT, +- +- /** +- * Matches any protocol in place of the current layer, a single ANY +- * may also stand for several protocol layers. +- * +- * See struct rte_flow_item_any. +- */ +- RTE_FLOW_ITEM_TYPE_ANY, +- +- /** +- * [META] +- * +- * Matches packets addressed to the physical function of the device. +- * +- * If the underlying device function differs from the one that would +- * normally receive the matched traffic, specifying this item +- * prevents it from reaching that device unless the flow rule +- * contains a PF action. Packets are not duplicated between device +- * instances by default. +- * +- * No associated specification structure. +- */ +- RTE_FLOW_ITEM_TYPE_PF, +- +- /** +- * [META] +- * +- * Matches packets addressed to a virtual function ID of the device. +- * +- * If the underlying device function differs from the one that would +- * normally receive the matched traffic, specifying this item +- * prevents it from reaching that device unless the flow rule +- * contains a VF action. Packets are not duplicated between device +- * instances by default. +- * +- * See struct rte_flow_item_vf. +- */ +- RTE_FLOW_ITEM_TYPE_VF, +- +- /** +- * [META] +- * +- * Matches packets coming from the specified physical port of the +- * underlying device. +- * +- * The first PORT item overrides the physical port normally +- * associated with the specified DPDK input port (port_id). This +- * item can be provided several times to match additional physical +- * ports. +- * +- * See struct rte_flow_item_port. +- */ +- RTE_FLOW_ITEM_TYPE_PORT, +- +- /** +- * Matches a byte string of a given length at a given offset. +- * +- * See struct rte_flow_item_raw. +- */ +- RTE_FLOW_ITEM_TYPE_RAW, +- +- /** +- * Matches an Ethernet header. +- * +- * See struct rte_flow_item_eth. +- */ +- RTE_FLOW_ITEM_TYPE_ETH, +- +- /** +- * Matches an 802.1Q/ad VLAN tag. +- * +- * See struct rte_flow_item_vlan. +- */ +- RTE_FLOW_ITEM_TYPE_VLAN, +- +- /** +- * Matches an IPv4 header. +- * +- * See struct rte_flow_item_ipv4. +- */ +- RTE_FLOW_ITEM_TYPE_IPV4, +- +- /** +- * Matches an IPv6 header. +- * +- * See struct rte_flow_item_ipv6. +- */ +- RTE_FLOW_ITEM_TYPE_IPV6, +- +- /** +- * Matches an ICMP header. +- * +- * See struct rte_flow_item_icmp. +- */ +- RTE_FLOW_ITEM_TYPE_ICMP, +- +- /** +- * Matches a UDP header. +- * +- * See struct rte_flow_item_udp. +- */ +- RTE_FLOW_ITEM_TYPE_UDP, +- +- /** +- * Matches a TCP header. +- * +- * See struct rte_flow_item_tcp. +- */ +- RTE_FLOW_ITEM_TYPE_TCP, +- +- /** +- * Matches a SCTP header. +- * +- * See struct rte_flow_item_sctp. +- */ +- RTE_FLOW_ITEM_TYPE_SCTP, +- +- /** +- * Matches a VXLAN header. +- * +- * See struct rte_flow_item_vxlan. +- */ +- RTE_FLOW_ITEM_TYPE_VXLAN, +- +- /** +- * Matches a E_TAG header. +- * +- * See struct rte_flow_item_e_tag. +- */ +- RTE_FLOW_ITEM_TYPE_E_TAG, +- +- /** +- * Matches a NVGRE header. +- * +- * See struct rte_flow_item_nvgre. +- */ +- RTE_FLOW_ITEM_TYPE_NVGRE, +- +- /** +- * Matches a MPLS header. +- * +- * See struct rte_flow_item_mpls. +- */ +- RTE_FLOW_ITEM_TYPE_MPLS, +- +- /** +- * Matches a GRE header. +- * +- * See struct rte_flow_item_gre. +- */ +- RTE_FLOW_ITEM_TYPE_GRE, +- +- /** +- * [META] +- * +- * Fuzzy pattern match, expect faster than default. +- * +- * This is for device that support fuzzy matching option. +- * Usually a fuzzy matching is fast but the cost is accuracy. +- * +- * See struct rte_flow_item_fuzzy. +- */ +- RTE_FLOW_ITEM_TYPE_FUZZY, +- +- /** +- * Matches a GTP header. +- * +- * Configure flow for GTP packets. +- * +- * See struct rte_flow_item_gtp. +- */ +- RTE_FLOW_ITEM_TYPE_GTP, +- +- /** +- * Matches a GTP header. +- * +- * Configure flow for GTP-C packets. +- * +- * See struct rte_flow_item_gtp. +- */ +- RTE_FLOW_ITEM_TYPE_GTPC, +- +- /** +- * Matches a GTP header. +- * +- * Configure flow for GTP-U packets. +- * +- * See struct rte_flow_item_gtp. +- */ +- RTE_FLOW_ITEM_TYPE_GTPU, +- +- /** +- * Matches a ESP header. +- * +- * See struct rte_flow_item_esp. +- */ +- RTE_FLOW_ITEM_TYPE_ESP, +-}; +- +-/** +- * RTE_FLOW_ITEM_TYPE_ANY +- * +- * Matches any protocol in place of the current layer, a single ANY may also +- * stand for several protocol layers. +- * +- * This is usually specified as the first pattern item when looking for a +- * protocol anywhere in a packet. +- * +- * A zeroed mask stands for any number of layers. +- */ +-struct rte_flow_item_any { +- uint32_t num; /**< Number of layers covered. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_ANY. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_any rte_flow_item_any_mask = { +- .num = 0x00000000, +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_VF +- * +- * Matches packets addressed to a virtual function ID of the device. +- * +- * If the underlying device function differs from the one that would +- * normally receive the matched traffic, specifying this item prevents it +- * from reaching that device unless the flow rule contains a VF +- * action. Packets are not duplicated between device instances by default. +- * +- * - Likely to return an error or never match any traffic if this causes a +- * VF device to match traffic addressed to a different VF. +- * - Can be specified multiple times to match traffic addressed to several +- * VF IDs. +- * - Can be combined with a PF item to match both PF and VF traffic. +- * +- * A zeroed mask can be used to match any VF ID. +- */ +-struct rte_flow_item_vf { +- uint32_t id; /**< Destination VF ID. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_VF. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_vf rte_flow_item_vf_mask = { +- .id = 0x00000000, +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_PORT +- * +- * Matches packets coming from the specified physical port of the underlying +- * device. +- * +- * The first PORT item overrides the physical port normally associated with +- * the specified DPDK input port (port_id). This item can be provided +- * several times to match additional physical ports. +- * +- * Note that physical ports are not necessarily tied to DPDK input ports +- * (port_id) when those are not under DPDK control. Possible values are +- * specific to each device, they are not necessarily indexed from zero and +- * may not be contiguous. +- * +- * As a device property, the list of allowed values as well as the value +- * associated with a port_id should be retrieved by other means. +- * +- * A zeroed mask can be used to match any port index. +- */ +-struct rte_flow_item_port { +- uint32_t index; /**< Physical port index. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_PORT. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_port rte_flow_item_port_mask = { +- .index = 0x00000000, +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_RAW +- * +- * Matches a byte string of a given length at a given offset. +- * +- * Offset is either absolute (using the start of the packet) or relative to +- * the end of the previous matched item in the stack, in which case negative +- * values are allowed. +- * +- * If search is enabled, offset is used as the starting point. The search +- * area can be delimited by setting limit to a nonzero value, which is the +- * maximum number of bytes after offset where the pattern may start. +- * +- * Matching a zero-length pattern is allowed, doing so resets the relative +- * offset for subsequent items. +- * +- * This type does not support ranges (struct rte_flow_item.last). +- */ +-struct rte_flow_item_raw { +- uint32_t relative:1; /**< Look for pattern after the previous item. */ +- uint32_t search:1; /**< Search pattern from offset (see also limit). */ +- uint32_t reserved:30; /**< Reserved, must be set to zero. */ +- int32_t offset; /**< Absolute or relative offset for pattern. */ +- uint16_t limit; /**< Search area limit for start of pattern. */ +- uint16_t length; /**< Pattern length. */ +- uint8_t pattern[]; /**< Byte string to look for. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_RAW. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_raw rte_flow_item_raw_mask = { +- .relative = 1, +- .search = 1, +- .reserved = 0x3fffffff, +- .offset = 0xffffffff, +- .limit = 0xffff, +- .length = 0xffff, +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_ETH +- * +- * Matches an Ethernet header. +- */ +-struct rte_flow_item_eth { +- struct ether_addr dst; /**< Destination MAC. */ +- struct ether_addr src; /**< Source MAC. */ +- rte_be16_t type; /**< EtherType. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_ETH. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_eth rte_flow_item_eth_mask = { +- .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", +- .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", +- .type = RTE_BE16(0x0000), +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_VLAN +- * +- * Matches an 802.1Q/ad VLAN tag. +- * +- * This type normally follows either RTE_FLOW_ITEM_TYPE_ETH or +- * RTE_FLOW_ITEM_TYPE_VLAN. +- */ +-struct rte_flow_item_vlan { +- rte_be16_t tpid; /**< Tag protocol identifier. */ +- rte_be16_t tci; /**< Tag control information. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_VLAN. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_vlan rte_flow_item_vlan_mask = { +- .tpid = RTE_BE16(0x0000), +- .tci = RTE_BE16(0xffff), +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_IPV4 +- * +- * Matches an IPv4 header. +- * +- * Note: IPv4 options are handled by dedicated pattern items. +- */ +-struct rte_flow_item_ipv4 { +- struct ipv4_hdr hdr; /**< IPv4 header definition. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_IPV4. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_ipv4 rte_flow_item_ipv4_mask = { +- .hdr = { +- .src_addr = RTE_BE32(0xffffffff), +- .dst_addr = RTE_BE32(0xffffffff), +- }, +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_IPV6. +- * +- * Matches an IPv6 header. +- * +- * Note: IPv6 options are handled by dedicated pattern items. +- */ +-struct rte_flow_item_ipv6 { +- struct ipv6_hdr hdr; /**< IPv6 header definition. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_IPV6. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_ipv6 rte_flow_item_ipv6_mask = { +- .hdr = { +- .src_addr = +- "\xff\xff\xff\xff\xff\xff\xff\xff" +- "\xff\xff\xff\xff\xff\xff\xff\xff", +- .dst_addr = +- "\xff\xff\xff\xff\xff\xff\xff\xff" +- "\xff\xff\xff\xff\xff\xff\xff\xff", +- }, +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_ICMP. +- * +- * Matches an ICMP header. +- */ +-struct rte_flow_item_icmp { +- struct icmp_hdr hdr; /**< ICMP header definition. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_icmp rte_flow_item_icmp_mask = { +- .hdr = { +- .icmp_type = 0xff, +- .icmp_code = 0xff, +- }, +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_UDP. +- * +- * Matches a UDP header. +- */ +-struct rte_flow_item_udp { +- struct udp_hdr hdr; /**< UDP header definition. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_UDP. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_udp rte_flow_item_udp_mask = { +- .hdr = { +- .src_port = RTE_BE16(0xffff), +- .dst_port = RTE_BE16(0xffff), +- }, +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_TCP. +- * +- * Matches a TCP header. +- */ +-struct rte_flow_item_tcp { +- struct tcp_hdr hdr; /**< TCP header definition. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_TCP. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_tcp rte_flow_item_tcp_mask = { +- .hdr = { +- .src_port = RTE_BE16(0xffff), +- .dst_port = RTE_BE16(0xffff), +- }, +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_SCTP. +- * +- * Matches a SCTP header. +- */ +-struct rte_flow_item_sctp { +- struct sctp_hdr hdr; /**< SCTP header definition. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_SCTP. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_sctp rte_flow_item_sctp_mask = { +- .hdr = { +- .src_port = RTE_BE16(0xffff), +- .dst_port = RTE_BE16(0xffff), +- }, +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_VXLAN. +- * +- * Matches a VXLAN header (RFC 7348). +- */ +-struct rte_flow_item_vxlan { +- uint8_t flags; /**< Normally 0x08 (I flag). */ +- uint8_t rsvd0[3]; /**< Reserved, normally 0x000000. */ +- uint8_t vni[3]; /**< VXLAN identifier. */ +- uint8_t rsvd1; /**< Reserved, normally 0x00. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_VXLAN. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_vxlan rte_flow_item_vxlan_mask = { +- .vni = "\xff\xff\xff", +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_E_TAG. +- * +- * Matches a E-tag header. +- */ +-struct rte_flow_item_e_tag { +- rte_be16_t tpid; /**< Tag protocol identifier (0x893F). */ +- /** +- * E-Tag control information (E-TCI). +- * E-PCP (3b), E-DEI (1b), ingress E-CID base (12b). +- */ +- rte_be16_t epcp_edei_in_ecid_b; +- /** Reserved (2b), GRP (2b), E-CID base (12b). */ +- rte_be16_t rsvd_grp_ecid_b; +- uint8_t in_ecid_e; /**< Ingress E-CID ext. */ +- uint8_t ecid_e; /**< E-CID ext. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_E_TAG. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_e_tag rte_flow_item_e_tag_mask = { +- .rsvd_grp_ecid_b = RTE_BE16(0x3fff), +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_NVGRE. +- * +- * Matches a NVGRE header. +- */ +-struct rte_flow_item_nvgre { +- /** +- * Checksum (1b), undefined (1b), key bit (1b), sequence number (1b), +- * reserved 0 (9b), version (3b). +- * +- * c_k_s_rsvd0_ver must have value 0x2000 according to RFC 7637. +- */ +- rte_be16_t c_k_s_rsvd0_ver; +- rte_be16_t protocol; /**< Protocol type (0x6558). */ +- uint8_t tni[3]; /**< Virtual subnet ID. */ +- uint8_t flow_id; /**< Flow ID. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_NVGRE. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_nvgre rte_flow_item_nvgre_mask = { +- .tni = "\xff\xff\xff", +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_MPLS. +- * +- * Matches a MPLS header. +- */ +-struct rte_flow_item_mpls { +- /** +- * Label (20b), TC (3b), Bottom of Stack (1b). +- */ +- uint8_t label_tc_s[3]; +- uint8_t ttl; /** Time-to-Live. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_MPLS. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_mpls rte_flow_item_mpls_mask = { +- .label_tc_s = "\xff\xff\xf0", +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_GRE. +- * +- * Matches a GRE header. +- */ +-struct rte_flow_item_gre { +- /** +- * Checksum (1b), reserved 0 (12b), version (3b). +- * Refer to RFC 2784. +- */ +- rte_be16_t c_rsvd0_ver; +- rte_be16_t protocol; /**< Protocol type. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_GRE. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_gre rte_flow_item_gre_mask = { +- .protocol = RTE_BE16(0xffff), +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_FUZZY +- * +- * Fuzzy pattern match, expect faster than default. +- * +- * This is for device that support fuzzy match option. +- * Usually a fuzzy match is fast but the cost is accuracy. +- * i.e. Signature Match only match pattern's hash value, but it is +- * possible two different patterns have the same hash value. +- * +- * Matching accuracy level can be configure by threshold. +- * Driver can divide the range of threshold and map to different +- * accuracy levels that device support. +- * +- * Threshold 0 means perfect match (no fuzziness), while threshold +- * 0xffffffff means fuzziest match. +- */ +-struct rte_flow_item_fuzzy { +- uint32_t thresh; /**< Accuracy threshold. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_FUZZY. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_fuzzy rte_flow_item_fuzzy_mask = { +- .thresh = 0xffffffff, +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_GTP. +- * +- * Matches a GTPv1 header. +- */ +-struct rte_flow_item_gtp { +- /** +- * Version (3b), protocol type (1b), reserved (1b), +- * Extension header flag (1b), +- * Sequence number flag (1b), +- * N-PDU number flag (1b). +- */ +- uint8_t v_pt_rsv_flags; +- uint8_t msg_type; /**< Message type. */ +- rte_be16_t msg_len; /**< Message length. */ +- rte_be32_t teid; /**< Tunnel endpoint identifier. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_GTP. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_gtp rte_flow_item_gtp_mask = { +- .teid = RTE_BE32(0xffffffff), +-}; +-#endif +- +-/** +- * RTE_FLOW_ITEM_TYPE_ESP +- * +- * Matches an ESP header. +- */ +-struct rte_flow_item_esp { +- struct esp_hdr hdr; /**< ESP header definition. */ +-}; +- +-/** Default mask for RTE_FLOW_ITEM_TYPE_ESP. */ +-#ifndef __cplusplus +-static const struct rte_flow_item_esp rte_flow_item_esp_mask = { +- .hdr = { +- .spi = OVS_BE32_MAX, +- }, +-}; +-#endif +- +-/** +- * Matching pattern item definition. +- * +- * A pattern is formed by stacking items starting from the lowest protocol +- * layer to match. This stacking restriction does not apply to meta items +- * which can be placed anywhere in the stack without affecting the meaning +- * of the resulting pattern. +- * +- * Patterns are terminated by END items. +- * +- * The spec field should be a valid pointer to a structure of the related +- * item type. It may remain unspecified (NULL) in many cases to request +- * broad (nonspecific) matching. In such cases, last and mask must also be +- * set to NULL. +- * +- * Optionally, last can point to a structure of the same type to define an +- * inclusive range. This is mostly supported by integer and address fields, +- * may cause errors otherwise. Fields that do not support ranges must be set +- * to 0 or to the same value as the corresponding fields in spec. +- * +- * Only the fields defined to nonzero values in the default masks (see +- * rte_flow_item_{name}_mask constants) are considered relevant by +- * default. This can be overridden by providing a mask structure of the +- * same type with applicable bits set to one. It can also be used to +- * partially filter out specific fields (e.g. as an alternate mean to match +- * ranges of IP addresses). +- * +- * Mask is a simple bit-mask applied before interpreting the contents of +- * spec and last, which may yield unexpected results if not used +- * carefully. For example, if for an IPv4 address field, spec provides +- * 10.1.2.3, last provides 10.3.4.5 and mask provides 255.255.0.0, the +- * effective range becomes 10.1.0.0 to 10.3.255.255. +- */ +-struct rte_flow_item { +- enum rte_flow_item_type type; /**< Item type. */ +- const void *spec; /**< Pointer to item specification structure. */ +- const void *last; /**< Defines an inclusive range (spec to last). */ +- const void *mask; /**< Bit-mask applied to spec and last. */ +-}; +- +-/** +- * Action types. +- * +- * Each possible action is represented by a type. Some have associated +- * configuration structures. Several actions combined in a list can be +- * affected to a flow rule. That list is not ordered. +- * +- * They fall in three categories: +- * +- * - Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent +- * processing matched packets by subsequent flow rules, unless overridden +- * with PASSTHRU. +- * +- * - Non terminating actions (PASSTHRU, DUP) that leave matched packets up +- * for additional processing by subsequent flow rules. +- * +- * - Other non terminating meta actions that do not affect the fate of +- * packets (END, VOID, MARK, FLAG, COUNT). +- * +- * When several actions are combined in a flow rule, they should all have +- * different types (e.g. dropping a packet twice is not possible). +- * +- * Only the last action of a given type is taken into account. PMDs still +- * perform error checking on the entire list. +- * +- * Note that PASSTHRU is the only action able to override a terminating +- * rule. +- */ +-enum rte_flow_action_type { +- /** +- * [META] +- * +- * End marker for action lists. Prevents further processing of +- * actions, thereby ending the list. +- * +- * No associated configuration structure. +- */ +- RTE_FLOW_ACTION_TYPE_END, +- +- /** +- * [META] +- * +- * Used as a placeholder for convenience. It is ignored and simply +- * discarded by PMDs. +- * +- * No associated configuration structure. +- */ +- RTE_FLOW_ACTION_TYPE_VOID, +- +- /** +- * Leaves packets up for additional processing by subsequent flow +- * rules. This is the default when a rule does not contain a +- * terminating action, but can be specified to force a rule to +- * become non-terminating. +- * +- * No associated configuration structure. +- */ +- RTE_FLOW_ACTION_TYPE_PASSTHRU, +- +- /** +- * [META] +- * +- * Attaches an integer value to packets and sets PKT_RX_FDIR and +- * PKT_RX_FDIR_ID mbuf flags. +- * +- * See struct rte_flow_action_mark. +- */ +- RTE_FLOW_ACTION_TYPE_MARK, +- +- /** +- * [META] +- * +- * Flags packets. Similar to MARK without a specific value; only +- * sets the PKT_RX_FDIR mbuf flag. +- * +- * No associated configuration structure. +- */ +- RTE_FLOW_ACTION_TYPE_FLAG, +- +- /** +- * Assigns packets to a given queue index. +- * +- * See struct rte_flow_action_queue. +- */ +- RTE_FLOW_ACTION_TYPE_QUEUE, +- +- /** +- * Drops packets. +- * +- * PASSTHRU overrides this action if both are specified. +- * +- * No associated configuration structure. +- */ +- RTE_FLOW_ACTION_TYPE_DROP, +- +- /** +- * [META] +- * +- * Enables counters for this rule. +- * +- * These counters can be retrieved and reset through rte_flow_query(), +- * see struct rte_flow_query_count. +- * +- * No associated configuration structure. +- */ +- RTE_FLOW_ACTION_TYPE_COUNT, +- +- /** +- * Duplicates packets to a given queue index. +- * +- * This is normally combined with QUEUE, however when used alone, it +- * is actually similar to QUEUE + PASSTHRU. +- * +- * See struct rte_flow_action_dup. +- */ +- RTE_FLOW_ACTION_TYPE_DUP, +- +- /** +- * Similar to QUEUE, except RSS is additionally performed on packets +- * to spread them among several queues according to the provided +- * parameters. +- * +- * See struct rte_flow_action_rss. +- */ +- RTE_FLOW_ACTION_TYPE_RSS, +- +- /** +- * Redirects packets to the physical function (PF) of the current +- * device. +- * +- * No associated configuration structure. +- */ +- RTE_FLOW_ACTION_TYPE_PF, +- +- /** +- * Redirects packets to the virtual function (VF) of the current +- * device with the specified ID. +- * +- * See struct rte_flow_action_vf. +- */ +- RTE_FLOW_ACTION_TYPE_VF, +- +- /** +- * Traffic metering and policing (MTR). +- * +- * See struct rte_flow_action_meter. +- * See file rte_mtr.h for MTR object configuration. +- */ +- RTE_FLOW_ACTION_TYPE_METER, +- +- /** +- * Redirects packets to security engine of current device for security +- * processing as specified by security session. +- * +- * See struct rte_flow_action_security. +- */ +- RTE_FLOW_ACTION_TYPE_SECURITY +-}; +- +-/** +- * RTE_FLOW_ACTION_TYPE_MARK +- * +- * Attaches an integer value to packets and sets PKT_RX_FDIR and +- * PKT_RX_FDIR_ID mbuf flags. +- * +- * This value is arbitrary and application-defined. Maximum allowed value +- * depends on the underlying implementation. It is returned in the +- * hash.fdir.hi mbuf field. +- */ +-struct rte_flow_action_mark { +- uint32_t id; /**< Integer value to return with packets. */ +-}; +- +-/** +- * RTE_FLOW_ACTION_TYPE_QUEUE +- * +- * Assign packets to a given queue index. +- * +- * Terminating by default. +- */ +-struct rte_flow_action_queue { +- uint16_t index; /**< Queue index to use. */ +-}; +- +-/** +- * RTE_FLOW_ACTION_TYPE_COUNT (query) +- * +- * Query structure to retrieve and reset flow rule counters. +- */ +-struct rte_flow_query_count { +- uint32_t reset:1; /**< Reset counters after query [in]. */ +- uint32_t hits_set:1; /**< hits field is set [out]. */ +- uint32_t bytes_set:1; /**< bytes field is set [out]. */ +- uint32_t reserved:29; /**< Reserved, must be zero [in, out]. */ +- uint64_t hits; /**< Number of hits for this rule [out]. */ +- uint64_t bytes; /**< Number of bytes through this rule [out]. */ +-}; +- +-/** +- * RTE_FLOW_ACTION_TYPE_DUP +- * +- * Duplicates packets to a given queue index. +- * +- * This is normally combined with QUEUE, however when used alone, it is +- * actually similar to QUEUE + PASSTHRU. +- * +- * Non-terminating by default. +- */ +-struct rte_flow_action_dup { +- uint16_t index; /**< Queue index to duplicate packets to. */ +-}; +- +-/** +- * RTE_FLOW_ACTION_TYPE_RSS +- * +- * Similar to QUEUE, except RSS is additionally performed on packets to +- * spread them among several queues according to the provided parameters. +- * +- * Note: RSS hash result is stored in the hash.rss mbuf field which overlaps +- * hash.fdir.lo. Since the MARK action sets the hash.fdir.hi field only, +- * both can be requested simultaneously. +- * +- * Terminating by default. +- */ +-struct rte_flow_action_rss { +- const struct rte_eth_rss_conf *rss_conf; /**< RSS parameters. */ +- uint16_t num; /**< Number of entries in queue[]. */ +- uint16_t queue[]; /**< Queues indices to use. */ +-}; +- +-/** +- * RTE_FLOW_ACTION_TYPE_VF +- * +- * Redirects packets to a virtual function (VF) of the current device. +- * +- * Packets matched by a VF pattern item can be redirected to their original +- * VF ID instead of the specified one. This parameter may not be available +- * and is not guaranteed to work properly if the VF part is matched by a +- * prior flow rule or if packets are not addressed to a VF in the first +- * place. +- * +- * Terminating by default. +- */ +-struct rte_flow_action_vf { +- uint32_t original:1; /**< Use original VF ID if possible. */ +- uint32_t reserved:31; /**< Reserved, must be zero. */ +- uint32_t id; /**< VF ID to redirect packets to. */ +-}; +- +-/** +- * RTE_FLOW_ACTION_TYPE_METER +- * +- * Traffic metering and policing (MTR). +- * +- * Packets matched by items of this type can be either dropped or passed to the +- * next item with their color set by the MTR object. +- * +- * Non-terminating by default. +- */ +-struct rte_flow_action_meter { +- uint32_t mtr_id; /**< MTR object ID created with rte_mtr_create(). */ +-}; +- +-/** +- * RTE_FLOW_ACTION_TYPE_SECURITY +- * +- * Perform the security action on flows matched by the pattern items +- * according to the configuration of the security session. +- * +- * This action modifies the payload of matched flows. For INLINE_CRYPTO, the +- * security protocol headers and IV are fully provided by the application as +- * specified in the flow pattern. The payload of matching packets is +- * encrypted on egress, and decrypted and authenticated on ingress. +- * For INLINE_PROTOCOL, the security protocol is fully offloaded to HW, +- * providing full encapsulation and decapsulation of packets in security +- * protocols. The flow pattern specifies both the outer security header fields +- * and the inner packet fields. The security session specified in the action +- * must match the pattern parameters. +- * +- * The security session specified in the action must be created on the same +- * port as the flow action that is being specified. +- * +- * The ingress/egress flow attribute should match that specified in the +- * security session if the security session supports the definition of the +- * direction. +- * +- * Multiple flows can be configured to use the same security session. +- * +- * Non-terminating by default. +- */ +-struct rte_flow_action_security { +- void *security_session; /**< Pointer to security session structure. */ +-}; +- +-/** +- * Definition of a single action. +- * +- * A list of actions is terminated by a END action. +- * +- * For simple actions without a configuration structure, conf remains NULL. +- */ +-struct rte_flow_action { +- enum rte_flow_action_type type; /**< Action type. */ +- const void *conf; /**< Pointer to action configuration structure. */ +-}; +- +-/** +- * Opaque type returned after successfully creating a flow. +- * +- * This handle can be used to manage and query the related flow (e.g. to +- * destroy it or retrieve counters). +- */ +-struct rte_flow; +- +-/** +- * Verbose error types. +- * +- * Most of them provide the type of the object referenced by struct +- * rte_flow_error.cause. +- */ +-enum rte_flow_error_type { +- RTE_FLOW_ERROR_TYPE_NONE, /**< No error. */ +- RTE_FLOW_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ +- RTE_FLOW_ERROR_TYPE_HANDLE, /**< Flow rule (handle). */ +- RTE_FLOW_ERROR_TYPE_ATTR_GROUP, /**< Group field. */ +- RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, /**< Priority field. */ +- RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, /**< Ingress field. */ +- RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, /**< Egress field. */ +- RTE_FLOW_ERROR_TYPE_ATTR, /**< Attributes structure. */ +- RTE_FLOW_ERROR_TYPE_ITEM_NUM, /**< Pattern length. */ +- RTE_FLOW_ERROR_TYPE_ITEM, /**< Specific pattern item. */ +- RTE_FLOW_ERROR_TYPE_ACTION_NUM, /**< Number of actions. */ +- RTE_FLOW_ERROR_TYPE_ACTION, /**< Specific action. */ +-}; +- +-/** +- * Verbose error structure definition. +- * +- * This object is normally allocated by applications and set by PMDs, the +- * message points to a constant string which does not need to be freed by +- * the application, however its pointer can be considered valid only as long +- * as its associated DPDK port remains configured. Closing the underlying +- * device or unloading the PMD invalidates it. +- * +- * Both cause and message may be NULL regardless of the error type. +- */ +-struct rte_flow_error { +- enum rte_flow_error_type type; /**< Cause field and error types. */ +- const void *cause; /**< Object responsible for the error. */ +- const char *message; /**< Human-readable error message. */ +-}; +- +-/** +- * Check whether a flow rule can be created on a given port. +- * +- * The flow rule is validated for correctness and whether it could be accepted +- * by the device given sufficient resources. The rule is checked against the +- * current device mode and queue configuration. The flow rule may also +- * optionally be validated against existing flow rules and device resources. +- * This function has no effect on the target device. +- * +- * The returned value is guaranteed to remain valid only as long as no +- * successful calls to rte_flow_create() or rte_flow_destroy() are made in +- * the meantime and no device parameter affecting flow rules in any way are +- * modified, due to possible collisions or resource limitations (although in +- * such cases EINVAL should not be returned). +- * +- * @param port_id +- * Port identifier of Ethernet device. +- * @param[in] attr +- * Flow rule attributes. +- * @param[in] pattern +- * Pattern specification (list terminated by the END pattern item). +- * @param[in] actions +- * Associated actions (list terminated by the END action). +- * @param[out] error +- * Perform verbose error reporting if not NULL. PMDs initialize this +- * structure in case of error only. +- * +- * @return +- * 0 if flow rule is valid and can be created. A negative errno value +- * otherwise (rte_errno is also set), the following errors are defined: +- * +- * -ENOSYS: underlying device does not support this functionality. +- * +- * -EINVAL: unknown or invalid rule specification. +- * +- * -ENOTSUP: valid but unsupported rule specification (e.g. partial +- * bit-masks are unsupported). +- * +- * -EEXIST: collision with an existing rule. Only returned if device +- * supports flow rule collision checking and there was a flow rule +- * collision. Not receiving this return code is no guarantee that creating +- * the rule will not fail due to a collision. +- * +- * -ENOMEM: not enough memory to execute the function, or if the device +- * supports resource validation, resource limitation on the device. +- * +- * -EBUSY: action cannot be performed due to busy device resources, may +- * succeed if the affected queues or even the entire port are in a stopped +- * state (see rte_eth_dev_rx_queue_stop() and rte_eth_dev_stop()). +- */ +-int +-rte_flow_validate(uint16_t port_id, +- const struct rte_flow_attr *attr, +- const struct rte_flow_item pattern[], +- const struct rte_flow_action actions[], +- struct rte_flow_error *error); +- +-/** +- * Create a flow rule on a given port. +- * +- * @param port_id +- * Port identifier of Ethernet device. +- * @param[in] attr +- * Flow rule attributes. +- * @param[in] pattern +- * Pattern specification (list terminated by the END pattern item). +- * @param[in] actions +- * Associated actions (list terminated by the END action). +- * @param[out] error +- * Perform verbose error reporting if not NULL. PMDs initialize this +- * structure in case of error only. +- * +- * @return +- * A valid handle in case of success, NULL otherwise and rte_errno is set +- * to the positive version of one of the error codes defined for +- * rte_flow_validate(). +- */ +-struct rte_flow * +-rte_flow_create(uint16_t port_id, +- const struct rte_flow_attr *attr, +- const struct rte_flow_item pattern[], +- const struct rte_flow_action actions[], +- struct rte_flow_error *error); +- +-/** +- * Destroy a flow rule on a given port. +- * +- * Failure to destroy a flow rule handle may occur when other flow rules +- * depend on it, and destroying it would result in an inconsistent state. +- * +- * This function is only guaranteed to succeed if handles are destroyed in +- * reverse order of their creation. +- * +- * @param port_id +- * Port identifier of Ethernet device. +- * @param flow +- * Flow rule handle to destroy. +- * @param[out] error +- * Perform verbose error reporting if not NULL. PMDs initialize this +- * structure in case of error only. +- * +- * @return +- * 0 on success, a negative errno value otherwise and rte_errno is set. +- */ +-int +-rte_flow_destroy(uint16_t port_id, +- struct rte_flow *flow, +- struct rte_flow_error *error); +- +-/** +- * Destroy all flow rules associated with a port. +- * +- * In the unlikely event of failure, handles are still considered destroyed +- * and no longer valid but the port must be assumed to be in an inconsistent +- * state. +- * +- * @param port_id +- * Port identifier of Ethernet device. +- * @param[out] error +- * Perform verbose error reporting if not NULL. PMDs initialize this +- * structure in case of error only. +- * +- * @return +- * 0 on success, a negative errno value otherwise and rte_errno is set. +- */ +-int +-rte_flow_flush(uint16_t port_id, +- struct rte_flow_error *error); +- +-/** +- * Query an existing flow rule. +- * +- * This function allows retrieving flow-specific data such as counters. +- * Data is gathered by special actions which must be present in the flow +- * rule definition. +- * +- * \see RTE_FLOW_ACTION_TYPE_COUNT +- * +- * @param port_id +- * Port identifier of Ethernet device. +- * @param flow +- * Flow rule handle to query. +- * @param action +- * Action type to query. +- * @param[in, out] data +- * Pointer to storage for the associated query data type. +- * @param[out] error +- * Perform verbose error reporting if not NULL. PMDs initialize this +- * structure in case of error only. +- * +- * @return +- * 0 on success, a negative errno value otherwise and rte_errno is set. +- */ +-int +-rte_flow_query(uint16_t port_id, +- struct rte_flow *flow, +- enum rte_flow_action_type action, +- void *data, +- struct rte_flow_error *error); +- +-/** +- * Restrict ingress traffic to the defined flow rules. +- * +- * Isolated mode guarantees that all ingress traffic comes from defined flow +- * rules only (current and future). +- * +- * Besides making ingress more deterministic, it allows PMDs to safely reuse +- * resources otherwise assigned to handle the remaining traffic, such as +- * global RSS configuration settings, VLAN filters, MAC address entries, +- * legacy filter API rules and so on in order to expand the set of possible +- * flow rule types. +- * +- * Calling this function as soon as possible after device initialization, +- * ideally before the first call to rte_eth_dev_configure(), is recommended +- * to avoid possible failures due to conflicting settings. +- * +- * Once effective, leaving isolated mode may not be possible depending on +- * PMD implementation. +- * +- * Additionally, the following functionality has no effect on the underlying +- * port and may return errors such as ENOTSUP ("not supported"): +- * +- * - Toggling promiscuous mode. +- * - Toggling allmulticast mode. +- * - Configuring MAC addresses. +- * - Configuring multicast addresses. +- * - Configuring VLAN filters. +- * - Configuring Rx filters through the legacy API (e.g. FDIR). +- * - Configuring global RSS settings. +- * +- * @param port_id +- * Port identifier of Ethernet device. +- * @param set +- * Nonzero to enter isolated mode, attempt to leave it otherwise. +- * @param[out] error +- * Perform verbose error reporting if not NULL. PMDs initialize this +- * structure in case of error only. +- * +- * @return +- * 0 on success, a negative errno value otherwise and rte_errno is set. +- */ +-int +-rte_flow_isolate(uint16_t port_id, int set, struct rte_flow_error *error); +- +-/** +- * Initialize flow error structure. +- * +- * @param[out] error +- * Pointer to flow error structure (may be NULL). +- * @param code +- * Related error code (rte_errno). +- * @param type +- * Cause field and error types. +- * @param cause +- * Object responsible for the error. +- * @param message +- * Human-readable error message. +- * +- * @return +- * Negative error code (errno value) and rte_errno is set. +- */ +-int +-rte_flow_error_set(struct rte_flow_error *error, +- int code, +- enum rte_flow_error_type type, +- const void *cause, +- const char *message); +- +-/** +- * Generic flow representation. +- * +- * This form is sufficient to describe an rte_flow independently from any +- * PMD implementation and allows for replayability and identification. +- */ +-struct rte_flow_desc { +- size_t size; /**< Allocated space including data[]. */ +- struct rte_flow_attr attr; /**< Attributes. */ +- struct rte_flow_item *items; /**< Items. */ +- struct rte_flow_action *actions; /**< Actions. */ +- uint8_t data[]; /**< Storage for items/actions. */ +-}; +- +-/** +- * Copy an rte_flow rule description. +- * +- * @param[in] fd +- * Flow rule description. +- * @param[in] len +- * Total size of allocated data for the flow description. +- * @param[in] attr +- * Flow rule attributes. +- * @param[in] items +- * Pattern specification (list terminated by the END pattern item). +- * @param[in] actions +- * Associated actions (list terminated by the END action). +- * +- * @return +- * If len is greater or equal to the size of the flow, the total size of the +- * flow description and its data. +- * If len is lower than the size of the flow, the number of bytes that would +- * have been written to desc had it been sufficient. Nothing is written. +- */ +-size_t +-rte_flow_copy(struct rte_flow_desc *fd, size_t len, +- const struct rte_flow_attr *attr, +- const struct rte_flow_item *items, +- const struct rte_flow_action *actions); +- +-#ifdef __cplusplus +-} +-#endif +- +-#endif /* RTE_FLOW_H_ */ +diff --git a/lib/automake.mk b/lib/automake.mk +index b1ff495ff1..49fbb1deaf 100644 +--- a/lib/automake.mk ++++ b/lib/automake.mk +@@ -107,6 +107,8 @@ lib_libopenvswitch_la_SOURCES = \ + lib/hmapx.h \ + lib/id-pool.c \ + lib/id-pool.h \ ++ lib/if-notifier-manual.c \ ++ lib/if-notifier.h \ + lib/jhash.c \ + lib/jhash.h \ + lib/json.c \ +@@ -385,7 +387,6 @@ lib_libopenvswitch_la_SOURCES += \ + lib/dpif-netlink-rtnl.c \ + lib/dpif-netlink-rtnl.h \ + lib/if-notifier.c \ +- lib/if-notifier.h \ + lib/netdev-linux.c \ + lib/netdev-linux.h \ + lib/netdev-tc-offloads.c \ +diff --git a/lib/classifier.c b/lib/classifier.c +index edb40c89c6..cb136f6529 100644 +--- a/lib/classifier.c ++++ b/lib/classifier.c +@@ -393,7 +393,9 @@ classifier_set_prefix_fields(struct classifier *cls, + bitmap_set1(fields.bm, trie_fields[i]); + + new_fields[n_tries] = NULL; +- if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) { ++ const struct mf_field *cls_field ++ = ovsrcu_get(struct mf_field *, &cls->tries[n_tries].field); ++ if (n_tries >= cls->n_tries || field != cls_field) { + new_fields[n_tries] = field; + changed = true; + } +@@ -454,7 +456,7 @@ trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field) + } else { + ovsrcu_set_hidden(&trie->root, NULL); + } +- trie->field = field; ++ ovsrcu_set_hidden(&trie->field, CONST_CAST(struct mf_field *, field)); + + /* Add existing rules to the new trie. */ + CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { +@@ -839,7 +841,6 @@ classifier_remove_assert(struct classifier *cls, + struct trie_ctx { + const struct cls_trie *trie; + bool lookup_done; /* Status of the lookup. */ +- uint8_t be32ofs; /* U32 offset of the field in question. */ + unsigned int maskbits; /* Prefix length needed to avoid false matches. */ + union trie_prefix match_plens; /* Bitmask of prefix lengths with possible + * matches. */ +@@ -849,7 +850,6 @@ static void + trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie) + { + ctx->trie = trie; +- ctx->be32ofs = trie->field->flow_be32ofs; + ctx->lookup_done = false; + } + +@@ -1513,8 +1513,10 @@ insert_subtable(struct classifier *cls, const struct minimask *mask) + *CONST_CAST(uint8_t *, &subtable->n_indices) = index; + + for (i = 0; i < cls->n_tries; i++) { +- subtable->trie_plen[i] = minimask_get_prefix_len(mask, +- cls->tries[i].field); ++ const struct mf_field *field ++ = ovsrcu_get(struct mf_field *, &cls->tries[i].field); ++ subtable->trie_plen[i] ++ = field ? minimask_get_prefix_len(mask, field) : 0; + } + + /* Ports trie. */ +@@ -1568,11 +1570,17 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, + * fields using the prefix tries. The trie checks are done only as + * needed to avoid folding in additional bits to the wildcards mask. */ + for (j = 0; j < n_tries; j++) { +- /* Is the trie field relevant for this subtable, and +- is the trie field within the current range of fields? */ +- if (field_plen[j] && +- flowmap_is_set(&range_map, trie_ctx[j].be32ofs / 2)) { ++ /* Is the trie field relevant for this subtable? */ ++ if (field_plen[j]) { + struct trie_ctx *ctx = &trie_ctx[j]; ++ const struct mf_field *ctx_field ++ = ovsrcu_get(struct mf_field *, &ctx->trie->field); ++ ++ /* Is the trie field within the current range of fields? */ ++ if (!ctx_field ++ || !flowmap_is_set(&range_map, ctx_field->flow_be32ofs / 2)) { ++ continue; ++ } + + /* On-demand trie lookup. */ + if (!ctx->lookup_done) { +@@ -1594,14 +1602,16 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, + * than this subtable would otherwise. */ + if (ctx->maskbits <= field_plen[j]) { + /* Unwildcard the bits and skip the rest. */ +- mask_set_prefix_bits(wc, ctx->be32ofs, ctx->maskbits); ++ mask_set_prefix_bits(wc, ctx_field->flow_be32ofs, ++ ctx->maskbits); + /* Note: Prerequisite already unwildcarded, as the only + * prerequisite of the supported trie lookup fields is + * the ethertype, which is always unwildcarded. */ + return true; + } + /* Can skip if the field is already unwildcarded. */ +- if (mask_prefix_bits_set(wc, ctx->be32ofs, ctx->maskbits)) { ++ if (mask_prefix_bits_set(wc, ctx_field->flow_be32ofs, ++ ctx->maskbits)) { + return true; + } + } +@@ -1994,12 +2004,12 @@ static unsigned int + trie_lookup(const struct cls_trie *trie, const struct flow *flow, + union trie_prefix *plens) + { +- const struct mf_field *mf = trie->field; ++ const struct mf_field *mf = ovsrcu_get(struct mf_field *, &trie->field); + + /* Check that current flow matches the prerequisites for the trie + * field. Some match fields are used for multiple purposes, so we + * must check that the trie is relevant for this flow. */ +- if (mf_are_prereqs_ok(mf, flow, NULL)) { ++ if (mf && mf_are_prereqs_ok(mf, flow, NULL)) { + return trie_lookup_value(&trie->root, + &((ovs_be32 *)flow)[mf->flow_be32ofs], + &plens->be32, mf->n_bits); +@@ -2046,8 +2056,9 @@ minimask_get_prefix_len(const struct minimask *minimask, + * happened to be zeros. + */ + static const ovs_be32 * +-minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf) ++minimatch_get_prefix(const struct minimatch *match, rcu_field_ptr *field) + { ++ struct mf_field *mf = ovsrcu_get_protected(struct mf_field *, field); + size_t u64_ofs = mf->flow_be32ofs / 2; + + return (OVS_FORCE const ovs_be32 *)miniflow_get__(match->flow, u64_ofs) +@@ -2061,7 +2072,7 @@ static void + trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen) + { + trie_insert_prefix(&trie->root, +- minimatch_get_prefix(&rule->match, trie->field), mlen); ++ minimatch_get_prefix(&rule->match, &trie->field), mlen); + } + + static void +@@ -2116,7 +2127,7 @@ static void + trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen) + { + trie_remove_prefix(&trie->root, +- minimatch_get_prefix(&rule->match, trie->field), mlen); ++ minimatch_get_prefix(&rule->match, &trie->field), mlen); + } + + /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask +diff --git a/lib/classifier.h b/lib/classifier.h +index d1bd4aa12a..f646a8f742 100644 +--- a/lib/classifier.h ++++ b/lib/classifier.h +@@ -314,13 +314,15 @@ extern "C" { + struct cls_subtable; + struct cls_match; + ++struct mf_field; ++typedef OVSRCU_TYPE(struct mf_field *) rcu_field_ptr; + struct trie_node; + typedef OVSRCU_TYPE(struct trie_node *) rcu_trie_ptr; + + /* Prefix trie for a 'field' */ + struct cls_trie { +- const struct mf_field *field; /* Trie field, or NULL. */ +- rcu_trie_ptr root; /* NULL if none. */ ++ rcu_field_ptr field; /* Trie field, or NULL. */ ++ rcu_trie_ptr root; /* NULL if none. */ + }; + + enum { +diff --git a/lib/conntrack.c b/lib/conntrack.c +index ea9486b735..2d050b627e 100644 +--- a/lib/conntrack.c ++++ b/lib/conntrack.c +@@ -640,7 +640,7 @@ static void + reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn) + { + char *tail = dp_packet_tail(pkt); +- char pad = dp_packet_l2_pad_size(pkt); ++ uint8_t pad = dp_packet_l2_pad_size(pkt); + struct conn_key inner_key; + const char *inner_l4 = NULL; + uint16_t orig_l3_ofs = pkt->l3_ofs; +@@ -650,6 +650,8 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn) + struct ip_header *nh = dp_packet_l3(pkt); + struct icmp_header *icmp = dp_packet_l4(pkt); + struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1); ++ /* This call is already verified to succeed during the code path from ++ * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */ + extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) - pad, + &inner_l4, false); + pkt->l3_ofs += (char *) inner_l3 - (char *) nh; +@@ -671,6 +673,8 @@ reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn) + struct icmp6_error_header *icmp6 = dp_packet_l4(pkt); + struct ovs_16aligned_ip6_hdr *inner_l3_6 = + (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1); ++ /* This call is already verified to succeed during the code path from ++ * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */ + extract_l3_ipv6(&inner_key, inner_l3_6, + tail - ((char *)inner_l3_6) - pad, + &inner_l4); +@@ -1252,6 +1256,11 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, + conn_key_lookup(&ct->buckets[bucket], ctx, now); + conn = ctx->conn; + ++ /* Reset ct_state whenever entering a new zone. */ ++ if (pkt->md.ct_state && pkt->md.ct_zone != zone) { ++ pkt->md.ct_state = 0; ++ } ++ + /* Delete found entry if in wrong direction. 'force' implies commit. */ + if (OVS_UNLIKELY(force && ctx->reply && conn)) { + struct conn lconn; +@@ -1284,7 +1293,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, + /* It is a race condition where conn has timed out and removed + * between unlock of the rev_conn and lock of the forward conn; + * nothing to do. */ +- pkt->md.ct_state |= CS_TRACKED | CS_INVALID; ++ pkt->md.ct_state |= CS_INVALID; ++ write_ct_md(pkt, zone, NULL, NULL, NULL); + ct_lock_unlock(&ct->buckets[bucket].lock); + return; + } +@@ -1715,9 +1725,10 @@ check_l4_icmp6(const struct conn_key *key, const void *data, size_t size, + } + + static inline bool +-extract_l4_tcp(struct conn_key *key, const void *data, size_t size) ++extract_l4_tcp(struct conn_key *key, const void *data, size_t size, ++ size_t *chk_len) + { +- if (OVS_UNLIKELY(size < TCP_HEADER_LEN)) { ++ if (OVS_UNLIKELY(size < (chk_len ? *chk_len : TCP_HEADER_LEN))) { + return false; + } + +@@ -1730,9 +1741,10 @@ extract_l4_tcp(struct conn_key *key, const void *data, size_t size) + } + + static inline bool +-extract_l4_udp(struct conn_key *key, const void *data, size_t size) ++extract_l4_udp(struct conn_key *key, const void *data, size_t size, ++ size_t *chk_len) + { +- if (OVS_UNLIKELY(size < UDP_HEADER_LEN)) { ++ if (OVS_UNLIKELY(size < (chk_len ? *chk_len : UDP_HEADER_LEN))) { + return false; + } + +@@ -1746,7 +1758,7 @@ extract_l4_udp(struct conn_key *key, const void *data, size_t size) + + static inline bool extract_l4(struct conn_key *key, const void *data, + size_t size, bool *related, const void *l3, +- bool validate_checksum); ++ bool validate_checksum, size_t *chk_len); + + static uint8_t + reverse_icmp_type(uint8_t type) +@@ -1778,9 +1790,9 @@ reverse_icmp_type(uint8_t type) + * possible */ + static inline int + extract_l4_icmp(struct conn_key *key, const void *data, size_t size, +- bool *related) ++ bool *related, size_t *chk_len) + { +- if (OVS_UNLIKELY(size < ICMP_HEADER_LEN)) { ++ if (OVS_UNLIKELY(size < (chk_len ? *chk_len : ICMP_HEADER_LEN))) { + return false; + } + +@@ -1831,8 +1843,9 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size, + key->src = inner_key.src; + key->dst = inner_key.dst; + key->nw_proto = inner_key.nw_proto; ++ size_t check_len = ICMP_ERROR_DATA_L4_LEN; + +- ok = extract_l4(key, l4, tail - l4, NULL, l3, false); ++ ok = extract_l4(key, l4, tail - l4, NULL, l3, false, &check_len); + if (ok) { + conn_key_reverse(key); + *related = true; +@@ -1919,7 +1932,7 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, + key->dst = inner_key.dst; + key->nw_proto = inner_key.nw_proto; + +- ok = extract_l4(key, l4, tail - l4, NULL, l3, false); ++ ok = extract_l4(key, l4, tail - l4, NULL, l3, false, NULL); + if (ok) { + conn_key_reverse(key); + *related = true; +@@ -1944,26 +1957,29 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, + * an ICMP or ICMP6 header. + * + * If 'related' is NULL, it means that we're already parsing a header nested +- * in an ICMP error. In this case, we skip checksum and length validation. */ ++ * in an ICMP error. In this case, we skip the checksum and some length ++ * validations. */ + static inline bool + extract_l4(struct conn_key *key, const void *data, size_t size, bool *related, +- const void *l3, bool validate_checksum) ++ const void *l3, bool validate_checksum, size_t *chk_len) + { + if (key->nw_proto == IPPROTO_TCP) { + return (!related || check_l4_tcp(key, data, size, l3, +- validate_checksum)) && extract_l4_tcp(key, data, size); ++ validate_checksum)) ++ && extract_l4_tcp(key, data, size, chk_len); + } else if (key->nw_proto == IPPROTO_UDP) { + return (!related || check_l4_udp(key, data, size, l3, +- validate_checksum)) && extract_l4_udp(key, data, size); ++ validate_checksum)) ++ && extract_l4_udp(key, data, size, chk_len); + } else if (key->dl_type == htons(ETH_TYPE_IP) + && key->nw_proto == IPPROTO_ICMP) { + return (!related || check_l4_icmp(data, size, validate_checksum)) +- && extract_l4_icmp(key, data, size, related); ++ && extract_l4_icmp(key, data, size, related, chk_len); + } else if (key->dl_type == htons(ETH_TYPE_IPV6) + && key->nw_proto == IPPROTO_ICMPV6) { + return (!related || check_l4_icmp6(key, data, size, l3, +- validate_checksum)) && extract_l4_icmp6(key, data, size, +- related); ++ validate_checksum)) ++ && extract_l4_icmp6(key, data, size, related); + } else { + return false; + } +@@ -2042,7 +2058,8 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, + bool hwol_good_l4_csum = dp_packet_l4_checksum_valid(pkt); + /* Validate the checksum only when hwol is not supported. */ + if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt), +- &ctx->icmp_related, l3, !hwol_good_l4_csum)) { ++ &ctx->icmp_related, l3, !hwol_good_l4_csum, ++ NULL)) { + ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); + return true; + } +diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c +index 5dae9e1e76..62f8b1bb6d 100644 +--- a/lib/dpif-netdev.c ++++ b/lib/dpif-netdev.c +@@ -100,6 +100,17 @@ enum { MAX_METERS = 65536 }; /* Maximum number of meters. */ + enum { MAX_BANDS = 8 }; /* Maximum number of bands / meter. */ + enum { N_METER_LOCKS = 64 }; /* Maximum number of meters. */ + ++COVERAGE_DEFINE(datapath_drop_meter); ++COVERAGE_DEFINE(datapath_drop_upcall_error); ++COVERAGE_DEFINE(datapath_drop_lock_error); ++COVERAGE_DEFINE(datapath_drop_userspace_action_error); ++COVERAGE_DEFINE(datapath_drop_tunnel_push_error); ++COVERAGE_DEFINE(datapath_drop_tunnel_pop_error); ++COVERAGE_DEFINE(datapath_drop_recirc_error); ++COVERAGE_DEFINE(datapath_drop_invalid_port); ++COVERAGE_DEFINE(datapath_drop_invalid_tnl_port); ++COVERAGE_DEFINE(datapath_drop_rx_invalid_packet); ++ + /* Protects against changes to 'dp_netdevs'. */ + static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER; + +@@ -2094,7 +2105,11 @@ dp_netdev_pmd_find_dpcls(struct dp_netdev_pmd_thread *pmd, + } + + #define MAX_FLOW_MARK (UINT32_MAX - 1) +-#define INVALID_FLOW_MARK (UINT32_MAX) ++#define INVALID_FLOW_MARK 0 ++/* Zero flow mark is used to indicate the HW to remove the mark. A packet ++ * marked with zero mark is received in SW without a mark at all, so it ++ * cannot be used as a valid mark. ++ */ + + struct megaflow_to_mark_data { + const struct cmap_node node; +@@ -2120,7 +2135,7 @@ flow_mark_alloc(void) + + if (!flow_mark.pool) { + /* Haven't initiated yet, do it here */ +- flow_mark.pool = id_pool_create(0, MAX_FLOW_MARK); ++ flow_mark.pool = id_pool_create(1, MAX_FLOW_MARK); + } + + if (id_pool_alloc_id(flow_mark.pool, &mark)) { +@@ -2225,6 +2240,12 @@ mark_to_flow_disassociate(struct dp_netdev_pmd_thread *pmd, + struct cmap_node *mark_node = CONST_CAST(struct cmap_node *, + &flow->mark_node); + ++ /* INVALID_FLOW_MARK may mean that the flow has been disassociated or ++ * never associated. */ ++ if (OVS_UNLIKELY(mark == INVALID_FLOW_MARK)) { ++ return EINVAL; ++ } ++ + cmap_remove(&flow_mark.mark_to_flow, mark_node, hash_int(mark, 0)); + flow->mark = INVALID_FLOW_MARK; + +@@ -2374,6 +2395,7 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload) + mark = flow_mark_alloc(); + if (mark == INVALID_FLOW_MARK) { + VLOG_ERR("Failed to allocate flow mark!\n"); ++ return -1; + } + } + info.flow_mark = mark; +@@ -2449,6 +2471,7 @@ dp_netdev_flow_offload_main(void *data OVS_UNUSED) + VLOG_DBG("%s to %s netdev flow\n", + ret == 0 ? "succeed" : "failed", op); + dp_netdev_free_flow_offload(offload); ++ ovsrcu_quiesce(); + } + + return NULL; +@@ -5552,6 +5575,14 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, + /* All packets will hit the meter at the same time. */ + long_delta_t = now / 1000 - meter->used / 1000; /* msec */ + ++ if (long_delta_t < 0) { ++ /* This condition means that we have several threads fighting for a ++ meter lock, and the one who received the packets a bit later wins. ++ Assuming that all racing threads received packets at the same time ++ to avoid overflow. */ ++ long_delta_t = 0; ++ } ++ + /* Make sure delta_t will not be too large, so that bucket will not + * wrap around below. */ + delta_t = (long_delta_t > (long long int)meter->max_delta_t) +@@ -5647,7 +5678,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, + band = &meter->bands[exceeded_band[j]]; + band->packet_count += 1; + band->byte_count += dp_packet_size(packet); +- ++ COVERAGE_INC(datapath_drop_meter); + dp_packet_delete(packet); + } else { + /* Meter accepts packet. */ +@@ -6200,7 +6231,6 @@ dpif_netdev_packet_get_rss_hash(struct dp_packet *packet, + recirc_depth = *recirc_depth_get_unsafe(); + if (OVS_UNLIKELY(recirc_depth)) { + hash = hash_finish(hash, recirc_depth); +- dp_packet_set_rss_hash(packet, hash); + } + return hash; + } +@@ -6403,6 +6433,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd, + + if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) { + dp_packet_delete(packet); ++ COVERAGE_INC(datapath_drop_rx_invalid_packet); + continue; + } + +@@ -6522,6 +6553,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, + put_actions); + if (OVS_UNLIKELY(error && error != ENOSPC)) { + dp_packet_delete(packet); ++ COVERAGE_INC(datapath_drop_upcall_error); + return error; + } + +@@ -6652,6 +6684,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, + DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) { + if (OVS_UNLIKELY(!rules[i])) { + dp_packet_delete(packet); ++ COVERAGE_INC(datapath_drop_lock_error); + upcall_fail_cnt++; + } + } +@@ -6921,6 +6954,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd, + actions->data, actions->size); + } else if (should_steal) { + dp_packet_delete(packet); ++ COVERAGE_INC(datapath_drop_userspace_action_error); + } + } + +@@ -6935,6 +6969,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, + struct dp_netdev *dp = pmd->dp; + int type = nl_attr_type(a); + struct tx_port *p; ++ uint32_t packet_count, packets_dropped; + + switch ((enum ovs_action_attr)type) { + case OVS_ACTION_ATTR_OUTPUT: +@@ -6976,6 +7011,9 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, + dp_packet_batch_add(&p->output_pkts, packet); + } + return; ++ } else { ++ COVERAGE_ADD(datapath_drop_invalid_port, ++ dp_packet_batch_size(packets_)); + } + break; + +@@ -6988,7 +7026,11 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, + break; + } + dp_packet_batch_apply_cutlen(packets_); +- push_tnl_action(pmd, a, packets_); ++ packet_count = dp_packet_batch_size(packets_); ++ if (push_tnl_action(pmd, a, packets_)) { ++ COVERAGE_ADD(datapath_drop_tunnel_push_error, ++ packet_count); ++ } + return; + + case OVS_ACTION_ATTR_TUNNEL_POP: +@@ -7008,7 +7050,14 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, + + dp_packet_batch_apply_cutlen(packets_); + ++ packet_count = dp_packet_batch_size(packets_); + netdev_pop_header(p->port->netdev, packets_); ++ packets_dropped = ++ packet_count - dp_packet_batch_size(packets_); ++ if (packets_dropped) { ++ COVERAGE_ADD(datapath_drop_tunnel_pop_error, ++ packets_dropped); ++ } + if (dp_packet_batch_is_empty(packets_)) { + return; + } +@@ -7023,6 +7072,11 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, + (*depth)--; + return; + } ++ COVERAGE_ADD(datapath_drop_invalid_tnl_port, ++ dp_packet_batch_size(packets_)); ++ } else { ++ COVERAGE_ADD(datapath_drop_recirc_error, ++ dp_packet_batch_size(packets_)); + } + break; + +@@ -7067,6 +7121,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, + + return; + } ++ COVERAGE_ADD(datapath_drop_lock_error, ++ dp_packet_batch_size(packets_)); + break; + + case OVS_ACTION_ATTR_RECIRC: +@@ -7090,6 +7146,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, + return; + } + ++ COVERAGE_ADD(datapath_drop_recirc_error, ++ dp_packet_batch_size(packets_)); + VLOG_WARN("Packet dropped. Max recirculation depth exceeded."); + break; + +@@ -7242,6 +7300,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, + case OVS_ACTION_ATTR_PUSH_NSH: + case OVS_ACTION_ATTR_POP_NSH: + case OVS_ACTION_ATTR_CT_CLEAR: ++ case OVS_ACTION_ATTR_CHECK_PKT_LEN: ++ case OVS_ACTION_ATTR_DROP: + case __OVS_ACTION_ATTR_MAX: + OVS_NOT_REACHED(); + } +diff --git a/lib/dpif-netlink-rtnl.c b/lib/dpif-netlink-rtnl.c +index 2e23a8c14f..582274c467 100644 +--- a/lib/dpif-netlink-rtnl.c ++++ b/lib/dpif-netlink-rtnl.c +@@ -104,7 +104,13 @@ vport_type_to_kind(enum ovs_vport_type type, + case OVS_VPORT_TYPE_IP6ERSPAN: + return "ip6erspan"; + case OVS_VPORT_TYPE_IP6GRE: +- return "ip6gre"; ++ if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L2) { ++ return "ip6gretap"; ++ } else if (tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L3) { ++ return NULL; ++ } else { ++ return NULL; ++ } + case OVS_VPORT_TYPE_NETDEV: + case OVS_VPORT_TYPE_INTERNAL: + case OVS_VPORT_TYPE_LISP: +diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c +index 165b7a93fc..26d09f843f 100644 +--- a/lib/dpif-netlink.c ++++ b/lib/dpif-netlink.c +@@ -456,6 +456,7 @@ vport_add_channel(struct dpif_netlink *dpif, odp_port_t port_no, + int error; + + if (dpif->handlers == NULL) { ++ close_nl_sock(socksp); + return 0; + } + +@@ -2037,6 +2038,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) + info.dpif_class = dpif_class; + info.tp_dst_port = dst_port; + info.tunnel_csum_on = csum_on; ++ info.tc_modify_flow_deleted = false; + err = netdev_flow_put(dev, &match, + CONST_CAST(struct nlattr *, put->actions), + put->actions_len, +@@ -2084,7 +2086,11 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) + out: + if (err && err != EEXIST && (put->flags & DPIF_FP_MODIFY)) { + /* Modified rule can't be offloaded, try and delete from HW */ +- int del_err = netdev_flow_del(dev, put->ufid, put->stats); ++ int del_err = 0; ++ ++ if (!info.tc_modify_flow_deleted) { ++ del_err = netdev_flow_del(dev, put->ufid, put->stats); ++ } + + if (!del_err) { + /* Delete from hw success, so old flow was offloaded. +diff --git a/lib/dpif.c b/lib/dpif.c +index 457c9bfb92..c908a1054d 100644 +--- a/lib/dpif.c ++++ b/lib/dpif.c +@@ -1269,6 +1269,8 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_, + case OVS_ACTION_ATTR_POP_NSH: + case OVS_ACTION_ATTR_CT_CLEAR: + case OVS_ACTION_ATTR_UNSPEC: ++ case OVS_ACTION_ATTR_CHECK_PKT_LEN: ++ case OVS_ACTION_ATTR_DROP: + case __OVS_ACTION_ATTR_MAX: + OVS_NOT_REACHED(); + } +@@ -1874,6 +1876,12 @@ dpif_supports_tnl_push_pop(const struct dpif *dpif) + return dpif_is_netdev(dpif); + } + ++bool ++dpif_supports_explicit_drop_action(const struct dpif *dpif) ++{ ++ return dpif_is_netdev(dpif); ++} ++ + /* Meters */ + void + dpif_meter_get_features(const struct dpif *dpif, +diff --git a/lib/dpif.h b/lib/dpif.h +index 475d5a674e..c425042d91 100644 +--- a/lib/dpif.h ++++ b/lib/dpif.h +@@ -888,6 +888,7 @@ int dpif_get_pmds_for_port(const struct dpif * dpif, odp_port_t port_no, + + char *dpif_get_dp_version(const struct dpif *); + bool dpif_supports_tnl_push_pop(const struct dpif *); ++bool dpif_supports_explicit_drop_action(const struct dpif *); + + /* Log functions. */ + struct vlog_module; +diff --git a/lib/flow.c b/lib/flow.c +index 2cf8a020ae..1379efcdf8 100644 +--- a/lib/flow.c ++++ b/lib/flow.c +@@ -685,7 +685,7 @@ ipv6_sanity_check(const struct ovs_16aligned_ip6_hdr *nh, size_t size) + return false; + } + /* Jumbo Payload option not supported yet. */ +- if (OVS_UNLIKELY(size - plen > UINT8_MAX)) { ++ if (OVS_UNLIKELY(size - (plen + IPV6_HEADER_LEN) > UINT8_MAX)) { + return false; + } + +@@ -1013,15 +1013,14 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) + dst->map = mf.map; + } + +-ovs_be16 +-parse_dl_type(const struct eth_header *data_, size_t size) ++static ovs_be16 ++parse_dl_type(const void **datap, size_t *sizep) + { +- const void *data = data_; + union flow_vlan_hdr vlans[FLOW_MAX_VLAN_HEADERS]; + +- parse_vlan(&data, &size, vlans); ++ parse_vlan(datap, sizep, vlans); + +- return parse_ethertype(&data, &size); ++ return parse_ethertype(datap, sizep); + } + + /* Parses and return the TCP flags in 'packet', converted to host byte order. +@@ -1044,11 +1043,11 @@ parse_tcp_flags(struct dp_packet *packet) + + dp_packet_reset_offsets(packet); + +- data_pull(&data, &size, ETH_ADDR_LEN * 2); +- dl_type = parse_ethertype(&data, &size); ++ dl_type = parse_dl_type(&data, &size); + if (OVS_UNLIKELY(eth_type_mpls(dl_type))) { + packet->l2_5_ofs = (char *)data - frame; + } ++ packet->l3_ofs = (char *)data - frame; + if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) { + const struct ip_header *nh = data; + int ip_len; +@@ -1058,7 +1057,6 @@ parse_tcp_flags(struct dp_packet *packet) + return 0; + } + dp_packet_set_l2_pad_size(packet, size - tot_len); +- packet->l3_ofs = (uint16_t)((char *)nh - frame); + nw_proto = nh->ip_proto; + nw_frag = ipv4_get_nw_frag(nh); + +@@ -1071,7 +1069,6 @@ parse_tcp_flags(struct dp_packet *packet) + if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) { + return 0; + } +- packet->l3_ofs = (uint16_t)((char *)nh - frame); + data_pull(&data, &size, sizeof *nh); + + plen = ntohs(nh->ip6_plen); /* Never pull padding. */ +diff --git a/lib/flow.h b/lib/flow.h +index 5ebdb1f1da..bee6f85e93 100644 +--- a/lib/flow.h ++++ b/lib/flow.h +@@ -132,7 +132,6 @@ void packet_expand(struct dp_packet *, const struct flow *, size_t size); + + bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto, + uint8_t *nw_frag); +-ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size); + bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key); + uint16_t parse_tcp_flags(struct dp_packet *packet); + +diff --git a/lib/if-notifier-manual.c b/lib/if-notifier-manual.c +new file mode 100644 +index 0000000000..54bbf76fb6 +--- /dev/null ++++ b/lib/if-notifier-manual.c +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (c) 2019 Ilya Maximets <i.maximets@ovn.org>. ++ * ++ * Licensed under the Apache License, Version 2.0 (the "License"); ++ * you may not use this file except in compliance with the License. ++ * You may obtain a copy of the License at: ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++#include <config.h> ++#include "openvswitch/compiler.h" ++#include "openvswitch/thread.h" ++#include "openvswitch/util.h" ++#include "if-notifier.h" ++ ++/* Implementation of a manual interface notifier. ++ * ++ * Intended for catching interface events that could not be tracked by ++ * OS specific interface notifiers, e.g. iface updates in netdev-dpdk. ++ * For that purpose 'if_notifier_manual_report()' should be called directly ++ * by the code that aware of interface changes. ++ * ++ * Thread-safety ++ * ============= ++ * This notifier is thread-safe in terms of calling its functions from ++ * different thread contexts, however if the callback passed to ++ * 'if_notifier_manual_set_cb' is used by some other code (i.e. by OS ++ * specific notifiers) it must be thread-safe itself. ++ */ ++ ++static struct ovs_mutex manual_notifier_mutex = OVS_MUTEX_INITIALIZER; ++static if_notify_func *notify OVS_GUARDED_BY(manual_notifier_mutex) = NULL; ++ ++void ++if_notifier_manual_set_cb(if_notify_func *cb) ++{ ++ ovs_mutex_lock(&manual_notifier_mutex); ++ notify = cb; ++ ovs_mutex_unlock(&manual_notifier_mutex); ++} ++ ++void ++if_notifier_manual_report(void) ++{ ++ ovs_mutex_lock(&manual_notifier_mutex); ++ if (notify) { ++ notify(NULL); ++ } ++ ovs_mutex_unlock(&manual_notifier_mutex); ++} +diff --git a/lib/if-notifier.h b/lib/if-notifier.h +index 259138f70c..dae852e9f0 100644 +--- a/lib/if-notifier.h ++++ b/lib/if-notifier.h +@@ -27,4 +27,11 @@ void if_notifier_destroy(struct if_notifier *); + void if_notifier_run(void); + void if_notifier_wait(void); + ++/* Below functions are reserved for if-notifier-manual , i.e. for manual ++ * notifications from the OVS code. ++ * Must not be implemented by OS specific notifiers. */ ++ ++void if_notifier_manual_set_cb(if_notify_func *); ++void if_notifier_manual_report(void); ++ + #endif /* if-notifier.h */ +diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c +index 4c2c1ba84a..c366cf53eb 100644 +--- a/lib/jsonrpc.c ++++ b/lib/jsonrpc.c +@@ -43,7 +43,7 @@ struct jsonrpc { + + /* Input. */ + struct byteq input; +- uint8_t input_buffer[512]; ++ uint8_t input_buffer[4096]; + struct json_parser *parser; + + /* Output. */ +diff --git a/lib/meta-flow.c b/lib/meta-flow.c +index bc48d63049..a131adc918 100644 +--- a/lib/meta-flow.c ++++ b/lib/meta-flow.c +@@ -2254,12 +2254,6 @@ mf_set(const struct mf_field *mf, + switch (mf->id) { + case MFF_CT_ZONE: + case MFF_CT_NW_PROTO: +- case MFF_CT_NW_SRC: +- case MFF_CT_NW_DST: +- case MFF_CT_IPV6_SRC: +- case MFF_CT_IPV6_DST: +- case MFF_CT_TP_SRC: +- case MFF_CT_TP_DST: + case MFF_RECIRC_ID: + case MFF_PACKET_TYPE: + case MFF_CONJ_ID: +@@ -2375,6 +2369,30 @@ mf_set(const struct mf_field *mf, + ntoh128(mask->be128)); + break; + ++ case MFF_CT_NW_SRC: ++ match_set_ct_nw_src_masked(match, value->be32, mask->be32); ++ break; ++ ++ case MFF_CT_NW_DST: ++ match_set_ct_nw_dst_masked(match, value->be32, mask->be32); ++ break; ++ ++ case MFF_CT_IPV6_SRC: ++ match_set_ct_ipv6_src_masked(match, &value->ipv6, &mask->ipv6); ++ break; ++ ++ case MFF_CT_IPV6_DST: ++ match_set_ct_ipv6_dst_masked(match, &value->ipv6, &mask->ipv6); ++ break; ++ ++ case MFF_CT_TP_SRC: ++ match_set_ct_tp_src_masked(match, value->be16, mask->be16); ++ break; ++ ++ case MFF_CT_TP_DST: ++ match_set_ct_tp_dst_masked(match, value->be16, mask->be16); ++ break; ++ + case MFF_ETH_DST: + match_set_dl_dst_masked(match, value->mac, mask->mac); + break; +diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c +index 20be56bc6c..7d2ac99fba 100644 +--- a/lib/netdev-dpdk.c ++++ b/lib/netdev-dpdk.c +@@ -41,11 +41,13 @@ + #include <rte_flow.h> + + #include "cmap.h" ++#include "coverage.h" + #include "dirs.h" + #include "dp-packet.h" + #include "dpdk.h" + #include "dpif-netdev.h" + #include "fatal-signal.h" ++#include "if-notifier.h" + #include "netdev-provider.h" + #include "netdev-vport.h" + #include "odp-util.h" +@@ -71,6 +73,9 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; + VLOG_DEFINE_THIS_MODULE(netdev_dpdk); + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); + ++COVERAGE_DEFINE(vhost_tx_contention); ++COVERAGE_DEFINE(vhost_notification); ++ + #define DPDK_PORT_WATCHDOG_INTERVAL 5 + + #define OVS_CACHE_LINE_SIZE CACHE_LINE_SIZE +@@ -109,34 +114,6 @@ BUILD_ASSERT_DECL(MAX_NB_MBUF % ROUND_DOWN_POW2(MAX_NB_MBUF / MIN_NB_MBUF) + BUILD_ASSERT_DECL((MAX_NB_MBUF / ROUND_DOWN_POW2(MAX_NB_MBUF / MIN_NB_MBUF)) + % MP_CACHE_SZ == 0); + +-/* +- * DPDK XSTATS Counter names definition +- */ +-#define XSTAT_RX_64_PACKETS "rx_size_64_packets" +-#define XSTAT_RX_65_TO_127_PACKETS "rx_size_65_to_127_packets" +-#define XSTAT_RX_128_TO_255_PACKETS "rx_size_128_to_255_packets" +-#define XSTAT_RX_256_TO_511_PACKETS "rx_size_256_to_511_packets" +-#define XSTAT_RX_512_TO_1023_PACKETS "rx_size_512_to_1023_packets" +-#define XSTAT_RX_1024_TO_1522_PACKETS "rx_size_1024_to_1522_packets" +-#define XSTAT_RX_1523_TO_MAX_PACKETS "rx_size_1523_to_max_packets" +- +-#define XSTAT_TX_64_PACKETS "tx_size_64_packets" +-#define XSTAT_TX_65_TO_127_PACKETS "tx_size_65_to_127_packets" +-#define XSTAT_TX_128_TO_255_PACKETS "tx_size_128_to_255_packets" +-#define XSTAT_TX_256_TO_511_PACKETS "tx_size_256_to_511_packets" +-#define XSTAT_TX_512_TO_1023_PACKETS "tx_size_512_to_1023_packets" +-#define XSTAT_TX_1024_TO_1522_PACKETS "tx_size_1024_to_1522_packets" +-#define XSTAT_TX_1523_TO_MAX_PACKETS "tx_size_1523_to_max_packets" +- +-#define XSTAT_RX_MULTICAST_PACKETS "rx_multicast_packets" +-#define XSTAT_TX_MULTICAST_PACKETS "tx_multicast_packets" +-#define XSTAT_RX_BROADCAST_PACKETS "rx_broadcast_packets" +-#define XSTAT_TX_BROADCAST_PACKETS "tx_broadcast_packets" +-#define XSTAT_RX_UNDERSIZED_ERRORS "rx_undersized_errors" +-#define XSTAT_RX_OVERSIZE_ERRORS "rx_oversize_errors" +-#define XSTAT_RX_FRAGMENTED_ERRORS "rx_fragmented_errors" +-#define XSTAT_RX_JABBER_ERRORS "rx_jabber_errors" +- + #define SOCKET0 0 + + /* Default size of Physical NIC RXQ */ +@@ -157,7 +134,13 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / ROUND_DOWN_POW2(MAX_NB_MBUF / MIN_NB_MBUF)) + typedef uint16_t dpdk_port_t; + #define DPDK_PORT_ID_FMT "%"PRIu16 + +-#define VHOST_ENQ_RETRY_NUM 8 ++/* Minimum amount of vhost tx retries, effectively a disable. */ ++#define VHOST_ENQ_RETRY_MIN 0 ++/* Maximum amount of vhost tx retries. */ ++#define VHOST_ENQ_RETRY_MAX 32 ++/* Legacy default value for vhost tx retries. */ ++#define VHOST_ENQ_RETRY_DEF 8 ++ + #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) + + static const struct rte_eth_conf port_conf = { +@@ -195,12 +178,28 @@ struct ufid_to_rte_flow_data { + static int new_device(int vid); + static void destroy_device(int vid); + static int vring_state_changed(int vid, uint16_t queue_id, int enable); ++static void vhost_guest_notified(int vid); + static const struct vhost_device_ops virtio_net_device_ops = + { + .new_device = new_device, + .destroy_device = destroy_device, + .vring_state_changed = vring_state_changed, +- .features_changed = NULL ++ .features_changed = NULL, ++ .guest_notified = vhost_guest_notified, ++}; ++ ++/* Custom software stats for dpdk ports */ ++struct netdev_dpdk_sw_stats { ++ /* No. of retries when unable to transmit. */ ++ uint64_t tx_retries; ++ /* Packet drops when unable to transmit; Probably Tx queue is full. */ ++ uint64_t tx_failure_drops; ++ /* Packet length greater than device MTU. */ ++ uint64_t tx_mtu_exceeded_drops; ++ /* Packet drops in egress policer processing. */ ++ uint64_t tx_qos_drops; ++ /* Packet drops in ingress policer processing. */ ++ uint64_t rx_qos_drops; + }; + + enum { DPDK_RING_SIZE = 256 }; +@@ -391,6 +390,8 @@ struct netdev_dpdk { + bool attached; + /* If true, rte_eth_dev_start() was successfully called */ + bool started; ++ bool reset_needed; ++ /* 1 pad byte here. */ + struct eth_addr hwaddr; + int mtu; + int socket_id; +@@ -413,7 +414,9 @@ struct netdev_dpdk { + + /* True if vHost device is 'up' and has been reconfigured at least once */ + bool vhost_reconfigured; +- /* 3 pad bytes here. */ ++ ++ atomic_uint8_t vhost_tx_retries_max; ++ /* 2 pad bytes here. */ + ); + + PADDED_MEMBERS(CACHE_LINE_SIZE, +@@ -437,9 +440,10 @@ struct netdev_dpdk { + + PADDED_MEMBERS(CACHE_LINE_SIZE, + struct netdev_stats stats; ++ struct netdev_dpdk_sw_stats *sw_stats; + /* Protects stats */ + rte_spinlock_t stats_lock; +- /* 44 pad bytes here. */ ++ /* 36 pad bytes here. */ + ); + + PADDED_MEMBERS(CACHE_LINE_SIZE, +@@ -493,6 +497,8 @@ struct netdev_rxq_dpdk { + static void netdev_dpdk_destruct(struct netdev *netdev); + static void netdev_dpdk_vhost_destruct(struct netdev *netdev); + ++static int netdev_dpdk_get_sw_custom_stats(const struct netdev *, ++ struct netdev_custom_stats *); + static void netdev_dpdk_clear_xstats(struct netdev_dpdk *dev); + + int netdev_dpdk_get_vid(const struct netdev_dpdk *dev); +@@ -1162,6 +1168,8 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no, + ovsrcu_index_init(&dev->vid, -1); + dev->vhost_reconfigured = false; + dev->attached = false; ++ dev->started = false; ++ dev->reset_needed = false; + + ovsrcu_init(&dev->qos_conf, NULL); + +@@ -1194,6 +1202,9 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no, + dev->rte_xstats_ids = NULL; + dev->rte_xstats_ids_size = 0; + ++ dev->sw_stats = xzalloc(sizeof *dev->sw_stats); ++ dev->sw_stats->tx_retries = (dev->type == DPDK_DEV_VHOST) ? 0 : UINT64_MAX; ++ + return 0; + } + +@@ -1250,6 +1261,8 @@ vhost_common_construct(struct netdev *netdev) + return ENOMEM; + } + ++ atomic_init(&dev->vhost_tx_retries_max, VHOST_ENQ_RETRY_DEF); ++ + return common_construct(netdev, DPDK_ETH_PORT_ID_INVALID, + DPDK_DEV_VHOST, socket_id); + } +@@ -1366,6 +1379,7 @@ common_destruct(struct netdev_dpdk *dev) + ovs_list_remove(&dev->list_node); + free(ovsrcu_get_protected(struct ingress_policer *, + &dev->ingress_policer)); ++ free(dev->sw_stats); + ovs_mutex_destroy(&dev->mutex); + } + +@@ -1739,6 +1753,34 @@ netdev_dpdk_process_devargs(struct netdev_dpdk *dev, + return new_port_id; + } + ++static int ++dpdk_eth_event_callback(dpdk_port_t port_id, enum rte_eth_event_type type, ++ void *param OVS_UNUSED, void *ret_param OVS_UNUSED) ++{ ++ struct netdev_dpdk *dev; ++ ++ switch ((int) type) { ++ case RTE_ETH_EVENT_INTR_RESET: ++ ovs_mutex_lock(&dpdk_mutex); ++ dev = netdev_dpdk_lookup_by_port_id(port_id); ++ if (dev) { ++ ovs_mutex_lock(&dev->mutex); ++ dev->reset_needed = true; ++ netdev_request_reconfigure(&dev->up); ++ VLOG_DBG_RL(&rl, "%s: Device reset requested.", ++ netdev_get_name(&dev->up)); ++ ovs_mutex_unlock(&dev->mutex); ++ } ++ ovs_mutex_unlock(&dpdk_mutex); ++ break; ++ ++ default: ++ /* Ignore all other types. */ ++ break; ++ } ++ return 0; ++} ++ + static void + dpdk_set_rxq_config(struct netdev_dpdk *dev, const struct smap *args) + OVS_REQUIRES(dev->mutex) +@@ -1775,6 +1817,7 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args, + { + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + bool rx_fc_en, tx_fc_en, autoneg, lsc_interrupt_mode; ++ bool flow_control_requested = true; + enum rte_eth_fc_mode fc_mode; + static const enum rte_eth_fc_mode fc_mode_set[2][2] = { + {RTE_FC_NONE, RTE_FC_TX_PAUSE}, +@@ -1797,7 +1840,7 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args, + + new_devargs = smap_get(args, "dpdk-devargs"); + +- if (dev->devargs && strcmp(new_devargs, dev->devargs)) { ++ if (dev->devargs && new_devargs && strcmp(new_devargs, dev->devargs)) { + /* The user requested a new device. If we return error, the caller + * will delete this netdev and try to recreate it. */ + err = EAGAIN; +@@ -1862,15 +1905,34 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args, + autoneg = smap_get_bool(args, "flow-ctrl-autoneg", false); + + fc_mode = fc_mode_set[tx_fc_en][rx_fc_en]; ++ ++ if (!smap_get(args, "rx-flow-ctrl") && !smap_get(args, "tx-flow-ctrl") ++ && !smap_get(args, "flow-ctrl-autoneg")) { ++ /* FIXME: User didn't ask for flow control configuration. ++ * For now we'll not print a warning if flow control is not ++ * supported by the DPDK port. */ ++ flow_control_requested = false; ++ } ++ ++ /* Get the Flow control configuration. */ ++ err = -rte_eth_dev_flow_ctrl_get(dev->port_id, &dev->fc_conf); ++ if (err) { ++ if (err == ENOTSUP) { ++ if (flow_control_requested) { ++ VLOG_WARN("%s: Flow control is not supported.", ++ netdev_get_name(netdev)); ++ } ++ err = 0; /* Not fatal. */ ++ } else { ++ VLOG_WARN("%s: Cannot get flow control parameters: %s", ++ netdev_get_name(netdev), rte_strerror(err)); ++ } ++ goto out; ++ } ++ + if (dev->fc_conf.mode != fc_mode || autoneg != dev->fc_conf.autoneg) { + dev->fc_conf.mode = fc_mode; + dev->fc_conf.autoneg = autoneg; +- /* Get the Flow control configuration for DPDK-ETH */ +- err = rte_eth_dev_flow_ctrl_get(dev->port_id, &dev->fc_conf); +- if (err) { +- VLOG_WARN("Cannot get flow control parameters on port " +- DPDK_PORT_ID_FMT", err=%d", dev->port_id, err); +- } + dpdk_eth_flow_ctrl_setup(dev); + } + +@@ -1901,6 +1963,7 @@ netdev_dpdk_vhost_client_set_config(struct netdev *netdev, + { + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + const char *path; ++ int max_tx_retries, cur_max_tx_retries; + + ovs_mutex_lock(&dev->mutex); + if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { +@@ -1916,6 +1979,19 @@ netdev_dpdk_vhost_client_set_config(struct netdev *netdev, + netdev_request_reconfigure(netdev); + } + } ++ ++ max_tx_retries = smap_get_int(args, "tx-retries-max", ++ VHOST_ENQ_RETRY_DEF); ++ if (max_tx_retries < VHOST_ENQ_RETRY_MIN ++ || max_tx_retries > VHOST_ENQ_RETRY_MAX) { ++ max_tx_retries = VHOST_ENQ_RETRY_DEF; ++ } ++ atomic_read_relaxed(&dev->vhost_tx_retries_max, &cur_max_tx_retries); ++ if (max_tx_retries != cur_max_tx_retries) { ++ atomic_store_relaxed(&dev->vhost_tx_retries_max, max_tx_retries); ++ VLOG_INFO("Max Tx retries for vhost device '%s' set to %d", ++ netdev_get_name(netdev), max_tx_retries); ++ } + ovs_mutex_unlock(&dev->mutex); + + return 0; +@@ -2115,16 +2191,17 @@ netdev_dpdk_vhost_update_rx_size_counters(struct netdev_stats *stats, + } + + static inline void +-netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats, ++netdev_dpdk_vhost_update_rx_counters(struct netdev_dpdk *dev, + struct dp_packet **packets, int count, +- int dropped) ++ int qos_drops) + { +- int i; +- unsigned int packet_size; ++ struct netdev_stats *stats = &dev->stats; + struct dp_packet *packet; ++ unsigned int packet_size; ++ int i; + + stats->rx_packets += count; +- stats->rx_dropped += dropped; ++ stats->rx_dropped += qos_drops; + for (i = 0; i < count; i++) { + packet = packets[i]; + packet_size = dp_packet_size(packet); +@@ -2147,6 +2224,10 @@ netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats, + + stats->rx_bytes += packet_size; + } ++ ++ if (OVS_UNLIKELY(qos_drops)) { ++ dev->sw_stats->rx_qos_drops += qos_drops; ++ } + } + + /* +@@ -2159,7 +2240,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, + struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev); + struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev); + uint16_t nb_rx = 0; +- uint16_t dropped = 0; ++ uint16_t qos_drops = 0; + int qid = rxq->queue_id * VIRTIO_QNUM + VIRTIO_TXQ; + int vid = netdev_dpdk_get_vid(dev); + +@@ -2186,16 +2267,16 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, + } + + if (policer) { +- dropped = nb_rx; ++ qos_drops = nb_rx; + nb_rx = ingress_policer_run(policer, + (struct rte_mbuf **) batch->packets, + nb_rx, true); +- dropped -= nb_rx; ++ qos_drops -= nb_rx; + } + + rte_spinlock_lock(&dev->stats_lock); +- netdev_dpdk_vhost_update_rx_counters(&dev->stats, batch->packets, +- nb_rx, dropped); ++ netdev_dpdk_vhost_update_rx_counters(dev, batch->packets, ++ nb_rx, qos_drops); + rte_spinlock_unlock(&dev->stats_lock); + + batch->count = nb_rx; +@@ -2237,6 +2318,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch, + if (OVS_UNLIKELY(dropped)) { + rte_spinlock_lock(&dev->stats_lock); + dev->stats.rx_dropped += dropped; ++ dev->sw_stats->rx_qos_drops += dropped; + rte_spinlock_unlock(&dev->stats_lock); + } + +@@ -2296,13 +2378,17 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev, struct rte_mbuf **pkts, + } + + static inline void +-netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats, ++netdev_dpdk_vhost_update_tx_counters(struct netdev_dpdk *dev, + struct dp_packet **packets, + int attempted, +- int dropped) ++ struct netdev_dpdk_sw_stats *sw_stats_add) + { +- int i; ++ int dropped = sw_stats_add->tx_mtu_exceeded_drops + ++ sw_stats_add->tx_qos_drops + ++ sw_stats_add->tx_failure_drops; ++ struct netdev_stats *stats = &dev->stats; + int sent = attempted - dropped; ++ int i; + + stats->tx_packets += sent; + stats->tx_dropped += dropped; +@@ -2310,6 +2396,15 @@ netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats, + for (i = 0; i < sent; i++) { + stats->tx_bytes += dp_packet_size(packets[i]); + } ++ ++ if (OVS_UNLIKELY(dropped || sw_stats_add->tx_retries)) { ++ struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats; ++ ++ sw_stats->tx_retries += sw_stats_add->tx_retries; ++ sw_stats->tx_failure_drops += sw_stats_add->tx_failure_drops; ++ sw_stats->tx_mtu_exceeded_drops += sw_stats_add->tx_mtu_exceeded_drops; ++ sw_stats->tx_qos_drops += sw_stats_add->tx_qos_drops; ++ } + } + + static void +@@ -2318,9 +2413,11 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid, + { + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts; +- unsigned int total_pkts = cnt; +- unsigned int dropped = 0; ++ struct netdev_dpdk_sw_stats sw_stats_add; ++ unsigned int n_packets_to_free = cnt; ++ unsigned int total_packets = cnt; + int i, retries = 0; ++ int max_retries = VHOST_ENQ_RETRY_MIN; + int vid = netdev_dpdk_get_vid(dev); + + qid = dev->tx_q[qid % netdev->n_txq].map; +@@ -2333,12 +2430,20 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid, + goto out; + } + +- rte_spinlock_lock(&dev->tx_q[qid].tx_lock); ++ if (OVS_UNLIKELY(!rte_spinlock_trylock(&dev->tx_q[qid].tx_lock))) { ++ COVERAGE_INC(vhost_tx_contention); ++ rte_spinlock_lock(&dev->tx_q[qid].tx_lock); ++ } + + cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt); ++ sw_stats_add.tx_mtu_exceeded_drops = total_packets - cnt; ++ + /* Check has QoS has been configured for the netdev */ ++ sw_stats_add.tx_qos_drops = cnt; + cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true); +- dropped = total_pkts - cnt; ++ sw_stats_add.tx_qos_drops -= cnt; ++ ++ n_packets_to_free = cnt; + + do { + int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ; +@@ -2350,21 +2455,31 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid, + cnt -= tx_pkts; + /* Prepare for possible retry.*/ + cur_pkts = &cur_pkts[tx_pkts]; ++ if (OVS_UNLIKELY(cnt && !retries)) { ++ /* ++ * Read max retries as there are packets not sent ++ * and no retries have already occurred. ++ */ ++ atomic_read_relaxed(&dev->vhost_tx_retries_max, &max_retries); ++ } + } else { + /* No packets sent - do not retry.*/ + break; + } +- } while (cnt && (retries++ < VHOST_ENQ_RETRY_NUM)); ++ } while (cnt && (retries++ < max_retries)); + + rte_spinlock_unlock(&dev->tx_q[qid].tx_lock); + ++ sw_stats_add.tx_failure_drops = cnt; ++ sw_stats_add.tx_retries = MIN(retries, max_retries); ++ + rte_spinlock_lock(&dev->stats_lock); +- netdev_dpdk_vhost_update_tx_counters(&dev->stats, pkts, total_pkts, +- cnt + dropped); ++ netdev_dpdk_vhost_update_tx_counters(dev, pkts, total_packets, ++ &sw_stats_add); + rte_spinlock_unlock(&dev->stats_lock); + + out: +- for (i = 0; i < total_pkts - dropped; i++) { ++ for (i = 0; i < n_packets_to_free; i++) { + dp_packet_delete(pkts[i]); + } + } +@@ -2383,14 +2498,18 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) + #endif + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + struct rte_mbuf *pkts[PKT_ARRAY_SIZE]; ++ struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats; + uint32_t cnt = batch_cnt; + uint32_t dropped = 0; ++ uint32_t tx_failure = 0; ++ uint32_t mtu_drops = 0; ++ uint32_t qos_drops = 0; + + if (dev->type != DPDK_DEV_VHOST) { + /* Check if QoS has been configured for this netdev. */ + cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets, + batch_cnt, false); +- dropped += batch_cnt - cnt; ++ qos_drops = batch_cnt - cnt; + } + + uint32_t txcnt = 0; +@@ -2403,13 +2522,13 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) + VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d", + size, dev->max_packet_len); + +- dropped++; ++ mtu_drops++; + continue; + } + + pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp); + if (OVS_UNLIKELY(!pkts[txcnt])) { +- dropped += cnt - i; ++ dropped = cnt - i; + break; + } + +@@ -2426,13 +2545,17 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) + __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts, + txcnt); + } else { +- dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt); ++ tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt); + } + } + ++ dropped += qos_drops + mtu_drops + tx_failure; + if (OVS_UNLIKELY(dropped)) { + rte_spinlock_lock(&dev->stats_lock); + dev->stats.tx_dropped += dropped; ++ sw_stats->tx_failure_drops += tx_failure; ++ sw_stats->tx_mtu_exceeded_drops += mtu_drops; ++ sw_stats->tx_qos_drops += qos_drops; + rte_spinlock_unlock(&dev->stats_lock); + } + } +@@ -2473,19 +2596,27 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, + dpdk_do_tx_copy(netdev, qid, batch); + dp_packet_delete_batch(batch, true); + } else { ++ struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats; + int tx_cnt, dropped; ++ int tx_failure, mtu_drops, qos_drops; + int batch_cnt = dp_packet_batch_size(batch); + struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets; + + tx_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt); ++ mtu_drops = batch_cnt - tx_cnt; ++ qos_drops = tx_cnt; + tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true); +- dropped = batch_cnt - tx_cnt; ++ qos_drops -= tx_cnt; + +- dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt); ++ tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt); + ++ dropped = tx_failure + mtu_drops + qos_drops; + if (OVS_UNLIKELY(dropped)) { + rte_spinlock_lock(&dev->stats_lock); + dev->stats.tx_dropped += dropped; ++ sw_stats->tx_failure_drops += tx_failure; ++ sw_stats->tx_mtu_exceeded_drops += mtu_drops; ++ sw_stats->tx_qos_drops += qos_drops; + rte_spinlock_unlock(&dev->stats_lock); + } + } +@@ -2620,51 +2751,41 @@ netdev_dpdk_convert_xstats(struct netdev_stats *stats, + const struct rte_eth_xstat_name *names, + const unsigned int size) + { ++/* DPDK XSTATS Counter names definition. */ ++#define DPDK_XSTATS \ ++ DPDK_XSTAT(multicast, "rx_multicast_packets" ) \ ++ DPDK_XSTAT(tx_multicast_packets, "tx_multicast_packets" ) \ ++ DPDK_XSTAT(rx_broadcast_packets, "rx_broadcast_packets" ) \ ++ DPDK_XSTAT(tx_broadcast_packets, "tx_broadcast_packets" ) \ ++ DPDK_XSTAT(rx_undersized_errors, "rx_undersized_errors" ) \ ++ DPDK_XSTAT(rx_oversize_errors, "rx_oversize_errors" ) \ ++ DPDK_XSTAT(rx_fragmented_errors, "rx_fragmented_errors" ) \ ++ DPDK_XSTAT(rx_jabber_errors, "rx_jabber_errors" ) \ ++ DPDK_XSTAT(rx_1_to_64_packets, "rx_size_64_packets" ) \ ++ DPDK_XSTAT(rx_65_to_127_packets, "rx_size_65_to_127_packets" ) \ ++ DPDK_XSTAT(rx_128_to_255_packets, "rx_size_128_to_255_packets" ) \ ++ DPDK_XSTAT(rx_256_to_511_packets, "rx_size_256_to_511_packets" ) \ ++ DPDK_XSTAT(rx_512_to_1023_packets, "rx_size_512_to_1023_packets" ) \ ++ DPDK_XSTAT(rx_1024_to_1522_packets, "rx_size_1024_to_1522_packets" ) \ ++ DPDK_XSTAT(rx_1523_to_max_packets, "rx_size_1523_to_max_packets" ) \ ++ DPDK_XSTAT(tx_1_to_64_packets, "tx_size_64_packets" ) \ ++ DPDK_XSTAT(tx_65_to_127_packets, "tx_size_65_to_127_packets" ) \ ++ DPDK_XSTAT(tx_128_to_255_packets, "tx_size_128_to_255_packets" ) \ ++ DPDK_XSTAT(tx_256_to_511_packets, "tx_size_256_to_511_packets" ) \ ++ DPDK_XSTAT(tx_512_to_1023_packets, "tx_size_512_to_1023_packets" ) \ ++ DPDK_XSTAT(tx_1024_to_1522_packets, "tx_size_1024_to_1522_packets" ) \ ++ DPDK_XSTAT(tx_1523_to_max_packets, "tx_size_1523_to_max_packets" ) ++ + for (unsigned int i = 0; i < size; i++) { +- if (strcmp(XSTAT_RX_64_PACKETS, names[i].name) == 0) { +- stats->rx_1_to_64_packets = xstats[i].value; +- } else if (strcmp(XSTAT_RX_65_TO_127_PACKETS, names[i].name) == 0) { +- stats->rx_65_to_127_packets = xstats[i].value; +- } else if (strcmp(XSTAT_RX_128_TO_255_PACKETS, names[i].name) == 0) { +- stats->rx_128_to_255_packets = xstats[i].value; +- } else if (strcmp(XSTAT_RX_256_TO_511_PACKETS, names[i].name) == 0) { +- stats->rx_256_to_511_packets = xstats[i].value; +- } else if (strcmp(XSTAT_RX_512_TO_1023_PACKETS, names[i].name) == 0) { +- stats->rx_512_to_1023_packets = xstats[i].value; +- } else if (strcmp(XSTAT_RX_1024_TO_1522_PACKETS, names[i].name) == 0) { +- stats->rx_1024_to_1522_packets = xstats[i].value; +- } else if (strcmp(XSTAT_RX_1523_TO_MAX_PACKETS, names[i].name) == 0) { +- stats->rx_1523_to_max_packets = xstats[i].value; +- } else if (strcmp(XSTAT_TX_64_PACKETS, names[i].name) == 0) { +- stats->tx_1_to_64_packets = xstats[i].value; +- } else if (strcmp(XSTAT_TX_65_TO_127_PACKETS, names[i].name) == 0) { +- stats->tx_65_to_127_packets = xstats[i].value; +- } else if (strcmp(XSTAT_TX_128_TO_255_PACKETS, names[i].name) == 0) { +- stats->tx_128_to_255_packets = xstats[i].value; +- } else if (strcmp(XSTAT_TX_256_TO_511_PACKETS, names[i].name) == 0) { +- stats->tx_256_to_511_packets = xstats[i].value; +- } else if (strcmp(XSTAT_TX_512_TO_1023_PACKETS, names[i].name) == 0) { +- stats->tx_512_to_1023_packets = xstats[i].value; +- } else if (strcmp(XSTAT_TX_1024_TO_1522_PACKETS, names[i].name) == 0) { +- stats->tx_1024_to_1522_packets = xstats[i].value; +- } else if (strcmp(XSTAT_TX_1523_TO_MAX_PACKETS, names[i].name) == 0) { +- stats->tx_1523_to_max_packets = xstats[i].value; +- } else if (strcmp(XSTAT_RX_MULTICAST_PACKETS, names[i].name) == 0) { +- stats->multicast = xstats[i].value; +- } else if (strcmp(XSTAT_TX_MULTICAST_PACKETS, names[i].name) == 0) { +- stats->tx_multicast_packets = xstats[i].value; +- } else if (strcmp(XSTAT_RX_BROADCAST_PACKETS, names[i].name) == 0) { +- stats->rx_broadcast_packets = xstats[i].value; +- } else if (strcmp(XSTAT_TX_BROADCAST_PACKETS, names[i].name) == 0) { +- stats->tx_broadcast_packets = xstats[i].value; +- } else if (strcmp(XSTAT_RX_UNDERSIZED_ERRORS, names[i].name) == 0) { +- stats->rx_undersized_errors = xstats[i].value; +- } else if (strcmp(XSTAT_RX_FRAGMENTED_ERRORS, names[i].name) == 0) { +- stats->rx_fragmented_errors = xstats[i].value; +- } else if (strcmp(XSTAT_RX_JABBER_ERRORS, names[i].name) == 0) { +- stats->rx_jabber_errors = xstats[i].value; ++#define DPDK_XSTAT(MEMBER, NAME) \ ++ if (strcmp(NAME, names[i].name) == 0) { \ ++ stats->MEMBER = xstats[i].value; \ ++ continue; \ + } ++ DPDK_XSTATS; ++#undef DPDK_XSTAT + } ++#undef DPDK_XSTATS + } + + static int +@@ -2753,7 +2874,9 @@ netdev_dpdk_get_custom_stats(const struct netdev *netdev, + + uint32_t i; + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); +- int rte_xstats_ret; ++ int rte_xstats_ret, sw_stats_size; ++ ++ netdev_dpdk_get_sw_custom_stats(netdev, custom_stats); + + ovs_mutex_lock(&dev->mutex); + +@@ -2768,23 +2891,22 @@ netdev_dpdk_get_custom_stats(const struct netdev *netdev, + if (rte_xstats_ret > 0 && + rte_xstats_ret <= dev->rte_xstats_ids_size) { + +- custom_stats->size = rte_xstats_ret; +- custom_stats->counters = +- (struct netdev_custom_counter *) xcalloc(rte_xstats_ret, +- sizeof(struct netdev_custom_counter)); ++ sw_stats_size = custom_stats->size; ++ custom_stats->size += rte_xstats_ret; ++ custom_stats->counters = xrealloc(custom_stats->counters, ++ custom_stats->size * ++ sizeof *custom_stats->counters); + + for (i = 0; i < rte_xstats_ret; i++) { +- ovs_strlcpy(custom_stats->counters[i].name, ++ ovs_strlcpy(custom_stats->counters[sw_stats_size + i].name, + netdev_dpdk_get_xstat_name(dev, + dev->rte_xstats_ids[i]), + NETDEV_CUSTOM_STATS_NAME_SIZE); +- custom_stats->counters[i].value = values[i]; ++ custom_stats->counters[sw_stats_size + i].value = values[i]; + } + } else { + VLOG_WARN("Cannot get XSTATS values for port: "DPDK_PORT_ID_FMT, + dev->port_id); +- custom_stats->counters = NULL; +- custom_stats->size = 0; + /* Let's clear statistics cache, so it will be + * reconfigured */ + netdev_dpdk_clear_xstats(dev); +@@ -2798,6 +2920,55 @@ netdev_dpdk_get_custom_stats(const struct netdev *netdev, + return 0; + } + ++static int ++netdev_dpdk_get_sw_custom_stats(const struct netdev *netdev, ++ struct netdev_custom_stats *custom_stats) ++{ ++ struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); ++ int i, n; ++ ++#define SW_CSTATS \ ++ SW_CSTAT(tx_retries) \ ++ SW_CSTAT(tx_failure_drops) \ ++ SW_CSTAT(tx_mtu_exceeded_drops) \ ++ SW_CSTAT(tx_qos_drops) \ ++ SW_CSTAT(rx_qos_drops) ++ ++#define SW_CSTAT(NAME) + 1 ++ custom_stats->size = SW_CSTATS; ++#undef SW_CSTAT ++ custom_stats->counters = xcalloc(custom_stats->size, ++ sizeof *custom_stats->counters); ++ ++ ovs_mutex_lock(&dev->mutex); ++ ++ rte_spinlock_lock(&dev->stats_lock); ++ i = 0; ++#define SW_CSTAT(NAME) \ ++ custom_stats->counters[i++].value = dev->sw_stats->NAME; ++ SW_CSTATS; ++#undef SW_CSTAT ++ rte_spinlock_unlock(&dev->stats_lock); ++ ++ ovs_mutex_unlock(&dev->mutex); ++ ++ i = 0; ++ n = 0; ++#define SW_CSTAT(NAME) \ ++ if (custom_stats->counters[i].value != UINT64_MAX) { \ ++ ovs_strlcpy(custom_stats->counters[n].name, \ ++ "ovs_"#NAME, NETDEV_CUSTOM_STATS_NAME_SIZE); \ ++ custom_stats->counters[n].value = custom_stats->counters[i].value; \ ++ n++; \ ++ } \ ++ i++; ++ SW_CSTATS; ++#undef SW_CSTAT ++ ++ custom_stats->size = n; ++ return 0; ++} ++ + static int + netdev_dpdk_get_features(const struct netdev *netdev, + enum netdev_features *current, +@@ -3639,6 +3810,12 @@ vring_state_changed(int vid, uint16_t queue_id, int enable) + return 0; + } + ++static ++void vhost_guest_notified(int vid OVS_UNUSED) ++{ ++ COVERAGE_INC(vhost_notification); ++} ++ + /* + * Retrieve the DPDK virtio device ID (vid) associated with a vhostuser + * or vhostuserclient netdev. +@@ -3672,6 +3849,8 @@ netdev_dpdk_class_init(void) + /* This function can be called for different classes. The initialization + * needs to be done only once */ + if (ovsthread_once_start(&once)) { ++ int ret; ++ + ovs_thread_create("dpdk_watchdog", dpdk_watchdog, NULL); + unixctl_command_register("netdev-dpdk/set-admin-state", + "[netdev] up|down", 1, 2, +@@ -3685,6 +3864,14 @@ netdev_dpdk_class_init(void) + "[netdev]", 0, 1, + netdev_dpdk_get_mempool_info, NULL); + ++ ret = rte_eth_dev_callback_register(RTE_ETH_ALL, ++ RTE_ETH_EVENT_INTR_RESET, ++ dpdk_eth_event_callback, NULL); ++ if (ret != 0) { ++ VLOG_ERR("Ethernet device callback register error: %s", ++ rte_strerror(-ret)); ++ } ++ + ovsthread_once_done(&once); + } + +@@ -4046,13 +4233,20 @@ netdev_dpdk_reconfigure(struct netdev *netdev) + && dev->rxq_size == dev->requested_rxq_size + && dev->txq_size == dev->requested_txq_size + && dev->socket_id == dev->requested_socket_id +- && dev->started) { ++ && dev->started && !dev->reset_needed) { + /* Reconfiguration is unnecessary */ + + goto out; + } + +- rte_eth_dev_stop(dev->port_id); ++ if (dev->reset_needed) { ++ rte_eth_dev_reset(dev->port_id); ++ if_notifier_manual_report(); ++ dev->reset_needed = false; ++ } else { ++ rte_eth_dev_stop(dev->port_id); ++ } ++ + dev->started = false; + + err = netdev_dpdk_mempool_configure(dev); +@@ -4580,7 +4774,8 @@ netdev_dpdk_add_rte_flow_offload(struct netdev *netdev, + struct rte_flow_item_eth eth_mask; + memset(ð_spec, 0, sizeof(eth_spec)); + memset(ð_mask, 0, sizeof(eth_mask)); +- if (!eth_addr_is_zero(match->wc.masks.dl_src) || ++ if (match->wc.masks.dl_type || ++ !eth_addr_is_zero(match->wc.masks.dl_src) || + !eth_addr_is_zero(match->wc.masks.dl_dst)) { + rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof(eth_spec.dst)); + rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof(eth_spec.src)); +@@ -4594,15 +4789,6 @@ netdev_dpdk_add_rte_flow_offload(struct netdev *netdev, + + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, + ð_spec, ð_mask); +- } else { +- /* +- * If user specifies a flow (like UDP flow) without L2 patterns, +- * OVS will at least set the dl_type. Normally, it's enough to +- * create an eth pattern just with it. Unluckily, some Intel's +- * NIC (such as XL710) doesn't support that. Below is a workaround, +- * which simply matches any L2 pkts. +- */ +- add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL); + } + + /* VLAN */ +@@ -5020,6 +5206,7 @@ static const struct netdev_class dpdk_vhost_class = { + .send = netdev_dpdk_vhost_send, + .get_carrier = netdev_dpdk_vhost_get_carrier, + .get_stats = netdev_dpdk_vhost_get_stats, ++ .get_custom_stats = netdev_dpdk_get_sw_custom_stats, + .get_status = netdev_dpdk_vhost_user_get_status, + .reconfigure = netdev_dpdk_vhost_reconfigure, + .rxq_recv = netdev_dpdk_vhost_rxq_recv +@@ -5034,6 +5221,7 @@ static const struct netdev_class dpdk_vhost_client_class = { + .send = netdev_dpdk_vhost_send, + .get_carrier = netdev_dpdk_vhost_get_carrier, + .get_stats = netdev_dpdk_vhost_get_stats, ++ .get_custom_stats = netdev_dpdk_get_sw_custom_stats, + .get_status = netdev_dpdk_vhost_user_get_status, + .reconfigure = netdev_dpdk_vhost_client_reconfigure, + .rxq_recv = netdev_dpdk_vhost_rxq_recv +diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c +index 4ab645953b..1ff39e5da6 100644 +--- a/lib/netdev-linux.c ++++ b/lib/netdev-linux.c +@@ -708,10 +708,6 @@ netdev_linux_update_lag(struct rtnetlink_change *change) + { + struct linux_lag_slave *lag; + +- if (!rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) { +- return; +- } +- + if (change->slave && netdev_linux_kind_is_lag(change->slave)) { + lag = shash_find_data(&lag_shash, change->ifname); + +@@ -809,8 +805,11 @@ netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) + netdev_linux_update(netdev, nsid, &change); + ovs_mutex_unlock(&netdev->mutex); + } +- else if (!netdev_ && change.ifname) { +- /* Netdev is not present in OvS but its master could be. */ ++ ++ if (change.ifname && ++ rtnetlink_type_is_rtnlgrp_link(change.nlmsg_type)) { ++ ++ /* Need to try updating the LAG information. */ + ovs_mutex_lock(&lag_mutex); + netdev_linux_update_lag(&change); + ovs_mutex_unlock(&lag_mutex); +@@ -5753,8 +5752,8 @@ netdev_linux_update_via_netlink(struct netdev_linux *netdev) + + ofpbuf_init(&request, 0); + nl_msg_put_nlmsghdr(&request, +- sizeof(struct ifinfomsg) + NL_ATTR_SIZE(IFNAMSIZ), +- RTM_GETLINK, NLM_F_REQUEST); ++ sizeof(struct ifinfomsg) + NL_ATTR_SIZE(IFNAMSIZ) + ++ NL_A_U32_SIZE, RTM_GETLINK, NLM_F_REQUEST); + ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg)); + + /* The correct identifiers for a Linux device are netnsid and ifindex, +diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c +index d309810e45..b55108a260 100644 +--- a/lib/netdev-tc-offloads.c ++++ b/lib/netdev-tc-offloads.c +@@ -574,7 +574,10 @@ parse_tc_flower_to_match(struct tc_flower *flower, + } + + if (flower->tunnel) { +- match_set_tun_id(match, flower->key.tunnel.id); ++ if (flower->mask.tunnel.id) { ++ match_set_tun_id(match, flower->key.tunnel.id); ++ match->flow.tunnel.flags |= FLOW_TNL_F_KEY; ++ } + if (flower->key.tunnel.ipv4.ipv4_dst) { + match_set_tun_src(match, flower->key.tunnel.ipv4.ipv4_src); + match_set_tun_dst(match, flower->key.tunnel.ipv4.ipv4_dst); +@@ -628,7 +631,9 @@ parse_tc_flower_to_match(struct tc_flower *flower, + size_t tunnel_offset = + nl_msg_start_nested(buf, OVS_KEY_ATTR_TUNNEL); + +- nl_msg_put_be64(buf, OVS_TUNNEL_KEY_ATTR_ID, action->encap.id); ++ if (action->encap.id_present) { ++ nl_msg_put_be64(buf, OVS_TUNNEL_KEY_ATTR_ID, action->encap.id); ++ } + if (action->encap.ipv4.ipv4_src) { + nl_msg_put_be32(buf, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, + action->encap.ipv4.ipv4_src); +@@ -830,11 +835,13 @@ parse_put_flow_set_action(struct tc_flower *flower, struct tc_action *action, + tunnel_len = nl_attr_get_size(set); + + action->type = TC_ACT_ENCAP; ++ action->encap.id_present = false; + flower->action_count++; + NL_ATTR_FOR_EACH_UNSAFE(tun_attr, tun_left, tunnel, tunnel_len) { + switch (nl_attr_type(tun_attr)) { + case OVS_TUNNEL_KEY_ATTR_ID: { + action->encap.id = nl_attr_get_be64(tun_attr); ++ action->encap.id_present = true; + } + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: { +@@ -1099,6 +1106,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, + flower.key.tunnel.tp_dst = tnl->tp_dst; + flower.mask.tunnel.tos = tnl_mask->ip_tos; + flower.mask.tunnel.ttl = tnl_mask->ip_ttl; ++ flower.mask.tunnel.id = (tnl->flags & FLOW_TNL_F_KEY) ? tnl_mask->tun_id : 0; + flower_match_to_tun_opt(&flower, tnl, tnl_mask); + flower.tunnel = true; + } +@@ -1113,6 +1121,10 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, + } + mask->mpls_lse[0] = 0; + ++ if (eth_type_vlan(key->vlans[0].tpid)) { ++ flower.key.encap_eth_type[0] = flower.key.eth_type; ++ flower.key.eth_type = key->vlans[0].tpid; ++ } + if (mask->vlans[0].tci) { + ovs_be16 vid_mask = mask->vlans[0].tci & htons(VLAN_VID_MASK); + ovs_be16 pcp_mask = mask->vlans[0].tci & htons(VLAN_PCP_MASK); +@@ -1133,8 +1145,6 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, + VLOG_DBG_RL(&rl, "vlan_prio[0]: %d\n", + flower.key.vlan_prio[0]); + } +- flower.key.encap_eth_type[0] = flower.key.eth_type; +- flower.key.eth_type = key->vlans[0].tpid; + } else if (mask->vlans[0].tci == htons(0xffff) && + ntohs(key->vlans[0].tci) == 0) { + /* exact && no vlan */ +@@ -1144,6 +1154,10 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, + } + } + ++ if (eth_type_vlan(key->vlans[1].tpid)) { ++ flower.key.encap_eth_type[1] = flower.key.encap_eth_type[0]; ++ flower.key.encap_eth_type[0] = key->vlans[1].tpid; ++ } + if (mask->vlans[1].tci) { + ovs_be16 vid_mask = mask->vlans[1].tci & htons(VLAN_VID_MASK); + ovs_be16 pcp_mask = mask->vlans[1].tci & htons(VLAN_PCP_MASK); +@@ -1163,8 +1177,6 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, + flower.mask.vlan_prio[1] = vlan_tci_to_pcp(mask->vlans[1].tci); + VLOG_DBG_RL(&rl, "vlan_prio[1]: %d", flower.key.vlan_prio[1]); + } +- flower.key.encap_eth_type[1] = flower.key.encap_eth_type[0]; +- flower.key.encap_eth_type[0] = key->vlans[1].tpid; + } else if (mask->vlans[1].tci == htons(0xffff) && + ntohs(key->vlans[1].tci) == 0) { + /* exact && no vlan */ +@@ -1256,14 +1268,19 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, + } + } + ++ /* ignore exact match on skb_mark of 0. */ ++ if (mask->pkt_mark == UINT32_MAX && !key->pkt_mark) { ++ mask->pkt_mark = 0; ++ } ++ + err = test_key_and_mask(match); + if (err) { + return err; + } + + NL_ATTR_FOR_EACH(nla, left, actions, actions_len) { +- if (flower.action_count >= TCA_ACT_MAX_PRIO) { +- VLOG_DBG_RL(&rl, "Can only support %d actions", flower.action_count); ++ if (flower.action_count >= TCA_ACT_MAX_NUM) { ++ VLOG_DBG_RL(&rl, "Can only support %d actions", TCA_ACT_MAX_NUM); + return EOPNOTSUPP; + } + action = &flower.actions[flower.action_count]; +@@ -1271,6 +1288,10 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, + odp_port_t port = nl_attr_get_odp_port(nla); + struct netdev *outdev = netdev_ports_get(port, info->dpif_class); + ++ if (!outdev) { ++ VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port); ++ return ENODEV; ++ } + action->ifindex_out = netdev_get_ifindex(outdev); + action->type = TC_ACT_OUTPUT; + flower.action_count++; +@@ -1317,8 +1338,12 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, + block_id = get_block_id_from_netdev(netdev); + handle = get_ufid_tc_mapping(ufid, &prio, NULL); + if (handle && prio) { ++ bool flow_deleted; ++ + VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d", handle, prio); +- del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid); ++ flow_deleted = !del_filter_and_ufid_mapping(ifindex, prio, handle, ++ block_id, ufid); ++ info->tc_modify_flow_deleted = flow_deleted; + } + + if (!prio) { +@@ -1524,11 +1549,16 @@ netdev_tc_init_flow_api(struct netdev *netdev) + return -ifindex; + } + ++ block_id = get_block_id_from_netdev(netdev); ++ tc_del_filter(ifindex, 0, 0, block_id); ++ + /* make sure there is no ingress qdisc */ + tc_add_del_ingress_qdisc(ifindex, false, 0); + + if (ovsthread_once_start(&block_once)) { + probe_tc_block_support(ifindex); ++ /* Need to re-fetch block id as it depends on feature availability. */ ++ block_id = get_block_id_from_netdev(netdev); + ovsthread_once_done(&block_once); + } + +@@ -1537,7 +1567,6 @@ netdev_tc_init_flow_api(struct netdev *netdev) + ovsthread_once_done(&multi_mask_once); + } + +- block_id = get_block_id_from_netdev(netdev); + error = tc_add_del_ingress_qdisc(ifindex, true, block_id); + + if (error && error != EEXIST) { +diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c +index 808a43f99d..550fbe4722 100644 +--- a/lib/netdev-vport.c ++++ b/lib/netdev-vport.c +@@ -189,22 +189,34 @@ netdev_vport_alloc(void) + int + netdev_vport_construct(struct netdev *netdev_) + { ++ const struct netdev_class *class = netdev_get_class(netdev_); ++ const char *dpif_port = netdev_vport_class_get_dpif_port(class); + struct netdev_vport *dev = netdev_vport_cast(netdev_); ++ const char *p, *name = netdev_get_name(netdev_); + const char *type = netdev_get_type(netdev_); ++ uint16_t port = 0; + + ovs_mutex_init(&dev->mutex); + eth_addr_random(&dev->etheraddr); + +- /* Add a default destination port for tunnel ports if none specified. */ ++ if (name && dpif_port && (strlen(name) > strlen(dpif_port) + 1) && ++ (!strncmp(name, dpif_port, strlen(dpif_port)))) { ++ p = name + strlen(dpif_port) + 1; ++ port = atoi(p); ++ } ++ ++ /* If a destination port for tunnel ports is specified in the netdev ++ * name, use it instead of the default one. Otherwise, use the default ++ * destination port */ + if (!strcmp(type, "geneve")) { +- dev->tnl_cfg.dst_port = htons(GENEVE_DST_PORT); ++ dev->tnl_cfg.dst_port = port ? htons(port) : htons(GENEVE_DST_PORT); + } else if (!strcmp(type, "vxlan")) { +- dev->tnl_cfg.dst_port = htons(VXLAN_DST_PORT); ++ dev->tnl_cfg.dst_port = port ? htons(port) : htons(VXLAN_DST_PORT); + update_vxlan_global_cfg(netdev_, NULL, &dev->tnl_cfg); + } else if (!strcmp(type, "lisp")) { +- dev->tnl_cfg.dst_port = htons(LISP_DST_PORT); ++ dev->tnl_cfg.dst_port = port ? htons(port) : htons(LISP_DST_PORT); + } else if (!strcmp(type, "stt")) { +- dev->tnl_cfg.dst_port = htons(STT_DST_PORT); ++ dev->tnl_cfg.dst_port = port ? htons(port) : htons(STT_DST_PORT); + } + + dev->tnl_cfg.dont_fragment = true; +@@ -1210,7 +1222,8 @@ netdev_vport_tunnel_register(void) + .type = "ip6gre", + .build_header = netdev_gre_build_header, + .push_header = netdev_gre_push_header, +- .pop_header = netdev_gre_pop_header ++ .pop_header = netdev_gre_pop_header, ++ .get_ifindex = NETDEV_VPORT_GET_IFINDEX, + }, + {{NULL, NULL, 0, 0}} + }, +diff --git a/lib/netdev.h b/lib/netdev.h +index d94817fb62..2d2d6f3a60 100644 +--- a/lib/netdev.h ++++ b/lib/netdev.h +@@ -208,6 +208,9 @@ struct offload_info { + * it will be in the pkt meta data. + */ + uint32_t flow_mark; ++ ++ bool tc_modify_flow_deleted; /* Indicate the tc modify flow put success ++ * to delete the original flow. */ + }; + struct dpif_class; + struct netdev_flow_dump; +diff --git a/lib/odp-execute.c b/lib/odp-execute.c +index 3b6890e952..1d33fcbb8d 100644 +--- a/lib/odp-execute.c ++++ b/lib/odp-execute.c +@@ -25,6 +25,7 @@ + #include <stdlib.h> + #include <string.h> + ++#include "coverage.h" + #include "dp-packet.h" + #include "dpif.h" + #include "netlink.h" +@@ -36,6 +37,72 @@ + #include "util.h" + #include "csum.h" + #include "conntrack.h" ++#include "openvswitch/vlog.h" ++ ++VLOG_DEFINE_THIS_MODULE(odp_execute); ++COVERAGE_DEFINE(datapath_drop_sample_error); ++COVERAGE_DEFINE(datapath_drop_nsh_decap_error); ++COVERAGE_DEFINE(drop_action_of_pipeline); ++COVERAGE_DEFINE(drop_action_bridge_not_found); ++COVERAGE_DEFINE(drop_action_recursion_too_deep); ++COVERAGE_DEFINE(drop_action_too_many_resubmit); ++COVERAGE_DEFINE(drop_action_stack_too_deep); ++COVERAGE_DEFINE(drop_action_no_recirculation_context); ++COVERAGE_DEFINE(drop_action_recirculation_conflict); ++COVERAGE_DEFINE(drop_action_too_many_mpls_labels); ++COVERAGE_DEFINE(drop_action_invalid_tunnel_metadata); ++COVERAGE_DEFINE(drop_action_unsupported_packet_type); ++COVERAGE_DEFINE(drop_action_congestion); ++COVERAGE_DEFINE(drop_action_forwarding_disabled); ++ ++static void ++dp_update_drop_action_counter(enum xlate_error drop_reason, ++ int delta) ++{ ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); ++ ++ switch (drop_reason) { ++ case XLATE_OK: ++ COVERAGE_ADD(drop_action_of_pipeline, delta); ++ break; ++ case XLATE_BRIDGE_NOT_FOUND: ++ COVERAGE_ADD(drop_action_bridge_not_found, delta); ++ break; ++ case XLATE_RECURSION_TOO_DEEP: ++ COVERAGE_ADD(drop_action_recursion_too_deep, delta); ++ break; ++ case XLATE_TOO_MANY_RESUBMITS: ++ COVERAGE_ADD(drop_action_too_many_resubmit, delta); ++ break; ++ case XLATE_STACK_TOO_DEEP: ++ COVERAGE_ADD(drop_action_stack_too_deep, delta); ++ break; ++ case XLATE_NO_RECIRCULATION_CONTEXT: ++ COVERAGE_ADD(drop_action_no_recirculation_context, delta); ++ break; ++ case XLATE_RECIRCULATION_CONFLICT: ++ COVERAGE_ADD(drop_action_recirculation_conflict, delta); ++ break; ++ case XLATE_TOO_MANY_MPLS_LABELS: ++ COVERAGE_ADD(drop_action_too_many_mpls_labels, delta); ++ break; ++ case XLATE_INVALID_TUNNEL_METADATA: ++ COVERAGE_ADD(drop_action_invalid_tunnel_metadata, delta); ++ break; ++ case XLATE_UNSUPPORTED_PACKET_TYPE: ++ COVERAGE_ADD(drop_action_unsupported_packet_type, delta); ++ break; ++ case XLATE_CONGESTION_DROP: ++ COVERAGE_ADD(drop_action_congestion, delta); ++ break; ++ case XLATE_FORWARDING_DISABLED: ++ COVERAGE_ADD(drop_action_forwarding_disabled, delta); ++ break; ++ case XLATE_MAX: ++ default: ++ VLOG_ERR_RL(&rl, "Invalid Drop reason type: %d", drop_reason); ++ } ++} + + /* Masked copy of an ethernet address. 'src' is already properly masked. */ + static void +@@ -589,6 +656,7 @@ odp_execute_sample(void *dp, struct dp_packet *packet, bool steal, + case OVS_SAMPLE_ATTR_PROBABILITY: + if (random_uint32() >= nl_attr_get_u32(a)) { + if (steal) { ++ COVERAGE_INC(datapath_drop_sample_error); + dp_packet_delete(packet); + } + return; +@@ -642,6 +710,49 @@ odp_execute_clone(void *dp, struct dp_packet_batch *batch, bool steal, + } + } + ++static void ++odp_execute_check_pkt_len(void *dp, struct dp_packet *packet, bool steal, ++ const struct nlattr *action, ++ odp_execute_cb dp_execute_action) ++{ ++ static const struct nl_policy ovs_cpl_policy[] = { ++ [OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] = { .type = NL_A_U16 }, ++ [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] = { .type = NL_A_NESTED }, ++ [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL] ++ = { .type = NL_A_NESTED }, ++ }; ++ struct nlattr *attrs[ARRAY_SIZE(ovs_cpl_policy)]; ++ ++ if (!nl_parse_nested(action, ovs_cpl_policy, attrs, ARRAY_SIZE(attrs))) { ++ OVS_NOT_REACHED(); ++ } ++ ++ const struct nlattr *a; ++ struct dp_packet_batch pb; ++ ++ a = attrs[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]; ++ bool is_greater = dp_packet_size(packet) > nl_attr_get_u16(a); ++ if (is_greater) { ++ a = attrs[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER]; ++ } else { ++ a = attrs[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL]; ++ } ++ ++ if (!steal) { ++ /* The 'subactions' may modify the packet, but the modification ++ * should not propagate beyond this action. Make a copy ++ * the packet in case we don't own the packet, so that the ++ * 'subactions' are only applid to check_pkt_len. 'odp_execute_actions' ++ * will free the clone. */ ++ packet = dp_packet_clone(packet); ++ } ++ /* If nl_attr_get(a) is NULL, the packet will be freed by ++ * odp_execute_actions. */ ++ dp_packet_batch_init_packet(&pb, packet); ++ odp_execute_actions(dp, &pb, true, nl_attr_get(a), nl_attr_get_size(a), ++ dp_execute_action); ++} ++ + static bool + requires_datapath_assistance(const struct nlattr *a) + { +@@ -673,6 +784,8 @@ requires_datapath_assistance(const struct nlattr *a) + case OVS_ACTION_ATTR_PUSH_NSH: + case OVS_ACTION_ATTR_POP_NSH: + case OVS_ACTION_ATTR_CT_CLEAR: ++ case OVS_ACTION_ATTR_CHECK_PKT_LEN: ++ case OVS_ACTION_ATTR_DROP: + return false; + + case OVS_ACTION_ATTR_UNSPEC: +@@ -889,6 +1002,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, + if (pop_nsh(packet)) { + dp_packet_batch_refill(batch, packet, i); + } else { ++ COVERAGE_INC(datapath_drop_nsh_decap_error); + dp_packet_delete(packet); + } + } +@@ -900,6 +1014,27 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, + } + break; + ++ case OVS_ACTION_ATTR_CHECK_PKT_LEN: ++ DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { ++ odp_execute_check_pkt_len(dp, packet, steal && last_action, a, ++ dp_execute_action); ++ } ++ ++ if (last_action) { ++ /* We do not need to free the packets. ++ * odp_execute_check_pkt_len() has stolen them. */ ++ return; ++ } ++ break; ++ ++ case OVS_ACTION_ATTR_DROP:{ ++ const enum xlate_error *drop_reason = nl_attr_get(a); ++ ++ dp_update_drop_action_counter(*drop_reason, ++ dp_packet_batch_size(batch)); ++ dp_packet_delete_batch(batch, steal); ++ return; ++ } + case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_TUNNEL_PUSH: + case OVS_ACTION_ATTR_TUNNEL_POP: +diff --git a/lib/odp-util.c b/lib/odp-util.c +index d41c9369f2..9222960f54 100644 +--- a/lib/odp-util.c ++++ b/lib/odp-util.c +@@ -131,6 +131,8 @@ odp_action_len(uint16_t type) + case OVS_ACTION_ATTR_CLONE: return ATTR_LEN_VARIABLE; + case OVS_ACTION_ATTR_PUSH_NSH: return ATTR_LEN_VARIABLE; + case OVS_ACTION_ATTR_POP_NSH: return 0; ++ case OVS_ACTION_ATTR_CHECK_PKT_LEN: return ATTR_LEN_VARIABLE; ++ case OVS_ACTION_ATTR_DROP: return sizeof(uint32_t); + + case OVS_ACTION_ATTR_UNSPEC: + case __OVS_ACTION_ATTR_MAX: +@@ -1042,6 +1044,42 @@ format_odp_set_nsh(struct ds *ds, const struct nlattr *attr) + ds_put_cstr(ds, "))"); + } + ++static void ++format_odp_check_pkt_len_action(struct ds *ds, const struct nlattr *attr, ++ const struct hmap *portno_names OVS_UNUSED) ++{ ++ static const struct nl_policy ovs_cpl_policy[] = { ++ [OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] = { .type = NL_A_U16 }, ++ [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] = { .type = NL_A_NESTED }, ++ [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL] ++ = { .type = NL_A_NESTED }, ++ }; ++ struct nlattr *a[ARRAY_SIZE(ovs_cpl_policy)]; ++ ds_put_cstr(ds, "check_pkt_len"); ++ if (!nl_parse_nested(attr, ovs_cpl_policy, a, ARRAY_SIZE(a))) { ++ ds_put_cstr(ds, "(error)"); ++ return; ++ } ++ ++ if (!a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] || ++ !a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL]) { ++ ds_put_cstr(ds, "(error)"); ++ return; ++ } ++ ++ uint16_t pkt_len = nl_attr_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]); ++ ds_put_format(ds, "(size=%u,gt(", pkt_len); ++ const struct nlattr *acts; ++ acts = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER]; ++ format_odp_actions(ds, nl_attr_get(acts), nl_attr_get_size(acts), ++ portno_names); ++ ++ ds_put_cstr(ds, "),le("); ++ acts = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL]; ++ format_odp_actions(ds, nl_attr_get(acts), nl_attr_get_size(acts), ++ portno_names); ++ ds_put_cstr(ds, "))"); ++} + + static void + format_odp_action(struct ds *ds, const struct nlattr *a, +@@ -1181,6 +1219,12 @@ format_odp_action(struct ds *ds, const struct nlattr *a, + case OVS_ACTION_ATTR_POP_NSH: + ds_put_cstr(ds, "pop_nsh()"); + break; ++ case OVS_ACTION_ATTR_CHECK_PKT_LEN: ++ format_odp_check_pkt_len_action(ds, a, portno_names); ++ break; ++ case OVS_ACTION_ATTR_DROP: ++ ds_put_cstr(ds, "drop"); ++ break; + case OVS_ACTION_ATTR_UNSPEC: + case __OVS_ACTION_ATTR_MAX: + default: +@@ -2397,6 +2441,52 @@ parse_odp_action(const char *s, const struct simap *port_names, + } + } + ++ { ++ uint16_t pkt_len; ++ int n = -1; ++ if (ovs_scan(s, "check_pkt_len(size=%"SCNi16",gt(%n", &pkt_len, &n)) { ++ size_t cpl_ofs, actions_ofs; ++ cpl_ofs = nl_msg_start_nested(actions, ++ OVS_ACTION_ATTR_CHECK_PKT_LEN); ++ nl_msg_put_u16(actions, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, pkt_len); ++ actions_ofs = nl_msg_start_nested( ++ actions, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER); ++ ++ int retval; ++ if (!strncasecmp(s + n, "drop", 4)) { ++ n += 4; ++ } else { ++ retval = parse_action_list(s + n, port_names, actions); ++ if (retval < 0) { ++ return retval; ++ } ++ ++ n += retval; ++ } ++ nl_msg_end_nested(actions, actions_ofs); ++ retval = -1; ++ if (!ovs_scan(s + n, "),le(%n", &retval)) { ++ return -EINVAL; ++ } ++ n += retval; ++ ++ actions_ofs = nl_msg_start_nested( ++ actions, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL); ++ if (!strncasecmp(s + n, "drop", 4)) { ++ n += 4; ++ } else { ++ retval = parse_action_list(s + n, port_names, actions); ++ if (retval < 0) { ++ return retval; ++ } ++ n += retval; ++ } ++ nl_msg_end_nested(actions, actions_ofs); ++ nl_msg_end_nested(actions, cpl_ofs); ++ return s[n + 1] == ')' ? n + 2 : -EINVAL; ++ } ++ } ++ + { + int retval; + +@@ -2433,6 +2523,7 @@ odp_actions_from_string(const char *s, const struct simap *port_names, + size_t old_size; + + if (!strcasecmp(s, "drop")) { ++ nl_msg_put_u32(actions, OVS_ACTION_ATTR_DROP, XLATE_OK); + return 0; + } + +@@ -5757,26 +5848,28 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms, + if (flow->ct_nw_proto) { + if (parms->support.ct_orig_tuple + && flow->dl_type == htons(ETH_TYPE_IP)) { +- struct ovs_key_ct_tuple_ipv4 ct = { +- data->ct_nw_src, +- data->ct_nw_dst, +- data->ct_tp_src, +- data->ct_tp_dst, +- data->ct_nw_proto, +- }; +- nl_msg_put_unspec(buf, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, &ct, +- sizeof ct); ++ struct ovs_key_ct_tuple_ipv4 *ct; ++ ++ /* 'struct ovs_key_ct_tuple_ipv4' has padding, clear it. */ ++ ct = nl_msg_put_unspec_zero(buf, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, ++ sizeof *ct); ++ ct->ipv4_src = data->ct_nw_src; ++ ct->ipv4_dst = data->ct_nw_dst; ++ ct->src_port = data->ct_tp_src; ++ ct->dst_port = data->ct_tp_dst; ++ ct->ipv4_proto = data->ct_nw_proto; + } else if (parms->support.ct_orig_tuple6 + && flow->dl_type == htons(ETH_TYPE_IPV6)) { +- struct ovs_key_ct_tuple_ipv6 ct = { +- data->ct_ipv6_src, +- data->ct_ipv6_dst, +- data->ct_tp_src, +- data->ct_tp_dst, +- data->ct_nw_proto, +- }; +- nl_msg_put_unspec(buf, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, &ct, +- sizeof ct); ++ struct ovs_key_ct_tuple_ipv6 *ct; ++ ++ /* 'struct ovs_key_ct_tuple_ipv6' has padding, clear it. */ ++ ct = nl_msg_put_unspec_zero(buf, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, ++ sizeof *ct); ++ ct->ipv6_src = data->ct_ipv6_src; ++ ct->ipv6_dst = data->ct_ipv6_dst; ++ ct->src_port = data->ct_tp_src; ++ ct->dst_port = data->ct_tp_dst; ++ ct->ipv6_proto = data->ct_nw_proto; + } + } + if (parms->support.recirc) { +@@ -5986,6 +6079,10 @@ odp_key_from_dp_packet(struct ofpbuf *buf, const struct dp_packet *packet) + + nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, md->skb_priority); + ++ if (md->dp_hash) { ++ nl_msg_put_u32(buf, OVS_KEY_ATTR_DP_HASH, md->dp_hash); ++ } ++ + if (flow_tnl_dst_is_set(&md->tunnel)) { + tun_key_to_attr(buf, &md->tunnel, &md->tunnel, NULL, NULL); + } +diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c +index 68132c9455..1259c59141 100644 +--- a/lib/ofp-actions.c ++++ b/lib/ofp-actions.c +@@ -355,6 +355,9 @@ enum ofp_raw_action_type { + /* NX1.3+(48): void. */ + NXAST_RAW_DEC_NSH_TTL, + ++ /* NX1.0+(49): struct nx_action_check_pkt_larger, ... VLMFF */ ++ NXAST_RAW_CHECK_PKT_LARGER, ++ + /* ## ------------------ ## */ + /* ## Debugging actions. ## */ + /* ## ------------------ ## */ +@@ -492,6 +495,7 @@ ofpact_next_flattened(const struct ofpact *ofpact) + case OFPACT_ENCAP: + case OFPACT_DECAP: + case OFPACT_DEC_NSH_TTL: ++ case OFPACT_CHECK_PKT_LARGER: + return ofpact_next(ofpact); + + case OFPACT_CLONE: +@@ -7400,6 +7404,124 @@ check_WRITE_METADATA(const struct ofpact_metadata *a OVS_UNUSED, + return 0; + } + ++/* Check packet length action. */ ++ ++struct nx_action_check_pkt_larger { ++ ovs_be16 type; /* OFPAT_VENDOR. */ ++ ovs_be16 len; /* 24. */ ++ ovs_be32 vendor; /* NX_VENDOR_ID. */ ++ ovs_be16 subtype; /* NXAST_OUTPUT_REG. */ ++ ovs_be16 pkt_len; /* Length of the packet to check. */ ++ ovs_be16 offset; /* Result bit offset in destination. */ ++ /* Followed by: ++ * - 'dst', as an OXM/NXM header (either 4 or 8 bytes). ++ * - Enough 0-bytes to pad the action out to 24 bytes. */ ++ uint8_t pad[10]; ++}; ++ ++OFP_ASSERT(sizeof(struct nx_action_check_pkt_larger) == 24); ++ ++static enum ofperr ++decode_NXAST_RAW_CHECK_PKT_LARGER( ++ const struct nx_action_check_pkt_larger *ncpl, ++ enum ofp_version ofp_version OVS_UNUSED, ++ const struct vl_mff_map *vl_mff_map, uint64_t *tlv_bitmap, ++ struct ofpbuf *out) ++{ ++ struct ofpact_check_pkt_larger *check_pkt_larger; ++ enum ofperr error; ++ ++ check_pkt_larger = ofpact_put_CHECK_PKT_LARGER(out); ++ check_pkt_larger->pkt_len = ntohs(ncpl->pkt_len); ++ check_pkt_larger->dst.ofs = ntohs(ncpl->offset); ++ check_pkt_larger->dst.n_bits = 1; ++ ++ struct ofpbuf b = ofpbuf_const_initializer(ncpl, ntohs(ncpl->len)); ++ ofpbuf_pull(&b, OBJECT_OFFSETOF(ncpl, pad)); ++ ++ error = mf_vl_mff_nx_pull_header(&b, vl_mff_map, ++ &check_pkt_larger->dst.field, ++ NULL, tlv_bitmap); ++ if (error) { ++ return error; ++ } ++ ++ if (!is_all_zeros(b.data, b.size)) { ++ return OFPERR_NXBRC_MUST_BE_ZERO; ++ } ++ ++ return mf_check_dst(&check_pkt_larger->dst, NULL); ++} ++ ++static void ++encode_CHECK_PKT_LARGER(const struct ofpact_check_pkt_larger *check_pkt_larger, ++ enum ofp_version ofp_version OVS_UNUSED, ++ struct ofpbuf *out) ++{ ++ struct nx_action_check_pkt_larger *ncpl = put_NXAST_CHECK_PKT_LARGER(out); ++ ncpl->pkt_len = htons(check_pkt_larger->pkt_len); ++ ncpl->offset = htons(check_pkt_larger->dst.ofs); ++ ++ if (check_pkt_larger->dst.field) { ++ size_t size = out->size; ++ out->size = size - sizeof ncpl->pad; ++ nx_put_mff_header(out, check_pkt_larger->dst.field, 0, false); ++ out->size = size; ++ } ++} ++ ++static char * OVS_WARN_UNUSED_RESULT ++parse_CHECK_PKT_LARGER(char *arg, const struct ofpact_parse_params *pp) ++{ ++ char *value; ++ char *delim; ++ char *key; ++ char *error = set_field_split_str(arg, &key, &value, &delim); ++ if (error) { ++ return error; ++ } ++ ++ delim[0] = '\0'; ++ if (value[strlen(value) - 1] == ')') { ++ value[strlen(value) - 1] = '\0'; ++ } ++ struct mf_subfield dst; ++ error = mf_parse_subfield(&dst, key); ++ if (error) { ++ return error; ++ } ++ ++ if (dst.n_bits != 1) { ++ return xstrdup("Only 1-bit destination field is allowed"); ++ } ++ ++ struct ofpact_check_pkt_larger *check_pkt_larger = ++ ofpact_put_CHECK_PKT_LARGER(pp->ofpacts); ++ error = str_to_u16(value, NULL, &check_pkt_larger->pkt_len); ++ if (error) { ++ return error; ++ } ++ check_pkt_larger->dst = dst; ++ return NULL; ++} ++ ++static void ++format_CHECK_PKT_LARGER(const struct ofpact_check_pkt_larger *a, ++ const struct ofpact_format_params *fp) ++{ ++ ds_put_format(fp->s, "%scheck_pkt_larger(%s%"PRIu32")->", ++ colors.param, colors.end, a->pkt_len); ++ mf_format_subfield(&a->dst, fp->s); ++} ++ ++static enum ofperr ++check_CHECK_PKT_LARGER(const struct ofpact_check_pkt_larger *a OVS_UNUSED, ++ const struct ofpact_check_params *cp OVS_UNUSED) ++{ ++ return 0; ++} ++ ++ + /* Goto-Table instruction. */ + + static void +@@ -7686,6 +7808,7 @@ action_set_classify(const struct ofpact *a) + case OFPACT_WRITE_METADATA: + case OFPACT_DEBUG_RECIRC: + case OFPACT_DEBUG_SLOW: ++ case OFPACT_CHECK_PKT_LARGER: + return ACTION_SLOT_INVALID; + + default: +@@ -7885,6 +8008,7 @@ ovs_instruction_type_from_ofpact_type(enum ofpact_type type) + case OFPACT_ENCAP: + case OFPACT_DECAP: + case OFPACT_DEC_NSH_TTL: ++ case OFPACT_CHECK_PKT_LARGER: + default: + return OVSINST_OFPIT11_APPLY_ACTIONS; + } +@@ -8755,6 +8879,7 @@ ofpact_outputs_to_port(const struct ofpact *ofpact, ofp_port_t port) + case OFPACT_ENCAP: + case OFPACT_DECAP: + case OFPACT_DEC_NSH_TTL: ++ case OFPACT_CHECK_PKT_LARGER: + default: + return false; + } +@@ -8991,7 +9116,6 @@ ofpacts_parse__(char *str, const struct ofpact_parse_params *pp, + enum ofpact_type type; + char *error = NULL; + ofp_port_t port; +- + if (ofpact_type_from_name(key, &type)) { + error = ofpact_parse(type, value, pp); + inst = ovs_instruction_type_from_ofpact_type(type); +diff --git a/lib/ofp-packet.c b/lib/ofp-packet.c +index aa3417c9b0..4638d8192c 100644 +--- a/lib/ofp-packet.c ++++ b/lib/ofp-packet.c +@@ -420,6 +420,7 @@ enum nx_continuation_prop_type { + NXCPT_COOKIE, + NXCPT_ACTIONS, + NXCPT_ACTION_SET, ++ NXCPT_ODP_PORT, + }; + + /* Only NXT_PACKET_IN2 (not NXT_RESUME) should include NXCPT_USERDATA, so this +@@ -506,6 +507,10 @@ ofputil_put_packet_in_private(const struct ofputil_packet_in_private *pin, + ofpprop_end(msg, start); + } + ++ if (pin->odp_port) { ++ ofpprop_put_u32(msg, NXCPT_ODP_PORT, odp_to_u32(pin->odp_port)); ++ } ++ + if (msg->size > inner_ofs) { + ofpprop_end(msg, continuation_ofs); + } else { +@@ -876,6 +881,13 @@ ofputil_decode_packet_in_private(const struct ofp_header *oh, bool loose, + error = parse_actions_property(&payload, oh->version, &action_set); + break; + ++ case NXCPT_ODP_PORT: { ++ uint32_t value; ++ error = ofpprop_parse_u32(&payload, &value); ++ pin->odp_port = u32_to_odp(value); ++ break; ++ } ++ + default: + error = OFPPROP_UNKNOWN(loose, "continuation", type); + break; +@@ -1011,6 +1023,11 @@ ofputil_packet_in_private_format(struct ds *s, + ds_put_char(s, '\n'); + } + ++ if (pin->odp_port) { ++ ds_put_format(s, " continuation.odp_port=%"PRIu32, pin->odp_port); ++ ds_put_char(s, '\n'); ++ } ++ + if (verbosity > 0) { + char *packet = ofp_packet_to_string( + public->packet, public->packet_len, +diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c +index a8b5a877c5..a90b926efb 100644 +--- a/lib/ofp-parse.c ++++ b/lib/ofp-parse.c +@@ -335,6 +335,16 @@ ofputil_parse_key_value(char **stringp, char **keyp, char **valuep) + char *value = *stringp; + size_t value_len = parse_value(value, value_delims); + char value_delim = value[value_len]; ++ ++ /* Handle the special case if the value is of the form "(x)->y". ++ * After parsing, 'valuep' will be pointing to - "x)->y". ++ * */ ++ if (key_delim == '(' && value[value_len] == ')' && ++ value[value_len + 1] == '-' && value[value_len + 2] == '>') { ++ value_delims = ", \t\r\n"; ++ value_len += parse_value(&value[value_len], value_delims); ++ value_delim = value[value_len]; ++ } + value[value_len] = '\0'; + *stringp += value_len + (value_delim != '\0'); + +diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml +index fec0b95fe7..758083e771 100644 +--- a/lib/ovs-actions.xml ++++ b/lib/ovs-actions.xml +@@ -1453,6 +1453,43 @@ $ ovs-ofctl -O OpenFlow10 add-flow br0 actions=mod_nw_src:1.2.3.4 + </p> + </conformance> + </action> ++ ++ <action name="CHECK_PKT_LARGER"> ++ <h2>The <code>check_pkt_larger</code> action</h2> ++ <syntax> ++ <code>check_pkt_larger(<var>pkt_len</var>)-><var>dst</var></code> ++ </syntax> ++ ++ <p> ++ Checks if the packet is larger than the specified length in ++ <var>pkt_len</var>. If so, stores 1 in <var>dst</var>, which should be ++ a 1-bit field; if not, stores 0. ++ </p> ++ ++ <p> ++ The packet length to check againt the argument <var>pkt_len</var> ++ includes the L2 header and L2 payload of the packet, but not the VLAN ++ tag (if present). ++ </p> ++ ++ <p> ++ Examples: ++ </p> ++ ++ <ul> ++ <li> ++ <code>check_pkt_larger(1500)->reg0[0]</code> ++ </li> ++ ++ <li> ++ <code>check_pkt_larger(8000)->reg9[10]</code> ++ </li> ++ </ul> ++ ++ <p> ++ This action was added in Open vSwitch 2.11.90. ++ </p> ++ </action> + </group> + + <group title="Metadata Actions"> +diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c +index ebc8120f0f..cde1e925ba 100644 +--- a/lib/ovs-rcu.c ++++ b/lib/ovs-rcu.c +@@ -30,6 +30,8 @@ + + VLOG_DEFINE_THIS_MODULE(ovs_rcu); + ++#define MIN_CBS 16 ++ + struct ovsrcu_cb { + void (*function)(void *aux); + void *aux; +@@ -37,7 +39,8 @@ struct ovsrcu_cb { + + struct ovsrcu_cbset { + struct ovs_list list_node; +- struct ovsrcu_cb cbs[16]; ++ struct ovsrcu_cb *cbs; ++ size_t n_allocated; + int n_cbs; + }; + +@@ -310,16 +313,19 @@ ovsrcu_postpone__(void (*function)(void *aux), void *aux) + cbset = perthread->cbset; + if (!cbset) { + cbset = perthread->cbset = xmalloc(sizeof *perthread->cbset); ++ cbset->cbs = xmalloc(MIN_CBS * sizeof *cbset->cbs); ++ cbset->n_allocated = MIN_CBS; + cbset->n_cbs = 0; + } + ++ if (cbset->n_cbs == cbset->n_allocated) { ++ cbset->cbs = x2nrealloc(cbset->cbs, &cbset->n_allocated, ++ sizeof *cbset->cbs); ++ } ++ + cb = &cbset->cbs[cbset->n_cbs++]; + cb->function = function; + cb->aux = aux; +- +- if (cbset->n_cbs >= ARRAY_SIZE(cbset->cbs)) { +- ovsrcu_flush_cbset(perthread); +- } + } + + static bool +@@ -341,6 +347,7 @@ ovsrcu_call_postponed(void) + for (cb = cbset->cbs; cb < &cbset->cbs[cbset->n_cbs]; cb++) { + cb->function(cb->aux); + } ++ free(cbset->cbs); + free(cbset); + } + +diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c +index 4c9862b88b..ebb5b50f3c 100644 +--- a/lib/ovsdb-idl.c ++++ b/lib/ovsdb-idl.c +@@ -577,7 +577,6 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db) + struct ovsdb_idl_table *table = &db->tables[i]; + struct ovsdb_idl_row *row, *next_row; + +- table->cond_changed = false; + if (hmap_is_empty(&table->rows)) { + continue; + } +@@ -600,7 +599,6 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db) + } + } + +- db->cond_changed = false; + db->cond_seqno = 0; + ovsdb_idl_db_track_clear(db); + +diff --git a/lib/packets.h b/lib/packets.h +index 8ae054a742..5e438db9dd 100644 +--- a/lib/packets.h ++++ b/lib/packets.h +@@ -744,6 +744,9 @@ struct icmp_header { + }; + BUILD_ASSERT_DECL(ICMP_HEADER_LEN == sizeof(struct icmp_header)); + ++/* ICMPV4 */ ++#define ICMP_ERROR_DATA_L4_LEN 8 ++ + #define IGMP_HEADER_LEN 8 + struct igmp_header { + uint8_t igmp_type; +diff --git a/lib/pvector.c b/lib/pvector.c +index aaeee92147..cc527fdc41 100644 +--- a/lib/pvector.c ++++ b/lib/pvector.c +@@ -33,7 +33,7 @@ pvector_impl_alloc(size_t size) + struct pvector_impl *impl; + + impl = xmalloc(sizeof *impl + size * sizeof impl->vector[0]); +- impl->size = 0; ++ atomic_init(&impl->size, 0); + impl->allocated = size; + + return impl; +@@ -117,18 +117,22 @@ pvector_insert(struct pvector *pvec, void *ptr, int priority) + { + struct pvector_impl *temp = pvec->temp; + struct pvector_impl *old = pvector_impl_get(pvec); ++ size_t size; + + ovs_assert(ptr != NULL); + ++ /* There is no possible concurrent writer. Insertions must be protected ++ * by mutex or be always excuted from the same thread. */ ++ atomic_read_relaxed(&old->size, &size); ++ + /* Check if can add to the end without reallocation. */ +- if (!temp && old->allocated > old->size && +- (!old->size || priority <= old->vector[old->size - 1].priority)) { +- old->vector[old->size].ptr = ptr; +- old->vector[old->size].priority = priority; ++ if (!temp && old->allocated > size && ++ (!size || priority <= old->vector[size - 1].priority)) { ++ old->vector[size].ptr = ptr; ++ old->vector[size].priority = priority; + /* Size increment must not be visible to the readers before the new + * entry is stored. */ +- atomic_thread_fence(memory_order_release); +- ++old->size; ++ atomic_store_explicit(&old->size, size + 1, memory_order_release); + } else { + if (!temp) { + temp = pvector_impl_dup(old); +diff --git a/lib/pvector.h b/lib/pvector.h +index b175b213d1..3d491ee257 100644 +--- a/lib/pvector.h ++++ b/lib/pvector.h +@@ -62,8 +62,8 @@ struct pvector_entry { + }; + + struct pvector_impl { +- size_t size; /* Number of entries in the vector. */ +- size_t allocated; /* Number of allocated entries. */ ++ atomic_size_t size; /* Number of entries in the vector. */ ++ size_t allocated; /* Number of allocated entries. */ + struct pvector_entry vector[]; + }; + +@@ -174,12 +174,17 @@ pvector_cursor_init(const struct pvector *pvec, + { + const struct pvector_impl *impl; + struct pvector_cursor cursor; ++ size_t size; + + impl = ovsrcu_get(struct pvector_impl *, &pvec->impl); + +- ovs_prefetch_range(impl->vector, impl->size * sizeof impl->vector[0]); ++ /* Use memory_order_acquire to ensure entry access can not be ++ * reordered to happen before size read. */ ++ atomic_read_explicit(&CONST_CAST(struct pvector_impl *, impl)->size, ++ &size, memory_order_acquire); ++ ovs_prefetch_range(impl->vector, size * sizeof impl->vector[0]); + +- cursor.size = impl->size; ++ cursor.size = size; + cursor.vector = impl->vector; + cursor.entry_idx = -1; + +diff --git a/lib/stream-ssl.c b/lib/stream-ssl.c +index 63b141d1ca..343dced587 100644 +--- a/lib/stream-ssl.c ++++ b/lib/stream-ssl.c +@@ -425,6 +425,7 @@ do_ca_cert_bootstrap(struct stream *stream) + static char * + get_peer_common_name(const struct ssl_stream *sslv) + { ++ char *peer_name = NULL; + X509 *peer_cert = SSL_get_peer_certificate(sslv->ssl); + if (!peer_cert) { + return NULL; +@@ -433,18 +434,18 @@ get_peer_common_name(const struct ssl_stream *sslv) + int cn_index = X509_NAME_get_index_by_NID(X509_get_subject_name(peer_cert), + NID_commonName, -1); + if (cn_index < 0) { +- return NULL; ++ goto error; + } + + X509_NAME_ENTRY *cn_entry = X509_NAME_get_entry( + X509_get_subject_name(peer_cert), cn_index); + if (!cn_entry) { +- return NULL; ++ goto error; + } + + ASN1_STRING *cn_data = X509_NAME_ENTRY_get_data(cn_entry); + if (!cn_data) { +- return NULL; ++ goto error; + } + + const char *cn; +@@ -454,7 +455,11 @@ get_peer_common_name(const struct ssl_stream *sslv) + #else + cn = (const char *)ASN1_STRING_get0_data(cn_data); + #endif +- return xstrdup(cn); ++ peer_name = xstrdup(cn); ++ ++error: ++ X509_free(peer_cert); ++ return peer_name; + } + + static int +diff --git a/lib/tc.c b/lib/tc.c +index b19f075f2b..bb2336dab9 100644 +--- a/lib/tc.c ++++ b/lib/tc.c +@@ -571,6 +571,7 @@ nl_parse_flower_tunnel(struct nlattr **attrs, struct tc_flower *flower) + ovs_be32 id = nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_KEY_ID]); + + flower->key.tunnel.id = be32_to_be64(id); ++ flower->mask.tunnel.id = OVS_BE64_MAX; + } + if (attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK]) { + flower->key.tunnel.ipv4.ipv4_src = +@@ -1014,6 +1015,7 @@ nl_parse_act_tunnel_key(struct nlattr *options, struct tc_flower *flower) + action->encap.ipv6.ipv6_dst = nl_attr_get_in6_addr(ipv6_dst); + } + action->encap.id = id ? be32_to_be64(nl_attr_get_be32(id)) : 0; ++ action->encap.id_present = id ? true : false; + action->encap.tp_dst = dst_port ? nl_attr_get_be16(dst_port) : 0; + action->encap.tos = tos ? nl_attr_get_u8(tos) : 0; + action->encap.ttl = ttl ? nl_attr_get_u8(ttl) : 0; +@@ -1304,7 +1306,7 @@ static int + nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower) + { + const struct nlattr *actions = attrs[TCA_FLOWER_ACT]; +- static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO + 1] = {}; ++ static struct nl_policy actions_orders_policy[TCA_ACT_MAX_NUM + 1] = {}; + struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)]; + const int max_size = ARRAY_SIZE(actions_orders_policy); + +@@ -1323,8 +1325,8 @@ nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower) + if (actions_orders[i]) { + int err; + +- if (flower->action_count >= TCA_ACT_MAX_PRIO) { +- VLOG_DBG_RL(&error_rl, "Can only support %d actions", flower->action_count); ++ if (flower->action_count >= TCA_ACT_MAX_NUM) { ++ VLOG_DBG_RL(&error_rl, "Can only support %d actions", TCA_ACT_MAX_NUM); + return EOPNOTSUPP; + } + err = nl_parse_single_action(actions_orders[i], flower); +@@ -1631,9 +1633,9 @@ nl_msg_put_act_tunnel_geneve_option(struct ofpbuf *request, + } + + static void +-nl_msg_put_act_tunnel_key_set(struct ofpbuf *request, ovs_be64 id, +- ovs_be32 ipv4_src, ovs_be32 ipv4_dst, +- struct in6_addr *ipv6_src, ++nl_msg_put_act_tunnel_key_set(struct ofpbuf *request, bool id_present, ++ ovs_be64 id, ovs_be32 ipv4_src, ++ ovs_be32 ipv4_dst, struct in6_addr *ipv6_src, + struct in6_addr *ipv6_dst, + ovs_be16 tp_dst, uint8_t tos, uint8_t ttl, + struct tun_metadata tun_metadata, +@@ -1650,7 +1652,9 @@ nl_msg_put_act_tunnel_key_set(struct ofpbuf *request, ovs_be64 id, + nl_msg_put_unspec(request, TCA_TUNNEL_KEY_PARMS, &tun, sizeof tun); + + ovs_be32 id32 = be64_to_be32(id); +- nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_KEY_ID, id32); ++ if (id_present) { ++ nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_KEY_ID, id32); ++ } + if (ipv4_dst) { + nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_SRC, ipv4_src); + nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_DST, ipv4_dst); +@@ -1715,6 +1719,14 @@ nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) { + } + } + ++static inline void ++nl_msg_put_act_flags(struct ofpbuf *request) { ++ struct nla_bitfield32 act_flags = { TCA_ACT_FLAGS_NO_PERCPU_STATS, ++ TCA_ACT_FLAGS_NO_PERCPU_STATS }; ++ ++ nl_msg_put_unspec(request, TCA_ACT_FLAGS, &act_flags, sizeof act_flags); ++} ++ + /* Given flower, a key_to_pedit map entry, calculates the rest, + * where: + * +@@ -1883,6 +1895,7 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) + if (flower->tunnel) { + act_offset = nl_msg_start_nested(request, act_index++); + nl_msg_put_act_tunnel_key_release(request); ++ nl_msg_put_act_flags(request); + nl_msg_end_nested(request, act_offset); + } + +@@ -1900,13 +1913,15 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) + if (flower->csum_update_flags) { + act_offset = nl_msg_start_nested(request, act_index++); + nl_msg_put_act_csum(request, flower->csum_update_flags); ++ nl_msg_put_act_flags(request); + nl_msg_end_nested(request, act_offset); + } + } + break; + case TC_ACT_ENCAP: { + act_offset = nl_msg_start_nested(request, act_index++); +- nl_msg_put_act_tunnel_key_set(request, action->encap.id, ++ nl_msg_put_act_tunnel_key_set(request, action->encap.id_present, ++ action->encap.id, + action->encap.ipv4.ipv4_src, + action->encap.ipv4.ipv4_dst, + &action->encap.ipv6.ipv6_src, +@@ -1916,12 +1931,14 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) + action->encap.ttl, + action->encap.data, + action->encap.no_csum); ++ nl_msg_put_act_flags(request); + nl_msg_end_nested(request, act_offset); + } + break; + case TC_ACT_VLAN_POP: { + act_offset = nl_msg_start_nested(request, act_index++); + nl_msg_put_act_pop_vlan(request); ++ nl_msg_put_act_flags(request); + nl_msg_end_nested(request, act_offset); + } + break; +@@ -1931,6 +1948,7 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) + action->vlan.vlan_push_tpid, + action->vlan.vlan_push_id, + action->vlan.vlan_push_prio); ++ nl_msg_put_act_flags(request); + nl_msg_end_nested(request, act_offset); + } + break; +@@ -1950,6 +1968,7 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) + TCA_EGRESS_MIRROR); + } + nl_msg_put_act_cookie(request, &flower->act_cookie); ++ nl_msg_put_act_flags(request); + nl_msg_end_nested(request, act_offset); + } + break; +@@ -1960,6 +1979,7 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) + act_offset = nl_msg_start_nested(request, act_index++); + nl_msg_put_act_drop(request); + nl_msg_put_act_cookie(request, &flower->act_cookie); ++ nl_msg_put_act_flags(request); + nl_msg_end_nested(request, act_offset); + } + nl_msg_end_nested(request, offset); +@@ -2026,6 +2046,7 @@ nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower) + uint8_t ttl = flower->key.tunnel.ttl; + uint8_t tos_mask = flower->mask.tunnel.tos; + uint8_t ttl_mask = flower->mask.tunnel.ttl; ++ ovs_be64 id_mask = flower->mask.tunnel.id; + + if (ipv4_dst) { + nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_SRC, ipv4_src); +@@ -2045,7 +2066,9 @@ nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower) + if (tp_dst) { + nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_DST_PORT, tp_dst); + } +- nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_KEY_ID, id); ++ if (id_mask) { ++ nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_KEY_ID, id); ++ } + nl_msg_put_flower_tunnel_opts(request, TCA_FLOWER_KEY_ENC_OPTS, + flower->key.tunnel.metadata); + nl_msg_put_flower_tunnel_opts(request, TCA_FLOWER_KEY_ENC_OPTS_MASK, +diff --git a/lib/tc.h b/lib/tc.h +index 7196a32d75..c6bcda8efb 100644 +--- a/lib/tc.h ++++ b/lib/tc.h +@@ -147,6 +147,7 @@ struct tc_action { + } vlan; + + struct { ++ bool id_present; + ovs_be64 id; + ovs_be16 tp_src; + ovs_be16 tp_dst; +@@ -174,6 +175,8 @@ enum tc_offloaded_state { + TC_OFFLOADED_STATE_NOT_IN_HW, + }; + ++#define TCA_ACT_MAX_NUM 16 ++ + struct tc_flower { + uint32_t handle; + uint32_t prio; +@@ -182,7 +185,7 @@ struct tc_flower { + struct tc_flower_key mask; + + int action_count; +- struct tc_action actions[TCA_ACT_MAX_PRIO]; ++ struct tc_action actions[TCA_ACT_MAX_NUM]; + + struct ovs_flow_stats stats; + uint64_t lastused; +diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c +index 4029806788..3f7c50dbd6 100644 +--- a/ofproto/ofproto-dpif-ipfix.c ++++ b/ofproto/ofproto-dpif-ipfix.c +@@ -3014,7 +3014,9 @@ dpif_ipfix_read_actions(const struct flow *flow, + case OVS_ACTION_ATTR_POP_ETH: + case OVS_ACTION_ATTR_PUSH_NSH: + case OVS_ACTION_ATTR_POP_NSH: ++ case OVS_ACTION_ATTR_CHECK_PKT_LEN: + case OVS_ACTION_ATTR_UNSPEC: ++ case OVS_ACTION_ATTR_DROP: + case __OVS_ACTION_ATTR_MAX: + default: + break; +diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c +index 7da31753c7..28b04682ea 100644 +--- a/ofproto/ofproto-dpif-sflow.c ++++ b/ofproto/ofproto-dpif-sflow.c +@@ -1026,7 +1026,7 @@ sflow_read_set_action(const struct nlattr *attr, + sflow_actions->tunnel.ip_tos = key->ipv4_tos; + } + if (key->ipv4_ttl) { +- sflow_actions->tunnel.ip_tos = key->ipv4_ttl; ++ sflow_actions->tunnel.ip_ttl = key->ipv4_ttl; + } + } + break; +@@ -1222,6 +1222,8 @@ dpif_sflow_read_actions(const struct flow *flow, + case OVS_ACTION_ATTR_PUSH_NSH: + case OVS_ACTION_ATTR_POP_NSH: + case OVS_ACTION_ATTR_UNSPEC: ++ case OVS_ACTION_ATTR_CHECK_PKT_LEN: ++ case OVS_ACTION_ATTR_DROP: + case __OVS_ACTION_ATTR_MAX: + default: + break; +diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c +index 4143bfa29f..ea9efbbae3 100644 +--- a/ofproto/ofproto-dpif-upcall.c ++++ b/ofproto/ofproto-dpif-upcall.c +@@ -1526,6 +1526,7 @@ process_upcall(struct udpif *udpif, struct upcall *upcall, + : NULL), + am->pin.up.action_set_len = state->action_set_len, + am->pin.up.bridge = upcall->ofproto->uuid; ++ am->pin.up.odp_port = upcall->packet->md.in_port.odp_port; + } + + /* We don't want to use the upcall 'flow', since it may be +@@ -1797,7 +1798,7 @@ ukey_create_from_dpif_flow(const struct udpif *udpif, + } + + reval_seq = seq_read(udpif->reval_seq) - 1; /* Ensure revalidation. */ +- ofpbuf_use_const(&actions, &flow->actions, flow->actions_len); ++ ofpbuf_use_const(&actions, flow->actions, flow->actions_len); + *ukey = ukey_create__(flow->key, flow->key_len, + flow->mask, flow->mask_len, flow->ufid_present, + &flow->ufid, flow->pmd_id, &actions, +diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c +index 09ad76538f..3f83acd1a9 100644 +--- a/ofproto/ofproto-dpif-xlate.c ++++ b/ofproto/ofproto-dpif-xlate.c +@@ -444,6 +444,12 @@ const char *xlate_strerror(enum xlate_error error) + return "Invalid tunnel metadata"; + case XLATE_UNSUPPORTED_PACKET_TYPE: + return "Unsupported packet type"; ++ case XLATE_CONGESTION_DROP: ++ return "Congestion Drop"; ++ case XLATE_FORWARDING_DISABLED: ++ return "Forwarding is disabled"; ++ case XLATE_MAX: ++ break; + } + return "Unknown error"; + } +@@ -487,6 +493,7 @@ ctx_cancel_freeze(struct xlate_ctx *ctx) + ctx->recirc_update_dp_hash = false; + ofpbuf_clear(&ctx->frozen_actions); + ctx->frozen_actions.header = NULL; ++ ctx->pause = NULL; + } + } + +@@ -4296,6 +4303,7 @@ xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id, + !is_ip_any(&ctx->xin->flow)) { + xlate_report_error(ctx, + "resubmit(ct) with non-tracked or non-IP packet!"); ++ ctx->table_id = old_table_id; + return; + } + tuple_swap(&ctx->xin->flow, ctx->wc); +@@ -5569,6 +5577,7 @@ reversible_actions(const struct ofpact *ofpacts, size_t ofpacts_len) + case OFPACT_UNROLL_XLATE: + case OFPACT_WRITE_ACTIONS: + case OFPACT_WRITE_METADATA: ++ case OFPACT_CHECK_PKT_LARGER: + break; + + case OFPACT_CT: +@@ -5877,6 +5886,7 @@ freeze_unroll_actions(const struct ofpact *a, const struct ofpact *end, + case OFPACT_CT: + case OFPACT_CT_CLEAR: + case OFPACT_NAT: ++ case OFPACT_CHECK_PKT_LARGER: + /* These may not generate PACKET INs. */ + break; + +@@ -5924,6 +5934,12 @@ put_ct_label(const struct flow *flow, struct ofpbuf *odp_actions, + } + } + ++static void ++put_drop_action(struct ofpbuf *odp_actions, enum xlate_error error) ++{ ++ nl_msg_put_u32(odp_actions, OVS_ACTION_ATTR_DROP, error); ++} ++ + static void + put_ct_helper(struct xlate_ctx *ctx, + struct ofpbuf *odp_actions, struct ofpact_conntrack *ofc) +@@ -6069,6 +6085,118 @@ compose_ct_clear_action(struct xlate_ctx *ctx) + } + } + ++/* check_pkt_larger action checks the packet length and stores the ++ * result in the register bit. We translate this action to the ++ * datapath action - 'check_pkt_len' whose format ++ * is: 'check_pkt_len(pkt_len, ge(actions), le(actions))'. ++ * ++ * We first set the destination register bit to 1 and call ++ * 'do_xlate_actions' for the case - packet len greater than ++ * the specified packet length. ++ * ++ * We then set the destination register bit to 0 and call ++ * 'do_xlate_actions' for the case - packet length is lesser or ++ * equal to the specified packet length. ++ * ++ * It is possible for freezing to happen for both the cases. ++ */ ++static void ++xlate_check_pkt_larger(struct xlate_ctx *ctx, ++ struct ofpact_check_pkt_larger *check_pkt_larger, ++ const struct ofpact *remaining_acts, ++ size_t remaining_acts_len) ++{ ++ union mf_subvalue value; ++ memset(&value, 0, sizeof value); ++ if (!ctx->xbridge->support.check_pkt_len) { ++ uint8_t is_pkt_larger = 0; ++ if (ctx->xin->packet) { ++ is_pkt_larger = ++ dp_packet_size(ctx->xin->packet) > check_pkt_larger->pkt_len; ++ } ++ value.u8_val = is_pkt_larger; ++ mf_write_subfield_flow(&check_pkt_larger->dst, &value, ++ &ctx->xin->flow); ++ /* If datapath doesn't support check_pkt_len action, then set the ++ * SLOW_ACTION flag. If we don't set SLOW_ACTION, we ++ * will push a flow to the datapath based on the packet length ++ * in ctx->xin->packet. For subsequent patches which match the ++ * same flow, datapath will apply the actions without considering ++ * the packet length. This results in wrong actions being applied. ++ */ ++ ctx->xout->slow |= SLOW_ACTION; ++ return; ++ } ++ ++ struct ofpbuf old_stack = ctx->stack; ++ union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)]; ++ ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack); ++ ofpbuf_put(&ctx->stack, old_stack.data, old_stack.size); ++ ++ struct ofpbuf old_action_set = ctx->action_set; ++ uint64_t actset_stub[1024 / 8]; ++ ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub); ++ ofpbuf_put(&ctx->action_set, old_action_set.data, old_action_set.size); ++ ++ struct flow old_flow = ctx->xin->flow; ++ xlate_commit_actions(ctx); ++ struct flow old_base = ctx->base_flow; ++ bool old_was_mpls = ctx->was_mpls; ++ bool old_conntracked = ctx->conntracked; ++ ++ size_t offset = nl_msg_start_nested(ctx->odp_actions, ++ OVS_ACTION_ATTR_CHECK_PKT_LEN); ++ nl_msg_put_u16(ctx->odp_actions, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, ++ check_pkt_larger->pkt_len); ++ size_t offset_attr = nl_msg_start_nested( ++ ctx->odp_actions, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER); ++ value.u8_val = 1; ++ mf_write_subfield_flow(&check_pkt_larger->dst, &value, &ctx->xin->flow); ++ do_xlate_actions(remaining_acts, remaining_acts_len, ctx, true, false); ++ if (!ctx->freezing) { ++ xlate_action_set(ctx); ++ } ++ if (ctx->freezing) { ++ finish_freezing(ctx); ++ } ++ nl_msg_end_nested(ctx->odp_actions, offset_attr); ++ ++ ctx->base_flow = old_base; ++ ctx->was_mpls = old_was_mpls; ++ ctx->conntracked = old_conntracked; ++ ctx->xin->flow = old_flow; ++ ++ /* If the flow translation for the IF_GREATER case requires freezing, ++ * then ctx->exit would be true. Reset to false so that we can ++ * do flow translation for 'IF_LESS_EQUAL' case. finish_freezing() ++ * would have taken care of Undoing the changes done for freeze. */ ++ ctx->exit = false; ++ ++ offset_attr = nl_msg_start_nested( ++ ctx->odp_actions, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL); ++ value.u8_val = 0; ++ mf_write_subfield_flow(&check_pkt_larger->dst, &value, &ctx->xin->flow); ++ do_xlate_actions(remaining_acts, remaining_acts_len, ctx, true, false); ++ if (!ctx->freezing) { ++ xlate_action_set(ctx); ++ } ++ if (ctx->freezing) { ++ finish_freezing(ctx); ++ } ++ nl_msg_end_nested(ctx->odp_actions, offset_attr); ++ nl_msg_end_nested(ctx->odp_actions, offset); ++ ++ ofpbuf_uninit(&ctx->action_set); ++ ctx->action_set = old_action_set; ++ ofpbuf_uninit(&ctx->stack); ++ ctx->stack = old_stack; ++ ctx->base_flow = old_base; ++ ctx->was_mpls = old_was_mpls; ++ ctx->conntracked = old_conntracked; ++ ctx->xin->flow = old_flow; ++ ctx->exit = true; ++} ++ + static void + rewrite_flow_encap_ethernet(struct xlate_ctx *ctx, + struct flow *flow, +@@ -6391,6 +6519,7 @@ recirc_for_mpls(const struct ofpact *a, struct xlate_ctx *ctx) + case OFPACT_WRITE_ACTIONS: + case OFPACT_WRITE_METADATA: + case OFPACT_GOTO_TABLE: ++ case OFPACT_CHECK_PKT_LARGER: + default: + break; + } +@@ -6846,6 +6975,21 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, + case OFPACT_DEBUG_SLOW: + ctx->xout->slow |= SLOW_ACTION; + break; ++ ++ case OFPACT_CHECK_PKT_LARGER: { ++ if (last) { ++ /* If this is last action, then there is no need to ++ * translate the action. */ ++ break; ++ } ++ const struct ofpact *remaining_acts = ofpact_next(a); ++ size_t remaining_acts_len = ofpact_remaining_len(remaining_acts, ++ ofpacts, ++ ofpacts_len); ++ xlate_check_pkt_larger(ctx, ofpact_get_CHECK_PKT_LARGER(a), ++ remaining_acts, remaining_acts_len); ++ break; ++ } + } + + /* Check if need to store this and the remaining actions for later +@@ -7386,8 +7530,9 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) + compose_ipfix_action(&ctx, ODPP_NONE); + } + size_t sample_actions_len = ctx.odp_actions->size; ++ bool ecn_drop = !tnl_process_ecn(flow); + +- if (tnl_process_ecn(flow) ++ if (!ecn_drop + && (!in_port || may_receive(in_port, &ctx))) { + const struct ofpact *ofpacts; + size_t ofpacts_len; +@@ -7419,6 +7564,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) + ctx.odp_actions->size = sample_actions_len; + ctx_cancel_freeze(&ctx); + ofpbuf_clear(&ctx.action_set); ++ ctx.error = XLATE_FORWARDING_DISABLED; + } + + if (!ctx.freezing) { +@@ -7427,6 +7573,8 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) + if (ctx.freezing) { + finish_freezing(&ctx); + } ++ } else if (ecn_drop) { ++ ctx.error = XLATE_CONGESTION_DROP; + } + + /* Output only fully processed packets. */ +@@ -7526,6 +7674,21 @@ exit: + ofpbuf_clear(xin->odp_actions); + } + } ++ ++ /* Install drop action if datapath supports explicit drop action. */ ++ if (xin->odp_actions && !xin->odp_actions->size && ++ ovs_explicit_drop_action_supported(ctx.xbridge->ofproto)) { ++ put_drop_action(xin->odp_actions, ctx.error); ++ } ++ ++ /* Since congestion drop and forwarding drop are not exactly ++ * translation error, we are resetting the translation error. ++ */ ++ if (ctx.error == XLATE_CONGESTION_DROP || ++ ctx.error == XLATE_FORWARDING_DISABLED) { ++ ctx.error = XLATE_OK; ++ } ++ + return ctx.error; + } + +diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h +index 0a5a52887b..ded81ca0e9 100644 +--- a/ofproto/ofproto-dpif-xlate.h ++++ b/ofproto/ofproto-dpif-xlate.h +@@ -205,19 +205,6 @@ int xlate_lookup(const struct dpif_backer *, const struct flow *, + struct dpif_sflow **, struct netflow **, + ofp_port_t *ofp_in_port); + +-enum xlate_error { +- XLATE_OK = 0, +- XLATE_BRIDGE_NOT_FOUND, +- XLATE_RECURSION_TOO_DEEP, +- XLATE_TOO_MANY_RESUBMITS, +- XLATE_STACK_TOO_DEEP, +- XLATE_NO_RECIRCULATION_CONTEXT, +- XLATE_RECIRCULATION_CONFLICT, +- XLATE_TOO_MANY_MPLS_LABELS, +- XLATE_INVALID_TUNNEL_METADATA, +- XLATE_UNSUPPORTED_PACKET_TYPE, +-}; +- + const char *xlate_strerror(enum xlate_error error); + + enum xlate_error xlate_actions(struct xlate_in *, struct xlate_out *); +diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c +index 050ff65773..5f2127e280 100644 +--- a/ofproto/ofproto-dpif.c ++++ b/ofproto/ofproto-dpif.c +@@ -827,6 +827,12 @@ ovs_native_tunneling_is_on(struct ofproto_dpif *ofproto) + && atomic_count_get(&ofproto->backer->tnl_count); + } + ++bool ++ovs_explicit_drop_action_supported(struct ofproto_dpif *ofproto) ++{ ++ return ofproto->backer->rt_support.explicit_drop_action; ++} ++ + /* Tests whether 'backer''s datapath supports recirculation. Only newer + * datapaths support OVS_KEY_ATTR_RECIRC_ID in keys. We need to disable some + * features on older datapaths that don't support this feature. +@@ -1290,6 +1296,48 @@ check_ct_clear(struct dpif_backer *backer) + return supported; + } + ++ ++/* Tests whether 'backer''s datapath supports the ++ * OVS_ACTION_ATTR_CHECK_PKT_LEN action. */ ++static bool ++check_check_pkt_len(struct dpif_backer *backer) ++{ ++ struct odputil_keybuf keybuf; ++ struct ofpbuf actions; ++ struct ofpbuf key; ++ struct flow flow; ++ bool supported; ++ ++ struct odp_flow_key_parms odp_parms = { ++ .flow = &flow, ++ .probe = true, ++ }; ++ ++ memset(&flow, 0, sizeof flow); ++ ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); ++ odp_flow_key_from_flow(&odp_parms, &key); ++ ofpbuf_init(&actions, 64); ++ size_t cpl_start; ++ ++ cpl_start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_CHECK_PKT_LEN); ++ nl_msg_put_u16(&actions, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, 100); ++ ++ /* Putting these actions without any data is good enough to check ++ * if check_pkt_len is supported or not. */ ++ nl_msg_put_flag(&actions, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER); ++ nl_msg_put_flag(&actions, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL); ++ ++ nl_msg_end_nested(&actions, cpl_start); ++ ++ supported = dpif_probe_feature(backer->dpif, "check_pkt_len", &key, ++ &actions, NULL); ++ ofpbuf_uninit(&actions); ++ VLOG_INFO("%s: Datapath %s check_pkt_len action", ++ dpif_name(backer->dpif), supported ? "supports" ++ : "does not support"); ++ return supported; ++} ++ + /* Probe the highest dp_hash algorithm supported by the datapath. */ + static size_t + check_max_dp_hash_alg(struct dpif_backer *backer) +@@ -1397,6 +1445,9 @@ check_support(struct dpif_backer *backer) + backer->rt_support.ct_eventmask = check_ct_eventmask(backer); + backer->rt_support.ct_clear = check_ct_clear(backer); + backer->rt_support.max_hash_alg = check_max_dp_hash_alg(backer); ++ backer->rt_support.check_pkt_len = check_check_pkt_len(backer); ++ backer->rt_support.explicit_drop_action = ++ dpif_supports_explicit_drop_action(backer->dpif); + + /* Flow fields. */ + backer->rt_support.odp.ct_state = check_ct_state(backer); +@@ -2228,24 +2279,24 @@ set_lldp(struct ofport *ofport_, + const struct smap *cfg) + { + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); ++ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + int error = 0; + + if (cfg) { + if (!ofport->lldp) { +- struct ofproto_dpif *ofproto; +- +- ofproto = ofproto_dpif_cast(ofport->up.ofproto); + ofproto->backer->need_revalidate = REV_RECONFIGURE; + ofport->lldp = lldp_create(ofport->up.netdev, ofport_->mtu, cfg); + } + + if (!lldp_configure(ofport->lldp, cfg)) { ++ lldp_unref(ofport->lldp); ++ ofport->lldp = NULL; + error = EINVAL; + } +- } +- if (error) { ++ } else if (ofport->lldp) { + lldp_unref(ofport->lldp); + ofport->lldp = NULL; ++ ofproto->backer->need_revalidate = REV_RECONFIGURE; + } + + ofproto_dpif_monitor_port_update(ofport, +@@ -4655,12 +4706,13 @@ ofproto_dpif_xcache_execute(struct ofproto_dpif *ofproto, + } + + static void +-packet_execute(struct ofproto *ofproto_, struct ofproto_packet_out *opo) ++packet_execute_prepare(struct ofproto *ofproto_, ++ struct ofproto_packet_out *opo) + OVS_REQUIRES(ofproto_mutex) + { + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + struct dpif_flow_stats stats; +- struct dpif_execute execute; ++ struct dpif_execute *execute; + + struct ofproto_dpif_packet_out *aux = opo->aux; + ovs_assert(aux); +@@ -4669,22 +4721,40 @@ packet_execute(struct ofproto *ofproto_, struct ofproto_packet_out *opo) + dpif_flow_stats_extract(opo->flow, opo->packet, time_msec(), &stats); + ofproto_dpif_xcache_execute(ofproto, &aux->xcache, &stats); + +- execute.actions = aux->odp_actions.data; +- execute.actions_len = aux->odp_actions.size; ++ execute = xzalloc(sizeof *execute); ++ execute->actions = xmemdup(aux->odp_actions.data, aux->odp_actions.size); ++ execute->actions_len = aux->odp_actions.size; + + pkt_metadata_from_flow(&opo->packet->md, opo->flow); +- execute.packet = opo->packet; +- execute.flow = opo->flow; +- execute.needs_help = aux->needs_help; +- execute.probe = false; +- execute.mtu = 0; ++ execute->packet = opo->packet; ++ execute->flow = opo->flow; ++ execute->needs_help = aux->needs_help; ++ execute->probe = false; ++ execute->mtu = 0; + + /* Fix up in_port. */ + ofproto_dpif_set_packet_odp_port(ofproto, opo->flow->in_port.ofp_port, + opo->packet); + +- dpif_execute(ofproto->backer->dpif, &execute); + ofproto_dpif_packet_out_delete(aux); ++ opo->aux = execute; ++} ++ ++static void ++packet_execute(struct ofproto *ofproto_, struct ofproto_packet_out *opo) ++ OVS_EXCLUDED(ofproto_mutex) ++{ ++ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); ++ struct dpif_execute *execute = opo->aux; ++ ++ if (!execute) { ++ return; ++ } ++ ++ dpif_execute(ofproto->backer->dpif, execute); ++ ++ free(CONST_CAST(struct nlattr *, execute->actions)); ++ free(execute); + opo->aux = NULL; + } + +@@ -5056,9 +5126,7 @@ nxt_resume(struct ofproto *ofproto_, + pkt_metadata_from_flow(&packet.md, &pin->base.flow_metadata.flow); + + /* Fix up in_port. */ +- ofproto_dpif_set_packet_odp_port(ofproto, +- pin->base.flow_metadata.flow.in_port.ofp_port, +- &packet); ++ packet.md.in_port.odp_port = pin->odp_port; + + struct flow headers; + flow_extract(&packet, &headers); +@@ -6060,6 +6128,7 @@ const struct ofproto_class ofproto_dpif_class = { + rule_get_stats, + packet_xlate, + packet_xlate_revert, ++ packet_execute_prepare, + packet_execute, + set_frag_handling, + nxt_resume, +diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h +index 1a404c82fa..29373db02a 100644 +--- a/ofproto/ofproto-dpif.h ++++ b/ofproto/ofproto-dpif.h +@@ -192,7 +192,14 @@ struct group_dpif *group_dpif_lookup(struct ofproto_dpif *, + DPIF_SUPPORT_FIELD(bool, ct_clear, "Conntrack clear") \ + \ + /* Highest supported dp_hash algorithm. */ \ +- DPIF_SUPPORT_FIELD(size_t, max_hash_alg, "Max dp_hash algorithm") ++ DPIF_SUPPORT_FIELD(size_t, max_hash_alg, "Max dp_hash algorithm") \ ++ \ ++ /* True if the datapath supports OVS_ACTION_ATTR_CHECK_PKT_LEN. */ \ ++ DPIF_SUPPORT_FIELD(bool, check_pkt_len, "Check pkt length action") \ ++ \ ++ /* True if the datapath supports explicit drop action. */ \ ++ DPIF_SUPPORT_FIELD(bool, explicit_drop_action, "Explicit Drop action") ++ + + /* Stores the various features which the corresponding backer supports. */ + struct dpif_backer_support { +@@ -361,4 +368,6 @@ int ofproto_dpif_delete_internal_flow(struct ofproto_dpif *, struct match *, + + bool ovs_native_tunneling_is_on(struct ofproto_dpif *); + ++bool ovs_explicit_drop_action_supported(struct ofproto_dpif *); ++ + #endif /* ofproto-dpif.h */ +diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h +index 074edfc119..958e0ef1f9 100644 +--- a/ofproto/ofproto-provider.h ++++ b/ofproto/ofproto-provider.h +@@ -1357,9 +1357,15 @@ struct ofproto_class { + * packet_xlate_revert() calls have to be made in reverse order. */ + void (*packet_xlate_revert)(struct ofproto *, struct ofproto_packet_out *); + +- /* Executes the datapath actions, translation side-effects, and stats as +- * produced by ->packet_xlate(). The caller retains ownership of 'opo'. +- */ ++ /* Translates side-effects, and stats as produced by ->packet_xlate(). ++ * Prepares to execute datapath actions. The caller retains ownership ++ * of 'opo'. */ ++ void (*packet_execute_prepare)(struct ofproto *, ++ struct ofproto_packet_out *opo); ++ ++ /* Executes the datapath actions. The caller retains ownership of 'opo'. ++ * Should be called after successful packet_execute_prepare(). ++ * No-op if called after packet_xlate_revert(). */ + void (*packet_execute)(struct ofproto *, struct ofproto_packet_out *opo); + + /* Changes the OpenFlow IP fragment handling policy to 'frag_handling', +diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c +index 994e89d9ff..1576de4be2 100644 +--- a/ofproto/ofproto.c ++++ b/ofproto/ofproto.c +@@ -2507,6 +2507,9 @@ ofproto_port_unregister(struct ofproto *ofproto, ofp_port_t ofp_port) + { + struct ofport *port = ofproto_get_port(ofproto, ofp_port); + if (port) { ++ if (port->ofproto->ofproto_class->set_lldp) { ++ port->ofproto->ofproto_class->set_lldp(port, NULL); ++ } + if (port->ofproto->ofproto_class->set_stp_port) { + port->ofproto->ofproto_class->set_stp_port(port, NULL); + } +@@ -3572,10 +3575,21 @@ ofproto_packet_out_revert(struct ofproto *ofproto, + ofproto->ofproto_class->packet_xlate_revert(ofproto, opo); + } + ++static void ++ofproto_packet_out_prepare(struct ofproto *ofproto, ++ struct ofproto_packet_out *opo) ++ OVS_REQUIRES(ofproto_mutex) ++{ ++ ofproto->ofproto_class->packet_execute_prepare(ofproto, opo); ++} ++ ++/* Execution of packet_out action in datapath could end up in upcall with ++ * subsequent flow translations and possible rule modifications. ++ * So, the caller should not hold 'ofproto_mutex'. */ + static void + ofproto_packet_out_finish(struct ofproto *ofproto, + struct ofproto_packet_out *opo) +- OVS_REQUIRES(ofproto_mutex) ++ OVS_EXCLUDED(ofproto_mutex) + { + ofproto->ofproto_class->packet_execute(ofproto, opo); + } +@@ -3618,10 +3632,13 @@ handle_packet_out(struct ofconn *ofconn, const struct ofp_header *oh) + opo.version = p->tables_version; + error = ofproto_packet_out_start(p, &opo); + if (!error) { +- ofproto_packet_out_finish(p, &opo); ++ ofproto_packet_out_prepare(p, &opo); + } + ovs_mutex_unlock(&ofproto_mutex); + ++ if (!error) { ++ ofproto_packet_out_finish(p, &opo); ++ } + ofproto_packet_out_uninit(&opo); + return error; + } +@@ -8114,7 +8131,7 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) + } else if (be->type == OFPTYPE_GROUP_MOD) { + ofproto_group_mod_finish(ofproto, &be->ogm, &req); + } else if (be->type == OFPTYPE_PACKET_OUT) { +- ofproto_packet_out_finish(ofproto, &be->opo); ++ ofproto_packet_out_prepare(ofproto, &be->opo); + } + } + } +@@ -8124,6 +8141,13 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) + ovs_mutex_unlock(&ofproto_mutex); + } + ++ /* Executing remaining datapath actions. */ ++ LIST_FOR_EACH (be, node, &bundle->msg_list) { ++ if (be->type == OFPTYPE_PACKET_OUT) { ++ ofproto_packet_out_finish(ofproto, &be->opo); ++ } ++ } ++ + /* The bundle is discarded regardless the outcome. */ + ofp_bundle_remove__(ofconn, bundle); + return error; +diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c +index d1c64d3429..c548faf942 100644 +--- a/ovn/northd/ovn-northd.c ++++ b/ovn/northd/ovn-northd.c +@@ -1039,7 +1039,14 @@ ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op) + + for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) { + uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr); +- ipam_insert_ip(op->peer->od, ip); ++ /* If the router has the first IP address of the subnet, don't add ++ * it to IPAM. We already added this when we initialized IPAM for ++ * the datapath. This will just result in an erroneous message ++ * about a duplicate IP address. ++ */ ++ if (ip != op->peer->od->ipam_info.start_ipv4) { ++ ipam_insert_ip(op->peer->od, ip); ++ } + } + + destroy_lport_addresses(&lrp_networks); +diff --git a/ovn/utilities/ovn-ctl b/ovn/utilities/ovn-ctl +index f1297a2626..769c09752a 100755 +--- a/ovn/utilities/ovn-ctl ++++ b/ovn/utilities/ovn-ctl +@@ -314,6 +314,15 @@ start_northd () { + if test X"$OVN_NORTHD_LOGFILE" != X; then + set "$@" --log-file=$OVN_NORTHD_LOGFILE + fi ++ if test X"$OVN_NORTHD_SSL_KEY" != X; then ++ set "$@" --private-key=$OVN_NORTHD_SSL_KEY ++ fi ++ if test X"$OVN_NORTHD_SSL_CERT" != X; then ++ set "$@" --certificate=$OVN_NORTHD_SSL_CERT ++ fi ++ if test X"$OVN_NORTHD_SSL_CA_CERT" != X; then ++ set "$@" --ca-cert=$OVN_NORTHD_SSL_CA_CERT ++ fi + + [ "$OVN_USER" != "" ] && set "$@" --user "$OVN_USER" + +@@ -475,6 +484,10 @@ set_defaults () { + OVN_CONTROLLER_SSL_CA_CERT="" + OVN_CONTROLLER_SSL_BOOTSTRAP_CA_CERT="" + ++ OVN_NORTHD_SSL_KEY="" ++ OVN_NORTHD_SSL_CERT="" ++ OVN_NORTHD_SSL_CA_CERT="" ++ + DB_SB_CREATE_INSECURE_REMOTE="no" + DB_NB_CREATE_INSECURE_REMOTE="no" + +@@ -571,6 +584,9 @@ Options: + --ovn-sb-db-ssl-key=KEY OVN Southbound DB SSL private key file + --ovn-sb-db-ssl-cert=CERT OVN Southbound DB SSL certificate file + --ovn-sb-db-ssl-ca-cert=CERT OVN Southbound DB SSL CA certificate file ++ --ovn-northd-ssl-key=KEY OVN Northd SSL private key file ++ --ovn-northd-ssl-cert=CERT OVN Northd SSL certificate file ++ --ovn-northd-ssl-ca-cert=CERT OVN Northd SSL CA certificate file + --ovn-manage-ovsdb=yes|no Whether or not the OVN databases should be + automatically started and stopped along + with ovn-northd. The default is "yes". If +diff --git a/ovsdb/execution.c b/ovsdb/execution.c +index c55a0b7710..fdbb5c399f 100644 +--- a/ovsdb/execution.c ++++ b/ovsdb/execution.c +@@ -696,7 +696,7 @@ ovsdb_execute_wait(struct ovsdb_execution *x, struct ovsdb_parser *parser, + long long int timeout_msec = 0; + size_t i; + +- timeout = ovsdb_parser_member(parser, "timeout", OP_NUMBER | OP_OPTIONAL); ++ timeout = ovsdb_parser_member(parser, "timeout", OP_INTEGER | OP_OPTIONAL); + where = ovsdb_parser_member(parser, "where", OP_ARRAY); + columns_json = ovsdb_parser_member(parser, "columns", + OP_ARRAY | OP_OPTIONAL); +@@ -714,7 +714,7 @@ ovsdb_execute_wait(struct ovsdb_execution *x, struct ovsdb_parser *parser, + } + if (!error) { + if (timeout) { +- timeout_msec = MIN(LLONG_MAX, json_real(timeout)); ++ timeout_msec = json_integer(timeout); + if (timeout_msec < 0) { + error = ovsdb_syntax_error(timeout, NULL, + "timeout must be nonnegative"); +diff --git a/ovsdb/file.c b/ovsdb/file.c +index 8d16b097b7..0af077fcec 100644 +--- a/ovsdb/file.c ++++ b/ovsdb/file.c +@@ -235,10 +235,14 @@ ovsdb_convert_table(struct ovsdb_txn *txn, + continue; + } + ++ ovsdb_datum_destroy(&dst_row->fields[dst_column->index], ++ &dst_column->type); ++ + struct ovsdb_error *error = ovsdb_datum_convert( + &dst_row->fields[dst_column->index], &dst_column->type, + &src_row->fields[src_column->index], &src_column->type); + if (error) { ++ ovsdb_datum_init_empty(&dst_row->fields[dst_column->index]); + ovsdb_row_destroy(dst_row); + return error; + } +diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c +index 7c7a277f0d..fee3e2bcf6 100644 +--- a/ovsdb/jsonrpc-server.c ++++ b/ovsdb/jsonrpc-server.c +@@ -80,6 +80,8 @@ static void ovsdb_jsonrpc_session_unlock_all(struct ovsdb_jsonrpc_session *); + static void ovsdb_jsonrpc_session_unlock__(struct ovsdb_lock_waiter *); + static void ovsdb_jsonrpc_session_send(struct ovsdb_jsonrpc_session *, + struct jsonrpc_msg *); ++static void ovsdb_jsonrpc_session_set_readonly_all( ++ struct ovsdb_jsonrpc_remote *remote, bool read_only); + + /* Triggers. */ + static void ovsdb_jsonrpc_trigger_create(struct ovsdb_jsonrpc_session *, +@@ -365,10 +367,13 @@ ovsdb_jsonrpc_server_set_read_only(struct ovsdb_jsonrpc_server *svr, + { + if (svr->read_only != read_only) { + svr->read_only = read_only; +- ovsdb_jsonrpc_server_reconnect(svr, false, +- xstrdup(read_only +- ? "making server read-only" +- : "making server read/write")); ++ ++ struct shash_node *node; ++ SHASH_FOR_EACH (node, &svr->remotes) { ++ struct ovsdb_jsonrpc_remote *remote = node->data; ++ ++ ovsdb_jsonrpc_session_set_readonly_all(remote, read_only); ++ } + } + } + +@@ -670,6 +675,17 @@ ovsdb_jsonrpc_session_reconnect_all(struct ovsdb_jsonrpc_remote *remote, + } + } + ++static void ++ovsdb_jsonrpc_session_set_readonly_all(struct ovsdb_jsonrpc_remote *remote, ++ bool read_only) ++{ ++ struct ovsdb_jsonrpc_session *s; ++ ++ LIST_FOR_EACH (s, node, &remote->sessions) { ++ s->read_only = read_only; ++ } ++} ++ + /* Sets the options for all of the JSON-RPC sessions managed by 'remote' to + * 'options'. + * +diff --git a/ovsdb/mutation.c b/ovsdb/mutation.c +index cd20bdb7cb..56edc5f000 100644 +--- a/ovsdb/mutation.c ++++ b/ovsdb/mutation.c +@@ -147,6 +147,8 @@ ovsdb_mutation_from_json(const struct ovsdb_table_schema *ts, + if (error && ovsdb_type_is_map(&m->type) + && m->mutator == OVSDB_M_DELETE) { + ovsdb_error_destroy(error); ++ ovsdb_base_type_destroy(&m->type.value); ++ m->type.value.enum_ = NULL; + m->type.value.type = OVSDB_TYPE_VOID; + error = ovsdb_datum_from_json(&m->arg, &m->type, array->elems[2], + symtab); +diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in +index 9f78e87f65..11403b1009 100644 +--- a/ovsdb/ovsdb-server.1.in ++++ b/ovsdb/ovsdb-server.1.in +@@ -288,6 +288,11 @@ Switches the server to an active role. The server stops synchronizing + its databases with an active server and closes all existing client + connections, which requires clients to reconnect. + . ++.IP "\fBovsdb\-server/set\-active\-ovsdb\-server\-probe\-interval \fIprobe interval" ++Sets the probe interval (in milli seconds) for the connection to ++active \fIserver\fR. ++. ++. + .IP "\fBovsdb\-server/set\-sync\-exclude\-tables \fIdb\fB:\fItable\fR[\fB,\fIdb\fB:\fItable\fR]..." + Sets the \fItable\fR within \fIdb\fR that will be excluded from synchronization. + This overrides the \fB\-\-sync\-exclude-tables\fR command-line option. +diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c +index 65a47a41d3..284ed05519 100644 +--- a/ovsdb/ovsdb-server.c ++++ b/ovsdb/ovsdb-server.c +@@ -86,6 +86,7 @@ static unixctl_cb_func ovsdb_server_set_active_ovsdb_server; + static unixctl_cb_func ovsdb_server_get_active_ovsdb_server; + static unixctl_cb_func ovsdb_server_connect_active_ovsdb_server; + static unixctl_cb_func ovsdb_server_disconnect_active_ovsdb_server; ++static unixctl_cb_func ovsdb_server_set_active_ovsdb_server_probe_interval; + static unixctl_cb_func ovsdb_server_set_sync_exclude_tables; + static unixctl_cb_func ovsdb_server_get_sync_exclude_tables; + static unixctl_cb_func ovsdb_server_get_sync_status; +@@ -97,6 +98,7 @@ struct server_config { + char **sync_from; + char **sync_exclude; + bool *is_backup; ++ int *replication_probe_interval; + struct ovsdb_jsonrpc_server *jsonrpc; + }; + static unixctl_cb_func ovsdb_server_add_remote; +@@ -144,9 +146,10 @@ static void load_config(FILE *config_file, struct sset *remotes, + + static void + ovsdb_replication_init(const char *sync_from, const char *exclude, +- struct shash *all_dbs, const struct uuid *server_uuid) ++ struct shash *all_dbs, const struct uuid *server_uuid, ++ int probe_interval) + { +- replication_init(sync_from, exclude, server_uuid); ++ replication_init(sync_from, exclude, server_uuid, probe_interval); + struct shash_node *node; + SHASH_FOR_EACH (node, all_dbs) { + struct db *db = node->data; +@@ -301,6 +304,7 @@ main(int argc, char *argv[]) + struct server_config server_config; + struct shash all_dbs; + struct shash_node *node, *next; ++ int replication_probe_interval = REPLICATION_DEFAULT_PROBE_INTERVAL; + + ovs_cmdl_proctitle_init(argc, argv); + set_program_name(argv[0]); +@@ -348,6 +352,7 @@ main(int argc, char *argv[]) + server_config.sync_from = &sync_from; + server_config.sync_exclude = &sync_exclude; + server_config.is_backup = &is_backup; ++ server_config.replication_probe_interval = &replication_probe_interval; + + perf_counters_init(); + +@@ -433,6 +438,9 @@ main(int argc, char *argv[]) + unixctl_command_register("ovsdb-server/disconnect-active-ovsdb-server", "", + 0, 0, ovsdb_server_disconnect_active_ovsdb_server, + &server_config); ++ unixctl_command_register( ++ "ovsdb-server/set-active-ovsdb-server-probe-interval", "", 1, 1, ++ ovsdb_server_set_active_ovsdb_server_probe_interval, &server_config); + unixctl_command_register("ovsdb-server/set-sync-exclude-tables", "", + 0, 1, ovsdb_server_set_sync_exclude_tables, + &server_config); +@@ -451,7 +459,8 @@ main(int argc, char *argv[]) + if (is_backup) { + const struct uuid *server_uuid; + server_uuid = ovsdb_jsonrpc_server_get_uuid(jsonrpc); +- ovsdb_replication_init(sync_from, sync_exclude, &all_dbs, server_uuid); ++ ovsdb_replication_init(sync_from, sync_exclude, &all_dbs, server_uuid, ++ replication_probe_interval); + } + + main_loop(&server_config, jsonrpc, &all_dbs, unixctl, &remotes, +@@ -528,7 +537,7 @@ close_db(struct server_config *config, struct db *db, char *comment) + + static struct ovsdb_error * OVS_WARN_UNUSED_RESULT + parse_txn(struct server_config *config, struct db *db, +- struct ovsdb_schema *schema, const struct json *txn_json, ++ const struct ovsdb_schema *schema, const struct json *txn_json, + const struct uuid *txnid) + { + if (schema) { +@@ -552,7 +561,7 @@ parse_txn(struct server_config *config, struct db *db, + ? xasprintf("database %s schema changed", db->db->name) + : xasprintf("database %s connected to storage", db->db->name))); + +- ovsdb_replace(db->db, ovsdb_create(schema, NULL)); ++ ovsdb_replace(db->db, ovsdb_create(ovsdb_schema_clone(schema), NULL)); + + /* Force update to schema in _Server database. */ + db->row_uuid = UUID_ZERO; +@@ -600,6 +609,7 @@ read_db(struct server_config *config, struct db *db) + } else { + error = parse_txn(config, db, schema, txn_json, &txnid); + json_destroy(txn_json); ++ ovsdb_schema_destroy(schema); + if (error) { + break; + } +@@ -1307,7 +1317,8 @@ ovsdb_server_connect_active_ovsdb_server(struct unixctl_conn *conn, + const struct uuid *server_uuid; + server_uuid = ovsdb_jsonrpc_server_get_uuid(config->jsonrpc); + ovsdb_replication_init(*config->sync_from, *config->sync_exclude, +- config->all_dbs, server_uuid); ++ config->all_dbs, server_uuid, ++ *config->replication_probe_interval); + if (!*config->is_backup) { + *config->is_backup = true; + save_config(config); +@@ -1330,6 +1341,28 @@ ovsdb_server_disconnect_active_ovsdb_server(struct unixctl_conn *conn, + unixctl_command_reply(conn, NULL); + } + ++static void ++ovsdb_server_set_active_ovsdb_server_probe_interval(struct unixctl_conn *conn, ++ int argc OVS_UNUSED, ++ const char *argv[], ++ void *config_) ++{ ++ struct server_config *config = config_; ++ ++ int probe_interval; ++ if (str_to_int(argv[1], 10, &probe_interval)) { ++ *config->replication_probe_interval = probe_interval; ++ save_config(config); ++ if (*config->is_backup) { ++ replication_set_probe_interval(probe_interval); ++ } ++ unixctl_command_reply(conn, NULL); ++ } else { ++ unixctl_command_reply( ++ conn, "Invalid probe interval, integer value expected"); ++ } ++} ++ + static void + ovsdb_server_set_sync_exclude_tables(struct unixctl_conn *conn, + int argc OVS_UNUSED, +@@ -1347,7 +1380,8 @@ ovsdb_server_set_sync_exclude_tables(struct unixctl_conn *conn, + const struct uuid *server_uuid; + server_uuid = ovsdb_jsonrpc_server_get_uuid(config->jsonrpc); + ovsdb_replication_init(*config->sync_from, *config->sync_exclude, +- config->all_dbs, server_uuid); ++ config->all_dbs, server_uuid, ++ *config->replication_probe_interval); + } + err = set_blacklist_tables(argv[1], false); + } +@@ -1558,7 +1592,8 @@ ovsdb_server_add_database(struct unixctl_conn *conn, int argc OVS_UNUSED, + const struct uuid *server_uuid; + server_uuid = ovsdb_jsonrpc_server_get_uuid(config->jsonrpc); + ovsdb_replication_init(*config->sync_from, *config->sync_exclude, +- config->all_dbs, server_uuid); ++ config->all_dbs, server_uuid, ++ *config->replication_probe_interval); + } + unixctl_command_reply(conn, NULL); + } else { +@@ -1580,7 +1615,8 @@ remove_db(struct server_config *config, struct shash_node *node, char *comment) + const struct uuid *server_uuid; + server_uuid = ovsdb_jsonrpc_server_get_uuid(config->jsonrpc); + ovsdb_replication_init(*config->sync_from, *config->sync_exclude, +- config->all_dbs, server_uuid); ++ config->all_dbs, server_uuid, ++ *config->replication_probe_interval); + } + } + +diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c +index 438f975902..45e656d3d3 100644 +--- a/ovsdb/ovsdb-tool.c ++++ b/ovsdb/ovsdb-tool.c +@@ -559,7 +559,9 @@ do_db_has_magic(struct ovs_cmdl_context *ctx, const char *magic) + + check_ovsdb_error(ovsdb_log_open(filename, OVSDB_MAGIC"|"RAFT_MAGIC, + OVSDB_LOG_READ_ONLY, -1, &log)); +- if (strcmp(ovsdb_log_get_magic(log), magic)) { ++ int cmp = strcmp(ovsdb_log_get_magic(log), magic); ++ ovsdb_log_close(log); ++ if (cmp) { + exit(2); + } + } +diff --git a/ovsdb/raft.c b/ovsdb/raft.c +index 68b527c12a..00bf3687f3 100644 +--- a/ovsdb/raft.c ++++ b/ovsdb/raft.c +@@ -3960,9 +3960,7 @@ raft_handle_execute_command_request__( + cmd->sid = rq->common.sid; + + enum raft_command_status status = cmd->status; +- if (status != RAFT_CMD_INCOMPLETE) { +- raft_command_unref(cmd); +- } ++ raft_command_unref(cmd); + return status; + } + +diff --git a/ovsdb/replication.c b/ovsdb/replication.c +index 752b3c89c5..cbbce64dfb 100644 +--- a/ovsdb/replication.c ++++ b/ovsdb/replication.c +@@ -43,7 +43,7 @@ static struct uuid server_uuid; + static struct jsonrpc_session *session; + static unsigned int session_seqno = UINT_MAX; + +-static struct jsonrpc_msg *create_monitor_request(struct ovsdb *db); ++static struct jsonrpc_msg *create_monitor_request(struct ovsdb_schema *); + static void add_monitored_table(struct ovsdb_table_schema *table, + struct json *monitor_requests); + +@@ -100,21 +100,32 @@ enum ovsdb_replication_state { + static enum ovsdb_replication_state state; + + ++struct replication_db { ++ struct ovsdb *db; ++ bool schema_version_higher; ++ /* Points to the schema received from the active server if ++ * the local db schema version is higher. NULL otherwise. */ ++ struct ovsdb_schema *active_db_schema; ++}; ++ ++static bool is_replication_possible(struct ovsdb_schema *local_db_schema, ++ struct ovsdb_schema *active_db_schema); ++ + /* All DBs known to ovsdb-server. The actual replication dbs are stored + * in 'replication dbs', which is a subset of all dbs and remote dbs whose + * schema matches. */ + static struct shash local_dbs = SHASH_INITIALIZER(&local_dbs); + static struct shash *replication_dbs; + +-static struct shash *replication_db_clone(struct shash *dbs); ++static struct shash *replication_dbs_create(void); + static void replication_dbs_destroy(void); + /* Find 'struct ovsdb' by name within 'replication_dbs' */ +-static struct ovsdb* find_db(const char *db_name); ++static struct replication_db *find_db(const char *db_name); + + + void + replication_init(const char *sync_from_, const char *exclude_tables, +- const struct uuid *server) ++ const struct uuid *server, int probe_interval) + { + free(sync_from); + sync_from = xstrdup(sync_from_); +@@ -132,6 +143,8 @@ replication_init(const char *sync_from_, const char *exclude_tables, + session = jsonrpc_session_open(sync_from, true); + session_seqno = UINT_MAX; + ++ jsonrpc_session_set_probe_interval(session, probe_interval); ++ + /* Keep a copy of local server uuid. */ + server_uuid = *server; + +@@ -152,8 +165,8 @@ send_schema_requests(const struct json *result) + if (name->type == JSON_STRING) { + /* Send one schema request for each remote DB. */ + const char *db_name = json_string(name); +- struct ovsdb *db = find_db(db_name); +- if (db) { ++ struct replication_db *rdb = find_db(db_name); ++ if (rdb) { + struct jsonrpc_msg *request = + jsonrpc_create_request( + "get_schema", +@@ -161,7 +174,7 @@ send_schema_requests(const struct json *result) + json_string_create(db_name)), + NULL); + +- request_ids_add(request->id, db); ++ request_ids_add(request->id, rdb->db); + jsonrpc_session_send(session, request); + } + } +@@ -206,11 +219,11 @@ replication_run(void) + && msg->params->array.n == 2 + && msg->params->array.elems[0]->type == JSON_STRING) { + char *db_name = msg->params->array.elems[0]->string; +- struct ovsdb *db = find_db(db_name); +- if (db) { ++ struct replication_db *rdb = find_db(db_name); ++ if (rdb) { + struct ovsdb_error *error; + error = process_notification(msg->params->array.elems[1], +- db); ++ rdb->db); + if (error) { + ovsdb_error_assert(error); + state = RPL_S_ERR; +@@ -218,6 +231,7 @@ replication_run(void) + } + } + } else if (msg->type == JSONRPC_REPLY) { ++ struct replication_db *rdb; + struct ovsdb *db; + if (!request_ids_lookup_and_free(msg->id, &db)) { + VLOG_WARN("received unexpected reply"); +@@ -256,7 +270,7 @@ replication_run(void) + jsonrpc_session_send(session, request); + + replication_dbs_destroy(); +- replication_dbs = replication_db_clone(&local_dbs); ++ replication_dbs = replication_dbs_create(); + state = RPL_S_DB_REQUESTED; + break; + } +@@ -284,17 +298,37 @@ replication_run(void) + state = RPL_S_ERR; + } + +- if (db != find_db(schema->name)) { ++ rdb = find_db(schema->name); ++ if (!rdb) { + /* Unexpected schema. */ + VLOG_WARN("unexpected schema %s", schema->name); + state = RPL_S_ERR; +- } else if (!ovsdb_schema_equal(schema, db->schema)) { ++ } else if (!ovsdb_schema_equal(schema, rdb->db->schema)) { + /* Schmea version mismatch. */ +- VLOG_INFO("Schema version mismatch, %s not replicated", ++ VLOG_INFO("Schema version mismatch, checking if %s can " ++ "still be replicated or not.", + schema->name); +- shash_find_and_delete(replication_dbs, schema->name); ++ if (is_replication_possible(rdb->db->schema, schema)) { ++ VLOG_INFO("%s can be replicated.", schema->name); ++ rdb->schema_version_higher = true; ++ if (rdb->active_db_schema) { ++ ovsdb_schema_destroy(rdb->active_db_schema); ++ } ++ rdb->active_db_schema = schema; ++ } else { ++ VLOG_INFO("%s cannot be replicated.", schema->name); ++ struct replication_db *r = ++ shash_find_and_delete(replication_dbs, ++ schema->name); ++ if (r->active_db_schema) { ++ ovsdb_schema_destroy(r->active_db_schema); ++ } ++ free(r); ++ ovsdb_schema_destroy(schema); ++ } ++ } else { ++ ovsdb_schema_destroy(schema); + } +- ovsdb_schema_destroy(schema); + + /* After receiving schemas, reset the local databases that + * will be monitored and send out monitor requests for them. */ +@@ -306,11 +340,13 @@ replication_run(void) + state = RPL_S_ERR; + } else { + SHASH_FOR_EACH (node, replication_dbs) { +- db = node->data; ++ rdb = node->data; + struct jsonrpc_msg *request = +- create_monitor_request(db); ++ create_monitor_request( ++ rdb->schema_version_higher ? ++ rdb->active_db_schema : rdb->db->schema); + +- request_ids_add(request->id, db); ++ request_ids_add(request->id, rdb->db); + jsonrpc_session_send(session, request); + VLOG_DBG("Send monitor requests"); + state = RPL_S_MONITOR_REQUESTED; +@@ -509,7 +545,7 @@ replication_destroy(void) + shash_destroy(&local_dbs); + } + +-static struct ovsdb * ++static struct replication_db * + find_db(const char *db_name) + { + return shash_find_data(replication_dbs, db_name); +@@ -541,11 +577,10 @@ reset_database(struct ovsdb *db) + * Caller is responsible for disposing 'request'. + */ + static struct jsonrpc_msg * +-create_monitor_request(struct ovsdb *db) ++create_monitor_request(struct ovsdb_schema *schema) + { + struct jsonrpc_msg *request; + struct json *monitor; +- struct ovsdb_schema *schema = db->schema; + const char *db_name = schema->name; + + struct json *monitor_request = json_object_create(); +@@ -779,14 +814,18 @@ request_ids_clear(void) + } + + static struct shash * +-replication_db_clone(struct shash *dbs) ++replication_dbs_create(void) + { + struct shash *new = xmalloc(sizeof *new); + shash_init(new); + + struct shash_node *node; +- SHASH_FOR_EACH (node, dbs) { +- shash_add(new, node->name, node->data); ++ SHASH_FOR_EACH (node, &local_dbs) { ++ struct replication_db *repl_db = xmalloc(sizeof *repl_db); ++ repl_db->db = node->data; ++ repl_db->schema_version_higher = false; ++ repl_db->active_db_schema = NULL; ++ shash_add(new, node->name, repl_db); + } + + return new; +@@ -795,7 +834,24 @@ replication_db_clone(struct shash *dbs) + static void + replication_dbs_destroy(void) + { +- shash_destroy(replication_dbs); ++ if (!replication_dbs) { ++ return; ++ } ++ ++ struct shash_node *node, *next; ++ ++ SHASH_FOR_EACH_SAFE (node, next, replication_dbs) { ++ hmap_remove(&replication_dbs->map, &node->node); ++ struct replication_db *rdb = node->data; ++ if (rdb->active_db_schema) { ++ ovsdb_schema_destroy(rdb->active_db_schema); ++ } ++ free(rdb); ++ free(node->name); ++ free(node); ++ } ++ ++ hmap_destroy(&replication_dbs->map); + free(replication_dbs); + replication_dbs = NULL; + } +@@ -877,6 +933,62 @@ replication_status(void) + return ds_steal_cstr(&ds); + } + ++/* Checks if it's possible to replicate to the local db from the active db ++ * schema. Returns true, if 'local_db_schema' has all the tables and columns ++ * of 'active_db_schema', false otherwise. ++ */ ++static bool ++is_replication_possible(struct ovsdb_schema *local_db_schema, ++ struct ovsdb_schema *active_db_schema) ++{ ++ struct shash_node *node; ++ SHASH_FOR_EACH (node, &active_db_schema->tables) { ++ struct ovsdb_table_schema *ldb_table_schema = ++ shash_find_data(&local_db_schema->tables, node->name); ++ if (!ldb_table_schema) { ++ VLOG_INFO("Table %s not present in the local db schema", ++ node->name); ++ return false; ++ } ++ ++ /* Local schema table should have all the columns ++ * of active schema table. */ ++ struct ovsdb_table_schema *adb_table_schema = node->data; ++ struct shash_node *n; ++ SHASH_FOR_EACH (n, &adb_table_schema->columns) { ++ struct ovsdb_column *ldb_col = ++ shash_find_data(&ldb_table_schema->columns, n->name); ++ if (!ldb_col) { ++ VLOG_INFO("Column %s not present in the local " ++ "db schema table %s.", n->name, node->name); ++ return false; ++ } ++ ++ struct json *ldb_col_json = ovsdb_column_to_json(ldb_col); ++ struct json *adb_col_json = ovsdb_column_to_json(n->data); ++ bool cols_equal = json_equal(ldb_col_json, adb_col_json); ++ json_destroy(ldb_col_json); ++ json_destroy(adb_col_json); ++ ++ if (!cols_equal) { ++ VLOG_INFO("Column %s mismatch in local " ++ "db schema table %s.", n->name, node->name); ++ return false; ++ } ++ } ++ } ++ ++ return true; ++} ++ ++void ++replication_set_probe_interval(int probe_interval) ++{ ++ if (session) { ++ jsonrpc_session_set_probe_interval(session, probe_interval); ++ } ++} ++ + void + replication_usage(void) + { +diff --git a/ovsdb/replication.h b/ovsdb/replication.h +index 1f9c32fa74..c45f33e262 100644 +--- a/ovsdb/replication.h ++++ b/ovsdb/replication.h +@@ -44,8 +44,10 @@ struct ovsdb; + * used mainly by uinxctl commands. + */ + ++#define REPLICATION_DEFAULT_PROBE_INTERVAL 60000 ++ + void replication_init(const char *sync_from, const char *exclude_tables, +- const struct uuid *server); ++ const struct uuid *server, int probe_interval); + void replication_run(void); + void replication_wait(void); + void replication_destroy(void); +@@ -54,6 +56,7 @@ void replication_add_local_db(const char *databse, struct ovsdb *db); + bool replication_is_alive(void); + int replication_get_last_error(void); + char *replication_status(void); ++void replication_set_probe_interval(int); + + char *set_blacklist_tables(const char *blacklist, bool dryrun) + OVS_WARN_UNUSED_RESULT; +diff --git a/python/ovs/stream.py b/python/ovs/stream.py +index c15be4b3e5..a98057e42a 100644 +--- a/python/ovs/stream.py ++++ b/python/ovs/stream.py +@@ -825,6 +825,14 @@ class SSLStream(Stream): + except SSL.SysCallError as e: + return -ovs.socket_util.get_exception_errno(e) + ++ def close(self): ++ if self.socket: ++ try: ++ self.socket.shutdown() ++ except SSL.Error: ++ pass ++ return super(SSLStream, self).close() ++ + + if SSL: + # Register SSL only if the OpenSSL module is available +diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in +index 4ece47a54d..0a08929cd8 100644 +--- a/rhel/openvswitch-fedora.spec.in ++++ b/rhel/openvswitch-fedora.spec.in +@@ -380,7 +380,7 @@ fi + %else + # Package install, not upgrade + if [ $1 -eq 1 ]; then +- /bin/systemctl daemon-reload >dev/null || : ++ /bin/systemctl daemon-reload >/dev/null || : + fi + %endif + +diff --git a/rhel/usr_lib_systemd_system_ovs-vswitchd.service.in b/rhel/usr_lib_systemd_system_ovs-vswitchd.service.in +index 82925133dc..ff43dae961 100644 +--- a/rhel/usr_lib_systemd_system_ovs-vswitchd.service.in ++++ b/rhel/usr_lib_systemd_system_ovs-vswitchd.service.in +@@ -14,18 +14,19 @@ Restart=on-failure + Environment=XDG_RUNTIME_DIR=/var/run/openvswitch + EnvironmentFile=/etc/openvswitch/default.conf + EnvironmentFile=-/etc/sysconfig/openvswitch +-EnvironmentFile=-/run/openvswitch/useropts ++EnvironmentFile=-/run/openvswitch.useropts ++LimitSTACK=2M + @begin_dpdk@ + ExecStartPre=-/bin/sh -c '/usr/bin/chown :$${OVS_USER_ID##*:} /dev/hugepages' + ExecStartPre=-/usr/bin/chmod 0775 /dev/hugepages + @end_dpdk@ + ExecStart=/usr/share/openvswitch/scripts/ovs-ctl \ + --no-ovsdb-server --no-monitor --system-id=random \ +- ${OVSUSER} \ ++ ${OVS_USER_OPT} \ + start $OPTIONS + ExecStop=/usr/share/openvswitch/scripts/ovs-ctl --no-ovsdb-server stop + ExecReload=/usr/share/openvswitch/scripts/ovs-ctl --no-ovsdb-server \ + --no-monitor --system-id=random \ +- ${OVSUSER} \ ++ ${OVS_USER_OPT} \ + restart $OPTIONS + TimeoutSec=300 +diff --git a/rhel/usr_lib_systemd_system_ovsdb-server.service b/rhel/usr_lib_systemd_system_ovsdb-server.service +index 41ac2dded1..4c170c09b4 100644 +--- a/rhel/usr_lib_systemd_system_ovsdb-server.service ++++ b/rhel/usr_lib_systemd_system_ovsdb-server.service +@@ -11,16 +11,15 @@ PIDFile=/var/run/openvswitch/ovsdb-server.pid + Restart=on-failure + EnvironmentFile=/etc/openvswitch/default.conf + EnvironmentFile=-/etc/sysconfig/openvswitch +-ExecStartPre=/usr/bin/chown ${OVS_USER_ID} /var/run/openvswitch /var/log/openvswitch +-ExecStartPre=/bin/sh -c 'rm -f /run/openvswitch/useropts; if [ "$${OVS_USER_ID/:*/}" != "root" ]; then /usr/bin/echo "OVSUSER=--ovs-user=${OVS_USER_ID}" > /run/openvswitch/useropts; fi' +-EnvironmentFile=-/run/openvswitch/useropts ++ExecStartPre=-/usr/bin/chown ${OVS_USER_ID} /var/run/openvswitch /var/log/openvswitch ++ExecStartPre=/bin/sh -c 'rm -f /run/openvswitch.useropts; /usr/bin/echo "OVS_USER_ID=${OVS_USER_ID}" > /run/openvswitch.useropts' ++ExecStartPre=/bin/sh -c 'if [ "$${OVS_USER_ID/:*/}" != "root" ]; then /usr/bin/echo "OVS_USER_OPT=--ovs-user=${OVS_USER_ID}" >> /run/openvswitch.useropts; fi' ++EnvironmentFile=-/run/openvswitch.useropts + ExecStart=/usr/share/openvswitch/scripts/ovs-ctl \ + --no-ovs-vswitchd --no-monitor --system-id=random \ +- ${OVSUSER} \ ++ ${OVS_USER_OPT} \ + start $OPTIONS + ExecStop=/usr/share/openvswitch/scripts/ovs-ctl --no-ovs-vswitchd stop + ExecReload=/usr/share/openvswitch/scripts/ovs-ctl --no-ovs-vswitchd \ +- ${OVSUSER} \ ++ ${OVS_USER_OPT} \ + --no-monitor restart $OPTIONS +-RuntimeDirectory=openvswitch +-RuntimeDirectoryMode=0755 +diff --git a/rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template b/rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template +index 9364454020..c467d02db9 100644 +--- a/rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template ++++ b/rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template +@@ -21,6 +21,10 @@ + # --ovs-vswitchd-wrapper=valgrind + # --ovsdb-server-wrapper=valgrind + # ++# Specify additional options, for example to start with debug logs: ++# --ovs-vswitchd-options='-vconsole:dbg -vfile:dbg' ++# --ovsdb-server-options='-vconsole:dbg -vfile:dbg' ++# + OPTIONS="" + + # Uncomment and set the OVS User/Group value +diff --git a/tests/automake.mk b/tests/automake.mk +index 92d56b29de..a4da75ebe0 100644 +--- a/tests/automake.mk ++++ b/tests/automake.mk +@@ -108,7 +108,8 @@ TESTSUITE_AT = \ + tests/ovn-controller-vtep.at \ + tests/mcast-snooping.at \ + tests/packet-type-aware.at \ +- tests/nsh.at ++ tests/nsh.at \ ++ tests/drop-stats.at + + EXTRA_DIST += $(FUZZ_REGRESSION_TESTS) + FUZZ_REGRESSION_TESTS = \ +diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at +index 6915d43ba1..9295f5a0ec 100644 +--- a/tests/dpif-netdev.at ++++ b/tests/dpif-netdev.at +@@ -337,6 +337,14 @@ meter:2 flow_count:1 packet_in_count:10 byte_in_count:600 duration:0.0s bands: + 0: packet_count:5 byte_count:300 + ]) + ++ovs-appctl time/warp 5000 ++ ++AT_CHECK([ ++ovs-appctl coverage/show | grep datapath_drop_meter | awk '{ print $6 } END { if (!NR) print "0" }' ++], [0], [dnl ++14 ++]) ++ + AT_CHECK([cat ovs-vswitchd.log | filter_flow_install | strip_xout_keep_actions], [0], [dnl + recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), actions:meter(0),7 + recirc_id(0),in_port(2),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), actions:8 +diff --git a/tests/drop-stats.at b/tests/drop-stats.at +new file mode 100644 +index 0000000000..67d1a4aa23 +--- /dev/null ++++ b/tests/drop-stats.at +@@ -0,0 +1,190 @@ ++AT_BANNER([drop-stats]) ++ ++AT_SETUP([drop-stats - cli tests]) ++ ++OVS_VSWITCHD_START([dnl ++ set bridge br0 datapath_type=dummy \ ++ protocols=OpenFlow10,OpenFlow13,OpenFlow14,OpenFlow15 -- \ ++ add-port br0 p1 -- set Interface p1 type=dummy ofport_request=1]) ++ ++AT_DATA([flows.txt], [dnl ++table=0,in_port=1,actions=drop ++]) ++ ++AT_CHECK([ ++ ovs-ofctl del-flows br0 ++ ovs-ofctl -Oopenflow13 add-flows br0 flows.txt ++ ovs-ofctl -Oopenflow13 dump-flows br0 | ofctl_strip | sort | grep actions ], [0], [dnl ++ in_port=1 actions=drop ++]) ++ ++AT_CHECK([ ++ ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=3a:6d:d2:09:9c:ab,dst=1e:2c:e9:2a:66:9e),ipv4(src=192.168.10.10,dst=192.168.10.30,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' ++ ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=3a:6d:d2:09:9c:ab,dst=1e:2c:e9:2a:66:9e),ipv4(src=192.168.10.10,dst=192.168.10.30,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' ++ ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=3a:6d:d2:09:9c:ab,dst=1e:2c:e9:2a:66:9e),ipv4(src=192.168.10.10,dst=192.168.10.30,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' ++], [0], [ignore]) ++ ++AT_CHECK([ovs-appctl dpctl/dump-flows | sed 's/used:[[0-9]].[[0-9]]*s/used:0.0/' | sort], [0], [flow-dump from non-dpdk interfaces: ++recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:2, bytes:212, used:0.0, actions:drop ++]) ++ ++ovs-appctl time/warp 5000 ++ ++AT_CHECK([ ++ovs-appctl coverage/show | grep drop_action_of_pipeline | awk '{ print $6 } END { if (!NR) print "0" }' ++], [0], [dnl ++3 ++]) ++ ++ ++OVS_VSWITCHD_STOP ++AT_CLEANUP ++ ++AT_SETUP([drop-stats - pipeline and recurssion drops]) ++ ++OVS_VSWITCHD_START([dnl ++ set bridge br0 datapath_type=dummy \ ++ protocols=OpenFlow10,OpenFlow13,OpenFlow14,OpenFlow15 -- \ ++ add-port br0 p1 -- set Interface p1 type=dummy ofport_request=1 -- \ ++ add-port br0 p2 -- set Interface p2 type=dummy ofport_request=2]) ++ ++AT_DATA([flows.txt], [dnl ++table=0,in_port=1,actions=drop ++]) ++ ++AT_CHECK([ ++ ovs-ofctl del-flows br0 ++ ovs-ofctl -Oopenflow13 add-flows br0 flows.txt ++ ovs-ofctl -Oopenflow13 dump-flows br0 | ofctl_strip | sort | grep actions ], [0], [dnl ++ in_port=1 actions=drop ++]) ++ ++AT_CHECK([ ++ ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=3a:6d:d2:09:9c:ab,dst=1e:2c:e9:2a:66:9e),ipv4(src=192.168.10.10,dst=192.168.10.30,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' ++], [0], [ignore]) ++ ++ovs-appctl time/warp 5000 ++ ++AT_CHECK([ ++ovs-appctl coverage/show | grep drop_action_of_pipeline | awk '{ print $6 } END { if (!NR) print "0" }' ++], [0], [dnl ++1 ++]) ++ ++ ++AT_DATA([flows.txt], [dnl ++table=0, in_port=1, actions=goto_table:1 ++table=1, in_port=1, actions=goto_table:2 ++table=2, in_port=1, actions=resubmit(,1) ++]) ++ ++AT_CHECK([ ++ ovs-ofctl del-flows br0 ++ ovs-ofctl -Oopenflow13 add-flows br0 flows.txt ++ ovs-ofctl -Oopenflow13 dump-flows br0 | ofctl_strip | sort | grep actions ], [0], [ignore]) ++ ++AT_CHECK([ ++ ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=3a:6d:d2:09:9c:ab,dst=1e:2c:e9:2a:66:9e),ipv4(src=192.168.10.10,dst=192.168.10.30,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' ++], [0], [ignore]) ++ ++ovs-appctl time/warp 5000 ++ ++AT_CHECK([ ++ovs-appctl coverage/show | grep drop_action_recursion_too_deep | awk '{ print $6 } END { if (!NR) print "0" }' ++], [0], [dnl ++1 ++]) ++ ++ ++OVS_VSWITCHD_STOP(["/|WARN|/d"]) ++AT_CLEANUP ++ ++AT_SETUP([drop-stats - too many resubmit]) ++OVS_VSWITCHD_START ++add_of_ports br0 1 ++(for i in `seq 1 64`; do ++ j=`expr $i + 1` ++ echo "in_port=$i, actions=resubmit:$j, resubmit:$j, local" ++ done ++ echo "in_port=65, actions=local") > flows.txt ++ ++AT_CHECK([ ++ ovs-ofctl del-flows br0 ++ ovs-ofctl -Oopenflow13 add-flows br0 flows.txt ], [0], [ignore]) ++ ++ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x1234)' ++ ++ovs-appctl time/warp 5000 ++ ++AT_CHECK([ ++ovs-appctl coverage/show | grep drop_action_too_many_resubmit | awk '{ print $6 } END { if (!NR) print "0" }' ++], [0], [dnl ++1 ++]) ++ ++OVS_VSWITCHD_STOP(["/|WARN|/d"]) ++AT_CLEANUP ++ ++ ++AT_SETUP([drop-stats - stack too deep]) ++OVS_VSWITCHD_START ++add_of_ports br0 1 ++(for i in `seq 1 12`; do ++ j=`expr $i + 1` ++ echo "in_port=$i, actions=resubmit:$j, resubmit:$j, local" ++ done ++ push="push:NXM_NX_REG0[[]]" ++ echo "in_port=13, actions=$push,$push,$push,$push,$push,$push,$push,$push") > flows ++ ++AT_CHECK([ovs-ofctl add-flows br0 flows]) ++ ++ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x1234)' ++ ++ovs-appctl time/warp 5000 ++ ++AT_CHECK([ ++ovs-appctl coverage/show | grep drop_action_stack_too_deep | awk '{ print $6 } END { if (!NR) print "0" }' ++], [0], [dnl ++1 ++]) ++ ++ ++OVS_VSWITCHD_STOP(["/resubmits yielded over 64 kB of stack/d"]) ++AT_CLEANUP ++ ++AT_SETUP([drop-stats - too many mpls labels]) ++ ++OVS_VSWITCHD_START([dnl ++ set bridge br0 datapath_type=dummy \ ++ protocols=OpenFlow10,OpenFlow13,OpenFlow14,OpenFlow15 -- \ ++ add-port br0 p1 -- set Interface p1 type=dummy ofport_request=1 -- \ ++ add-port br0 p2 -- set Interface p2 type=dummy ofport_request=2]) ++ ++AT_DATA([flows.txt], [dnl ++table=0, in_port=1, actions=push_mpls:0x8847, resubmit:3 ++table=0, in_port=3, actions=push_mpls:0x8847, set_field:10->mpls_label, set_field:15->mpls_label, resubmit:4 ++table=0, in_port=4, actions=push_mpls:0x8847, set_field:11->mpls_label, resubmit:5 ++table=0, in_port=5, actions=push_mpls:0x8847, set_field:12->mpls_label, resubmit:6 ++table=0, in_port=6, actions=push_mpls:0x8847, set_field:13->mpls_label, output:2 ++]) ++ ++AT_CHECK([ ++ ovs-ofctl del-flows br0 ++ ovs-ofctl -Oopenflow13 add-flows br0 flows.txt ++]) ++ ++AT_CHECK([ ++ ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=3a:6d:d2:09:9c:ab,dst=1e:2c:e9:2a:66:9e),ipv4(src=192.168.10.10,dst=192.168.10.30,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' ++], [0], [ignore]) ++ ++ovs-appctl time/warp 5000 ++ ++AT_CHECK([ ++ovs-appctl coverage/show | grep drop_action_too_many_mpls_labels | awk '{ print $6 } END { if (!NR) print "0" }' ++], [0], [dnl ++1 ++]) ++ ++ ++OVS_VSWITCHD_STOP(["/|WARN|/d"]) ++AT_CLEANUP +diff --git a/tests/odp.at b/tests/odp.at +index 86a918e662..a172c49fe1 100644 +--- a/tests/odp.at ++++ b/tests/odp.at +@@ -377,6 +377,10 @@ clone(1) + clone(clone(push_vlan(vid=12,pcp=0),2),1) + set(tunnel(tun_id=0x1,dst=1.1.1.1,ttl=64,erspan(ver=1,idx=0x7),flags(df|key))) + set(tunnel(tun_id=0x1,dst=1.1.1.1,ttl=64,erspan(ver=2,dir=1,hwid=0x1),flags(df|key))) ++check_pkt_len(size=200,gt(4),le(5)) ++check_pkt_len(size=200,gt(drop),le(5)) ++check_pkt_len(size=200,gt(ct(nat)),le(drop)) ++check_pkt_len(size=200,gt(set(eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15))),le(set(eth(src=00:01:02:03:04:06,dst=10:11:12:13:14:16)))) + ]) + AT_CHECK_UNQUOTED([ovstest test-odp parse-actions < actions.txt], [0], + [`cat actions.txt` +diff --git a/tests/ofp-actions.at b/tests/ofp-actions.at +index e320a92a8f..0f559d587d 100644 +--- a/tests/ofp-actions.at ++++ b/tests/ofp-actions.at +@@ -310,6 +310,12 @@ ffff 0018 00002320 0025 0000 0005 5000 1122334455 000000 + ffff 0048 00002320 0023 0001 00000000 0000 FF 000000 0000 dnl + ffff 0030 00002320 0024 00 00 0011 000c fe800000 00000000 020c 29ff fe88 a18b fe800000 00000000 020c 29ff fe88 0001 + ++# actions=check_pkt_larger(1500)->NXM_NX_REG0[0] ++ffff 0018 00002320 0031 05dc 000000010004000000000000 ++ ++# actions=check_pkt_larger(1000)->NXM_NX_XXREG1[4] ++ffff 0018 00002320 0031 03e8 00040001e010000000000000 ++ + ]) + sed '/^[[#&]]/d' < test-data > input.txt + sed -n 's/^# //p; /^$/p' < test-data > expout +diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at +index ded2ef0132..b7cb3cf5f8 100644 +--- a/tests/ofproto-dpif.at ++++ b/tests/ofproto-dpif.at +@@ -5434,6 +5434,47 @@ AT_CHECK([test 1 = `$PYTHON "$top_srcdir/utilities/ovs-pcap.in" p2-tx.pcap | wc + OVS_VSWITCHD_STOP + AT_CLEANUP + ++AT_SETUP([ofproto-dpif - continuation with patch port]) ++AT_KEYWORDS([continuations pause resume]) ++OVS_VSWITCHD_START( ++ [add-port br0 p0 -- set Interface p0 type=dummy -- \ ++ add-port br0 patch- -- \ ++ set interface patch- type=patch options:peer=patch+ -- \ ++ add-br br1 -- set bridge br1 datapath-type=dummy fail-mode=secure -- \ ++ add-port br1 patch+ -- set interface patch+ type=patch options:peer=patch- ++]) ++add_of_ports --pcap br1 1 ++ ++flow="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" ++ ++AT_DATA([flows.txt], [dnl ++table=0, in_port=patch+ icmp action=controller(pause), resubmit(,1) ++table=1, in_port=patch+ icmp action=ct(table=2) ++table=2, in_port=patch+ icmp ct_state=+trk+new action=ct(commit, table=3) ++table=3, in_port=patch+ icmp action=p1 ++]) ++ ++AT_CHECK([ovs-ofctl add-flow br0 action=normal]) ++AT_CHECK([ovs-ofctl -O OpenFlow13 add-flows br1 flows.txt]) ++ ++AT_CAPTURE_FILE([ofctl_monitor.log]) ++ovs-ofctl monitor br1 resume --detach --no-chdir --pidfile=ovs-ofctl.pid 2> ofctl_monitor.log ++ ++# Run a packet through the switch. ++AT_CHECK([ovs-appctl netdev-dummy/receive p0 "$flow"], [0], [stdout]) ++ ++# Check flow stats ++AT_CHECK([ovs-ofctl dump-flows br1], [0], [stdout]) ++AT_CHECK([strip_xids < stdout | sed -n 's/duration=[[0-9]]*\.[[0-9]]*s/duration=0.0s/p' | sed -n 's/idle_age=[[0-9]]*/idle_age=0/p' | grep 'table=3' | grep -v 'commit'], [0], [dnl ++ cookie=0x0, duration=0.0s, table=3, n_packets=1, n_bytes=106, idle_age=0, icmp,in_port=1 actions=output:2 ++]) ++ ++# The packet should be received by port 1 ++AT_CHECK([test 1 = `$PYTHON "$top_srcdir/utilities/ovs-pcap.in" p1-tx.pcap | wc -l`]) ++ ++OVS_VSWITCHD_STOP ++AT_CLEANUP ++ + # Check that pause works after the packet is cloned. + AT_SETUP([ofproto-dpif - continuation after clone]) + AT_KEYWORDS([continuations clone pause resume]) +@@ -9384,7 +9425,7 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp= + # are wildcarded. + AT_CHECK([grep '\(modify\)\|\(flow_add\)' ovs-vswitchd.log | strip_ufid ], [0], [dnl + dpif_netdev|DBG|flow_add: recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x1234), actions:100 +-dpif|DBG|dummy@ovs-dummy: put[[modify]] skb_priority(0/0),skb_mark(0/0),ct_state(0/0),ct_zone(0/0),ct_mark(0/0),ct_label(0/0),recirc_id(0),dp_hash(0/0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x1234) ++dpif|DBG|dummy@ovs-dummy: put[[modify]] skb_priority(0/0),skb_mark(0/0),ct_state(0/0),ct_zone(0/0),ct_mark(0/0),ct_label(0/0),recirc_id(0),dp_hash(0/0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x1234), actions:drop + dpif|DBG|dummy@ovs-dummy: put[[modify]] skb_priority(0/0),skb_mark(0/0),ct_state(0/0),ct_zone(0/0),ct_mark(0/0),ct_label(0/0),recirc_id(0),dp_hash(0/0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x1234), actions:100 + dpif_netdev|DBG|flow_add: recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp=7/0x0),encap(eth_type(0x1234)), actions:drop + ]) +@@ -10361,6 +10402,62 @@ udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10. + OVS_VSWITCHD_STOP + AT_CLEANUP + ++AT_SETUP([ofproto-dpif - conntrack - match masked ct fields]) ++OVS_VSWITCHD_START ++ ++add_of_ports br0 1 2 ++ ++AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg vconn:info ofproto_dpif:info]) ++ ++dnl Allow new connections on p1->p2. Allow only established connections p2->p1 ++AT_DATA([flows.txt], [dnl ++table=0,arp,action=normal ++table=0,ip,in_port=1,udp,nw_src=10.1.2.1/24,action=ct(commit) ++table=0,ip,in_port=1,udp6,ipv6_dst=2001:db8::1/64,action=ct(commit) ++table=0,ip,in_port=1,udp,tp_src=3/0x1,action=ct(commit) ++table=0,ip,in_port=2,actions=ct(table=1) ++table=0,ip6,in_port=2,actions=ct(table=1) ++table=1,priority=10,udp,ct_state=+trk+rpl,ct_nw_src=10.1.2.1/24,actions=controller ++table=1,priority=10,udp6,ct_state=+trk+rpl,ct_ipv6_dst=2001:db8::1/64,actions=controller ++table=1,priority=10,udp,ct_state=+trk+rpl,ct_tp_src=3/0x1,actions=controller ++table=1,priority=1,action=drop ++]) ++ ++AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) ++ ++AT_CAPTURE_FILE([ofctl_monitor.log]) ++AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl -P nxt_packet_in --detach --no-chdir --pidfile 2> ofctl_monitor.log]) ++ ++dnl Match ct_nw_src=10.1.2.1/24 ++AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.2.100,dst=10.1.2.200,proto=17,tos=0,ttl=64,frag=no),udp(src=6,dst=6)']) ++AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x0800),ipv4(src=10.1.2.200,dst=10.1.2.100,proto=17,tos=0,ttl=64,frag=no),udp(src=6,dst=6)']) ++ ++dnl Match ct_ipv6_dst=2001:db8::1/64 ++AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x86dd),ipv6(src=2001:db8::1,dst=2001:db8::2,label=0,proto=17,tclass=0x70,hlimit=128,frag=no),udp(src=1,dst=2)']) ++AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x86dd),ipv6(src=2001:db8::2,dst=2001:db8::1,label=0,proto=17,tclass=0x70,hlimit=128,frag=no),udp(src=2,dst=1)']) ++ ++dnl Match ct_tp_src=3/0x1 ++AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=1,dst=2)']) ++AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x0800),ipv4(src=10.1.1.2,dst=10.1.1.1,proto=17,tos=0,ttl=64,frag=no),udp(src=2,dst=1)']) ++ ++OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ++OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) ++ ++dnl Check this output. ++AT_CHECK([cat ofctl_monitor.log], [0], [dnl ++NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=106 ct_state=est|rpl|trk,ct_nw_src=10.1.2.100,ct_nw_dst=10.1.2.200,ct_nw_proto=17,ct_tp_src=6,ct_tp_dst=6,ip,in_port=2 (via action) data_len=106 (unbuffered) ++udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.2.200,nw_dst=10.1.2.100,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=6,tp_dst=6 udp_csum:221 ++dnl ++NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=126 ct_state=est|rpl|trk,ct_ipv6_src=2001:db8::1,ct_ipv6_dst=2001:db8::2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,ipv6,in_port=2 (via action) data_len=126 (unbuffered) ++udp6,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,ipv6_src=2001:db8::2,ipv6_dst=2001:db8::1,ipv6_label=0x00000,nw_tos=112,nw_ecn=0,nw_ttl=128,tp_src=2,tp_dst=1 udp_csum:bfe2 ++dnl ++NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=106 ct_state=est|rpl|trk,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,ip,in_port=2 (via action) data_len=106 (unbuffered) ++udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=2,tp_dst=1 udp_csum:553 ++]) ++ ++OVS_VSWITCHD_STOP ++AT_CLEANUP ++ + AT_SETUP([ofproto-dpif - conntrack - ofproto/trace]) + OVS_VSWITCHD_START + +@@ -10503,3 +10600,166 @@ AT_CHECK([grep flow_del ovs-vswitchd.log], [1]) + + OVS_VSWITCHD_STOP + AT_CLEANUP ++ ++AT_SETUP([ofproto-dpif - check_pkt_larger action]) ++OVS_VSWITCHD_START ++add_of_ports br0 1 2 3 4 ++ ++AT_DATA([flows.txt], [dnl ++table=0,in_port=1 actions=check_pkt_larger(200)->NXM_NX_REG0[[0]],resubmit(,1) ++table=1,in_port=1,reg0=0x1/0x1 actions=output:2,resubmit(,2) ++table=1,in_port=1,actions=output:3,resubmit(,2) ++table=2,in_port=1,actions=mod_dl_dst:82:82:82:82:82:82,output:4 ++]) ++ ++AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt]) ++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) ++AT_CHECK([tail -1 stdout], [0], [dnl ++Datapath actions: check_pkt_len(size=200,gt(2,set(eth(dst=82:82:82:82:82:82)),4),le(3,set(eth(dst=82:82:82:82:82:82)),4)) ++]) ++ ++dnl Test flow xlate check_pkt_large clone action without using datapath check_pkt_len action. ++AT_CHECK([ovs-appctl dpif/set-dp-features br0 check_pkt_len false], [0], [ignore]) ++ ++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) ++AT_CHECK([tail -3 stdout], [0], [dnl ++Datapath actions: 3,set(eth(dst=82:82:82:82:82:82)),4 ++This flow is handled by the userspace slow path because it: ++ - Uses action(s) not supported by datapath. ++]) ++ ++dnl Enable datapath check_pkt_len action ++AT_CHECK([ovs-appctl dpif/set-dp-features br0 check_pkt_len true], [0], [ignore]) ++ ++ovs-ofctl del-flows br0 ++ ++AT_DATA([flows.txt], [dnl ++table=0,in_port=1 actions=check_pkt_larger(200)->NXM_NX_REG0[[0]],resubmit(,1) ++table=1,in_port=1,priority=200,reg0=0x1/0x1 actions=output:2 ++]) ++ ++AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt]) ++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) ++AT_CHECK([tail -1 stdout], [0], [dnl ++Datapath actions: check_pkt_len(size=200,gt(2),le(drop)) ++]) ++ ++ovs-ofctl del-flows br0 ++AT_DATA([flows.txt], [dnl ++table=0,in_port=1 actions=check_pkt_larger(200)->NXM_NX_REG0[[0]] ++]) ++ ++AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt]) ++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) ++AT_CHECK([tail -1 stdout], [0], [dnl ++Datapath actions: check_pkt_len(size=200,gt(drop),le(drop)) ++]) ++ ++ovs-ofctl del-flows br0 ++AT_DATA([flows.txt], [dnl ++table=0,in_port=1 actions=check_pkt_larger(200)->NXM_NX_REG0[[0]],resubmit(,1) ++table=1,in_port=1,priority=200,reg0=0x1/0x1,ip actions=clone(set_field:192.168.3.3->ip_src),clone(set_field:192.168.4.4->ip_dst,output:2),clone(mod_dl_src:80:81:81:81:81:81,set_field:192.168.5.5->ip_dst,output:3),output:4 ++table=1,in_port=1,priority=0,ip actions=clone(set_field:192.168.3.3->ip_src),clone(set_field:192.168.4.4->ip_dst,output:2),clone(ct(commit),output:3),output:4 ++]) ++ ++AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt]) ++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) ++AT_CHECK([tail -1 stdout], [0], [dnl ++Datapath actions: check_pkt_len(size=200,gt(set(ipv4(src=10.10.10.2,dst=192.168.4.4)),2,set(eth(src=80:81:81:81:81:81)),set(ipv4(src=10.10.10.2,dst=192.168.5.5)),3,set(eth(src=50:54:00:00:00:09)),set(ipv4(src=10.10.10.2,dst=10.10.10.1)),4),le(set(ipv4(src=10.10.10.2,dst=192.168.4.4)),2,set(ipv4(src=10.10.10.2,dst=10.10.10.1)),clone(ct(commit),3),4)) ++]) ++ ++AT_DATA([flows.txt], [dnl ++table=0,priority=0 actions=check_pkt_larger(200)->NXM_NX_REG0[[0]],resubmit(,1) ++table=1,in_port=1,priority=200,reg0=0x1/0x1,ip actions=clone(set_field:192.168.3.3->ip_src, resubmit(,0)) ++table=1,in_port=1,priority=0,ip actions=clone(set_field:192.168.3.4->ip_src, resubmit(,0)) ++]) ++ ++AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt]) ++ovs-ofctl dump-flows br0 ++ ++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) ++AT_CHECK([tail -3 stdout], [0], [dnl ++Megaflow: recirc_id=0,eth,ip,reg0=0/0x1,in_port=1,nw_src=10.10.10.2,nw_frag=no ++Datapath actions: drop ++Translation failed (Recursion too deep), packet is dropped. ++]) ++ ++ovs-ofctl del-flows br0 ++AT_DATA([flows.txt], [dnl ++table=0,priority=0 actions=check_pkt_larger(200)->NXM_NX_REG0[[0]],resubmit(,1) ++table=1,ip,nw_src=192.168.3.3 actions=output:3 ++table=1,ip,nw_src=192.168.3.4 actions=output:4 ++table=1,reg0=0x1/0x1,ip actions=clone(set_field:192.168.3.3->ip_src, resubmit(,0)) ++table=1,ip actions=clone(set_field:192.168.3.4->ip_src, resubmit(,0)) ++]) ++ ++AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt]) ++ovs-ofctl dump-flows br0 ++ ++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) ++AT_CHECK([tail -1 stdout], [0], [dnl ++Datapath actions: check_pkt_len(size=200,gt(set(ipv4(src=192.168.3.3)),check_pkt_len(size=200,gt(3),le(3))),le(set(ipv4(src=192.168.3.4)),check_pkt_len(size=200,gt(4),le(4)))) ++]) ++ ++ovs-ofctl del-flows br0 ++AT_DATA([flows.txt], [dnl ++table=0,in_port=1 actions=check_pkt_larger(200)->NXM_NX_REG0[[0]],resubmit(,1) ++table=1,in_port=1,reg0=0x1/0x1 actions=mod_dl_dst:82:82:82:82:82:82,controller(),resubmit(,2) ++table=1,in_port=1 actions=resubmit(,2) ++table=2,ip,dl_dst=82:82:82:82:82:82 actions=ct(table=3) ++table=2,ip,dl_dst=50:54:00:00:00:0a actions=ct(table=3) ++table=3,ip,reg0=0x1/0x1 actions=output:2 ++table=3,ip actions=output:4 ++]) ++ ++AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt]) ++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) ++AT_CHECK([cat stdout | grep Datapath -B1], [0], [dnl ++Megaflow: recirc_id=0,eth,ip,in_port=1,dl_dst=50:54:00:00:00:0a,nw_frag=no ++Datapath actions: check_pkt_len(size=200,gt(set(eth(dst=82:82:82:82:82:82)),userspace(pid=0,controller(reason=1,dont_send=1,continuation=0,recirc_id=1,rule_cookie=0,controller_id=0,max_len=65535)),ct,recirc(0x2)),le(ct,recirc(0x3))) ++-- ++Megaflow: recirc_id=0x2,eth,ip,in_port=1,nw_frag=no ++Datapath actions: 2 ++-- ++Megaflow: recirc_id=0x3,eth,ip,in_port=1,nw_frag=no ++Datapath actions: 4 ++]) ++ ++OVS_VSWITCHD_STOP ++AT_CLEANUP ++ ++AT_SETUP([ofproto-dpif - check_pkt_larger with continuation and ct]) ++OVS_VSWITCHD_START ++add_of_ports --pcap br0 `seq 1 4` ++ ++AT_CAPTURE_FILE([ofctl_monitor0.log]) ++AT_CHECK([ovs-ofctl monitor br0 resume --detach --no-chdir --pidfile=ovs-ofctl0.pid 2> ofctl_monitor0.log]) ++ ++AT_DATA([flows.txt], [dnl ++table=0,in_port=1 actions=check_pkt_larger(150)->NXM_NX_REG0[[0]],resubmit(,1) ++table=1,ip,reg0=0x1/0x1 actions=mod_dl_dst:82:82:82:82:82:82,controller(pause),resubmit(,2) ++table=1,ip,reg0=0 actions=mod_dl_dst:83:83:83:83:83:83,controller(pause),resubmit(,2) ++table=2,ip,dl_dst=82:82:82:82:82:82 actions=ct(table=3) ++table=2,ip,dl_dst=83:83:83:83:83:83 actions=ct(table=3) ++table=3,ip,reg0=0x1/0x1 actions=ct(commit),output:2 ++table=3,ip actions=ct(commit),output:4 ++]) ++ ++AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt]) ++ ++flow="in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)" ++ ++AT_CHECK([ovs-appctl netdev-dummy/receive p1 "$flow"], [0], [stdout]) ++ ++OVS_WAIT_UNTIL([test 1 = `ovs-ofctl parse-pcap p4-tx.pcap \ ++| grep dl_dst=83:83:83:83:83:83 | wc -l`]) ++AT_CHECK([test 0 = `ovs-ofctl parse-pcap p2-tx.pcap | wc -l`]) ++ ++AT_CHECK([ovs-appctl netdev-dummy/receive p1 "$flow" --len 200], [0], [stdout]) ++ ++OVS_WAIT_UNTIL([test 1 = `ovs-ofctl parse-pcap p2-tx.pcap \ ++| grep dl_dst=82:82:82:82:82:82 | wc -l`]) ++AT_CHECK([test 1 = `ovs-ofctl parse-pcap p2-tx.pcap | wc -l`]) ++ ++OVS_VSWITCHD_STOP ++AT_CLEANUP +diff --git a/tests/ovn.at b/tests/ovn.at +index 4b14720f32..291330b3b2 100644 +--- a/tests/ovn.at ++++ b/tests/ovn.at +@@ -12090,3 +12090,24 @@ ovn-nbctl list logical_switch_port + ovn-nbctl list logical_router_port + + AT_CLEANUP ++ ++# Run ovn-nbctl in daemon mode, change to a backup database and verify that ++# an insert operation is not allowed. ++AT_SETUP([ovn -- can't write to a backup database server instance]) ++ovn_start ++on_exit 'kill $(cat ovn-nbctl.pid)' ++export OVN_NB_DAEMON=$(ovn-nbctl --pidfile --detach) ++ ++AT_CHECK([ovn-nbctl ls-add sw0]) ++as ovn-nb ++AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/sync-status | grep active | wc -l], [0], [1 ++]) ++ovs-appctl -t ovsdb-server ovsdb-server/set-active-ovsdb-server tcp:192.0.2.2:6641 ++ovs-appctl -t ovsdb-server ovsdb-server/connect-active-ovsdb-server ++AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/sync-status | grep -c backup], [0], [1 ++]) ++AT_CHECK([ovn-nbctl ls-add sw1], [1], [ignore], ++[ovn-nbctl: transaction error: {"details":"insert operation not allowed when database server is in read only mode","error":"not allowed"} ++]) ++ ++AT_CLEANUP +diff --git a/tests/ovsdb-replication.at b/tests/ovsdb-replication.at +index f81381bdb3..82c4160529 100644 +--- a/tests/ovsdb-replication.at ++++ b/tests/ovsdb-replication.at +@@ -19,6 +19,29 @@ replication_schema () { + } + EOF + } ++replication_schema_v2 () { ++ cat <<'EOF' ++ {"name": "mydb", ++ "tables": { ++ "a": { ++ "columns": { ++ "number": {"type": "integer"}, ++ "name": {"type": "string"}}, ++ "indexes": [["number"]]}, ++ "b": { ++ "columns": { ++ "number": {"type": "integer"}, ++ "name": {"type": "string"}, ++ "foo" : {"type": "string"}}, ++ "indexes": [["number"]]}, ++ "c": { ++ "columns": { ++ "number": {"type": "integer"}, ++ "name": {"type": "string"}}, ++ "indexes": [["number"]]}} ++ } ++EOF ++} + ] + m4_divert_pop([PREPARE_TESTS]) + +diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at +index 81f03d28b0..b128c4f0fb 100644 +--- a/tests/ovsdb-server.at ++++ b/tests/ovsdb-server.at +@@ -1903,3 +1903,112 @@ AT_CHECK([uuidfilt output], [0], [[[{"details":"insert operation not allowed whe + ], [ignore]) + OVSDB_SERVER_SHUTDOWN + AT_CLEANUP ++ ++AT_SETUP([ovsdb-server replication with schema mismatch]) ++AT_KEYWORDS([ovsdb server replication]) ++replication_schema > subset_schema ++replication_schema_v2 > superset_schema ++ ++AT_CHECK([ovsdb-tool create db1 subset_schema], [0], [stdout], [ignore]) ++AT_CHECK([ovsdb-tool create db2 superset_schema], [0], [stdout], [ignore]) ++ ++dnl Add some data to both DBs ++AT_CHECK([ovsdb-tool transact db1 \ ++'[["mydb", ++ {"op": "insert", ++ "table": "a", ++ "row": {"number": 9, "name": "nine"}}]]'], [0], [ignore], [ignore]) ++ ++AT_CHECK([ovsdb-tool transact db2 \ ++'[["mydb", ++ {"op": "insert", ++ "table": "a", ++ "row": {"number": 10, "name": "ten"}}]]'], [0], [ignore], [ignore]) ++ ++dnl Start both 'db1' and 'db2'. ++AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile="`pwd`"/pid --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --active ], [0], [ignore], [ignore]) ++on_exit 'test ! -e pid || kill `cat pid`' ++ ++ ++AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile="`pwd`"/pid2 --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 db2], [0], [ignore], [ignore]) ++on_exit 'test ! -e pid2 || kill `cat pid2`' ++ ++OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/sync-status |grep active]) ++OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/sync-status |grep active]) ++ ++AT_CHECK([ovsdb-client dump unix:db.sock a number name], 0, [dnl ++a table ++name number ++---- ------ ++nine 9 ++]) ++ ++AT_CHECK([ovsdb-client dump unix:db2.sock a number name], 0, [dnl ++a table ++name number ++---- ------ ++ten 10 ++]) ++ ++# Replicate db1 from db2. It should fail since db2 schema ++# doesn't match with db1 and has additional tables/columns. ++AT_CHECK([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/set-active-ovsdb-server unix:db2.sock]) ++AT_CHECK([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/connect-active-ovsdb-server]) ++ ++OVS_WAIT_UNTIL( ++ [test 1 = `cat ovsdb-server1.log | grep "Schema version mismatch, checking if mydb can still be replicated or not" | wc -l]` ++) ++ ++OVS_WAIT_UNTIL( ++ [test 1 = `cat ovsdb-server1.log | grep "mydb cannot be replicated" | wc -l]` ++) ++ ++OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/sync-status |grep active]) ++ ++# Replicate db2 from db1. This should be successful. ++AT_CHECK([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/disconnect-active-ovsdb-server]) ++AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/set-active-ovsdb-server unix:db.sock]) ++AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/connect-active-ovsdb-server]) ++ ++OVS_WAIT_UNTIL( ++ [test 1 = `cat ovsdb-server2.log | grep "Schema version mismatch, checking if mydb can still be replicated or not" | wc -l]` ++) ++ ++OVS_WAIT_UNTIL( ++ [test 1 = `cat ovsdb-server2.log | grep "mydb can be replicated" | wc -l]` ++) ++ ++OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/sync-status |grep replicating]) ++ ++AT_CHECK([ovsdb-client dump unix:db.sock a number name], 0, [dnl ++a table ++name number ++---- ------ ++nine 9 ++]) ++ ++AT_CHECK([ovsdb-client dump unix:db2.sock a number name], 0, [dnl ++a table ++name number ++---- ------ ++nine 9 ++]) ++ ++AT_CHECK([ovsdb-client transact unix:db.sock \ ++'[["mydb", ++ {"op": "insert", ++ "table": "a", ++ "row": {"number": 6, "name": "six"}}]]'], [0], [ignore], [ignore]) ++ ++OVS_WAIT_UNTIL([test 1 = `ovsdb-client dump unix:db2.sock a number name | grep six | wc -l`]) ++ ++AT_CHECK([ ++ ovsdb-client dump unix:db2.sock a number name], 0, [dnl ++a table ++name number ++---- ------ ++nine 9 ++six 6 ++]) ++ ++AT_CLEANUP +diff --git a/tests/system-traffic.at b/tests/system-traffic.at +index e34f7a4fbd..be702ae597 100644 +--- a/tests/system-traffic.at ++++ b/tests/system-traffic.at +@@ -340,6 +340,46 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PI + OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + ++AT_SETUP([datapath - ping over ip6gre L2 tunnel]) ++OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) ++OVS_CHECK_GRE() ++OVS_CHECK_ERSPAN() ++ ++OVS_TRAFFIC_VSWITCHD_START() ++ADD_BR([br-underlay]) ++ ++AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) ++AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"]) ++ ++ADD_NAMESPACES(at_ns0) ++ ++dnl Set up underlay link from host into the namespace using veth pair. ++ADD_VETH(p0, at_ns0, br-underlay, "fc00:100::1/96", [], [], nodad) ++AT_CHECK([ip addr add dev br-underlay "fc00:100::100/96" nodad]) ++AT_CHECK([ip link set dev br-underlay up]) ++ ++dnl Set up tunnel endpoints on OVS outside the namespace and with a native ++dnl linux device inside the namespace. ++ADD_OVS_TUNNEL6([ip6gre], [br0], [at_gre0], [fc00:100::1], [10.1.1.100/24], ++ [options:packet_type=legacy_l2]) ++ADD_NATIVE_TUNNEL6([ip6gretap], [ns_gretap0], [at_ns0], [fc00:100::100], ++ [10.1.1.1/24], [local fc00:100::1]) ++ ++OVS_WAIT_UNTIL([ip netns exec at_ns0 ping6 -c 2 fc00:100::100]) ++ ++dnl First, check the underlay ++NS_CHECK_EXEC([at_ns0], [ping6 -q -c 3 -i 0.3 -w 2 fc00:100::100 | FORMAT_PING], [0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++dnl Okay, now check the overlay with different packet sizes ++NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++OVS_TRAFFIC_VSWITCHD_STOP ++AT_CLEANUP ++ ++ + AT_SETUP([datapath - ping over erspan v1 tunnel]) + OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) + OVS_CHECK_GRE() +diff --git a/tests/system-userspace-packet-type-aware.at b/tests/system-userspace-packet-type-aware.at +index 24a7698ab4..c2246316de 100644 +--- a/tests/system-userspace-packet-type-aware.at ++++ b/tests/system-userspace-packet-type-aware.at +@@ -252,39 +252,39 @@ AT_CHECK([ + + ### Verify datapath configuration + AT_CHECK([ +- ovs-appctl dpif/show | grep -v hit | sed 's/\t/ /g' ++ ovs-appctl dpif/show | grep -v hit + ], [0], [dnl +- br-in1: +- br-in1 65534/2: (tap) +- gre12 1020/14: (gre: remote_ip=10.0.0.2) +- gre12_l3 1021/14: (gre: packet_type=legacy_l3, remote_ip=10.0.0.2) +- gre13 1030/14: (gre: remote_ip=10.0.0.3) +- ovs-n1 10/15: (system) +- br-in2: +- br-in2 65534/3: (tap) +- gre21 2010/14: (gre: packet_type=ptap, remote_ip=20.0.0.1) +- gre23 2030/14: (gre: packet_type=ptap, remote_ip=20.0.0.3) +- ovs-n2 20/16: (system) +- br-in3: +- br-in3 65534/4: (tap) +- gre31 3010/14: (gre: remote_ip=30.0.0.1) +- gre32 3020/14: (gre: remote_ip=30.0.0.2) +- gre32_l3 3021/14: (gre: packet_type=legacy_l3, remote_ip=30.0.0.2) +- ovs-n3 30/17: (system) +- br-p1: +- br-p1 65534/5: (tap) +- p1-0 2/8: (system) +- br-p2: +- br-p2 65534/6: (tap) +- p2-0 2/9: (system) +- br-p3: +- br-p3 65534/7: (tap) +- p3-0 2/10: (system) +- br0: +- br0 65534/1: (tap) +- p0-1 10/11: (system) +- p0-2 20/12: (system) +- p0-3 30/13: (system) ++ br-in1: ++ br-in1 65534/2: (tap) ++ gre12 1020/14: (gre: remote_ip=10.0.0.2) ++ gre12_l3 1021/14: (gre: packet_type=legacy_l3, remote_ip=10.0.0.2) ++ gre13 1030/14: (gre: remote_ip=10.0.0.3) ++ ovs-n1 10/15: (system) ++ br-in2: ++ br-in2 65534/3: (tap) ++ gre21 2010/14: (gre: packet_type=ptap, remote_ip=20.0.0.1) ++ gre23 2030/14: (gre: packet_type=ptap, remote_ip=20.0.0.3) ++ ovs-n2 20/16: (system) ++ br-in3: ++ br-in3 65534/4: (tap) ++ gre31 3010/14: (gre: remote_ip=30.0.0.1) ++ gre32 3020/14: (gre: remote_ip=30.0.0.2) ++ gre32_l3 3021/14: (gre: packet_type=legacy_l3, remote_ip=30.0.0.2) ++ ovs-n3 30/17: (system) ++ br-p1: ++ br-p1 65534/5: (tap) ++ p1-0 2/8: (system) ++ br-p2: ++ br-p2 65534/6: (tap) ++ p2-0 2/9: (system) ++ br-p3: ++ br-p3 65534/7: (tap) ++ p3-0 2/10: (system) ++ br0: ++ br0 65534/1: (tap) ++ p0-1 10/11: (system) ++ p0-2 20/12: (system) ++ p0-3 30/13: (system) + ]) + + ### Test L3 forwarding flows +diff --git a/tests/test-classifier.c b/tests/test-classifier.c +index 6d53d016de..2d98fad485 100644 +--- a/tests/test-classifier.c ++++ b/tests/test-classifier.c +@@ -512,8 +512,9 @@ verify_tries(struct classifier *cls) + int i; + + for (i = 0; i < cls->n_tries; i++) { +- n_rules += trie_verify(&cls->tries[i].root, 0, +- cls->tries[i].field->n_bits); ++ const struct mf_field * cls_field ++ = ovsrcu_get(struct mf_field *, &cls->tries[i].field); ++ n_rules += trie_verify(&cls->tries[i].root, 0, cls_field->n_bits); + } + assert(n_rules <= cls->n_rules); + } +diff --git a/tests/testsuite.at b/tests/testsuite.at +index b840dbfa70..922ba48fdf 100644 +--- a/tests/testsuite.at ++++ b/tests/testsuite.at +@@ -82,3 +82,4 @@ m4_include([tests/ovn-controller-vtep.at]) + m4_include([tests/mcast-snooping.at]) + m4_include([tests/packet-type-aware.at]) + m4_include([tests/nsh.at]) ++m4_include([tests/drop-stats.at]) +diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at +index cbdd5a32f7..59723e63b8 100644 +--- a/tests/tunnel-push-pop-ipv6.at ++++ b/tests/tunnel-push-pop-ipv6.at +@@ -1,5 +1,74 @@ + AT_BANNER([tunnel_push_pop_ipv6]) + ++AT_SETUP([tunnel_push_pop_ipv6 - ip6gre]) ++ ++OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00]) ++AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy], [0]) ++AT_CHECK([ovs-vsctl add-port int-br t2 -- set Interface t2 type=ip6gre \ ++ options:remote_ip=2001:cafe::92 ofport_request=2\ ++ options:packet_type=legacy_l2 ++ ], [0]) ++ ++AT_CHECK([ovs-appctl dpif/show], [0], [dnl ++dummy@ovs-dummy: hit:0 missed:0 ++ br0: ++ br0 65534/100: (dummy-internal) ++ p0 1/1: (dummy) ++ int-br: ++ int-br 65534/2: (dummy-internal) ++ t2 2/6: (ip6gre: remote_ip=2001:cafe::92) ++]) ++ ++dnl First setup dummy interface IP address, then add the route ++dnl so that tnl-port table can get valid IP address for the device. ++AT_CHECK([ovs-appctl netdev-dummy/ip6addr br0 2001:cafe::88/24], [0], [OK ++]) ++AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 1.1.2.88/24], [0], [OK ++]) ++AT_CHECK([ovs-appctl ovs/route/add 2001:cafe::92/24 br0], [0], [OK ++]) ++ ++AT_CHECK([ovs-ofctl add-flow br0 action=normal]) ++ ++dnl Check Neighbour discovery. ++AT_CHECK([ovs-vsctl -- set Interface p0 options:pcap=p0.pcap]) ++ ++AT_CHECK([ovs-appctl netdev-dummy/receive int-br 'in_port(2),eth(src=aa:55:aa:55:00:00,dst=f8:bc:12:ff:ff:ff),eth_type(0x0800),ipv4(src=1.1.3.92,dst=1.1.3.88,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)']) ++AT_CHECK([ovs-pcap p0.pcap > p0.pcap.txt 2>&1]) ++ ++AT_CHECK([cat p0.pcap.txt | grep 92aa55aa55000086dd6000000000203aff2001cafe | uniq], [0], [dnl ++3333ff000092aa55aa55000086dd6000000000203aff2001cafe000000000000000000000088ff0200000000000000000001ff00009287004d48000000002001cafe0000000000000000000000920101aa55aa550000 ++]) ++ ++dnl ++AT_CHECK([ovs-appctl netdev-dummy/receive p0 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:cafe::92,dst=2001:cafe::88,label=0,proto=58,tclass=0,hlimit=255,frag=no),icmpv6(type=136,code=0),nd(target=2001:cafe::92,sll=00:00:00:00:00:00,tll=f8:bc:12:44:34:b6)']) ++ ++AT_CHECK([ovs-appctl tnl/arp/show | tail -n+3 | sort], [0], [dnl ++2001:cafe::92 f8:bc:12:44:34:b6 br0 ++]) ++ ++AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl ++Listening ports: ++ip6gre_sys (6) ref_cnt=1 ++]) ++ ++dnl Check IPv6 GRE tunnel pop ++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:cafe::92,dst=2001:cafe::88,label=0,proto=47,tclass=0x0,hlimit=64)'], [0], [stdout]) ++AT_CHECK([tail -1 stdout], [0], ++ [Datapath actions: tnl_pop(6) ++]) ++ ++dnl Check IPv6 GRE tunnel push ++AT_CHECK([ovs-ofctl add-flow int-br action=2]) ++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:01),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout]) ++ ++AT_CHECK([tail -1 stdout], [0], ++ [Datapath actions: clone(tnl_push(tnl_port(6),header(size=58,type=109,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=47,tclass=0x0,hlimit=64),gre((flags=0x0,proto=0x6558))),out_port(100)),1) ++]) ++ ++OVS_VSWITCHD_STOP ++AT_CLEANUP ++ + AT_SETUP([tunnel_push_pop_ipv6 - ip6erspan]) + + OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00]) +diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at +index f7172433ee..6581c5ea09 100644 +--- a/tests/tunnel-push-pop.at ++++ b/tests/tunnel-push-pop.at +@@ -447,6 +447,27 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 7'], [0], [dnl + port 7: rx pkts=3, bytes=252, drop=?, errs=?, frame=?, over=?, crc=? + ]) + ++AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba600101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) ++ ++ovs-appctl time/warp 5000 ++ ++AT_CHECK([ ++ovs-appctl coverage/show | grep datapath_drop_tunnel_pop_error | awk '{ print $6 } END { if (!NR) print "0" }' ++], [0], [dnl ++1 ++]) ++ ++AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004503007079464000402fba600101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) ++ ++ovs-appctl time/warp 5000 ++ ++AT_CHECK([ ++ovs-appctl coverage/show | grep drop_action_congestion | awk '{ print $6 } END { if (!NR) print "0" }' ++], [0], [dnl ++1 ++]) ++ ++ + dnl Check GREL3 only accepts non-fragmented packets? + AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007e79464000402fba550101025c0101025820000800000001c8fe71d883724fbeb6f4e1494a080045000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) + +@@ -455,7 +476,7 @@ ovs-appctl time/warp 1000 + + AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port [[37]]' | sort], [0], [dnl + port 3: rx pkts=3, bytes=294, drop=?, errs=?, frame=?, over=?, crc=? +- port 7: rx pkts=4, bytes=350, drop=?, errs=?, frame=?, over=?, crc=? ++ port 7: rx pkts=5, bytes=434, drop=?, errs=?, frame=?, over=?, crc=? + ]) + + dnl Check decapsulation of Geneve packet with options +@@ -478,7 +499,7 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 5'], [0], [dnl + port 5: rx pkts=1, bytes=98, drop=?, errs=?, frame=?, over=?, crc=? + ]) + AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], [dnl +-tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=3,rule_cookie=0,controller_id=0,max_len=65535)) ++tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=2,rule_cookie=0,controller_id=0,max_len=65535)) + ]) + + ovs-appctl time/warp 10000 +@@ -510,7 +531,8 @@ AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl + Listening ports: + ]) + +-OVS_VSWITCHD_STOP ++OVS_VSWITCHD_STOP(["/dropping tunnel packet marked ECN CE but is not ECN capable/d ++/ip packet has invalid checksum/d"]) + AT_CLEANUP + + AT_SETUP([tunnel_push_pop - packet_out]) +diff --git a/tests/tunnel.at b/tests/tunnel.at +index 417343e2c5..4f7c13e7c5 100644 +--- a/tests/tunnel.at ++++ b/tests/tunnel.at +@@ -102,8 +102,9 @@ Datapath actions: set(ipv4(tos=0x3/0x3)),2 + + dnl Tunnel CE and encapsulated packet Non-ECT + AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +-AT_CHECK([tail -2 stdout], [0], +- [Megaflow: recirc_id=0,eth,ip,tun_id=0,tun_src=1.1.1.1,tun_dst=2.2.2.2,tun_tos=3,tun_flags=-df-csum-key,in_port=1,nw_ecn=0,nw_frag=no ++AT_CHECK([tail -3 stdout], [0], ++ [Final flow: unchanged ++Megaflow: recirc_id=0,eth,ip,tun_id=0,tun_src=1.1.1.1,tun_dst=2.2.2.2,tun_tos=3,tun_flags=-df-csum-key,in_port=1,nw_ecn=0,nw_frag=no + Datapath actions: drop + ]) + OVS_VSWITCHD_STOP(["/dropping tunnel packet marked ECN CE but is not ECN capable/d"]) +@@ -193,6 +194,17 @@ AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00: + AT_CHECK([tail -1 stdout], [0], + [Datapath actions: set(tunnel(tun_id=0x5,src=2.2.2.2,dst=1.1.1.1,ttl=64,flags(df|key))),set(skb_mark(0x2)),1 + ]) ++ ++AT_CHECK([ovs-appctl netdev-dummy/receive p2 'aa55aa550001f8bc124434b6080045000054ba20000040018486010103580101037001004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) ++ ++ovs-appctl time/warp 5000 ++ ++AT_CHECK([ ++ovs-appctl coverage/show | grep datapath_drop_invalid_port | awk '{ print $6 } END { if (!NR) print "0" }' ++], [0], [dnl ++1 ++]) ++ + OVS_VSWITCHD_STOP + AT_CLEANUP + +diff --git a/utilities/automake.mk b/utilities/automake.mk +index a5bb27e2b9..d8ae93ab87 100644 +--- a/utilities/automake.mk ++++ b/utilities/automake.mk +@@ -150,6 +150,7 @@ endif + + FLAKE8_PYFILES += utilities/ovs-pcap.in \ + utilities/checkpatch.py utilities/ovs-dev.py \ ++ utilities/ovs-check-dead-ifs.in \ + utilities/ovs-tcpdump.in \ + utilities/ovs-pipegen.py + +diff --git a/utilities/ovs-check-dead-ifs.in b/utilities/ovs-check-dead-ifs.in +index ac54f6c9ce..73e4fd9e10 100755 +--- a/utilities/ovs-check-dead-ifs.in ++++ b/utilities/ovs-check-dead-ifs.in +@@ -37,7 +37,7 @@ for ifname in os.listdir("/sys/class/net"): + except IOError: + pass + except ValueError: +- print "%s: unexpected format\n" % fn ++ print("%s: unexpected format\n" % fn) + + # Get inodes for all packet sockets whose ifindexes don't exist. + invalid_inodes = set() +@@ -95,8 +95,8 @@ for pid in os.listdir("/proc"): + bad_pids.add(pid) + + if bad_pids: +- print """ ++ print(""" + The following processes are listening for packets to arrive on network devices +-that no longer exist. You may want to restart them.""" ++that no longer exist. You may want to restart them.""") + sys.stdout.flush() +- os.execvp("ps", ["ps"] + ["%s" % pid for pid in bad_pids]) ++ os.execvp("ps", ["ps"] + ["%s" % pspid for pspid in bad_pids]) +diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in +index 9c2a092ea7..8c5cd70327 100644 +--- a/utilities/ovs-ctl.in ++++ b/utilities/ovs-ctl.in +@@ -144,6 +144,7 @@ do_start_ovsdb () { + set "$@" --certificate=db:Open_vSwitch,SSL,certificate + set "$@" --bootstrap-ca-cert=db:Open_vSwitch,SSL,ca_cert + [ "$OVS_USER" != "" ] && set "$@" --user "$OVS_USER" ++ [ "$OVSDB_SERVER_OPTIONS" != "" ] && set "$@" $OVSDB_SERVER_OPTIONS + + start_daemon "$OVSDB_SERVER_PRIORITY" "$OVSDB_SERVER_WRAPPER" "$@" \ + || return 1 +@@ -213,6 +214,7 @@ do_start_forwarding () { + set "$@" --no-self-confinement + fi + [ "$OVS_USER" != "" ] && set "$@" --user "$OVS_USER" ++ [ "$OVS_VSWITCHD_OPTIONS" != "" ] &&set "$@" $OVS_VSWITCHD_OPTIONS + + start_daemon "$OVS_VSWITCHD_PRIORITY" "$OVS_VSWITCHD_WRAPPER" "$@" || + return 1 +@@ -326,6 +328,8 @@ set_defaults () { + OVS_VSWITCHD_PRIORITY=-10 + OVSDB_SERVER_WRAPPER= + OVS_VSWITCHD_WRAPPER= ++ OVSDB_SERVER_OPTIONS= ++ OVS_VSWITCHD_OPTIONS= + + DB_FILE=$dbdir/conf.db + DB_SOCK=$rundir/db.sock +diff --git a/utilities/ovs-tcpundump.in b/utilities/ovs-tcpundump.in +index c298700624..b729276a99 100755 +--- a/utilities/ovs-tcpundump.in ++++ b/utilities/ovs-tcpundump.in +@@ -47,6 +47,7 @@ if __name__ == "__main__": + usage() + elif key in ['-V', '--version']: + print("ovs-tcpundump (Open vSwitch) @VERSION@") ++ sys.exit(0) + else: + sys.exit(0) + +@@ -56,7 +57,7 @@ if __name__ == "__main__": + sys.exit(1) + + packet = '' +- regex = re.compile(r'^\s+0x([0-9a-fA-F]+): ((?: [0-9a-fA-F]{4})+)') ++ regex = re.compile(r'^\s+0x([0-9a-fA-F]+): ((?: [0-9a-fA-F]{2,4})+)') + while True: + line = sys.stdin.readline() + if line == "": +diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c +index 83708ee51c..15118af775 100644 +--- a/vswitchd/bridge.c ++++ b/vswitchd/bridge.c +@@ -494,6 +494,7 @@ bridge_init(const char *remote) + ifaces_changed = seq_create(); + last_ifaces_changed = seq_read(ifaces_changed); + ifnotifier = if_notifier_create(if_change_cb, NULL); ++ if_notifier_manual_set_cb(if_change_cb); + } + + void +@@ -501,6 +502,7 @@ bridge_exit(bool delete_datapath) + { + struct bridge *br, *next_br; + ++ if_notifier_manual_set_cb(NULL); + if_notifier_destroy(ifnotifier); + seq_destroy(ifaces_changed); + HMAP_FOR_EACH_SAFE (br, next_br, node, &all_bridges) { +@@ -1802,8 +1804,13 @@ iface_do_create(const struct bridge *br, + *ofp_portp = iface_pick_ofport(iface_cfg); + error = ofproto_port_add(br->ofproto, netdev, ofp_portp); + if (error) { +- VLOG_WARN_BUF(errp, "could not add network device %s to ofproto (%s)", +- iface_cfg->name, ovs_strerror(error)); ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); ++ ++ *errp = xasprintf("could not add network device %s to ofproto (%s)", ++ iface_cfg->name, ovs_strerror(error)); ++ if (!VLOG_DROP_WARN(&rl)) { ++ VLOG_WARN("%s", *errp); ++ } + goto error; + } + +diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml +index 1db4e86946..573fe4e4af 100644 +--- a/vswitchd/vswitch.xml ++++ b/vswitchd/vswitch.xml +@@ -2535,10 +2535,16 @@ + + <dt><code>gre</code></dt> + <dd> +- Generic Routing Encapsulation (GRE) over IPv4/IPv6 tunnel, ++ Generic Routing Encapsulation (GRE) over IPv4 tunnel, + configurable to encapsulate layer 2 or layer 3 traffic. + </dd> + ++ <dt><code>ip6gre</code></dt> ++ <dd> ++ Generic Routing Encapsulation (GRE) over IPv6 tunnel, ++ encapsulate layer 2 traffic. ++ </dd> ++ + <dt><code>vxlan</code></dt> + <dd> + <p> +@@ -2597,8 +2603,8 @@ + <group title="Tunnel Options"> + <p> + These options apply to interfaces with <ref column="type"/> of +- <code>geneve</code>, <code>gre</code>, <code>vxlan</code>, +- <code>lisp</code> and <code>stt</code>. ++ <code>geneve</code>, <code>gre</code>, <code>ip6gre</code>, ++ <code>vxlan</code>, <code>lisp</code> and <code>stt</code>. + </p> + + <p> +@@ -2895,10 +2901,10 @@ + </column> + </group> + +- <group title="Tunnel Options: gre, geneve, and vxlan"> ++ <group title="Tunnel Options: gre, ip6gre, geneve, and vxlan"> + <p> +- <code>gre</code>, <code>geneve</code>, and +- <code>vxlan</code> interfaces support these options. ++ <code>gre</code>, <code>ip6gre</code>, <code>geneve</code>, ++ and <code>vxlan</code> interfaces support these options. + </p> + + <column name="options" key="csum" type='{"type": "boolean"}'> +@@ -2912,8 +2918,9 @@ + <p> + When using the upstream Linux kernel module, computation of + checksums for <code>geneve</code> and <code>vxlan</code> requires +- Linux kernel version 4.0 or higher. <code>gre</code> supports +- checksums for all versions of Open vSwitch that support GRE. ++ Linux kernel version 4.0 or higher. <code>gre</code> and ++ <code>ip6gre</code> support checksums for all versions of ++ Open vSwitch that support GRE. + The out of tree kernel module distributed as part of OVS + can compute all tunnel checksums on any kernel version that it + is compatible with. +@@ -2925,10 +2932,11 @@ + <group title="Tunnel Options: IPsec"> + <p> + Setting any of these options enables IPsec support for a given +- tunnel. <code>gre</code>, <code>geneve</code>, <code>vxlan</code>, +- and <code>stt</code> interfaces support these options. See the +- <code>IPsec</code> section in the <ref table="Open_vSwitch"/> table +- for a description of each mode. ++ tunnel. <code>gre</code>, <code>ip6gre</code>, ++ <code>geneve</code>, <code>vxlan</code> and <code>stt</code> ++ interfaces support these options. See the <code>IPsec</code> ++ section in the <ref table="Open_vSwitch"/> table for a description ++ of each mode. + </p> + <column name="options" key="psk" type='{"type": "string"}'> + <p> +@@ -3100,6 +3108,18 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ + </p> + </column> + ++ <column name="options" key="tx-retries-max" ++ type='{"type": "integer", "minInteger": 0, "maxInteger": 32}'> ++ <p> ++ The value specifies the maximum amount of vhost tx retries that can ++ be made while trying to send a batch of packets to an interface. ++ Only supported by dpdkvhostuserclient interfaces. ++ </p> ++ <p> ++ Default value is 8. ++ </p> ++ </column> ++ + <column name="options" key="n_rxq_desc" + type='{"type": "integer", "minInteger": 1, "maxInteger": 4096}'> + <p> diff --git a/SOURCES/ppc_64-power8-linuxapp-gcc-config b/SOURCES/ppc_64-power8-linuxapp-gcc-config new file mode 100644 index 0000000..a2964ff --- /dev/null +++ b/SOURCES/ppc_64-power8-linuxapp-gcc-config @@ -0,0 +1,549 @@ +# -*- cfg-sha: 64cb6ddcd2aa862a6cc9bcb3de422441794ff18e6301fc5091cc89ae53d2cf28 +# BSD LICENSE +# Copyright (C) IBM Corporation 2014. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of IBM Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2016 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2017 Intel Corporation +# RTE_EXEC_ENV values are the directories in mk/exec-env/ +CONFIG_RTE_EXEC_ENV="linuxapp" +# RTE_ARCH values are architecture we compile for. directories in mk/arch/ +CONFIG_RTE_ARCH="ppc_64" +# machine can define specific variables or action for a specific board +# RTE_MACHINE values are architecture we compile for. directories in mk/machine/ +CONFIG_RTE_MACHINE="power8" +# The compiler we use. +# RTE_TOOLCHAIN values are architecture we compile for. directories in mk/toolchain/ +CONFIG_RTE_TOOLCHAIN="gcc" +# Use intrinsics or assembly code for key routines +CONFIG_RTE_FORCE_INTRINSICS=n +# Machine forces strict alignment constraints. +CONFIG_RTE_ARCH_STRICT_ALIGN=n +# Compile to share library +CONFIG_RTE_BUILD_SHARED_LIB=n +# Use newest code breaking previous ABI +CONFIG_RTE_NEXT_ABI=n +# Major ABI to overwrite library specific LIBABIVER +CONFIG_RTE_MAJOR_ABI= +# Machine's cache line size +CONFIG_RTE_CACHE_LINE_SIZE=128 +# Memory model +CONFIG_RTE_USE_C11_MEM_MODEL=n +# Compile Environment Abstraction Layer +CONFIG_RTE_LIBRTE_EAL=y +CONFIG_RTE_MAX_LCORE=256 +CONFIG_RTE_MAX_NUMA_NODES=32 +CONFIG_RTE_MAX_HEAPS=32 +CONFIG_RTE_MAX_MEMSEG_LISTS=64 +# each memseg list will be limited to either RTE_MAX_MEMSEG_PER_LIST pages +# or RTE_MAX_MEM_MB_PER_LIST megabytes worth of memory, whichever is smaller +CONFIG_RTE_MAX_MEMSEG_PER_LIST=8192 +CONFIG_RTE_MAX_MEM_MB_PER_LIST=32768 +# a "type" is a combination of page size and NUMA node. total number of memseg +# lists per type will be limited to either RTE_MAX_MEMSEG_PER_TYPE pages (split +# over multiple lists of RTE_MAX_MEMSEG_PER_LIST pages), or +# RTE_MAX_MEM_MB_PER_TYPE megabytes of memory (split over multiple lists of +# RTE_MAX_MEM_MB_PER_LIST), whichever is smaller +CONFIG_RTE_MAX_MEMSEG_PER_TYPE=32768 +CONFIG_RTE_MAX_MEM_MB_PER_TYPE=131072 +# global maximum usable amount of VA, in megabytes +CONFIG_RTE_MAX_MEM_MB=524288 +CONFIG_RTE_MAX_MEMZONE=2560 +CONFIG_RTE_MAX_TAILQ=32 +CONFIG_RTE_ENABLE_ASSERT=n +CONFIG_RTE_LOG_DP_LEVEL=RTE_LOG_INFO +CONFIG_RTE_LOG_HISTORY=256 +CONFIG_RTE_BACKTRACE=y +CONFIG_RTE_LIBEAL_USE_HPET=n +CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n +CONFIG_RTE_EAL_IGB_UIO=n +CONFIG_RTE_EAL_VFIO=y +CONFIG_RTE_MAX_VFIO_GROUPS=64 +CONFIG_RTE_MAX_VFIO_CONTAINERS=64 +CONFIG_RTE_MALLOC_DEBUG=n +CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES=y +CONFIG_RTE_USE_LIBBSD=n +# Recognize/ignore architecture we compile for. AVX/AVX512 CPU flags for performance/power testing. +# AVX512 is marked as experimental for now, will enable it after enough +# field test and possible optimization. +CONFIG_RTE_ENABLE_AVX=y +CONFIG_RTE_ENABLE_AVX512=n +# Default driver path (or "" to disable) +CONFIG_RTE_EAL_PMD_PATH="" +# Compile Environment Abstraction Layer to support Vmware TSC map +CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=n +# Compile architecture we compile for. PCI library +CONFIG_RTE_LIBRTE_PCI=y +# Compile architecture we compile for. argument parser library +CONFIG_RTE_LIBRTE_KVARGS=y +# Compile generic ethernet library +CONFIG_RTE_LIBRTE_ETHER=y +CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n +CONFIG_RTE_MAX_ETHPORTS=128 +CONFIG_RTE_MAX_QUEUES_PER_PORT=1024 +CONFIG_RTE_LIBRTE_IEEE1588=n +CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16 +CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y +CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE=n +# Turn off Tx preparation stage +# Warning: rte_eth_tx_prepare() can be safely disabled only if using a +# driver which do not implement any Tx preparation. +CONFIG_RTE_ETHDEV_TX_PREPARE_NOOP=n +# Common libraries, before Bus/PMDs +CONFIG_RTE_LIBRTE_COMMON_DPAAX=n +# Compile architecture we compile for. Intel FPGA bus +CONFIG_RTE_LIBRTE_IFPGA_BUS=n +# Compile PCI bus driver +CONFIG_RTE_LIBRTE_PCI_BUS=y +# Compile architecture we compile for. vdev bus +CONFIG_RTE_LIBRTE_VDEV_BUS=y +# Compile ARK PMD +CONFIG_RTE_LIBRTE_ARK_PMD=n +CONFIG_RTE_LIBRTE_ARK_PAD_TX=y +CONFIG_RTE_LIBRTE_ARK_DEBUG_RX=n +CONFIG_RTE_LIBRTE_ARK_DEBUG_TX=n +CONFIG_RTE_LIBRTE_ARK_DEBUG_STATS=n +CONFIG_RTE_LIBRTE_ARK_DEBUG_TRACE=n +# Compile Aquantia Atlantic PMD driver +CONFIG_RTE_LIBRTE_ATLANTIC_PMD=n +# Compile AMD PMD +CONFIG_RTE_LIBRTE_AXGBE_PMD=n +CONFIG_RTE_LIBRTE_AXGBE_PMD_DEBUG=n +# Compile burst-oriented Broadcom PMD driver +CONFIG_RTE_LIBRTE_BNX2X_PMD=n +CONFIG_RTE_LIBRTE_BNX2X_DEBUG_RX=n +CONFIG_RTE_LIBRTE_BNX2X_DEBUG_TX=n +CONFIG_RTE_LIBRTE_BNX2X_MF_SUPPORT=n +CONFIG_RTE_LIBRTE_BNX2X_DEBUG_PERIODIC=n +# Compile burst-oriented Broadcom BNXT PMD driver +CONFIG_RTE_LIBRTE_BNXT_PMD=n +# Compile burst-oriented Chelsio Terminator (CXGBE) PMD +CONFIG_RTE_LIBRTE_CXGBE_PMD=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG_REG=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG_MBOX=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG_TX=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG_RX=n +CONFIG_RTE_LIBRTE_CXGBE_TPUT=y +# NXP DPAA Bus +CONFIG_RTE_LIBRTE_DPAA_BUS=n +CONFIG_RTE_LIBRTE_DPAA_MEMPOOL=n +CONFIG_RTE_LIBRTE_DPAA_PMD=n +CONFIG_RTE_LIBRTE_DPAA_HWDEBUG=n +# Compile NXP DPAA2 FSL-MC Bus +CONFIG_RTE_LIBRTE_FSLMC_BUS=n +# Compile Support Libraries for NXP DPAA2 +CONFIG_RTE_LIBRTE_DPAA2_MEMPOOL=n +CONFIG_RTE_LIBRTE_DPAA2_USE_PHYS_IOVA=y +# Compile burst-oriented NXP DPAA2 PMD driver +CONFIG_RTE_LIBRTE_DPAA2_PMD=n +CONFIG_RTE_LIBRTE_DPAA2_DEBUG_DRIVER=n +# Compile NXP ENETC PMD Driver +CONFIG_RTE_LIBRTE_ENETC_PMD=n +# Compile burst-oriented Amazon ENA PMD driver +CONFIG_RTE_LIBRTE_ENA_PMD=n +CONFIG_RTE_LIBRTE_ENA_DEBUG_RX=n +CONFIG_RTE_LIBRTE_ENA_DEBUG_TX=n +CONFIG_RTE_LIBRTE_ENA_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_ENA_COM_DEBUG=n +# Compile burst-oriented Cisco ENIC PMD driver +CONFIG_RTE_LIBRTE_ENIC_PMD=n +# Compile burst-oriented IGB & EM PMD drivers +CONFIG_RTE_LIBRTE_EM_PMD=n +CONFIG_RTE_LIBRTE_IGB_PMD=n +CONFIG_RTE_LIBRTE_E1000_DEBUG_RX=n +CONFIG_RTE_LIBRTE_E1000_DEBUG_TX=n +CONFIG_RTE_LIBRTE_E1000_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_E1000_PF_DISABLE_STRIP_CRC=n +# Compile burst-oriented IXGBE PMD driver +CONFIG_RTE_LIBRTE_IXGBE_PMD=n +CONFIG_RTE_LIBRTE_IXGBE_DEBUG_RX=n +CONFIG_RTE_LIBRTE_IXGBE_DEBUG_TX=n +CONFIG_RTE_LIBRTE_IXGBE_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_IXGBE_PF_DISABLE_STRIP_CRC=n +CONFIG_RTE_IXGBE_INC_VECTOR=y +CONFIG_RTE_LIBRTE_IXGBE_BYPASS=n +# Compile burst-oriented I40E PMD driver +CONFIG_RTE_LIBRTE_I40E_PMD=y +CONFIG_RTE_LIBRTE_I40E_DEBUG_RX=n +CONFIG_RTE_LIBRTE_I40E_DEBUG_TX=n +CONFIG_RTE_LIBRTE_I40E_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC=y +CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y +CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n +CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64 +CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4 +# Compile burst-oriented FM10K PMD +CONFIG_RTE_LIBRTE_FM10K_PMD=n +CONFIG_RTE_LIBRTE_FM10K_DEBUG_RX=n +CONFIG_RTE_LIBRTE_FM10K_DEBUG_TX=n +CONFIG_RTE_LIBRTE_FM10K_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE=y +CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y +# Compile burst-oriented AVF PMD driver +CONFIG_RTE_LIBRTE_AVF_PMD=n +CONFIG_RTE_LIBRTE_AVF_INC_VECTOR=y +CONFIG_RTE_LIBRTE_AVF_DEBUG_TX=n +CONFIG_RTE_LIBRTE_AVF_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_AVF_DEBUG_RX=n +CONFIG_RTE_LIBRTE_AVF_16BYTE_RX_DESC=n +# Compile burst-oriented Mellanox ConnectX-3 (MLX4) PMD +CONFIG_RTE_LIBRTE_MLX4_PMD=n +CONFIG_RTE_LIBRTE_MLX4_DEBUG=n +CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS=n +# Compile burst-oriented Mellanox ConnectX-4, ConnectX-5 & Bluefield +# (MLX5) PMD +CONFIG_RTE_LIBRTE_MLX5_PMD=n +CONFIG_RTE_LIBRTE_MLX5_DEBUG=n +CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS=n +# Compile burst-oriented Netronome NFP PMD driver +CONFIG_RTE_LIBRTE_NFP_PMD=n +CONFIG_RTE_LIBRTE_NFP_DEBUG_TX=n +CONFIG_RTE_LIBRTE_NFP_DEBUG_RX=n +# QLogic 10G/25G/40G/50G/100G PMD +CONFIG_RTE_LIBRTE_QEDE_PMD=n +CONFIG_RTE_LIBRTE_QEDE_DEBUG_TX=n +CONFIG_RTE_LIBRTE_QEDE_DEBUG_RX=n +#Provides abs path/name of architecture we compile for. firmware file. +#Empty string denotes driver will use default firmware +CONFIG_RTE_LIBRTE_QEDE_FW="" +# Compile burst-oriented Solarflare libefx-based PMD +CONFIG_RTE_LIBRTE_SFC_EFX_PMD=n +CONFIG_RTE_LIBRTE_SFC_EFX_DEBUG=n +# Compile software PMD backed by SZEDATA2 device +CONFIG_RTE_LIBRTE_PMD_SZEDATA2=n +# Compile burst-oriented Cavium Thunderx NICVF PMD driver +CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD=n +CONFIG_RTE_LIBRTE_THUNDERX_NICVF_DEBUG_RX=n +CONFIG_RTE_LIBRTE_THUNDERX_NICVF_DEBUG_TX=n +# Compile burst-oriented Cavium LiquidIO PMD driver +CONFIG_RTE_LIBRTE_LIO_PMD=n +CONFIG_RTE_LIBRTE_LIO_DEBUG_RX=n +CONFIG_RTE_LIBRTE_LIO_DEBUG_TX=n +CONFIG_RTE_LIBRTE_LIO_DEBUG_MBOX=n +CONFIG_RTE_LIBRTE_LIO_DEBUG_REGS=n +# Compile burst-oriented Cavium OCTEONTX network PMD driver +CONFIG_RTE_LIBRTE_OCTEONTX_PMD=n +# Compile WRS accelerated virtual port (AVP) guest PMD driver +CONFIG_RTE_LIBRTE_AVP_PMD=n +CONFIG_RTE_LIBRTE_AVP_DEBUG_RX=n +CONFIG_RTE_LIBRTE_AVP_DEBUG_TX=n +CONFIG_RTE_LIBRTE_AVP_DEBUG_BUFFERS=n +# Compile burst-oriented VIRTIO PMD driver +CONFIG_RTE_LIBRTE_VIRTIO_PMD=y +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n +# Compile virtio device emulation inside virtio PMD driver +CONFIG_RTE_VIRTIO_USER=n +# Compile burst-oriented VMXNET3 PMD driver +CONFIG_RTE_LIBRTE_VMXNET3_PMD=n +CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_RX=n +CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_TX=n +CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_TX_FREE=n +# Compile software PMD backed by AF_PACKET sockets (Linux only) +CONFIG_RTE_LIBRTE_PMD_AF_PACKET=n +# Compile link bonding PMD library +CONFIG_RTE_LIBRTE_PMD_BOND=n +CONFIG_RTE_LIBRTE_BOND_DEBUG_ALB=n +CONFIG_RTE_LIBRTE_BOND_DEBUG_ALB_L1=n +# Compile fail-safe PMD +CONFIG_RTE_LIBRTE_PMD_FAILSAFE=y +# Compile Marvell PMD driver +CONFIG_RTE_LIBRTE_MVPP2_PMD=n +# Compile Marvell MVNETA PMD driver +CONFIG_RTE_LIBRTE_MVNETA_PMD=n +# Compile support for VMBus library +CONFIG_RTE_LIBRTE_VMBUS=n +# Compile native PMD for Hyper-V/Azure +CONFIG_RTE_LIBRTE_NETVSC_PMD=n +CONFIG_RTE_LIBRTE_NETVSC_DEBUG_RX=n +CONFIG_RTE_LIBRTE_NETVSC_DEBUG_TX=n +CONFIG_RTE_LIBRTE_NETVSC_DEBUG_DUMP=n +# Compile virtual device driver for NetVSC on Hyper-V/Azure +CONFIG_RTE_LIBRTE_VDEV_NETVSC_PMD=n +# Compile null PMD +CONFIG_RTE_LIBRTE_PMD_NULL=n +# Compile software PMD backed by PCAP files +CONFIG_RTE_LIBRTE_PMD_PCAP=n +# Compile example software rings based PMD +CONFIG_RTE_LIBRTE_PMD_RING=y +CONFIG_RTE_PMD_RING_MAX_RX_RINGS=16 +CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16 +# Compile SOFTNIC PMD +CONFIG_RTE_LIBRTE_PMD_SOFTNIC=n +# Compile architecture we compile for. TAP PMD +# It is enabled by default for Linux only. +CONFIG_RTE_LIBRTE_PMD_TAP=y +# Do prefetch of packet data within PMD driver receive function +CONFIG_RTE_PMD_PACKET_PREFETCH=y +# Compile generic wireless base band device library +# EXPERIMENTAL: API may change without prior notice +CONFIG_RTE_LIBRTE_BBDEV=n +CONFIG_RTE_BBDEV_MAX_DEVS=128 +CONFIG_RTE_BBDEV_OFFLOAD_COST=n +# Compile PMD for NULL bbdev device +CONFIG_RTE_LIBRTE_PMD_BBDEV_NULL=n +# Compile PMD for turbo software bbdev device +CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW=n +# Compile generic crypto device library +CONFIG_RTE_LIBRTE_CRYPTODEV=n +CONFIG_RTE_CRYPTO_MAX_DEVS=64 +# Compile PMD for ARMv8 Crypto device +CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO=n +CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO_DEBUG=n +# Compile NXP CAAM JR crypto Driver +CONFIG_RTE_LIBRTE_PMD_CAAM_JR=n +CONFIG_RTE_LIBRTE_PMD_CAAM_JR_BE=n +# Compile NXP DPAA2 crypto sec driver for CAAM HW +CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC=n +# NXP DPAA caam - crypto driver +CONFIG_RTE_LIBRTE_PMD_DPAA_SEC=n +CONFIG_RTE_LIBRTE_DPAA_MAX_CRYPTODEV=4 +# Compile PMD for Cavium OCTEON TX crypto device +CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO=n +# Compile PMD for QuickAssist based devices - see docs for details +CONFIG_RTE_LIBRTE_PMD_QAT=n +CONFIG_RTE_LIBRTE_PMD_QAT_SYM=n +# Max. number of QuickAssist devices, which can be detected and attached +CONFIG_RTE_PMD_QAT_MAX_PCI_DEVICES=48 +CONFIG_RTE_PMD_QAT_COMP_SGL_MAX_SEGMENTS=16 +CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE=65536 +# Compile PMD for virtio crypto devices +CONFIG_RTE_LIBRTE_PMD_VIRTIO_CRYPTO=n +# Number of maximum virtio crypto devices +CONFIG_RTE_MAX_VIRTIO_CRYPTO=32 +# Compile PMD for AESNI backed device +CONFIG_RTE_LIBRTE_PMD_AESNI_MB=n +# Compile PMD for Software backed device +CONFIG_RTE_LIBRTE_PMD_OPENSSL=n +# Compile PMD for AESNI GCM device +CONFIG_RTE_LIBRTE_PMD_AESNI_GCM=n +# Compile PMD for SNOW 3G device +CONFIG_RTE_LIBRTE_PMD_SNOW3G=n +CONFIG_RTE_LIBRTE_PMD_SNOW3G_DEBUG=n +# Compile PMD for KASUMI device +CONFIG_RTE_LIBRTE_PMD_KASUMI=n +# Compile PMD for ZUC device +CONFIG_RTE_LIBRTE_PMD_ZUC=n +# Compile PMD for Crypto Scheduler device +CONFIG_RTE_LIBRTE_PMD_CRYPTO_SCHEDULER=n +# Compile PMD for NULL Crypto device +CONFIG_RTE_LIBRTE_PMD_NULL_CRYPTO=n +# Compile PMD for AMD CCP crypto device +CONFIG_RTE_LIBRTE_PMD_CCP=n +# Compile PMD for Marvell Crypto device +CONFIG_RTE_LIBRTE_PMD_MVSAM_CRYPTO=n +# Compile generic security library +CONFIG_RTE_LIBRTE_SECURITY=n +# Compile generic compression device library +CONFIG_RTE_LIBRTE_COMPRESSDEV=n +CONFIG_RTE_COMPRESS_MAX_DEVS=64 +# Compile compressdev unit test +CONFIG_RTE_COMPRESSDEV_TEST=n +# Compile PMD for Octeontx ZIPVF compression device +CONFIG_RTE_LIBRTE_PMD_OCTEONTX_ZIPVF=n +# Compile PMD for ISA-L compression device +CONFIG_RTE_LIBRTE_PMD_ISAL=n +# Compile PMD for ZLIB compression device +CONFIG_RTE_LIBRTE_PMD_ZLIB=n +# Compile generic event device library +CONFIG_RTE_LIBRTE_EVENTDEV=n +CONFIG_RTE_LIBRTE_EVENTDEV_DEBUG=n +CONFIG_RTE_EVENT_MAX_DEVS=16 +CONFIG_RTE_EVENT_MAX_QUEUES_PER_DEV=64 +CONFIG_RTE_EVENT_TIMER_ADAPTER_NUM_MAX=32 +CONFIG_RTE_EVENT_ETH_INTR_RING_SIZE=1024 +CONFIG_RTE_EVENT_CRYPTO_ADAPTER_MAX_INSTANCE=32 +CONFIG_RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE=32 +# Compile PMD for skeleton event device +CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV=n +CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV_DEBUG=n +# Compile PMD for software event device +CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV=n +# Compile PMD for distributed software event device +CONFIG_RTE_LIBRTE_PMD_DSW_EVENTDEV=n +# Compile PMD for octeontx sso event device +CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF=n +# Compile PMD for OPDL event device +CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV=n +# Compile PMD for NXP DPAA event device +CONFIG_RTE_LIBRTE_PMD_DPAA_EVENTDEV=n +# Compile PMD for NXP DPAA2 event device +CONFIG_RTE_LIBRTE_PMD_DPAA2_EVENTDEV=n +# Compile raw device support +# EXPERIMENTAL: API may change without prior notice +CONFIG_RTE_LIBRTE_RAWDEV=n +CONFIG_RTE_RAWDEV_MAX_DEVS=10 +CONFIG_RTE_LIBRTE_PMD_SKELETON_RAWDEV=n +# Compile PMD for NXP DPAA2 CMDIF raw device +CONFIG_RTE_LIBRTE_PMD_DPAA2_CMDIF_RAWDEV=n +# Compile PMD for NXP DPAA2 QDMA raw device +CONFIG_RTE_LIBRTE_PMD_DPAA2_QDMA_RAWDEV=n +# Compile PMD for Intel FPGA raw device +CONFIG_RTE_LIBRTE_PMD_IFPGA_RAWDEV=n +# Compile librte_ring +CONFIG_RTE_LIBRTE_RING=y +# Compile librte_mempool +CONFIG_RTE_LIBRTE_MEMPOOL=y +CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE=512 +CONFIG_RTE_LIBRTE_MEMPOOL_DEBUG=n +# Compile Mempool drivers +CONFIG_RTE_DRIVER_MEMPOOL_BUCKET=y +CONFIG_RTE_DRIVER_MEMPOOL_BUCKET_SIZE_KB=64 +CONFIG_RTE_DRIVER_MEMPOOL_RING=y +CONFIG_RTE_DRIVER_MEMPOOL_STACK=y +# Compile PMD for octeontx fpa mempool device +CONFIG_RTE_LIBRTE_OCTEONTX_MEMPOOL=n +# Compile librte_mbuf +CONFIG_RTE_LIBRTE_MBUF=y +CONFIG_RTE_LIBRTE_MBUF_DEBUG=n +CONFIG_RTE_MBUF_DEFAULT_MEMPOOL_OPS="ring_mp_mc" +CONFIG_RTE_MBUF_REFCNT_ATOMIC=y +CONFIG_RTE_PKTMBUF_HEADROOM=128 +# Compile librte_timer +CONFIG_RTE_LIBRTE_TIMER=n +CONFIG_RTE_LIBRTE_TIMER_DEBUG=n +# Compile librte_cfgfile +CONFIG_RTE_LIBRTE_CFGFILE=n +# Compile librte_cmdline +CONFIG_RTE_LIBRTE_CMDLINE=y +CONFIG_RTE_LIBRTE_CMDLINE_DEBUG=n +# Compile librte_hash +CONFIG_RTE_LIBRTE_HASH=y +CONFIG_RTE_LIBRTE_HASH_DEBUG=n +# Compile librte_efd +CONFIG_RTE_LIBRTE_EFD=n +# Compile librte_member +CONFIG_RTE_LIBRTE_MEMBER=y +# Compile librte_jobstats +CONFIG_RTE_LIBRTE_JOBSTATS=n +# Compile architecture we compile for. device metrics library +CONFIG_RTE_LIBRTE_METRICS=y +# Compile architecture we compile for. bitrate statistics library +CONFIG_RTE_LIBRTE_BITRATE=y +# Compile architecture we compile for. latency statistics library +CONFIG_RTE_LIBRTE_LATENCY_STATS=y +# Compile librte_telemetry +CONFIG_RTE_LIBRTE_TELEMETRY=n +# Compile librte_lpm +CONFIG_RTE_LIBRTE_LPM=n +CONFIG_RTE_LIBRTE_LPM_DEBUG=n +# Compile librte_acl +CONFIG_RTE_LIBRTE_ACL=n +CONFIG_RTE_LIBRTE_ACL_DEBUG=n +# Compile librte_power +CONFIG_RTE_LIBRTE_POWER=n +CONFIG_RTE_LIBRTE_POWER_DEBUG=n +CONFIG_RTE_MAX_LCORE_FREQS=64 +# Compile librte_net +CONFIG_RTE_LIBRTE_NET=y +# Compile librte_ip_frag +CONFIG_RTE_LIBRTE_IP_FRAG=y +CONFIG_RTE_LIBRTE_IP_FRAG_DEBUG=n +CONFIG_RTE_LIBRTE_IP_FRAG_MAX_FRAG=4 +CONFIG_RTE_LIBRTE_IP_FRAG_TBL_STAT=n +# Compile GRO library +CONFIG_RTE_LIBRTE_GRO=y +# Compile GSO library +CONFIG_RTE_LIBRTE_GSO=y +# Compile librte_meter +CONFIG_RTE_LIBRTE_METER=y +# Compile librte_classify +CONFIG_RTE_LIBRTE_FLOW_CLASSIFY=n +# Compile librte_sched +CONFIG_RTE_LIBRTE_SCHED=n +CONFIG_RTE_SCHED_DEBUG=n +CONFIG_RTE_SCHED_RED=n +CONFIG_RTE_SCHED_COLLECT_STATS=n +CONFIG_RTE_SCHED_SUBPORT_TC_OV=n +CONFIG_RTE_SCHED_PORT_N_GRINDERS=8 +CONFIG_RTE_SCHED_VECTOR=n +# Compile architecture we compile for. distributor library +CONFIG_RTE_LIBRTE_DISTRIBUTOR=n +# Compile architecture we compile for. reorder library +CONFIG_RTE_LIBRTE_REORDER=n +# Compile librte_port +CONFIG_RTE_LIBRTE_PORT=n +CONFIG_RTE_PORT_STATS_COLLECT=n +CONFIG_RTE_PORT_PCAP=n +# Compile librte_table +CONFIG_RTE_LIBRTE_TABLE=n +CONFIG_RTE_TABLE_STATS_COLLECT=n +# Compile librte_pipeline +CONFIG_RTE_LIBRTE_PIPELINE=n +CONFIG_RTE_PIPELINE_STATS_COLLECT=n +# Compile librte_kni +CONFIG_RTE_LIBRTE_KNI=n +CONFIG_RTE_LIBRTE_PMD_KNI=n +CONFIG_RTE_KNI_KMOD=n +CONFIG_RTE_KNI_KMOD_ETHTOOL=n +CONFIG_RTE_KNI_PREEMPT_DEFAULT=y +# Compile architecture we compile for. pdump library +CONFIG_RTE_LIBRTE_PDUMP=y +# Compile vhost user library +CONFIG_RTE_LIBRTE_VHOST=y +CONFIG_RTE_LIBRTE_VHOST_NUMA=y +CONFIG_RTE_LIBRTE_VHOST_DEBUG=n +# Compile vhost PMD +# To compile, CONFIG_RTE_LIBRTE_VHOST should be enabled. +CONFIG_RTE_LIBRTE_PMD_VHOST=y +# Compile IFC driver +# To compile, CONFIG_RTE_LIBRTE_VHOST and CONFIG_RTE_EAL_VFIO +# should be enabled. +CONFIG_RTE_LIBRTE_IFC_PMD=n +# Compile librte_bpf +CONFIG_RTE_LIBRTE_BPF=n +# allow load BPF from ELF files (requires libelf) +CONFIG_RTE_LIBRTE_BPF_ELF=n +# Compile architecture we compile for. test application +CONFIG_RTE_APP_TEST=y +CONFIG_RTE_APP_TEST_RESOURCE_TAR=n +# Compile architecture we compile for. procinfo application +CONFIG_RTE_PROC_INFO=y +# Compile architecture we compile for. PMD test application +CONFIG_RTE_TEST_PMD=n +CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n +CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n +# Compile architecture we compile for. bbdev test application +CONFIG_RTE_TEST_BBDEV=n +# Compile architecture we compile for. crypto performance application +CONFIG_RTE_APP_CRYPTO_PERF=n +# Compile architecture we compile for. eventdev application +CONFIG_RTE_APP_EVENTDEV=n +CONFIG_RTE_EXEC_ENV_LINUXAPP=y +CONFIG_RTE_LIBRTE_VHOST_POSTCOPY=n +# Common libraries, before Bus/PMDs +# NXP DPAA BUS and drivers +# NXP FSLMC BUS and DPAA2 drivers +# NXP ENETC PMD Driver +CONFIG_RTE_ARCH_PPC_64=y +CONFIG_RTE_ARCH_64=y +CONFIG_RTE_TOOLCHAIN_GCC=y +# Note: Power doesn't have this support +# Note: Initially, all of architecture we compile for. PMD drivers compilation are turned off on Power +# Will turn on them only after architecture we compile for. successful testing on Power +CONFIG_RTE_LIBRTE_PMD_XENVIRT=n diff --git a/SOURCES/set_config.sh b/SOURCES/set_config.sh new file mode 100755 index 0000000..002386b --- /dev/null +++ b/SOURCES/set_config.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Copyright (C) 2017, Red Hat, Inc. +# +# set_config.sh will copy a configuration from $1 to $2, in the process +# checking that the sha header for $1 matches the header in $2 + +source configlib.sh + +if (( $# < 2 )); then + echo "$0: source dest [comment-marker]" + exit 1 +fi + +if [ ! -f "$1" ]; then + echo "Source file $1 must exist." + exit 1 +fi +src_file=$1 +shift + +if [ ! -f "$1" ]; then + echo "Dest file $1 must exist." + exit 1 +fi +dst_file=$1 +shift + +comment_sep=${1:-#} + +export LANG=en_US.utf8 + +DEST_FILE_SHA="" +SRC_FILE_SHA="" + +calc_sha DEST_FILE_SHA "$dst_file" "$comment_sep" || echo "Failed to calc sha" +retr_sha SRC_FILE_SHA "$src_file" "$comment_sep" || echo "Failed to retrieve sha" + +if [ "$DEST_FILE_SHA" != "$SRC_FILE_SHA" ]; then + echo "ERROR: The requisite starting sha from $dst_file does not match the" + echo " specified sha in $src_file." + echo "[ $DEST_FILE_SHA ] vs [ $SRC_FILE_SHA ]" + exit 1 +fi + +mv "$dst_file" "$dst_file".OLD +cp "$src_file" "$dst_file" +echo "copied 1 config file." +exit 0 diff --git a/SOURCES/x86_64-native-linuxapp-gcc-config b/SOURCES/x86_64-native-linuxapp-gcc-config new file mode 100644 index 0000000..e09318b --- /dev/null +++ b/SOURCES/x86_64-native-linuxapp-gcc-config @@ -0,0 +1,524 @@ +# -*- cfg-sha: f06f2b5234bcb38cbde09c4732b27af067c30d7f3aff6966eb2c81501add74bb +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2014 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2016 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2017 Intel Corporation +# RTE_EXEC_ENV values are the directories in mk/exec-env/ +CONFIG_RTE_EXEC_ENV="linuxapp" +# RTE_ARCH values are architecture we compile for. directories in mk/arch/ +CONFIG_RTE_ARCH="x86_64" +# machine can define specific variables or action for a specific board +# RTE_MACHINE values are architecture we compile for. directories in mk/machine/ +CONFIG_RTE_MACHINE="default" +# The compiler we use. +# RTE_TOOLCHAIN values are architecture we compile for. directories in mk/toolchain/ +CONFIG_RTE_TOOLCHAIN="gcc" +# Use intrinsics or assembly code for key routines +CONFIG_RTE_FORCE_INTRINSICS=n +# Machine forces strict alignment constraints. +CONFIG_RTE_ARCH_STRICT_ALIGN=n +# Compile to share library +CONFIG_RTE_BUILD_SHARED_LIB=n +# Use newest code breaking previous ABI +CONFIG_RTE_NEXT_ABI=n +# Major ABI to overwrite library specific LIBABIVER +CONFIG_RTE_MAJOR_ABI= +# Machine's cache line size +CONFIG_RTE_CACHE_LINE_SIZE=64 +# Memory model +CONFIG_RTE_USE_C11_MEM_MODEL=n +# Compile Environment Abstraction Layer +CONFIG_RTE_LIBRTE_EAL=y +CONFIG_RTE_MAX_LCORE=128 +CONFIG_RTE_MAX_NUMA_NODES=8 +CONFIG_RTE_MAX_HEAPS=32 +CONFIG_RTE_MAX_MEMSEG_LISTS=64 +# each memseg list will be limited to either RTE_MAX_MEMSEG_PER_LIST pages +# or RTE_MAX_MEM_MB_PER_LIST megabytes worth of memory, whichever is smaller +CONFIG_RTE_MAX_MEMSEG_PER_LIST=8192 +CONFIG_RTE_MAX_MEM_MB_PER_LIST=32768 +# a "type" is a combination of page size and NUMA node. total number of memseg +# lists per type will be limited to either RTE_MAX_MEMSEG_PER_TYPE pages (split +# over multiple lists of RTE_MAX_MEMSEG_PER_LIST pages), or +# RTE_MAX_MEM_MB_PER_TYPE megabytes of memory (split over multiple lists of +# RTE_MAX_MEM_MB_PER_LIST), whichever is smaller +CONFIG_RTE_MAX_MEMSEG_PER_TYPE=32768 +CONFIG_RTE_MAX_MEM_MB_PER_TYPE=131072 +# global maximum usable amount of VA, in megabytes +CONFIG_RTE_MAX_MEM_MB=524288 +CONFIG_RTE_MAX_MEMZONE=2560 +CONFIG_RTE_MAX_TAILQ=32 +CONFIG_RTE_ENABLE_ASSERT=n +CONFIG_RTE_LOG_DP_LEVEL=RTE_LOG_INFO +CONFIG_RTE_LOG_HISTORY=256 +CONFIG_RTE_BACKTRACE=y +CONFIG_RTE_LIBEAL_USE_HPET=n +CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n +CONFIG_RTE_EAL_IGB_UIO=n +CONFIG_RTE_EAL_VFIO=y +CONFIG_RTE_MAX_VFIO_GROUPS=64 +CONFIG_RTE_MAX_VFIO_CONTAINERS=64 +CONFIG_RTE_MALLOC_DEBUG=n +CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES=y +CONFIG_RTE_USE_LIBBSD=n +# Recognize/ignore architecture we compile for. AVX/AVX512 CPU flags for performance/power testing. +# AVX512 is marked as experimental for now, will enable it after enough +# field test and possible optimization. +CONFIG_RTE_ENABLE_AVX=y +CONFIG_RTE_ENABLE_AVX512=n +# Default driver path (or "" to disable) +CONFIG_RTE_EAL_PMD_PATH="" +# Compile Environment Abstraction Layer to support Vmware TSC map +CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=y +# Compile architecture we compile for. PCI library +CONFIG_RTE_LIBRTE_PCI=y +# Compile architecture we compile for. argument parser library +CONFIG_RTE_LIBRTE_KVARGS=y +# Compile generic ethernet library +CONFIG_RTE_LIBRTE_ETHER=y +CONFIG_RTE_LIBRTE_ETHDEV_DEBUG=n +CONFIG_RTE_MAX_ETHPORTS=128 +CONFIG_RTE_MAX_QUEUES_PER_PORT=1024 +CONFIG_RTE_LIBRTE_IEEE1588=n +CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16 +CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y +CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE=n +# Turn off Tx preparation stage +# Warning: rte_eth_tx_prepare() can be safely disabled only if using a +# driver which do not implement any Tx preparation. +CONFIG_RTE_ETHDEV_TX_PREPARE_NOOP=n +# Common libraries, before Bus/PMDs +CONFIG_RTE_LIBRTE_COMMON_DPAAX=n +# Compile architecture we compile for. Intel FPGA bus +CONFIG_RTE_LIBRTE_IFPGA_BUS=n +# Compile PCI bus driver +CONFIG_RTE_LIBRTE_PCI_BUS=y +# Compile architecture we compile for. vdev bus +CONFIG_RTE_LIBRTE_VDEV_BUS=y +# Compile ARK PMD +CONFIG_RTE_LIBRTE_ARK_PMD=n +CONFIG_RTE_LIBRTE_ARK_PAD_TX=y +CONFIG_RTE_LIBRTE_ARK_DEBUG_RX=n +CONFIG_RTE_LIBRTE_ARK_DEBUG_TX=n +CONFIG_RTE_LIBRTE_ARK_DEBUG_STATS=n +CONFIG_RTE_LIBRTE_ARK_DEBUG_TRACE=n +# Compile Aquantia Atlantic PMD driver +CONFIG_RTE_LIBRTE_ATLANTIC_PMD=n +# Compile AMD PMD +CONFIG_RTE_LIBRTE_AXGBE_PMD=n +CONFIG_RTE_LIBRTE_AXGBE_PMD_DEBUG=n +# Compile burst-oriented Broadcom PMD driver +CONFIG_RTE_LIBRTE_BNX2X_PMD=n +CONFIG_RTE_LIBRTE_BNX2X_DEBUG_RX=n +CONFIG_RTE_LIBRTE_BNX2X_DEBUG_TX=n +CONFIG_RTE_LIBRTE_BNX2X_MF_SUPPORT=n +CONFIG_RTE_LIBRTE_BNX2X_DEBUG_PERIODIC=n +# Compile burst-oriented Broadcom BNXT PMD driver +CONFIG_RTE_LIBRTE_BNXT_PMD=y +# Compile burst-oriented Chelsio Terminator (CXGBE) PMD +CONFIG_RTE_LIBRTE_CXGBE_PMD=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG_REG=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG_MBOX=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG_TX=n +CONFIG_RTE_LIBRTE_CXGBE_DEBUG_RX=n +CONFIG_RTE_LIBRTE_CXGBE_TPUT=y +# NXP DPAA Bus +CONFIG_RTE_LIBRTE_DPAA_BUS=n +CONFIG_RTE_LIBRTE_DPAA_MEMPOOL=n +CONFIG_RTE_LIBRTE_DPAA_PMD=n +CONFIG_RTE_LIBRTE_DPAA_HWDEBUG=n +# Compile NXP DPAA2 FSL-MC Bus +CONFIG_RTE_LIBRTE_FSLMC_BUS=n +# Compile Support Libraries for NXP DPAA2 +CONFIG_RTE_LIBRTE_DPAA2_MEMPOOL=n +CONFIG_RTE_LIBRTE_DPAA2_USE_PHYS_IOVA=y +# Compile burst-oriented NXP DPAA2 PMD driver +CONFIG_RTE_LIBRTE_DPAA2_PMD=n +CONFIG_RTE_LIBRTE_DPAA2_DEBUG_DRIVER=n +# Compile NXP ENETC PMD Driver +CONFIG_RTE_LIBRTE_ENETC_PMD=n +# Compile burst-oriented Amazon ENA PMD driver +CONFIG_RTE_LIBRTE_ENA_PMD=n +CONFIG_RTE_LIBRTE_ENA_DEBUG_RX=n +CONFIG_RTE_LIBRTE_ENA_DEBUG_TX=n +CONFIG_RTE_LIBRTE_ENA_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_ENA_COM_DEBUG=n +# Compile burst-oriented Cisco ENIC PMD driver +CONFIG_RTE_LIBRTE_ENIC_PMD=y +# Compile burst-oriented IGB & EM PMD drivers +CONFIG_RTE_LIBRTE_EM_PMD=n +CONFIG_RTE_LIBRTE_IGB_PMD=y +CONFIG_RTE_LIBRTE_E1000_DEBUG_RX=n +CONFIG_RTE_LIBRTE_E1000_DEBUG_TX=n +CONFIG_RTE_LIBRTE_E1000_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_E1000_PF_DISABLE_STRIP_CRC=n +# Compile burst-oriented IXGBE PMD driver +CONFIG_RTE_LIBRTE_IXGBE_PMD=y +CONFIG_RTE_LIBRTE_IXGBE_DEBUG_RX=n +CONFIG_RTE_LIBRTE_IXGBE_DEBUG_TX=n +CONFIG_RTE_LIBRTE_IXGBE_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_IXGBE_PF_DISABLE_STRIP_CRC=n +CONFIG_RTE_IXGBE_INC_VECTOR=y +CONFIG_RTE_LIBRTE_IXGBE_BYPASS=n +# Compile burst-oriented I40E PMD driver +CONFIG_RTE_LIBRTE_I40E_PMD=y +CONFIG_RTE_LIBRTE_I40E_DEBUG_RX=n +CONFIG_RTE_LIBRTE_I40E_DEBUG_TX=n +CONFIG_RTE_LIBRTE_I40E_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC=y +CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y +CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n +CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64 +CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4 +# Compile burst-oriented FM10K PMD +CONFIG_RTE_LIBRTE_FM10K_PMD=n +CONFIG_RTE_LIBRTE_FM10K_DEBUG_RX=n +CONFIG_RTE_LIBRTE_FM10K_DEBUG_TX=n +CONFIG_RTE_LIBRTE_FM10K_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE=y +CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y +# Compile burst-oriented AVF PMD driver +CONFIG_RTE_LIBRTE_AVF_PMD=n +CONFIG_RTE_LIBRTE_AVF_INC_VECTOR=y +CONFIG_RTE_LIBRTE_AVF_DEBUG_TX=n +CONFIG_RTE_LIBRTE_AVF_DEBUG_TX_FREE=n +CONFIG_RTE_LIBRTE_AVF_DEBUG_RX=n +CONFIG_RTE_LIBRTE_AVF_16BYTE_RX_DESC=n +# Compile burst-oriented Mellanox ConnectX-3 (MLX4) PMD +CONFIG_RTE_LIBRTE_MLX4_PMD=y +CONFIG_RTE_LIBRTE_MLX4_DEBUG=n +CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS=y +# Compile burst-oriented Mellanox ConnectX-4, ConnectX-5 & Bluefield +# (MLX5) PMD +CONFIG_RTE_LIBRTE_MLX5_PMD=y +CONFIG_RTE_LIBRTE_MLX5_DEBUG=n +CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS=y +# Compile burst-oriented Netronome NFP PMD driver +CONFIG_RTE_LIBRTE_NFP_PMD=y +CONFIG_RTE_LIBRTE_NFP_DEBUG_TX=n +CONFIG_RTE_LIBRTE_NFP_DEBUG_RX=n +# QLogic 10G/25G/40G/50G/100G PMD +CONFIG_RTE_LIBRTE_QEDE_PMD=y +CONFIG_RTE_LIBRTE_QEDE_DEBUG_TX=n +CONFIG_RTE_LIBRTE_QEDE_DEBUG_RX=n +#Provides abs path/name of architecture we compile for. firmware file. +#Empty string denotes driver will use default firmware +CONFIG_RTE_LIBRTE_QEDE_FW="" +# Compile burst-oriented Solarflare libefx-based PMD +CONFIG_RTE_LIBRTE_SFC_EFX_PMD=n +CONFIG_RTE_LIBRTE_SFC_EFX_DEBUG=n +# Compile software PMD backed by SZEDATA2 device +CONFIG_RTE_LIBRTE_PMD_SZEDATA2=n +# Compile burst-oriented Cavium Thunderx NICVF PMD driver +CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD=n +CONFIG_RTE_LIBRTE_THUNDERX_NICVF_DEBUG_RX=n +CONFIG_RTE_LIBRTE_THUNDERX_NICVF_DEBUG_TX=n +# Compile burst-oriented Cavium LiquidIO PMD driver +CONFIG_RTE_LIBRTE_LIO_PMD=n +CONFIG_RTE_LIBRTE_LIO_DEBUG_RX=n +CONFIG_RTE_LIBRTE_LIO_DEBUG_TX=n +CONFIG_RTE_LIBRTE_LIO_DEBUG_MBOX=n +CONFIG_RTE_LIBRTE_LIO_DEBUG_REGS=n +# Compile burst-oriented Cavium OCTEONTX network PMD driver +CONFIG_RTE_LIBRTE_OCTEONTX_PMD=n +# Compile WRS accelerated virtual port (AVP) guest PMD driver +CONFIG_RTE_LIBRTE_AVP_PMD=n +CONFIG_RTE_LIBRTE_AVP_DEBUG_RX=n +CONFIG_RTE_LIBRTE_AVP_DEBUG_TX=n +CONFIG_RTE_LIBRTE_AVP_DEBUG_BUFFERS=n +# Compile burst-oriented VIRTIO PMD driver +CONFIG_RTE_LIBRTE_VIRTIO_PMD=y +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n +CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n +# Compile virtio device emulation inside virtio PMD driver +CONFIG_RTE_VIRTIO_USER=n +# Compile burst-oriented VMXNET3 PMD driver +CONFIG_RTE_LIBRTE_VMXNET3_PMD=n +CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_RX=n +CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_TX=n +CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_TX_FREE=n +# Compile software PMD backed by AF_PACKET sockets (Linux only) +CONFIG_RTE_LIBRTE_PMD_AF_PACKET=n +# Compile link bonding PMD library +CONFIG_RTE_LIBRTE_PMD_BOND=n +CONFIG_RTE_LIBRTE_BOND_DEBUG_ALB=n +CONFIG_RTE_LIBRTE_BOND_DEBUG_ALB_L1=n +# Compile fail-safe PMD +CONFIG_RTE_LIBRTE_PMD_FAILSAFE=y +# Compile Marvell PMD driver +CONFIG_RTE_LIBRTE_MVPP2_PMD=n +# Compile Marvell MVNETA PMD driver +CONFIG_RTE_LIBRTE_MVNETA_PMD=n +# Compile support for VMBus library +CONFIG_RTE_LIBRTE_VMBUS=y +# Compile native PMD for Hyper-V/Azure +CONFIG_RTE_LIBRTE_NETVSC_PMD=y +CONFIG_RTE_LIBRTE_NETVSC_DEBUG_RX=n +CONFIG_RTE_LIBRTE_NETVSC_DEBUG_TX=n +CONFIG_RTE_LIBRTE_NETVSC_DEBUG_DUMP=n +# Compile virtual device driver for NetVSC on Hyper-V/Azure +CONFIG_RTE_LIBRTE_VDEV_NETVSC_PMD=y +# Compile null PMD +CONFIG_RTE_LIBRTE_PMD_NULL=n +# Compile software PMD backed by PCAP files +CONFIG_RTE_LIBRTE_PMD_PCAP=n +# Compile example software rings based PMD +CONFIG_RTE_LIBRTE_PMD_RING=y +CONFIG_RTE_PMD_RING_MAX_RX_RINGS=16 +CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16 +# Compile SOFTNIC PMD +CONFIG_RTE_LIBRTE_PMD_SOFTNIC=n +# Compile architecture we compile for. TAP PMD +# It is enabled by default for Linux only. +CONFIG_RTE_LIBRTE_PMD_TAP=y +# Do prefetch of packet data within PMD driver receive function +CONFIG_RTE_PMD_PACKET_PREFETCH=y +# Compile generic wireless base band device library +# EXPERIMENTAL: API may change without prior notice +CONFIG_RTE_LIBRTE_BBDEV=n +CONFIG_RTE_BBDEV_MAX_DEVS=128 +CONFIG_RTE_BBDEV_OFFLOAD_COST=n +# Compile PMD for NULL bbdev device +CONFIG_RTE_LIBRTE_PMD_BBDEV_NULL=n +# Compile PMD for turbo software bbdev device +CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW=n +# Compile generic crypto device library +CONFIG_RTE_LIBRTE_CRYPTODEV=n +CONFIG_RTE_CRYPTO_MAX_DEVS=64 +# Compile PMD for ARMv8 Crypto device +CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO=n +CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO_DEBUG=n +# Compile NXP CAAM JR crypto Driver +CONFIG_RTE_LIBRTE_PMD_CAAM_JR=n +CONFIG_RTE_LIBRTE_PMD_CAAM_JR_BE=n +# Compile NXP DPAA2 crypto sec driver for CAAM HW +CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC=n +# NXP DPAA caam - crypto driver +CONFIG_RTE_LIBRTE_PMD_DPAA_SEC=n +CONFIG_RTE_LIBRTE_DPAA_MAX_CRYPTODEV=4 +# Compile PMD for Cavium OCTEON TX crypto device +CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO=n +# Compile PMD for QuickAssist based devices - see docs for details +CONFIG_RTE_LIBRTE_PMD_QAT=n +CONFIG_RTE_LIBRTE_PMD_QAT_SYM=n +# Max. number of QuickAssist devices, which can be detected and attached +CONFIG_RTE_PMD_QAT_MAX_PCI_DEVICES=48 +CONFIG_RTE_PMD_QAT_COMP_SGL_MAX_SEGMENTS=16 +CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE=65536 +# Compile PMD for virtio crypto devices +CONFIG_RTE_LIBRTE_PMD_VIRTIO_CRYPTO=n +# Number of maximum virtio crypto devices +CONFIG_RTE_MAX_VIRTIO_CRYPTO=32 +# Compile PMD for AESNI backed device +CONFIG_RTE_LIBRTE_PMD_AESNI_MB=n +# Compile PMD for Software backed device +CONFIG_RTE_LIBRTE_PMD_OPENSSL=n +# Compile PMD for AESNI GCM device +CONFIG_RTE_LIBRTE_PMD_AESNI_GCM=n +# Compile PMD for SNOW 3G device +CONFIG_RTE_LIBRTE_PMD_SNOW3G=n +CONFIG_RTE_LIBRTE_PMD_SNOW3G_DEBUG=n +# Compile PMD for KASUMI device +CONFIG_RTE_LIBRTE_PMD_KASUMI=n +# Compile PMD for ZUC device +CONFIG_RTE_LIBRTE_PMD_ZUC=n +# Compile PMD for Crypto Scheduler device +CONFIG_RTE_LIBRTE_PMD_CRYPTO_SCHEDULER=n +# Compile PMD for NULL Crypto device +CONFIG_RTE_LIBRTE_PMD_NULL_CRYPTO=n +# Compile PMD for AMD CCP crypto device +CONFIG_RTE_LIBRTE_PMD_CCP=n +# Compile PMD for Marvell Crypto device +CONFIG_RTE_LIBRTE_PMD_MVSAM_CRYPTO=n +# Compile generic security library +CONFIG_RTE_LIBRTE_SECURITY=n +# Compile generic compression device library +CONFIG_RTE_LIBRTE_COMPRESSDEV=n +CONFIG_RTE_COMPRESS_MAX_DEVS=64 +# Compile compressdev unit test +CONFIG_RTE_COMPRESSDEV_TEST=n +# Compile PMD for Octeontx ZIPVF compression device +CONFIG_RTE_LIBRTE_PMD_OCTEONTX_ZIPVF=n +# Compile PMD for ISA-L compression device +CONFIG_RTE_LIBRTE_PMD_ISAL=n +# Compile PMD for ZLIB compression device +CONFIG_RTE_LIBRTE_PMD_ZLIB=n +# Compile generic event device library +CONFIG_RTE_LIBRTE_EVENTDEV=n +CONFIG_RTE_LIBRTE_EVENTDEV_DEBUG=n +CONFIG_RTE_EVENT_MAX_DEVS=16 +CONFIG_RTE_EVENT_MAX_QUEUES_PER_DEV=64 +CONFIG_RTE_EVENT_TIMER_ADAPTER_NUM_MAX=32 +CONFIG_RTE_EVENT_ETH_INTR_RING_SIZE=1024 +CONFIG_RTE_EVENT_CRYPTO_ADAPTER_MAX_INSTANCE=32 +CONFIG_RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE=32 +# Compile PMD for skeleton event device +CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV=n +CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV_DEBUG=n +# Compile PMD for software event device +CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV=n +# Compile PMD for distributed software event device +CONFIG_RTE_LIBRTE_PMD_DSW_EVENTDEV=n +# Compile PMD for octeontx sso event device +CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF=n +# Compile PMD for OPDL event device +CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV=n +# Compile PMD for NXP DPAA event device +CONFIG_RTE_LIBRTE_PMD_DPAA_EVENTDEV=n +# Compile PMD for NXP DPAA2 event device +CONFIG_RTE_LIBRTE_PMD_DPAA2_EVENTDEV=n +# Compile raw device support +# EXPERIMENTAL: API may change without prior notice +CONFIG_RTE_LIBRTE_RAWDEV=n +CONFIG_RTE_RAWDEV_MAX_DEVS=10 +CONFIG_RTE_LIBRTE_PMD_SKELETON_RAWDEV=n +# Compile PMD for NXP DPAA2 CMDIF raw device +CONFIG_RTE_LIBRTE_PMD_DPAA2_CMDIF_RAWDEV=n +# Compile PMD for NXP DPAA2 QDMA raw device +CONFIG_RTE_LIBRTE_PMD_DPAA2_QDMA_RAWDEV=n +# Compile PMD for Intel FPGA raw device +CONFIG_RTE_LIBRTE_PMD_IFPGA_RAWDEV=n +# Compile librte_ring +CONFIG_RTE_LIBRTE_RING=y +# Compile librte_mempool +CONFIG_RTE_LIBRTE_MEMPOOL=y +CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE=512 +CONFIG_RTE_LIBRTE_MEMPOOL_DEBUG=n +# Compile Mempool drivers +CONFIG_RTE_DRIVER_MEMPOOL_BUCKET=y +CONFIG_RTE_DRIVER_MEMPOOL_BUCKET_SIZE_KB=64 +CONFIG_RTE_DRIVER_MEMPOOL_RING=y +CONFIG_RTE_DRIVER_MEMPOOL_STACK=y +# Compile PMD for octeontx fpa mempool device +CONFIG_RTE_LIBRTE_OCTEONTX_MEMPOOL=n +# Compile librte_mbuf +CONFIG_RTE_LIBRTE_MBUF=y +CONFIG_RTE_LIBRTE_MBUF_DEBUG=n +CONFIG_RTE_MBUF_DEFAULT_MEMPOOL_OPS="ring_mp_mc" +CONFIG_RTE_MBUF_REFCNT_ATOMIC=y +CONFIG_RTE_PKTMBUF_HEADROOM=128 +# Compile librte_timer +CONFIG_RTE_LIBRTE_TIMER=n +CONFIG_RTE_LIBRTE_TIMER_DEBUG=n +# Compile librte_cfgfile +CONFIG_RTE_LIBRTE_CFGFILE=n +# Compile librte_cmdline +CONFIG_RTE_LIBRTE_CMDLINE=y +CONFIG_RTE_LIBRTE_CMDLINE_DEBUG=n +# Compile librte_hash +CONFIG_RTE_LIBRTE_HASH=y +CONFIG_RTE_LIBRTE_HASH_DEBUG=n +# Compile librte_efd +CONFIG_RTE_LIBRTE_EFD=n +# Compile librte_member +CONFIG_RTE_LIBRTE_MEMBER=y +# Compile librte_jobstats +CONFIG_RTE_LIBRTE_JOBSTATS=n +# Compile architecture we compile for. device metrics library +CONFIG_RTE_LIBRTE_METRICS=y +# Compile architecture we compile for. bitrate statistics library +CONFIG_RTE_LIBRTE_BITRATE=y +# Compile architecture we compile for. latency statistics library +CONFIG_RTE_LIBRTE_LATENCY_STATS=y +# Compile librte_telemetry +CONFIG_RTE_LIBRTE_TELEMETRY=n +# Compile librte_lpm +CONFIG_RTE_LIBRTE_LPM=n +CONFIG_RTE_LIBRTE_LPM_DEBUG=n +# Compile librte_acl +CONFIG_RTE_LIBRTE_ACL=n +CONFIG_RTE_LIBRTE_ACL_DEBUG=n +# Compile librte_power +CONFIG_RTE_LIBRTE_POWER=n +CONFIG_RTE_LIBRTE_POWER_DEBUG=n +CONFIG_RTE_MAX_LCORE_FREQS=64 +# Compile librte_net +CONFIG_RTE_LIBRTE_NET=y +# Compile librte_ip_frag +CONFIG_RTE_LIBRTE_IP_FRAG=y +CONFIG_RTE_LIBRTE_IP_FRAG_DEBUG=n +CONFIG_RTE_LIBRTE_IP_FRAG_MAX_FRAG=4 +CONFIG_RTE_LIBRTE_IP_FRAG_TBL_STAT=n +# Compile GRO library +CONFIG_RTE_LIBRTE_GRO=y +# Compile GSO library +CONFIG_RTE_LIBRTE_GSO=y +# Compile librte_meter +CONFIG_RTE_LIBRTE_METER=y +# Compile librte_classify +CONFIG_RTE_LIBRTE_FLOW_CLASSIFY=n +# Compile librte_sched +CONFIG_RTE_LIBRTE_SCHED=n +CONFIG_RTE_SCHED_DEBUG=n +CONFIG_RTE_SCHED_RED=n +CONFIG_RTE_SCHED_COLLECT_STATS=n +CONFIG_RTE_SCHED_SUBPORT_TC_OV=n +CONFIG_RTE_SCHED_PORT_N_GRINDERS=8 +CONFIG_RTE_SCHED_VECTOR=n +# Compile architecture we compile for. distributor library +CONFIG_RTE_LIBRTE_DISTRIBUTOR=n +# Compile architecture we compile for. reorder library +CONFIG_RTE_LIBRTE_REORDER=n +# Compile librte_port +CONFIG_RTE_LIBRTE_PORT=n +CONFIG_RTE_PORT_STATS_COLLECT=n +CONFIG_RTE_PORT_PCAP=n +# Compile librte_table +CONFIG_RTE_LIBRTE_TABLE=n +CONFIG_RTE_TABLE_STATS_COLLECT=n +# Compile librte_pipeline +CONFIG_RTE_LIBRTE_PIPELINE=n +CONFIG_RTE_PIPELINE_STATS_COLLECT=n +# Compile librte_kni +CONFIG_RTE_LIBRTE_KNI=n +CONFIG_RTE_LIBRTE_PMD_KNI=n +CONFIG_RTE_KNI_KMOD=n +CONFIG_RTE_KNI_KMOD_ETHTOOL=n +CONFIG_RTE_KNI_PREEMPT_DEFAULT=y +# Compile architecture we compile for. pdump library +CONFIG_RTE_LIBRTE_PDUMP=y +# Compile vhost user library +CONFIG_RTE_LIBRTE_VHOST=y +CONFIG_RTE_LIBRTE_VHOST_NUMA=y +CONFIG_RTE_LIBRTE_VHOST_DEBUG=n +# Compile vhost PMD +# To compile, CONFIG_RTE_LIBRTE_VHOST should be enabled. +CONFIG_RTE_LIBRTE_PMD_VHOST=y +# Compile IFC driver +# To compile, CONFIG_RTE_LIBRTE_VHOST and CONFIG_RTE_EAL_VFIO +# should be enabled. +CONFIG_RTE_LIBRTE_IFC_PMD=n +# Compile librte_bpf +CONFIG_RTE_LIBRTE_BPF=n +# allow load BPF from ELF files (requires libelf) +CONFIG_RTE_LIBRTE_BPF_ELF=n +# Compile architecture we compile for. test application +CONFIG_RTE_APP_TEST=y +CONFIG_RTE_APP_TEST_RESOURCE_TAR=n +# Compile architecture we compile for. procinfo application +CONFIG_RTE_PROC_INFO=y +# Compile architecture we compile for. PMD test application +CONFIG_RTE_TEST_PMD=n +CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n +CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n +# Compile architecture we compile for. bbdev test application +CONFIG_RTE_TEST_BBDEV=n +# Compile architecture we compile for. crypto performance application +CONFIG_RTE_APP_CRYPTO_PERF=n +# Compile architecture we compile for. eventdev application +CONFIG_RTE_APP_EVENTDEV=n +CONFIG_RTE_EXEC_ENV_LINUXAPP=y +CONFIG_RTE_LIBRTE_VHOST_POSTCOPY=n +# Common libraries, before Bus/PMDs +# NXP DPAA BUS and drivers +# NXP FSLMC BUS and DPAA2 drivers +# NXP ENETC PMD Driver +CONFIG_RTE_ARCH_X86_64=y +CONFIG_RTE_ARCH_X86=y +CONFIG_RTE_ARCH_64=y +CONFIG_RTE_TOOLCHAIN_GCC=y +CONFIG_RTE_LIBRTE_PMD_XENVIRT=n diff --git a/SPECS/openvswitch2.11.spec b/SPECS/openvswitch2.11.spec new file mode 100644 index 0000000..f78ea89 --- /dev/null +++ b/SPECS/openvswitch2.11.spec @@ -0,0 +1,1736 @@ +# Copyright (C) 2009, 2010, 2013, 2014 Nicira Networks, Inc. +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. This file is offered as-is, +# without warranty of any kind. +# +# If tests have to be skipped while building, specify the '--without check' +# option. For example: +# rpmbuild -bb --without check rhel/openvswitch-fedora.spec + +# This defines the base package name's version. + +%define pkgname openvswitch2.11 + +# Enable PIE, bz#955181 +%global _hardened_build 1 + +# RHEL-7 doesn't define _rundir macro yet +# Fedora 15 onwards uses /run as _rundir +%if 0%{!?_rundir:1} +%define _rundir /run +%endif + +# FIXME Test "STP - flush the fdb and mdb when topology changed" fails on s390x +# FIXME 2 tests fails on ppc64le. They will be hopefully fixed before official 2.11 +%ifarch %{ix86} x86_64 aarch64 +%bcond_without check +%else +%bcond_with check +%endif +# option to run kernel datapath tests, requires building as root! +%bcond_with check_datapath_kernel +# option to build with libcap-ng, needed for running OVS as regular user +%bcond_without libcapng + +# Build python2 (that provides python) and python3 subpackages on Fedora +# Build only python3 (that provides python) subpackage on RHEL8 +# Build only python subpackage on RHEL7 +%if 0%{?rhel} > 7 || 0%{?fedora} +# Use Python3 +%global _py python3 +%global _py2 python2 +%global with_python3 1 +%if 0%{?fedora} +%global with_python2 1 +%else +%global with_python2 0 +%endif +# On RHEL8 Sphinx is included in buildroot +%global external_sphinx 1 +%else +# Use Python2 +%global _py python +%global _py2 python +%global with_python2 1 +%global with_python3 0 +# Don't use external sphinx (RHV doesn't have optional repositories enabled) +%global external_sphinx 0 +%endif + +Name: %{pkgname} +Summary: Open vSwitch +Group: System Environment/Daemons daemon/database/utilities +URL: http://www.openvswitch.org/ +Version: 2.11.3 +Release: 60%{?dist} + +# Nearly all of openvswitch is ASL 2.0. The bugtool is LGPLv2+, and the +# lib/sflow*.[ch] files are SISSL +# datapath/ is GPLv2 (although not built into any of the binary packages) +License: ASL 2.0 and LGPLv2+ and SISSL + +%define dpdkver 18.11.7 +%define dpdkdir dpdk +%define dpdksver %(echo %{dpdkver} | cut -d. -f-2) +# NOTE: DPDK does not currently build for s390x +# DPDK on aarch64 is not stable enough to be enabled in FDP +%define dpdkarches x86_64 ppc64le + +Source: https://github.com/openvswitch/ovs/archive/v%{version}.tar.gz#/openvswitch-%{version}.tar.gz +Source10: https://fast.dpdk.org/rel/dpdk-%{dpdkver}.tar.xz + +%define docutilsver 0.12 +%define pygmentsver 1.4 +%define sphinxver 1.1.3 +Source100: https://pypi.io/packages/source/d/docutils/docutils-%{docutilsver}.tar.gz +Source101: https://pypi.io/packages/source/P/Pygments/Pygments-%{pygmentsver}.tar.gz +Source102: https://pypi.io/packages/source/S/Sphinx/Sphinx-%{sphinxver}.tar.gz + +Source500: configlib.sh +Source502: set_config.sh + +# Important: source503 is used as the actual copy file +# @TODO: this causes a warning - fix it? +Source504: arm64-armv8a-linuxapp-gcc-config +Source505: ppc_64-power8-linuxapp-gcc-config +Source506: x86_64-native-linuxapp-gcc-config + +Patch: openvswitch-%{version}.patch + +# The DPDK is designed to optimize througput of network traffic using, among +# other techniques, carefully crafted assembly instructions. As such it +# needs extensive work to port it to other architectures. +ExclusiveArch: x86_64 aarch64 ppc64le s390x + +# Do not enable this otherwise YUM will break on any upgrade. +# Provides: openvswitch +Conflicts: openvswitch < 2.11 +Conflicts: openvswitch-dpdk < 2.11 + +# dpdk_mach_arch maps between rpm and dpdk arch name, often same as _target_cpu +# dpdk_mach_tmpl is the config template dpdk_mach name, often "native" +# dpdk_mach is the actual dpdk_mach name used in the dpdk make system +%ifarch x86_64 +%define dpdk_mach_arch x86_64 +%define dpdk_mach_tmpl native +%define dpdk_mach default +%endif +%ifarch aarch64 +%define dpdk_mach_arch arm64 +%define dpdk_mach_tmpl armv8a +%define dpdk_mach armv8a +%endif +%ifarch ppc64le +%define dpdk_mach_arch ppc_64 +%define dpdk_mach_tmpl power8 +%define dpdk_mach power8 +%endif + +%define dpdktarget %{dpdk_mach_arch}-%{dpdk_mach_tmpl}-linuxapp-gcc + +# FIXME Sphinx is used to generate some manpages, unfortunately, on RHEL, it's +# in the -optional repository and so we can't require it directly since RHV +# doesn't have the -optional repository enabled and so TPS fails +%if %{external_sphinx} +BuildRequires: %{_py}-sphinx +%else +# Sphinx dependencies +BuildRequires: %{_py}-devel +BuildRequires: %{_py}-setuptools +#BuildRequires: %{_py}-docutils +BuildRequires: %{_py}-jinja2 +BuildRequires: %{_py}-nose +#BuildRequires: %{_py}-pygments +# docutils dependencies +BuildRequires: %{_py}-imaging +# pygments dependencies +BuildRequires: %{_py}-nose +%endif + +BuildRequires: gcc gcc-c++ make +BuildRequires: autoconf automake libtool +BuildRequires: systemd-units openssl openssl-devel +%if %{with_python3} +BuildRequires: python3-devel python3-six python3-setuptools +%endif +%if %{with_python2} +BuildRequires: %{_py2}-devel %{_py2}-six %{_py2}-setuptools +%endif +BuildRequires: desktop-file-utils +BuildRequires: groff-base graphviz +BuildRequires: unbound-devel +# make check dependencies +BuildRequires: procps-ng +%if %{with_python2} +BuildRequires: pyOpenSSL +%else +BuildRequires: python3-pyOpenSSL +%endif +%if %{with check_datapath_kernel} +BuildRequires: nmap-ncat +# would be useful but not available in RHEL or EPEL +#BuildRequires: pyftpdlib +%endif + +%if %{with libcapng} +BuildRequires: libcap-ng libcap-ng-devel +%endif + +%ifarch %{dpdkarches} +# DPDK driver dependencies +BuildRequires: zlib-devel numactl-devel +%ifarch x86_64 +BuildRequires: rdma-core-devel >= 15 libmnl-devel +%global __requires_exclude_from ^%{_libdir}/openvswitch/librte_pmd_mlx[45]_glue\.so.*$ +%endif + +# Required by packaging policy for the bundled DPDK +Provides: bundled(dpdk) = %{dpdkver} +%endif + +Requires: openssl iproute module-init-tools +#Upstream kernel commit 4f647e0a3c37b8d5086214128614a136064110c3 +#Requires: kernel >= 3.15.0-0 +Requires: openvswitch-selinux-extra-policy + +Requires(pre): shadow-utils +Requires(post): /bin/sed +Requires(post): /usr/sbin/usermod +Requires(post): /usr/sbin/groupadd +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units +Obsoletes: openvswitch-controller <= 0:2.1.0-1 + +%description +Open vSwitch provides standard network bridging functions and +support for the OpenFlow protocol for remote per-flow control of +traffic. + +%if %{with_python2} +%package -n %{_py2}-%{pkgname} +Summary: Open vSwitch %{_py2} bindings +License: ASL 2.0 +Requires: %{_py2} %{_py2}-six +Requires: %{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} +%if "%{_py2}" == "python2" +Obsoletes: python-%{pkgname} < 2.6.1-2 +Provides: python-%{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} +%endif + +%description -n %{_py2}-%{pkgname} +Python bindings for the Open vSwitch database +%endif + +%if %{with_python3} +%package -n python3-%{pkgname} +Summary: Open vSwitch python3 bindings +License: ASL 2.0 +Requires: python3 python3-six +Requires: %{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} +%if ! %{with_python2} +Obsoletes: python-%{pkgname} < 2.10.0-6 +Provides: python-%{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} +%endif + +%description -n python3-%{pkgname} +Python bindings for the Open vSwitch database +%endif + +%package test +Summary: Open vSwitch testing utilities +License: ASL 2.0 +BuildArch: noarch +%if %{with_python2} +Requires: %{_py2}-%{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} +Requires: %{_py2} %{_py2}-twisted%{?rhel:-web} +Requires: %{_py2}-netifaces +%else +Requires: python3-%{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} +Requires: python3-netifaces +%endif +Requires: tcpdump + +%description test +Utilities that are useful to diagnose performance and connectivity +issues in Open vSwitch setup. + +%package devel +Summary: Open vSwitch OpenFlow development package (library, headers) +License: ASL 2.0 +Requires: %{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} + +%description devel +This provides shared library, libopenswitch.so and the openvswitch header +files needed to build an external application. + +%if 0%{?rhel} > 7 || 0%{?fedora} > 28 +%package -n network-scripts-%{name} +Summary: Open vSwitch legacy network service support +License: ASL 2.0 +Requires: network-scripts +Supplements: (%{name} and network-scripts) + +%description -n network-scripts-%{name} +This provides the ifup and ifdown scripts for use with the legacy network +service. +%endif + + +%prep +%setup -q -n ovs-%{version} -a 10 +%if ! %{external_sphinx} +%setup -n ovs-%{version} -q -D -T -a 100 -a 101 -a 102 +%endif + +mv dpdk-*/ %{dpdkdir}/ + +%patch -p1 + +%build +# Build Sphinx on RHEL +%if ! %{external_sphinx} +export PYTHONPATH="${PYTHONPATH:+$PYTHONPATH:}%{_builddir}/pytmp/lib/python" +for x in docutils-%{docutilsver} Pygments-%{pygmentsver} Sphinx-%{sphinxver}; do + pushd "$x" + %{_py} setup.py install --home %{_builddir}/pytmp + popd +done + +export PATH="$PATH:%{_builddir}/pytmp/bin" +%endif + +./boot.sh + +%ifarch %{dpdkarches} # build dpdk +# Lets build DPDK first +cd %{dpdkdir} + +# In case dpdk-devel is installed +unset RTE_SDK RTE_INCLUDE RTE_TARGET + +# Avoid appending second -Wall to everything, it breaks upstream warning +# disablers in makefiles. Strip explicit -march= from optflags since they +# will only guarantee build failures, DPDK is picky with that. +# Note: _hardening_ldflags has to go on the extra cflags line because dpdk is +# astoundingly convoluted in how it processes its linker flags. Fixing it in +# dpdk is the preferred solution, but adjusting to allow a gcc option in the +# ldflags, even when gcc is used as the linker, requires large tree-wide changes +touch obj.o +gcc -### obj.o 2>&1 | awk '/.*collect2.*/ { print $0}' | sed -e 's/\S*\.res\S*//g' -e 's/-z \S*//g' -e 's/[^ ]*\.o//g' -e 's/ /\n/g' | sort -u > ./noopts.txt +gcc -### $RPM_LD_FLAGS obj.o 2>&1 | awk '/.*collect2.*/ {print $0}' | sed -e 's/\S*\.res\S*//g' -e 's/-z \S*//g' -e 's/[^ ]*\.o//g' -e 's/ /\n/g' | sort -u > ./opts.txt +EXTRA_RPM_LDFLAGS=$(comm -13 ./noopts.txt ./opts.txt) +rm -f obj.o + +export EXTRA_CFLAGS="$(echo %{optflags} | sed -e 's:-Wall::g' -e 's:-march=[[:alnum:]]* ::g') -Wformat -fPIC %{_hardening_ldflags}" +export EXTRA_LDFLAGS=$(echo %{__global_ldflags} | sed -e's/-Wl,//g' -e's/-spec.*//') +export HOST_EXTRA_CFLAGS="$EXTRA_CFLAGS $EXTRA_RPM_LDFLAGS" +export EXTRA_HOST_LDFLAGS="$EXTRA_RPM_LDFLAGS $(echo %{__global_ldflags} | sed -e's/-spec.*//')" + +# DPDK defaults to using builder-specific compiler flags. However, +# the config has been changed by specifying CONFIG_RTE_MACHINE=default +# in order to build for a more generic host. NOTE: It is possible that +# the compiler flags used still won't work for all Fedora-supported +# dpdk_machs, but runtime checks in DPDK will catch those situations. + +make V=1 O=%{dpdktarget} T=%{dpdktarget} %{?_smp_mflags} config + +cp -f %{SOURCE500} %{SOURCE502} "%{_sourcedir}/%{dpdktarget}-config" . +%{SOURCE502} %{dpdktarget}-config "%{dpdktarget}/.config" + +make V=1 O=%{dpdktarget} %{?_smp_mflags} + +# Generate a list of supported drivers, its hard to tell otherwise. +cat << EOF > README.DPDK-PMDS +DPDK drivers included in this package: + +EOF + +for f in $(ls %{dpdk_mach_arch}-%{dpdk_mach_tmpl}-linuxapp-gcc/lib/lib*_pmd_*); do + basename ${f} | cut -c12- | cut -d. -f1 | tr [:lower:] [:upper:] +done >> README.DPDK-PMDS + +cat << EOF >> README.DPDK-PMDS + +For further information about the drivers, see +http://dpdk.org/doc/guides-%{dpdksver}/nics/index.html +EOF + +cd - +%endif # build dpdk + +# And now for OVS... +mkdir build-shared build-static +pushd build-shared +ln -s ../configure +%configure \ +%if %{with libcapng} + --enable-libcapng \ +%else + --disable-libcapng \ +%endif + --disable-static \ + --enable-shared \ + --enable-ssl \ + --with-pkidir=%{_sharedstatedir}/openvswitch/pki +make %{?_smp_mflags} +popd +pushd build-static +ln -s ../configure +%ifarch %{dpdkarches} +LDFLAGS="%{__global_ldflags} -Wl,-rpath,%{_libdir}/openvswitch" \ +%endif +%configure \ +%if %{with libcapng} + --enable-libcapng \ +%else + --disable-libcapng \ +%endif + --enable-ssl \ +%ifarch %{dpdkarches} + --with-dpdk=$(pwd)/../%{dpdkdir}/%{dpdktarget} \ +%endif + --with-pkidir=%{_sharedstatedir}/openvswitch/pki +make %{?_smp_mflags} +popd + +/usr/bin/%{_py} build-aux/dpdkstrip.py \ + --dpdk \ + < rhel/usr_lib_systemd_system_ovs-vswitchd.service.in \ + > rhel/usr_lib_systemd_system_ovs-vswitchd.service + +%install +rm -rf $RPM_BUILD_ROOT +make -C build-shared install-libLTLIBRARIES DESTDIR=$RPM_BUILD_ROOT +make -C build-static install DESTDIR=$RPM_BUILD_ROOT + +install -d -m 0755 $RPM_BUILD_ROOT%{_rundir}/openvswitch +install -d -m 0750 $RPM_BUILD_ROOT%{_localstatedir}/log/openvswitch +install -d -m 0755 $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch + +install -p -D -m 0644 rhel/usr_lib_udev_rules.d_91-vfio.rules \ + $RPM_BUILD_ROOT%{_udevrulesdir}/91-vfio.rules + +install -p -D -m 0644 \ + rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template \ + $RPM_BUILD_ROOT/%{_sysconfdir}/sysconfig/openvswitch + +for service in openvswitch ovsdb-server ovs-vswitchd \ + ovs-delete-transient-ports; do + install -p -D -m 0644 \ + rhel/usr_lib_systemd_system_${service}.service \ + $RPM_BUILD_ROOT%{_unitdir}/${service}.service +done + + +install -m 0755 rhel/etc_init.d_openvswitch \ + $RPM_BUILD_ROOT%{_datadir}/openvswitch/scripts/openvswitch.init + +install -p -D -m 0644 rhel/etc_openvswitch_default.conf \ + $RPM_BUILD_ROOT/%{_sysconfdir}/openvswitch/default.conf + +install -p -D -m 0644 rhel/etc_logrotate.d_openvswitch \ + $RPM_BUILD_ROOT/%{_sysconfdir}/logrotate.d/openvswitch + +install -m 0644 vswitchd/vswitch.ovsschema \ + $RPM_BUILD_ROOT/%{_datadir}/openvswitch/vswitch.ovsschema + +install -d -m 0755 $RPM_BUILD_ROOT/%{_sysconfdir}/sysconfig/network-scripts/ +install -p -m 0755 rhel/etc_sysconfig_network-scripts_ifdown-ovs \ + $RPM_BUILD_ROOT/%{_sysconfdir}/sysconfig/network-scripts/ifdown-ovs +install -p -m 0755 rhel/etc_sysconfig_network-scripts_ifup-ovs \ + $RPM_BUILD_ROOT/%{_sysconfdir}/sysconfig/network-scripts/ifup-ovs + +%if %{with_python2} +install -d -m 0755 $RPM_BUILD_ROOT%{python_sitelib} +cp -a $RPM_BUILD_ROOT/%{_datadir}/openvswitch/python/ovstest \ + $RPM_BUILD_ROOT%{python_sitelib} +%else +install -d -m 0755 $RPM_BUILD_ROOT%{python3_sitelib} +cp -a $RPM_BUILD_ROOT/%{_datadir}/openvswitch/python/ovstest \ + $RPM_BUILD_ROOT%{python3_sitelib} +%endif + +# Build the JSON C extension for the Python lib (#1417738) +pushd python +%if %{with_python2} +( +export CPPFLAGS="-I ../include -I ../build-shared/include" +export LDFLAGS="%{__global_ldflags} -L $RPM_BUILD_ROOT%{_libdir}" +%py2_build +%py2_install +[ -f "$RPM_BUILD_ROOT/%{python2_sitearch}/ovs/_json.so" ] +) +%endif +%if %{with_python3} +( +export CPPFLAGS="-I ../include -I ../build-shared/include" +export LDFLAGS="%{__global_ldflags} -L $RPM_BUILD_ROOT%{_libdir}" +%py3_build +%py3_install +[ -f "$RPM_BUILD_ROOT/%{python3_sitearch}/ovs/_json.cpython-%{python3_version_nodots}m-%{_arch}-%{_target_os}%{?_gnu}.so" ] +) +%endif +popd + +rm -rf $RPM_BUILD_ROOT/%{_datadir}/openvswitch/python/ + +install -d -m 0755 $RPM_BUILD_ROOT/%{_sharedstatedir}/openvswitch + +install -d -m 0755 $RPM_BUILD_ROOT%{_prefix}/lib/firewalld/services/ + +install -p -D -m 0755 \ + rhel/usr_share_openvswitch_scripts_ovs-systemd-reload \ + $RPM_BUILD_ROOT%{_datadir}/openvswitch/scripts/ovs-systemd-reload + +touch $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch/conf.db +touch $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch/system-id.conf + +%ifarch x86_64 +install -d -m 0755 $RPM_BUILD_ROOT%{_libdir}/openvswitch +install -p -m 0755 %{dpdkdir}/%{dpdktarget}/lib/librte_pmd_mlx{4,5}_glue.so.* \ + $RPM_BUILD_ROOT%{_libdir}/openvswitch/ +%endif +# remove unpackaged files +rm -f $RPM_BUILD_ROOT/%{_bindir}/ovs-benchmark \ + $RPM_BUILD_ROOT/%{_bindir}/ovs-docker \ + $RPM_BUILD_ROOT/%{_bindir}/ovs-parse-backtrace \ + $RPM_BUILD_ROOT/%{_bindir}/ovs-testcontroller \ + $RPM_BUILD_ROOT/%{_datadir}/openvswitch/scripts/ovs-monitor-ipsec \ + $RPM_BUILD_ROOT/%{_sbindir}/ovs-vlan-bug-workaround \ + $RPM_BUILD_ROOT/%{_mandir}/man1/ovs-benchmark.1* \ + $RPM_BUILD_ROOT/%{_mandir}/man8/ovs-testcontroller.* \ + $RPM_BUILD_ROOT/%{_mandir}/man8/ovs-vlan-bug-workaround.8* + + +# remove ovn unpackages files +rm -f $RPM_BUILD_ROOT%{_bindir}/ovn* +rm -f $RPM_BUILD_ROOT%{_mandir}/man1/ovn* +rm -f $RPM_BUILD_ROOT%{_mandir}/man5/ovn* +rm -f $RPM_BUILD_ROOT%{_mandir}/man7/ovn* +rm -f $RPM_BUILD_ROOT%{_mandir}/man8/ovn* +rm -f $RPM_BUILD_ROOT%{_datadir}/openvswitch/ovn* +rm -f $RPM_BUILD_ROOT%{_datadir}/openvswitch/scripts/ovn* +rm -f $RPM_BUILD_ROOT%{_includedir}/ovn/* + +%check + export MLX4_GLUE_PATH=$(pwd)/%{dpdkdir}/%{dpdktarget}/lib + export MLX5_GLUE_PATH=$(pwd)/%{dpdkdir}/%{dpdktarget}/lib +%if %{with check} + pushd build-static + touch resolv.conf + export OVS_RESOLV_CONF=$(pwd)/resolv.conf + if make check TESTSUITEFLAGS='%{_smp_mflags}' || + make check TESTSUITEFLAGS='--recheck'; then :; + else + cat tests/testsuite.log + exit 1 + fi + popd +%endif +%if %{with check_datapath_kernel} + pushd build-static + if make check-kernel RECHECK=yes; then :; + else + cat tests/system-kmod-testsuite.log + exit 1 + fi + popd +%endif + +%clean +rm -rf $RPM_BUILD_ROOT + +%preun +%if 0%{?systemd_preun:1} + %systemd_preun openvswitch.service +%else + if [ $1 -eq 0 ] ; then + # Package removal, not upgrade + /bin/systemctl --no-reload disable openvswitch.service >/dev/null 2>&1 || : + /bin/systemctl stop openvswitch.service >/dev/null 2>&1 || : + fi +%endif + +%pre +getent group openvswitch >/dev/null || groupadd -r openvswitch +getent passwd openvswitch >/dev/null || \ + useradd -r -g openvswitch -d / -s /sbin/nologin \ + -c "Open vSwitch Daemons" openvswitch + +%ifarch %{dpdkarches} + getent group hugetlbfs >/dev/null || groupadd hugetlbfs + usermod -a -G hugetlbfs openvswitch +%endif +exit 0 + +%post +if [ $1 -eq 1 ]; then + sed -i 's:^#OVS_USER_ID=:OVS_USER_ID=:' /etc/sysconfig/openvswitch + +%ifarch %{dpdkarches} + sed -i \ + 's@OVS_USER_ID="openvswitch:openvswitch"@OVS_USER_ID="openvswitch:hugetlbfs"@'\ + /etc/sysconfig/openvswitch +%endif +fi +chown -R openvswitch:openvswitch /etc/openvswitch + +%if 0%{?systemd_post:1} + %systemd_post openvswitch.service +%else + # Package install, not upgrade + if [ $1 -eq 1 ]; then + /bin/systemctl daemon-reload >dev/null || : + fi +%endif + +%postun +%if 0%{?systemd_postun:1} + %systemd_postun openvswitch.service +%else + /bin/systemctl daemon-reload >/dev/null 2>&1 || : +%endif + +%triggerun -- openvswitch < 2.5.0-22.git20160727%{?dist} +# old rpm versions restart the service in postun, but +# due to systemd some preparation is needed. +if systemctl is-active openvswitch >/dev/null 2>&1 ; then + /usr/share/openvswitch/scripts/ovs-ctl stop >/dev/null 2>&1 || : + systemctl daemon-reload >/dev/null 2>&1 || : + systemctl stop openvswitch ovsdb-server ovs-vswitchd >/dev/null 2>&1 || : + systemctl start openvswitch >/dev/null 2>&1 || : +fi +exit 0 + +%if %{with_python2} +%files -n %{_py2}-%{pkgname} +%{python2_sitearch}/ovs +%{python2_sitearch}/ovs-*.egg-info +%doc LICENSE +%endif + +%if %{with_python3} +%files -n python3-%{pkgname} +%{python3_sitearch}/ovs +%{python3_sitearch}/ovs-*.egg-info +%doc LICENSE +%endif + +%files test +%{_bindir}/ovs-pcap +%{_bindir}/ovs-tcpdump +%{_bindir}/ovs-tcpundump +%{_mandir}/man1/ovs-pcap.1* +%{_mandir}/man8/ovs-tcpdump.8* +%{_mandir}/man1/ovs-tcpundump.1* +%if %{with_python2} +%{_bindir}/ovs-test +%{_bindir}/ovs-vlan-test +%{_bindir}/ovs-l3ping +%{_mandir}/man8/ovs-test.8* +%{_mandir}/man8/ovs-vlan-test.8* +%{_mandir}/man8/ovs-l3ping.8* +%{python_sitelib}/ovstest +%else +%exclude %{_mandir}/man8/ovs-test.8* +%exclude %{_mandir}/man8/ovs-vlan-test.8* +%exclude %{_mandir}/man8/ovs-l3ping.8* +%{python3_sitelib}/ovstest +%endif + +%files devel +%{_libdir}/*.so +%{_libdir}/pkgconfig/*.pc +%{_includedir}/openvswitch/* +%{_includedir}/openflow/* +%exclude %{_libdir}/*.a +%exclude %{_libdir}/*.la + +%if 0%{?rhel} > 7 || 0%{?fedora} > 28 +%files -n network-scripts-%{name} +%{_sysconfdir}/sysconfig/network-scripts/ifup-ovs +%{_sysconfdir}/sysconfig/network-scripts/ifdown-ovs +%endif + +%files +%defattr(-,openvswitch,openvswitch) +%dir %{_sysconfdir}/openvswitch +%{_sysconfdir}/openvswitch/default.conf +%config %ghost %verify(not owner group md5 size mtime) %{_sysconfdir}/openvswitch/conf.db +%ghost %attr(0600,-,-) %verify(not owner group md5 size mtime) %{_sysconfdir}/openvswitch/.conf.db.~lock~ +%config %ghost %{_sysconfdir}/openvswitch/system-id.conf +%defattr(-,root,root) +%config(noreplace) %verify(not md5 size mtime) %{_sysconfdir}/sysconfig/openvswitch +%{_sysconfdir}/bash_completion.d/ovs-appctl-bashcomp.bash +%{_sysconfdir}/bash_completion.d/ovs-vsctl-bashcomp.bash +%config(noreplace) %{_sysconfdir}/logrotate.d/openvswitch +%{_unitdir}/openvswitch.service +%{_unitdir}/ovsdb-server.service +%{_unitdir}/ovs-vswitchd.service +%{_unitdir}/ovs-delete-transient-ports.service +%{_datadir}/openvswitch/scripts/openvswitch.init +%{_datadir}/openvswitch/scripts/ovs-check-dead-ifs +%{_datadir}/openvswitch/scripts/ovs-lib +%{_datadir}/openvswitch/scripts/ovs-save +%{_datadir}/openvswitch/scripts/ovs-vtep +%{_datadir}/openvswitch/scripts/ovs-ctl +%{_datadir}/openvswitch/scripts/ovs-kmod-ctl +%{_datadir}/openvswitch/scripts/ovs-systemd-reload +%config %{_datadir}/openvswitch/vswitch.ovsschema +%config %{_datadir}/openvswitch/vtep.ovsschema +%{_bindir}/ovs-appctl +%{_bindir}/ovs-dpctl +%{_bindir}/ovs-ofctl +%{_bindir}/ovs-vsctl +%{_bindir}/ovsdb-client +%{_bindir}/ovsdb-tool +%{_bindir}/ovs-pki +%{_bindir}/vtep-ctl +%{_libdir}/*.so.* +%ifarch x86_64 +%dir %{_libdir}/openvswitch +%{_libdir}/openvswitch/librte_pmd_mlx4_glue.so.* +%{_libdir}/openvswitch/librte_pmd_mlx5_glue.so.* +%endif +%{_sbindir}/ovs-vswitchd +%{_sbindir}/ovsdb-server +%{_mandir}/man1/ovsdb-client.1* +%{_mandir}/man1/ovsdb-server.1* +%{_mandir}/man1/ovsdb-tool.1* +%{_mandir}/man5/ovsdb.5* +%{_mandir}/man5/ovsdb-server.5.* +%{_mandir}/man5/ovs-vswitchd.conf.db.5* +%{_mandir}/man5/vtep.5* +%{_mandir}/man7/ovsdb-server.7* +%{_mandir}/man7/ovsdb.7* +%{_mandir}/man7/ovs-actions.7* +%{_mandir}/man7/ovs-fields.7* +%{_mandir}/man8/vtep-ctl.8* +%{_mandir}/man8/ovs-appctl.8* +%{_mandir}/man8/ovs-ctl.8* +%{_mandir}/man8/ovs-dpctl.8* +%{_mandir}/man8/ovs-kmod-ctl.8.* +%{_mandir}/man8/ovs-ofctl.8* +%{_mandir}/man8/ovs-pki.8* +%{_mandir}/man8/ovs-vsctl.8* +%{_mandir}/man8/ovs-vswitchd.8* +%{_mandir}/man8/ovs-parse-backtrace.8* +%{_udevrulesdir}/91-vfio.rules +%doc LICENSE NOTICE README.rst NEWS rhel/README.RHEL.rst +%ifarch %{dpdkarches} +%doc %{dpdkdir}/README.DPDK-PMDS +%endif +/var/lib/openvswitch +%attr(750,openvswitch,openvswitch) %verify(not owner group) /var/log/openvswitch +%ghost %attr(755,root,root) %verify(not owner group) %{_rundir}/openvswitch +%if %{with_python2} +%{_datadir}/openvswitch/bugtool-plugins/ +%{_datadir}/openvswitch/scripts/ovs-bugtool-* +%{_bindir}/ovs-dpctl-top +%{_sbindir}/ovs-bugtool +%{_mandir}/man8/ovs-dpctl-top.8* +%{_mandir}/man8/ovs-bugtool.8* +%else +%exclude %{_mandir}/man8/ovs-dpctl-top.8* +%endif +%if (0%{?rhel} && 0%{?rhel} <= 7) || (0%{?fedora} && 0%{?fedora} < 29) +%{_sysconfdir}/sysconfig/network-scripts/ifup-ovs +%{_sysconfdir}/sysconfig/network-scripts/ifdown-ovs +%endif + + +%changelog +* Fri Jul 17 2020 Flavio Leitner <fbl@redhat.com> - 2.11.3-60 +- Merge branch 'fast-datapath-rhel-7' into fast-datapath-rhel-8 + [a2d9792f8cea55348a9f263c4f891298ffcb2462] + +* Thu Jul 16 2020 Flavio Leitner <fbl@redhat.com> - 2.11.3-59 +- Merge branch 'fast-datapath-rhel-7' into fast-datapath-rhel-8 + [c9f7a9e2d37b09a1f154fe30b50260255cce4595] + +* Wed Jul 15 2020 Flavio Leitner <fbl@redhat.com> - 2.11.3-58 +- spec: Fix configure to use dpdkdir without version. + [583acc91dd782f1e73cc20a27b7cbd8bb5a7bc98] + +* Mon Jul 13 2020 Flavio Leitner <fbl@redhat.com> - 2.11.3-57 +- redhat: Rename OVSCI job name. + [cbcaa831188b77f253f718203dc743904538464a] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-56 +- This is fast-datapath-rhel-8 + [98f312f126a245f2609a8dcea9604e09832181f0] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-55 +- bus/pci: fix VF memory access (#1851170) + [fa4d90db57191665037114e4098f3d1f6b6ea9c7] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-54 +- vhost: fix vring index check (#1831391) + [8e33084d85d80cea72d02de0abf36c142dcefa2a] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-53 +- vhost: check log mmap offset and size overflow (#1831391) + [753ae0cf66553e8fd71b8e76642900d9fb62c406] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-52 +- vhost: add device op when notification to guest is sent (#1726579) + [92715cf99cbebdb6d13e223872cdd44f822a4ebe] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-51 +- net/i40e: re-program promiscuous mode on VF interface (#1733402) + [0fe1f42b5f3bc0b714f063d57cc79215459d28dc] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-50 +- bus/pci: always check IOMMU capabilities (#1711739) + [0815c39d39c0b34dd7456bde23077e1f25250dec] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-49 +- eal: fix IOVA mode selection as VA for PCI drivers (#1711739) + [11fbef3c85f71b257dc37dd9b570025ad4a24dfa] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-48 +- bus/pci: consider only usable devices for IOVA mode (#1711739) + [69f5cb4c56c59505c76d4599cb0117b9fd6bfc11] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-47 +- eal: compute IOVA mode based on PA availability (#1711739) + [d5e1d2fa507875898bae71762c84c4f1d63ed972] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-46 +- netdev-linux: Update LAG in all cases. (#1812892) + [276351180996d21a96b6539671e4eed4e636f65d] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-45 +- netdev-offload-tc: Re-fetch block ID after probing. (#1812892) + [83cebd3221538df693d7170c3a17ed9a381911c6] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-44 +- netdev-offload-tc: Flush rules on ingress block when init tc flow api (#1812892) + [e5d7d5ec243b68d65383ca5075d7128f13e8aebc] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-43 +- netdev-vport: Use the dst_port in tunnel netdev name (#1727599) + [f4a6fb757441ee0ba5bf808a18cd8bf7a65a9124] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-42 +- lib/tc: Fix flow dump for tunnel id equal zero (#1732305) + [765ba1d1c0898446d3c05d9c7d3e92134647787a] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-41 +- lib/tc: Support optional tunnel id (#1732305) + [42f09fe96f8664a4165261c935d0a4117f0675d1] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-40 +- tc: Set 'no_percpu' flag for compatible actions (#1780690) + [42f07f6bd81f65f52b84bb7a0011c5bb21af71ce] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-39 +- rhel: let *-ctl handle runtime directory (#1785586) + [c3763ec916aef757d113a73fb402cf89753e92a7] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-38 +- rhel: set useropts optional for ovsdb-server (#1785586) + [77bed8f0e4c0a3b7396a219d4680d585e88caf95] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-37 +- rhel: run ovn with the same user as ovs (#1785586) + [8f5f39b4afcfcfc8f29e79db138629630909352a] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-36 +- rhel: secure openvswitch useropts (#1785586) + [71154ad26f1c22aacc60ab0a1ea335b7b2a6588a] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-35 +- userspace: Improved packet drop statistics. (#1726568) + [a6b7a37be86d9fe990e4511f56b99d23d14f763d] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-34 +- netdev-dpdk: Fix sw stats perf drop. (#1790841) + [54f4571750280654fa05705b2d4657823dffbf64] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-33 +- netdev-dpdk: Detailed packet drop statistics. (#1790841) + [1e1b33541a3a832e32d7515b660f2939b251718a] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-32 +- netdev-dpdk: Reuse vhost function for dpdk ETH custom stats. (#1790841) + [e0d00f70c5154535a86295ea58f6ef726e478fc8] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-31 +- netdev-dpdk: Refactor vhost custom stats for extensibility. (#1790841) + [b084d7a5c2644ac5e6ec667c80ae9c39b3f22350] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-30 +- netdev-dpdk: Fix not reporting rx_oversize_errors in stats. (#1790841) + [26017f85c82ba01a1e884a031605095b4f64ee69] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-29 +- ovsdb replication: Provide option to configure probe interval. (#1788800) + [e8a669ead72973ced8bb15d9a18e25b323f05ab0] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-28 +- netdev-dpdk: Add coverage counter to count vhost IRQs. (#1726579) + [3c3997eb0aa9693f89a6a3083b6fa12772d522dd] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-27 +- netdev-dpdk: add support for the RTE_ETH_EVENT_INTR_RESET event. (#1719644) + [ca1a1a8e1c6ec2b44744876b26630448022b95e9] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-26 +- bridge: Allow manual notifications about interfaces' updates. (#1719644) + [f58b68088819d4ec8b7bd3a1821929f5fea3170d] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-25 +- Shutdown SSL connection before closing socket (#1780745) + [aa97017175536816f70d111647b5dc9bedd824ff] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-24 +- flake8: also check the ovs-check-dead-ifs script (#1751161) + [ecd3a1b407816c629c17f410f95eab868ab68257] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-23 +- ovs-check-dead-ifs: unshadow pid variable (#1751161) + [a086e7618191f0efc75746c1fe6d4481a397f2ac] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-22 +- ovs-check-dead-ifs: python3 print format (#1751161) + [d61553f744b42dc05186910be30171ed1f8425e3] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-21 +- ovs-tcpundump: exit when getting version (#1764127) + [ea9923af222ed5bf398846b553d7b7fe54e10bd6] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-20 +- ovs-tcpundump: allow multiple packet lengths (#1764125) + [ac3b7794054e2b15b22855930b23ede24b5d5835] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-19 +- jsonrpc: increase input buffer size from 512 to 4096 (#1776883) + [9c93db837390817b3bae8b2104bec5becbd946cf] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-18 +- netdev-dpdk: Track vhost tx contention. (#1740144) + [31112a95027735528554c91953de89175f94e191] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-17 +- ovsdb-server: Allow replication from older schema version servers. (#1766586) + [cb53fe2282c1c260cb7cc98c9d21e0573b304283] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-16 +- ovsdb-server: Don't drop all connections on read/write status change. (#1761572) + [5a0a77328bcab168ad04fba006158f2c2884befb] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-15 +- ofproto-dpif: Fix continuation with patch port (#1761461) + [069d4bd4378e02bd61121f32fb2bc18ac316f358] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-14 +- vswitch: ratelimit the device add log (#1737146) + [052e541d4580fe49d3461c3045755374a0726dd5] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-13 +- netdev-dpdk: Enable tx-retries-max config. (#1747531) + [734086f5d4608b7cdf03a5d0a182245354e1f6eb] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-12 +- netdev-dpdk: Add custom stat for vhost tx retries. (#1747531) + [0c238ac414e750fad80ec810ff42395df6c2e540] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-11 +- doc: Move vhost tx retry info to separate section. (#1747531) + [91d9e4d92b9efe06dccbf22f42faf1ae183a96e9] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-10 +- netdev-vport: Make ip6gre netdev type to use TC rules (#1725623) + [d3315b8035a875e9e3b425d72a97191fbcb7e065] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-9 +- tunnel: Add layer 2 IPv6 GRE encapsulation support. (#1725623) + [0c20e7e83ddb50dbb6e0c37f986216e3953ea12e] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-8 +- ovsdb-server: drop all connections on read/write status change (#1720947) + [0f0be40ee08c15a114029a5c0e046dc58d38fb09] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-7 +- netdev-tc-offloads: Support match on priority tags (#1725623) + [895735b3827e2afdd7c968d965e9f4fd9b0e1278] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-6 +- rhel: limit stack size to 2M. (#1720315) + [79c6209e71801b94396ce4833cff99a2c0969e30] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-5 +- Add a new OVS action check_pkt_larger (#1702564) + [c899ac57880e4446a00d83a590a5eb60fc081fdc] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-4 +- netlink linux: account for the netnsid netlink attr. (#1692812) + [ce14b518b702c2401a9a291a0afd654de5cd44a5] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-3 +- rhel: Add an example to specify custom options (#1687775) + [a7dd6b6eb5e2dfe15d9387f83b614c8661b18bdd] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-2 +- ovs-ctl: Permit to specify additional options (#1687775) + [b8a874b82e423a87965503da2384c45e84b6509a] + +* Fri Jul 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.3-1 +- Merge commit 'a4efc599e0244e43fd417b2fb38b7f120eb1ebd4' into fast-datapath-rhel-7 + [8da1428afe7a47d5fe02d396ede18d7ecfb60128] + +* Thu Jun 25 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-56.20200327gita4efc59 +- Backport "bus/pci: fix VF memory access" (#1851170) + +* Wed May 27 2020 Aaron Conole <aconole@redhat.com> - 2.11.0-55.20200327gita4efc59 +- Backport the upstreammed fixes for HWOL (#1812892) + +* Mon May 11 2020 Maxime Coquelin <maxime.coquelin@redhat.com> - 2.11.0-54.20200327gita4efc59 +- Backport fixes for CVE-2020-10722 & CVE-2020-10723 (#1831391 & #1831394) + +* Tue Apr 21 2020 Aaron Conole <aconole@redhat.com> - 2.11.0-53.20200327gita4efc59 +- Backport HWOL fixes for ingress qdisc (#1812892) + +* Fri Apr 17 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-52.20200327gita4efc59 +- Update to DPDK 18.11.7 (#1822653) + +* Thu Apr 09 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-51.20200327gita4efc59 +- Rebase to last branch-2.11 commit and DPDK 18.11.6 (#1822653) + +* Wed Mar 11 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-50 +- Backport "vhost: fix packed virtqueue ready condition" (#1793068) + +* Tue Mar 10 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-49 +- Revert Backport "ovs-tc: support OvS internal port offload" and deps (#1737982) +- Revert Backport "netdev-tc-offloads: Use correct hook qdisc at init tc flow" (#1737982) + +* Tue Feb 25 2020 Maxime Coquelin <maxime.coquelin@redhat.com> - 2.11.0-48 +- Backport "vhost: fix vring memory partially mapped" (#1798996) +- Backport "vhost: protect log address translation in IOTLB update" (#1798996) + +* Thu Jan 16 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-47 +- Backport "netdev-vport: Use the dst_port in tunnel netdev name" (#1727599) + +* Thu Jan 16 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-46 +- Backport "dpif-netlink: Allow offloading of flows with dl_type 0x1234." (#1722337) + +* Thu Jan 16 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-45 +- Backport "lib/tc: Support optional tunnel id" (#1732305) + Backport "lib/tc: Fix flow dump for tunnel id equal zero" (#1732305) + +* Wed Jan 15 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-44 +- Backport "tc: implement support for action flags" (#1780690) + +* Wed Jan 15 2020 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-43 +- Backport "rhel: secure openvswitch useropts" (#1785586) +- Backport "rhel: run ovn with the same user as ovs" (#1785586) +- Backport "rhel: set useropts optional for ovsdb-server" (#1785586) +- Backport "rhel: let *-ctl handle runtime directory" (#1785586) + +* Tue Jan 14 2020 Eelco Chaudron <echaudro@redhat.com> - 2.11.0-42 +- Backport "userspace: Improved packet drop statistics" (#1726568) + +* Tue Jan 14 2020 Kevin Traynor <ktraynor@redhat.com> - 2.11.0-41 +- Detailed packet drop statistics and related patches (#1790841) +- Backport "netdev-dpdk: Fix not reporting rx_oversize_errors in stats." (#1790841) +- Backport "netdev-dpdk: Refactor vhost custom stats for extensibility." (#1790841) +- Backport "netdev-dpdk: Reuse vhost function for dpdk ETH custom stats." (#1790841) +- Backport "netdev-dpdk: Detailed packet drop statistics." (#1790841) +- Backport "netdev-dpdk: Fix sw stats perf drop." (#1790841) + +* Fri Jan 10 2020 Adrián Moreno <amorenoz@redhat.com> - 2.11.0-40 +- Backport "vhost: fix virtqueue not accessible" (#1792399) +- Backport "vhost: prevent zero copy mode if IOMMU is on" (#1792399) +- Backport "vhost: convert buffer addresses to GPA for logging" (#1792399) +- Backport "vhost: translate incoming log address to GPA" (#1792399) +- Backport "vhost: fix vring address handling during live migration" (#1792399) +- Backport "vhost: add external message handling to the API" (#1792399) + +* Wed Jan 08 2020 Numan Siddique <nusiddiq@redhat.com> - 2.11.0-39 +- Backport "ovsdb replication: Provide option to configure probe interval" (#1788800) + +* Tue Jan 07 2020 David Marchand <david.marchand@redhat.com> - 2.11.0-38 +- Backport DPDK interrupt fixes for qede (#1738789) + +* Mon Dec 23 2019 Eelco Chaudron <echaudro@redhat.com> - 2.11.0-37 +- Backport "vhost: add device op when notification to guest is sent" (#1726579) +- Backport "netdev-dpdk: Add coverage counter to count vhost IRQs" (#1726579) + +* Mon Dec 23 2019 Eelco Chaudron <echaudro@redhat.com> - 2.11.0-36 +- Backport "net/i40e: downgrade error log" (#1719644) +- Backport "net/i40e: re-program promiscuous mode on VF interface" (#1733402) +- Backport "bridge: Allow manual notifications about interfaces' updates" (#1719644) +- Backport "netdev-dpdk: add support for the RTE_ETH_EVENT_INTR_RESET" (#1719644) + +* Tue Dec 10 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-35 +- Fix librte_pmd_mlx{4,5}_glue.so error in Execshield part of RPMDiff + by backporting the DPDK flags from dpdk spec file. + +* Fri Dec 06 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-34 +- Backport "Shutdown SSL connection before closing socket" (#1780745) + +* Thu Dec 05 2019 Aaron Conole <aconole@redhat.com> - 2.11.0-33 +- Backport "ovs-check-dead-ifs: python3 print format" (#1751161) +- Backport "ovs-check-dead-ifs: unshadow pid variable" (#1751161) +- Backport "flake8: also check the ovs-check-dead-ifs script" (#1751161) + +* Thu Dec 05 2019 Aaron Conole <aconole@redhat.com> - 2.11.0-32 +- Backport "ovs-tcpundump: exit when getting version" (#1764127) + +* Thu Dec 05 2019 Aaron Conole <aconole@redhat.com> - 2.11.0-31 +- Backport "ovs-tcpundump: allow multiple packet lengths" (#1764125) + +* Tue Dec 03 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-30 +- Rebase internal DPDK to 18.11.5 (#1760246) (CVE-2019-14818) + +* Tue Nov 26 2019 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.11.0-29 +- Backport "jsonrpc: increase input buffer size from 512 to 4096" (#1776883) + +* Tue Nov 12 2019 David Marchand <david.marchand@redhat.com> - 2.11.0-28 +- Backport "netdev-dpdk: Track vhost tx contention." (#1740144) + +* Tue Oct 29 2019 Numan Siddique <nusiddiq@redhat.com> - 2.11.0-27 +- Backport "ovsdb-server: Allow replication from older schema version servers" (#1766586) + +* Mon Oct 14 2019 Numan Siddique <nusiddiq@redhat.com> - 2.11.0-26 +- Backport "ovsdb-server: Don't drop all connections on read/write status change" (#1761572) + +* Mon Oct 14 2019 Dumitru Ceara <dceara@redhat.com> - 2.11.0-25 +- Backport "ofproto-dpif: Fix continuation with patch port" (#1761461) + +* Mon Oct 07 2019 Aaron Conole <aconole@redhat.com> - 2.11.0-24 +- Backport "vswitch: ratelimit the device add log" (#1737146) + +* Fri Sep 13 2019 Kevin Traynor <ktraynor@redhat.com> - 2.11.0-23 +- Backport "Add custom stat for vhost tx retries." (#1747531) +- Backport "Enable tx-retries-max config." (#1747531) + +* Tue Sep 03 2019 Flavio Leitner <fbl@redhat.com> - 2.11.0-22 +- tnl-neigh: Use outgoing ofproto version (#1685642) + +* Tue Aug 27 2019 Flavio Leitner <fbl@redhat.com> - 2.11.0-21 +- Bump release + +* Tue Aug 06 2019 David Marchand <david.marchand@redhat.com> - 2.11.0-20 +- Renumbered dpdk patches +- Backport IOVA fixes (#1711739) + +* Tue Jul 23 2019 Numan Siddique <nusiddiq@redhat.com> - 2.11.0-19 +- Backport "ovsdb-server: drop all connections on read/write status change" (#1720947) + +* Tue Jul 16 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-18 +- Increase CONFIG_RTE_MAX_ETHPORTS to 128 (#1730421) + +* Tue Jul 16 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-17 +- Backport "tunnel: Add layer 2 IPv6 GRE encapsulation support." and + "netdev-vport: Make ip6gre netdev type to use TC rules" (#1725623) + +* Fri Jul 12 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-16 +- Rebase internal DPDK to 18.11.2 (#1713698) + +* Tue Jul 09 2019 David Marchand <david.marchand@redhat.com> - 2.11.0-15 +- Backport "net/i40e: fix dropped packets statistics name" (#1728610) + +* Tue Jul 02 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-14 +- Backport "netdev-tc-offloads: Use correct hook qdisc at init tc flow" (#1721219) + +* Fri Jun 21 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-13 +- Backport "netdev-tc-offloads: Support match on priority tags" (#1722249) + +* Thu Jun 13 2019 Maxime Coquelin <maxime.coquelin@redhat.com> - 2.11.0-12 +- Backport Vhost performance regression fixes (#1672538) + +* Thu Jun 13 2019 Flavio Leitner <fbl@redhat.com> - 2.11.0-11 +- Backport "rhel: limit stack size to 2M." (#1720315) + +* Thu May 16 2019 Pablo Cascón <pablo.cascon@redhat.com> - 2.11.0-10 +- Backport "ovs-tc: support OvS internal port offload" and deps (#1702334) + +* Wed Apr 24 2019 Numan Siddique <nusiddiq@redhat.com> - 2.11.0-9 +- Backport "[OVN] Fragmentation support - check_pkt_larger action" (#1702564) + +* Thu Apr 11 2019 Kevin Traynor <ktraynor@redhat.com> - 2.11.0-8 +- Backport "net/qede: support IOVA VA mode" (#1684605) + +* Wed Apr 10 2019 David Marchand <david.marchand@redhat.com> - 2.11.0-7 +- Backport cpu affinity fixes (#1687320) + +* Tue Apr 09 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-6 +- Add missing dependencies for ovs-tcpdump (#1697978) + +* Tue Mar 26 2019 Flavio Leitner <fbl@redhat.com> - 2.11.0-5 +- fixed netlink msg corruption when updating netdev. (#1692812) + +* Tue Mar 12 2019 Davide Caratti <dcaratti@redhat.com> - 2.11.0-4 +- Backport "net/bnxt: support IOVA VA mode" (#1645523) + +* Tue Mar 12 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-3 +- Backport "ovs-ctl: Permit to specify additional options" (#1687775) +- Remove useless -fPIC from DPDK + +* Fri Mar 01 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-2 +- Backport "rhel: Use PIDFile on forking systemd service files" (#1684477) + +* Thu Feb 28 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-1 +- Update to official 2.11 release + +* Thu Jan 31 2019 Open vSwitch Bot <null@redhat.com> - 2.11.0-0.20190129gitd3a10db +- Snapshot of branch-2.11 d3a10db4fd38 + +* Sun Jan 27 2019 Open vSwitch Bot <null@redhat.com> - 2.11.0-0.20190126gitd4ff5b2 +- Snapshot of branch-2.11 d4ff5b2be7fc + +* Mon Jan 14 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.11.0-0.20190114gitadb3f0b +- Update to a snapshot of OVS 2.11 from master + +* Mon Jan 7 2019 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-42 +- Backport "OVN: add static IP support to IPAM" (#1664028) + +* Thu Jan 03 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-41 +- Backport some patches to improve offload indications (#1655990) + +* Wed Jan 02 2019 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-40 +- Add "Requires: openvswitch = %%{version}-%%{release}" to python-openvswitch2.10 (#1662944) + +* Wed Jan 2 2019 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-39 +- Backport "OVN: add mac address only support to IPAM/MACAM" (#1662905) + +* Thu Dec 20 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-38 +- Backport "ovn-controller: Inject GARPs to logical switch pipeline to update neighbors" (#1643902) + +* Tue Dec 18 2018 David Marchand <david.marchand@redhat.com> - 2.10.0-37 +- Backport 'ovs-ctl: fix system-id.conf owner' (#1659391) +- Do not check /var/log/openvswitch owner/group (#1659391) + +* Tue Dec 18 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-36 +- Backport "ovn: Fix the invalid eth.dst and ip6.dst set by nd_ns action for certain cases." (#1656018) + +* Mon Dec 10 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-35 +- Backport "dpif-netdev: Add vlan to mask for flow_put operation" (#1649516) + +* Tue Nov 27 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-34 +- Backport "ovn: Avoid tunneling for VLAN packets redirected to a gateway chassis" (#1561880) + +* Fri Nov 23 2018 Eelco Chaudron <echaudro@redhat.com> - 2.10.0-33 +- Backport "mem: fix memory initialization time" (#1647498) + +* Thu Nov 22 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-32 +- Backport "tests: Use the default key length when generating RSA keys" + +* Wed Nov 14 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-31 +- Backport "net/qede: fix crash when configure fails" (#1648183) + +* Tue Nov 13 2018 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-30 +- Backport 'pinctrl: Fix dp_packet structure leak' and 'pinctrl: Fix crash on + buffered packets hmap double remove'. Moreover align 'ovn -- 3 HVs, 3 LS, 3 + lports/LS, 1 LR' test to upstream one (#1649008) + +* Tue Nov 13 2018 Eelco Chaudron <echaudro@redhat.com> - 2.10.0-29 +- Backup "netdev-dpdk: Bring link down when NETDEV_UP is not set" (#1645288) + +* Fri Nov 09 2018 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-28 +- OVN: configure L2 address according to the used IP address (#1648272) + +* Thu Nov 08 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-27 +- Backport "bond: Honor updelay and downdelay when LACP is in use" (#1646923) + +* Thu Nov 08 2018 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-26 +- OVN: introduce mac_prefix support to IPAM (#1647750) + +* Tue Nov 06 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-25 +- Backport "ofproto-dpif-xlate: Avoid deadlock on multicast snooping recursion" (#1643065) + +* Tue Nov 06 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-24 +- Re-enable "make check" + +* Fri Nov 02 2018 Kevin Traynor <ktraynor@redhat.com> - 2.10.0-23 +- Update to DPDK 17.11.4 (#1566069) + +* Thu Oct 25 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-22 +- Ship statically linked OVS binaries (#1643478) + +* Tue Oct 23 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-21 +- Backport connmgr: Fix vswitchd abort when a port is added and the controller is down (#1637926) + +* Mon Oct 22 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-20 +- Backport "ovn: Add DHCP support for option 252" (#1641740) + +* Wed Oct 17 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-19 +- Backport "net/i40e: fix VLAN offload setting issue" (#1637893) + +* Wed Oct 17 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-18 +- Backport "Python: Make Row's __getattr__ less error prone" (#1639963) + +* Fri Oct 12 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-17 +- OVN: ovn-ctl: Fix the wrong pidfile argument passed to ovsdb-servers (#1636714) + +* Fri Oct 12 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-16 +- OVN: Support processing DHCPv6 information request message type (#1636874) + +* Fri Oct 12 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-15 +- OVN: Fix IPv6 DAD failure for container ports (#1616129) + +* Thu Oct 11 2018 Numan Siddique <nusiddiq@redhat.com> - 2.10.0-14 +- OVN: Fix the issue in IPv6 Neigh Solicitation responder for router IPs (#1567735) + +* Tue Oct 09 2018 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-13 +- OVN: add buffering support for ip packets (#1637466) + +* Mon Oct 08 2018 Matteo Croce <mcroce@redhat.com> - 2.10.0-12 +- Fix null pointer (#1634015) +* Tue Oct 02 2018 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.10.0-11 +- OVN: add CT_LB action to ovn-trace (#1635344) + +* Mon Oct 01 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-10 +- Backport NFP PMD's non-root related commits for > 1TB of RAM (#1634820): + - net/nfp: support IOVA VA mode + - bus/pci: forbid IOVA mode if IOMMU address width too small + - net/nfp: check hugepages IOVAs based on DMA mask + - mem: use address hint for mapping hugepages + - bus/pci: use IOVAs check when setting IOVA mode + - mem: add function for checking memsegs IOVAs addresses + - mem: fix max DMA maskbit size + +* Thu Sep 27 2018 Matteo Croce <mcroce@redhat.com> - 2.10.0-9 +- Backport "Remove support for multiple queues per port" (#1634015) + +* Wed Sep 26 2018 Matteo Croce <mcroce@redhat.com> - 2.10.0-8 +- Backport EMC reorder fix (#1565205) + +* Wed Sep 26 2018 Matteo Croce <mcroce@redhat.com> - 2.10.0-7 +- Backport per-port socket netlink creation with EPOLLEXCLUSIVE (#1634015) + +* Fri Sep 21 2018 Kevin Traynor <ktraynor@redhat.com> - 2.10.0-6 +- Backport roundrobin rxq to pmd assignment (#1631797) + +* Fri Sep 14 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-5 +- Backport "ovs-save: Don't always include the default flow during restore" (#1628905) + +* Thu Sep 13 2018 Flavio Leitner <fbl@redhat.com> - 2.10.0-4 +- applied Fix translation of groups with no buckets (#1626488) + +* Thu Sep 13 2018 Flavio Leitner <fbl@redhat.com> - 2.10.0-3 +- Removed provides and obsoletes for openvswitch-dpdk (#1628603) + +* Tue Sep 11 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.10.0-2 +- Backported "net/mlx{4,5}: avoid stripping the glue library" (#1627700) + +* Tue Aug 21 2018 Flavio Leitner <fbl@redhat.com> - 2.10-1 +- Updated with 2.10.0 official tarball (#1618551) + +* Fri Aug 17 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 +- Sync'ed with fd-next (4452afaa58) +- vhost: flush IOTLB cache on new mem table handling (#1609643) +- OVN: introduce ovs-appctl command to monitor HVs sb (#1593804) + +* Thu Aug 16 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 +- Snapshot of branch-2.10 6bced903bb50 + +* Fri Aug 10 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 +- Snapshot of branch-2.10 58a7ce60b9f7 + +* Wed Aug 08 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 +- Snapshot of branch-2.10 faf64fb8861f + +* Tue Aug 07 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 +- Snapshot of branch master 7a78d1c1ad73 + +* Tue Jul 31 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 +- Sync'ed spec file with fd-next-57 (shared linking). + (DPDK patches not included) +- Fixed package dependencies (#1610603) + +* Fri Jul 27 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 +- Snapshot of branch master b1ca64f020f7 + +* Fri Jul 27 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 +- Replace macro %%{name} with 'openvswitch'. + +* Tue Jul 24 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 +- Snapshot of branch master 1ac690899592 + +* Tue Jul 24 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 +- Versioned conflict to be less than 2.10. + +* Thu Jul 19 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 +- Snapshot of branch master 3c921cc2b6b7 + +* Wed Jul 18 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 +- Fixed unbound requires and buildrequires. + +* Tue Jul 10 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 +- Snapshot of branch master 93c0ef12039c + +* Tue Jul 03 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 +- Snapshot of branch master 79d0dfa4e99a + +* Wed Jun 27 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 +- Snapshot of branch master e46148133067 + +* Wed Jun 27 2018 Open vSwitch Bot <null@redhat.com> - 2.10-0 +- Snapshot of branch master 61677bf976e9 + +* Tue Jun 26 2018 Flavio Leitner <fbl@redhat.com> - 2.10-0 +- snapshot of branch master + +* Mon Jun 11 2018 Aaron Conole <aconole@redhat.com> - 2.9.0-47 +- Backport "net/mlx5: fix memory region cache lookup" (#1581230) +- Backport "net/mlx5: fix memory region boundary checks" (#1581230) + +* Mon Jun 11 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-46 +- Backport "net/qede: fix memory alloc for multiple port reconfig" (#1589866) + +* Thu Jun 07 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-45 +- Backport "net/qede: fix unicast filter routine return code" (#1578590) + +* Thu Jun 07 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-44 +- Backport "net/qede: fix L2-handles used for RSS hash update" (#1578981) + +* Tue May 29 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-43 +- Backport "net/nfp: fix lock file usage" (#1583670) + +* Mon May 28 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-42 +- Backport "net/nfp: configure default RSS reta table" (#1583161) + +* Mon May 28 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-41 +- Backport "netdev-dpdk: don't enable scatter for jumbo RX support for nfp" (#1578324) + +* Mon May 28 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-40 +- Backport "ovn pacemaker: Fix promotion issue when the master node is reset" (#1579025) + +* Thu May 24 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-39 +- Backport spec file modfications from "rhel: Use openvswitch user/group for + the log directory" + +* Wed May 23 2018 Maxime Coquelin <maxime.coquelin@redhat.com> - 2.9.0-38 +- Backport "vhost: improve dirty pages logging performance" (#1552465) + +* Wed May 16 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-37 +- Backport "ovn: Set proper Neighbour Adv flag when replying for NS request for + router IP" (#1567735) + +* Mon May 14 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-36 +- Enable QEDE PMDs (only on x86_64) (#1578003) + +* Thu May 10 2018 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> - 2.9.0-35 +- ovn-nbctl: Show gw chassis in decreasing prio order (#1576725) + +* Wed May 09 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-34 +- Fix hugetlbfs group when DPDK is enabled + +* Wed May 09 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-33 +- Backport "eal: abstract away the auxiliary vector" (#1560728) +- Re-enable DPDK on ppc64le + +* Wed May 09 2018 Aaron Conole <aconole@redhat.com> - 2.9.0-32 +- Require the selinux policy module (#1555440) + +* Tue May 08 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-31 +- Backport fix QEDE PMD (#1494616) + +* Tue May 08 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-30 +- Backport "net/nfp: fix mbufs releasing when stop or close" (#1575067) + +* Sun May 06 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-29 +- Backport net/mlx4: fix broadcast Rx (#1568908) + +* Fri May 04 2018 Kevin Traynor <ktraynor@redhat.com> - 2.9.0-28 +- Backport mempool use after free fix and debug (#1575016) + +* Fri May 04 2018 Aaron Conole <aconole@redhat.com> - 2.9.0-27 +- Fix the email address in the changelog. + +* Wed May 02 2018 Aaron Conole <aconole@redhat.com> - 2.9.0-26 +- Backport fix for missing user during install/upgrade (#1559374) + +* Mon Apr 30 2018 Jakub Sitnicki <jkbs@redhat.com> - 2.9.0-25 +- Backport fix for Unicode encoding in Python IDL (#1547065) + +* Thu Apr 26 2018 Aaron Conole <aconole@redhat.com> - 2.9.0-24 +- Backport the cisco enic patches + +* Thu Apr 26 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-23 +- Backport a fix for "Offload of Fragment Matching in OvS Userspace" (#1559111) + +* Thu Apr 26 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-22 +- Backport "ovn-controller: Handle Port_Binding's "requested-chassis" option" (#1559222) + +* Thu Apr 26 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-21 +- Backport "python: avoid useless JSON conversion to enhance performance" (#1551016) + +* Thu Apr 26 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-20 +- Backport "ovn: Set router lifetime value for IPv6 periodic RA" (#1567735) +- Remove useless libpcap-devel dependency + +* Mon Apr 23 2018 Kevin Traynor <ktraynor@redhat.com> - 2.9.0-19 +- Backport DPDK CVE-2018-1059 (#1544298) + +* Fri Apr 20 2018 Davide Caratti <dcaratti@redhat.com> - 2.9.0-18 +- Backport fix for PMD segfault when BNXT receives tunneled traffic (#1567634) + +* Mon Apr 16 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-17 +- Backport patches to make NFP detect the correct firmware (#1566712) +- Backport "rhel: Fix literal dollar sign usage in systemd service files" + +* Fri Mar 30 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-16 +- Backport "rhel: don't drop capabilities when running as root" +- Change owner of /etc/openvswitch during upgrade + +* Tue Mar 27 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-14 +- Disable DPDK on ppc64le + +* Sun Mar 25 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-13 +- Disable DPDK on aarch64 + +* Thu Mar 22 2018 Flavio Leitner <fbl@redhat.com> - 2.9.0-12 +- fixes i40e link status timeout trough direct register access (#1559612) + +* Thu Mar 22 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-11 +- Enable BNXT, MLX4, MLX5 and NFP (aligned from FDB) + +* Thu Mar 22 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-10 +- Backport "Offload of Fragment Matching in OvS Userspace" (#1559111) + +* Thu Mar 15 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-9 +- Avoid to unpack openvswitch 2 times and to overwrite all the patched files + Fixes 2.9.0-4 + +* Thu Mar 08 2018 Eric Garver <egarver@redhat.com> - 2.9.0-8 +- Backport "ofproto-dpif-xlate: translate action_set in clone action" (#1544892) + +* Thu Mar 08 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-7 +- Backport "ovn: Calculate UDP checksum for DNS over IPv6" (#1553023) + +* Tue Mar 06 2018 Aaron Conole <aconole@redhat.com> - 2.9.0-6 +- Require the latest rhel selinux policy (#1549673) + +* Fri Mar 02 2018 Matteo Croce <mcroce@redhat.com> - 2.9.0-5 +- Backport vhost patches (#1541881) + +* Fri Mar 02 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-4 +- Don't require python-sphinx directly, but built it since python-sphinx is in + the optional repository that is not available on RHEV and TPS test fails. + +* Tue Feb 20 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-3 +- Don't verify the user and group of /etc/openvswitch and /etc/sysconfig/openvswitch + This is needed since we cannot change the user and group if you upgrade from + an old version that still uses root:root. + +* Tue Feb 20 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.9.0-1 +- Update to OVS 2.9.0 + DPDK 17.11 (#1475436) +- Backport of ofproto-dpif: Delete system tunnel interface when remove ovs bridge (#1505776) +- Backport DPDK patches from FDB (vhost user async fix and enic fixes) +- Backport 94cd8383e297 and 951d79e638ec to fix permissions (#1489465) +- Use a static configuration file for DPDK + +* Fri Jan 12 2018 Timothy Redaelli <tredaelli@redhat.com> - 2.7.3-3.git20180112 +- Rebase to latest OVS branch-2.7 fixes + DPDK 16.11.4 (#1533872) + +* Wed Oct 18 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.7.3-2.git20171010 +- Remove ovs-test and ovs-vlan-test from openvswitch-test package +- Add an option to enable openvswitch-ovn-docker package (disabled by default) + +* Tue Oct 10 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.7.3-1.git20171010 +- Update to OVS 2.7.3 + branch-2.7 bugfixes (#1502742) + +* Mon Sep 18 2017 Kevin Traynor <ktraynor@redhat.com> - 2.7.2-10.git20170914 +- Backport of fix for i40e flow control get (#1491791) + +* Thu Sep 14 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.7.2-9.git20170914 +- Rebase to latest OVS branch fixes + DPDK 16.11.3 + +* Wed Sep 06 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.7.2-8.git20170719 +- Backport of enic driver crash fix to dpdk-16.11 (#1489010) + +* Tue Aug 22 2017 Aaron Conole <aconole@redhat.com> - 2.7.2-7.git20170719 +- Re-enable Cisco enic PMD (#1482675) + +* Tue Aug 22 2017 Aaron Conole <aconole@redhat.com> - 2.7.2-6.git20170719 +- Update based on multi-arch + +* Tue Aug 22 2017 Aaron Conole <aconole@redhat.com> - 2.7.2-5.git20170719 +- Disable unsupported PMDs (#1482675) +- software and hardware PMDs audited by the team + +* Thu Aug 03 2017 John W. Linville <linville@redhat.com> - 2.7.2-4.git20170719 +- Backport mmap fix for memory initialization on ppc64le to dpdk-16.11 + +* Thu Aug 03 2017 John W. Linville <linville@redhat.com> - 2.7.2-3.git20170719 +- Backport support for vfio-pci based PMD in ppc64le to dpdk-16.11 + +* Thu Aug 03 2017 John W. Linville <linville@redhat.com> - 2.7.2-2.git20170719 +- Backport support for Intel XL710 (i40e) pmd in ppc64le to dpdk-16.11 + +* Wed Jul 19 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.7.2-1.git20170719 +- Update to OVS 2.7.2 + branch-2.7 bugfixes (#1472854) +- Add a symlink of the OCF script in the OCF resources folder (#1472729) + +* Mon Jul 10 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.7.1-1.git20170710 +- Align to FDB openvswitch-2.7.1-1.git20170710.el7fdb (#1459286) + +* Wed Jun 07 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.6.1-20.git20161206 +- backport "mcast-snooping: Avoid segfault for vswitchd" (#1456356) +- backport "mcast-snooping: Flush ports mdb when VLAN cfg changed." (#1456358) + +* Sun May 21 2017 Lance Richardson <lrichard@redhat.com> - 2.6.1-19.git20161206 +- backport patch to not automatically restard ovn svcs after upgrade (#1438901) + +* Tue May 09 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.6.1-18.git20161206 +- rconn: Avoid abort for ill-behaved remote (#1449109) + +* Fri May 05 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.6.1-17.git20161206 +- Fix race in "PMD - change numa node" test (#1447714) +- Report only un-deleted groups in group stats replies. (#1447724) +- Workaround some races in "ofproto - asynchronous message control" tests (#1448536) + +* Mon Apr 10 2017 Eric Garver <egarver@redhat.com> - 2.6.1-16.git20161206 +- Fix an issue using set_field action on nw_ecn (#1410715) + +* Fri Mar 31 2017 Kevin Traynor <ktraynor@redhat.com> - 2.6.1-15.git20161206 +- backport patch to fix uni-dir vhost perf drop (#1414919) + +* Wed Mar 29 2017 Lance Richardson <lrichard@redhat.com> - 2.6.1-14.git20161206 +- backport patch to correct port number in firewalld service file (#1390938) + +* Fri Mar 10 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.6.1-13.git20161206 +- backport patch to enable/disable libcap-ng support (--with libcapng) + +* Thu Mar 09 2017 Aaron Conole <aconole@redhat.com> - 2.6.1-12.git20161206 +- Fix an MTU issue with ovs mirror ports (#1426342) + +* Wed Mar 08 2017 Lance Richardson <lrichard@redhat.com> - 2.6.1-11.git20161206 +- update spec file to install firewalld service files (#1390938) + +* Thu Feb 16 2017 Aaron Conole <aconole@redhat.com> - 2.6.1-10.git20161206 +- vhostuser client mode support for ifup/ifdown (#1418957) + +* Thu Feb 16 2017 Lance Richardson <lrichard@redhat.com> - 2.6.1-9.git20161206 +- OVN-DHCP is not sending DHCP responses after a MAC change in north db (#1418261) + +* Thu Feb 16 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.6.1-8.git20161206 +- systemd service starts too fast (#1422227) + +* Fri Feb 10 2017 Lance Richardson <lrichard@redhat.com> - 2.6.1-7.git20161206 +- iptables should be easily configurable for OVN hosts and OVN central server (#1390938) + +* Thu Feb 09 2017 Aaron Conole <aconole@redhat.com> - 2.6.1-6.git20161206 +- ovn: IPAM has no reply to DHCP request for renewal (#1415449) + +* Tue Feb 07 2017 Timothy Redaelli <tredaelli@redhat.com> - 2.6.1-5.git20161206 +- ovn-controller: Provide the option to set Encap.options:csum (#1418742) + +* Mon Feb 06 2017 Flavio Leitner <fbl@redhat.com> 2.5.0-23.git20160727 +- fixed broken service after a package upgrade (#1403958) + +* Wed Dec 21 2016 Lance Richardson <lrichard@redhat.com> 2.6.1-3.git20161206 +- ovsdb-idlc: Initialize nonnull string columns for inserted rows. (#1405094) + +* Fri Dec 09 2016 Lance Richardson <lrichard@redhat.com> 2.6.1-2.git20161206 +- OVN: Support IPAM with externally specified MAC (#1368043) + +* Tue Dec 06 2016 Kevin Traynor <ktraynor@redhat.com> 2.6.1-1.git20161206 +- Update to OVS 2.6.1 + branch-2.6 bugfixes (#1335865) +- Update to use DPDK 16.11 (#1335865) +- Enable OVN + +* Tue Nov 22 2016 Flavio Leitner <fbl@redhat.com> 2.5.0-22.git20160727 +- ifnotifier: do not wake up when there is no db connection (#1397504) + +* Tue Nov 22 2016 Flavio Leitner <fbl@redhat.com> 2.5.0-21.git20160727 +- Use instant sending instead of queue (#1397481) + +* Mon Nov 21 2016 Flavio Leitner <fbl@redhat.com> 2.5.0-20.git20160727 +- dpdk vhost: workaround stale vring base (#1376217) + +* Thu Oct 20 2016 Aaron Conole <aconole@redhat.com> - 2.5.0-19.git20160727 +- Applied tnl fix (#1346232) + +* Tue Oct 18 2016 Aaron Conole <aconole@redhat.com> - 2.5.0-18.git20160727 +- Applied the systemd backports + +* Tue Oct 18 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-17.git20160727 +- Fixed OVS to not require SSSE3 if DPDK is not used (#1378501) + +* Tue Oct 18 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-16.git20160727 +- Fixed a typo (#1385096) + +* Tue Oct 18 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-15.git20160727 +- Do not restart the service after a package upgrade (#1385096) + +* Mon Sep 26 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-14.git20160727 +- Permit running just the kernel datapath tests (#1375660) + +* Wed Sep 14 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-13.git20160727 +- Obsolete openvswitch-dpdk < 2.6.0 to provide migration path +- Add spec option to run kernel datapath tests (#1375660) + +* Fri Sep 09 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-12.git20160727 +- Backport ovs-tcpdump support (#1335560) +- Add ovs-pcap, ovs-tcpdump and ovs-tcpundump to -test package + +* Thu Sep 08 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-11.git20160727 +- Add openvswitch-dpdk provide for testing and depending on dpdk-enablement +- Disable bnx2x driver, it's not stable +- Build dpdk with -Wno-error to permit for newer compilers +- Drop subpkgs conditional from spec, its not useful anymore + +* Fri Aug 26 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-10.git20160727 +- Fix adding ukeys for same flow by different pmds (#1364898) + +* Thu Jul 28 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-9.git20160727 +- Fixed ifup-ovs to support DPDK Bond (#1360426) + +* Thu Jul 28 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-8.git20160727 +- Fixed ifup-ovs to delete the ports first (#1359890) + +* Wed Jul 27 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-7.git20160727 +- pull bugfixes from upstream 2.5 branch (#1360431) + +* Tue Jul 26 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-6.git20160628 +- Removed redundant provides for openvswitch +- Added epoch to the provides for -static package + +* Thu Jul 21 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-5.git20160628 +- Renamed to openvswitch (dpdk enabled) +- Enabled sub-packages +- Removed conflicts to openvswitch +- Increased epoch to give this package preference over stable + +* Tue Jun 28 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-4.git20160628 +- pull bugfixes from upstream 2.5 branch (#1346313) + +* Wed Apr 27 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-4 +- Enable DPDK bnx2x driver (#1330589) +- Add README.DPDK-PMDS document listing drivers included in this package + +* Thu Mar 17 2016 Flavio Leitner <fbl@redhat.com> - 2.5.0-3 +- Run testsuite by default on x86 arches (#1318786) + (this sync the spec with non-dpdk version though the testsuite + was already enabled here) + +* Thu Mar 17 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-2 +- eliminate debuginfo-artifacts (#1281913) + +* Thu Mar 17 2016 Panu Matilainen <pmatilai@redhat.com> - 2.5.0-1 +- Update to OVS to 2.5.0 and bundled DPDK to 2.2.0 (#1317889) + +* Mon Nov 23 2015 Panu Matilainen <pmatilai@redhat.com> +- Provide openvswitch ver-rel (#1281894) + +* Thu Aug 13 2015 Flavio Leitner <fbl@redhat.com> +- ExclusiveArch to x86_64 (dpdk) +- Provides bundled(dpdk) +- Re-enable testsuite + +* Fri Aug 07 2015 Panu Matilainen <pmatilai@redhat.com> +- Enable building from pre-release snapshots, update to pre 2.4 version +- Bundle a minimal, private build of DPDK 2.0 and link statically +- Rename package to openvswitch-dpdk, conflict with regular openvswitch +- Disable all sub-packages + +* Wed Jan 12 2011 Ralf Spenneberg <ralf@os-s.net> +- First build on F14