diff --git a/SOURCES/openvswitch-3.2.0.patch b/SOURCES/openvswitch-3.2.0.patch index c88769e..67e6ee4 100644 --- a/SOURCES/openvswitch-3.2.0.patch +++ b/SOURCES/openvswitch-3.2.0.patch @@ -28,7 +28,7 @@ index 48931fa085..d8a9722809 100644 memory: 4G diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml -index 47d239f108..b50c42de6f 100644 +index 47d239f108..8d4815b362 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -8,16 +8,16 @@ jobs: @@ -118,7 +118,22 @@ index 47d239f108..b50c42de6f 100644 with: path: dpdk-dir key: ${{ needs.build-dpdk.outputs.dpdk_key }} -@@ -200,9 +206,9 @@ jobs: +@@ -191,6 +197,14 @@ jobs: + if: matrix.m32 != '' + run: sudo apt install -y gcc-multilib + ++ - name: Reduce ASLR entropy ++ if: matrix.asan != '' || matrix.ubsan != '' ++ # Asan in llvm 14 provided in ubuntu-22.04 is incompatible with ++ # high-entropy ASLR configured in much newer kernels that GitHub ++ # runners are using leading to random crashes: ++ # https://github.com/actions/runner-images/issues/9491 ++ run: sudo sysctl -w vm.mmap_rnd_bits=28 ++ + - name: prepare + run: ./.ci/linux-prepare.sh + +@@ -200,9 +214,9 @@ jobs: - name: copy logs on failure if: failure() || cancelled() run: | @@ -130,7 +145,7 @@ index 47d239f108..b50c42de6f 100644 # So, we're just archiving everything here to avoid any issues. mkdir logs cp config.log ./logs/ -@@ -211,7 +217,7 @@ jobs: +@@ -211,7 +225,7 @@ jobs: - name: upload logs on failure if: failure() || cancelled() @@ -139,7 +154,7 @@ index 47d239f108..b50c42de6f 100644 with: name: logs-linux-${{ join(matrix.*, '-') }} path: logs.tgz -@@ -230,13 +236,13 @@ jobs: +@@ -230,13 +244,13 @@ jobs: steps: - name: checkout @@ -155,7 +170,7 @@ index 47d239f108..b50c42de6f 100644 with: python-version: '3.9' - name: install dependencies -@@ -247,7 +253,7 @@ jobs: +@@ -247,7 +261,7 @@ jobs: run: ./.ci/osx-build.sh - name: upload logs on failure if: failure() @@ -164,7 +179,7 @@ index 47d239f108..b50c42de6f 100644 with: name: logs-osx-clang---disable-ssl path: config.log -@@ -271,7 +277,7 @@ jobs: +@@ -271,7 +285,7 @@ jobs: steps: - name: checkout @@ -173,7 +188,7 @@ index 47d239f108..b50c42de6f 100644 - name: update PATH run: | -@@ -293,7 +299,7 @@ jobs: +@@ -293,7 +307,7 @@ jobs: run: ./.ci/linux-build.sh - name: upload deb packages @@ -182,7 +197,7 @@ index 47d239f108..b50c42de6f 100644 with: name: deb-packages-${{ matrix.dpdk }}-dpdk path: '/home/runner/work/ovs/*.deb' -@@ -301,7 +307,7 @@ jobs: +@@ -301,7 +315,7 @@ jobs: build-linux-rpm: name: linux rpm fedora runs-on: ubuntu-latest @@ -191,7 +206,7 @@ index 47d239f108..b50c42de6f 100644 timeout-minutes: 30 strategy: -@@ -309,7 +315,7 @@ jobs: +@@ -309,7 +323,7 @@ jobs: steps: - name: checkout @@ -200,7 +215,7 @@ index 47d239f108..b50c42de6f 100644 - name: install dependencies run: | dnf install -y rpm-build dnf-plugins-core -@@ -328,7 +334,7 @@ jobs: +@@ -328,7 +342,7 @@ jobs: run: dnf install -y rpm/rpmbuild/RPMS/*/*.rpm - name: upload rpm packages @@ -240,6 +255,18 @@ index 0000000000..7d505150ec +python: + install: + - requirements: Documentation/requirements.txt +diff --git a/AUTHORS.rst b/AUTHORS.rst +index 9186e1ad22..f58bf1c853 100644 +--- a/AUTHORS.rst ++++ b/AUTHORS.rst +@@ -578,6 +578,7 @@ David Evans davidjoshuaevans@gmail.com + David Palma palma@onesource.pt + David van Moolenbroek dvmoolenbroek@aimvalley.nl + Derek Cormier derek.cormier@lab.ntt.co.jp ++Derrick Lim derrick.lim@rakuten.com + Dhaval Badiani dbadiani@vmware.com + DK Moon + Ding Zhi zhi.ding@6wind.com diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index e6bda14e7b..f47d408369 100644 --- a/Documentation/faq/releases.rst @@ -1518,6 +1545,977 @@ index 9ccafd6d47..a2506da5ff 100644 * * To use it, insert the following code to where backtrace is * desired: +diff --git a/lib/bfd.c b/lib/bfd.c +index 9698576d07..b8149e7897 100644 +--- a/lib/bfd.c ++++ b/lib/bfd.c +@@ -586,7 +586,6 @@ bfd_put_packet(struct bfd *bfd, struct dp_packet *p, + { + long long int min_tx, min_rx; + struct udp_header *udp; +- struct eth_header *eth; + struct ip_header *ip; + struct msg *msg; + +@@ -605,15 +604,13 @@ bfd_put_packet(struct bfd *bfd, struct dp_packet *p, + * set. */ + ovs_assert(!(bfd->flags & FLAG_POLL) || !(bfd->flags & FLAG_FINAL)); + +- dp_packet_reserve(p, 2); /* Properly align after the ethernet header. */ +- eth = dp_packet_put_uninit(p, sizeof *eth); +- eth->eth_src = eth_addr_is_zero(bfd->local_eth_src) +- ? eth_src : bfd->local_eth_src; +- eth->eth_dst = eth_addr_is_zero(bfd->local_eth_dst) +- ? eth_addr_bfd : bfd->local_eth_dst; +- eth->eth_type = htons(ETH_TYPE_IP); ++ ip = eth_compose(p, ++ eth_addr_is_zero(bfd->local_eth_dst) ++ ? eth_addr_bfd : bfd->local_eth_dst, ++ eth_addr_is_zero(bfd->local_eth_src) ++ ? eth_src : bfd->local_eth_src, ++ ETH_TYPE_IP, sizeof *ip + sizeof *udp + sizeof *msg); + +- ip = dp_packet_put_zeros(p, sizeof *ip); + ip->ip_ihl_ver = IP_IHL_VER(5, 4); + ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg); + ip->ip_ttl = MAXTTL; +@@ -621,15 +618,17 @@ bfd_put_packet(struct bfd *bfd, struct dp_packet *p, + ip->ip_proto = IPPROTO_UDP; + put_16aligned_be32(&ip->ip_src, bfd->ip_src); + put_16aligned_be32(&ip->ip_dst, bfd->ip_dst); +- /* Checksum has already been zeroed by put_zeros call. */ ++ /* Checksum has already been zeroed by eth_compose call. */ + ip->ip_csum = csum(ip, sizeof *ip); ++ dp_packet_set_l4(p, ip + 1); + +- udp = dp_packet_put_zeros(p, sizeof *udp); ++ udp = dp_packet_l4(p); + udp->udp_src = htons(bfd->udp_src); + udp->udp_dst = htons(BFD_DEST_PORT); + udp->udp_len = htons(sizeof *udp + sizeof *msg); ++ /* Checksum already zero from eth_compose. */ + +- msg = dp_packet_put_uninit(p, sizeof *msg); ++ msg = (struct msg *)(udp + 1); + msg->vers_diag = (BFD_VERSION << 5) | bfd->diag; + msg->flags = (bfd->state & STATE_MASK) | bfd->flags; + +@@ -1131,10 +1130,11 @@ bfd_set_state(struct bfd *bfd, enum state state, enum diag diag) + if (!VLOG_DROP_INFO(&rl)) { + struct ds ds = DS_EMPTY_INITIALIZER; + +- ds_put_format(&ds, "%s: BFD state change: %s->%s" +- " \"%s\"->\"%s\".\n", ++ ds_put_format(&ds, "%s: BFD state change: (bfd.SessionState: %s," ++ " bfd.LocalDiag: \"%s\") -> (bfd.SessionState: %s," ++ " bfd.LocalDiag: \"%s\")\n", + bfd->name, bfd_state_str(bfd->state), +- bfd_state_str(state), bfd_diag_str(bfd->diag), ++ bfd_diag_str(bfd->diag), bfd_state_str(state), + bfd_diag_str(diag)); + bfd_put_details(&ds, bfd); + VLOG_INFO("%s", ds_cstr(&ds)); +diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h +index bb326868e9..3fd5fccd3e 100644 +--- a/lib/conntrack-private.h ++++ b/lib/conntrack-private.h +@@ -49,6 +49,12 @@ struct ct_endpoint { + * hashing in ct_endpoint_hash_add(). */ + BUILD_ASSERT_DECL(sizeof(struct ct_endpoint) == sizeof(union ct_addr) + 4); + ++enum key_dir { ++ CT_DIR_FWD = 0, ++ CT_DIR_REV, ++ CT_DIRS, ++}; ++ + /* Changes to this structure need to be reflected in conn_key_hash() + * and conn_key_cmp(). */ + struct conn_key { +@@ -112,20 +118,18 @@ enum ct_timeout { + + #define N_EXP_LISTS 100 + +-enum OVS_PACKED_ENUM ct_conn_type { +- CT_CONN_TYPE_DEFAULT, +- CT_CONN_TYPE_UN_NAT, ++struct conn_key_node { ++ enum key_dir dir; ++ struct conn_key key; ++ struct cmap_node cm_node; + }; + + struct conn { + /* Immutable data. */ +- struct conn_key key; +- struct conn_key rev_key; ++ struct conn_key_node key_node[CT_DIRS]; + struct conn_key parent_key; /* Only used for orig_tuple support. */ +- struct cmap_node cm_node; + uint16_t nat_action; + char *alg; +- struct conn *nat_conn; /* The NAT 'conn' context, if there is one. */ + atomic_flag reclaimed; /* False during the lifetime of the connection, + * True as soon as a thread has started freeing + * its memory. */ +@@ -150,7 +154,6 @@ struct conn { + + /* Immutable data. */ + bool alg_related; /* True if alg data connection. */ +- enum ct_conn_type conn_type; + + uint32_t tp_id; /* Timeout policy ID. */ + }; +diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c +index 89cb2704a6..2149fdc73a 100644 +--- a/lib/conntrack-tp.c ++++ b/lib/conntrack-tp.c +@@ -253,7 +253,8 @@ conn_update_expiration(struct conntrack *ct, struct conn *conn, + } + VLOG_DBG_RL(&rl, "Update timeout %s zone=%u with policy id=%d " + "val=%u sec.", +- ct_timeout_str[tm], conn->key.zone, conn->tp_id, val); ++ ct_timeout_str[tm], conn->key_node[CT_DIR_FWD].key.zone, ++ conn->tp_id, val); + + atomic_store_relaxed(&conn->expiration, now + val * 1000); + } +@@ -273,7 +274,8 @@ conn_init_expiration(struct conntrack *ct, struct conn *conn, + } + + VLOG_DBG_RL(&rl, "Init timeout %s zone=%u with policy id=%d val=%u sec.", +- ct_timeout_str[tm], conn->key.zone, conn->tp_id, val); ++ ct_timeout_str[tm], conn->key_node[CT_DIR_FWD].key.zone, ++ conn->tp_id, val); + + conn->expiration = now + val * 1000; + } +diff --git a/lib/conntrack.c b/lib/conntrack.c +index 5f1176d333..592bbaa3e1 100644 +--- a/lib/conntrack.c ++++ b/lib/conntrack.c +@@ -103,7 +103,7 @@ static enum ct_update_res conn_update(struct conntrack *ct, struct conn *conn, + struct conn_lookup_ctx *ctx, + long long now); + static long long int conn_expiration(const struct conn *); +-static bool conn_expired(struct conn *, long long now); ++static bool conn_expired(const struct conn *, long long now); + static void conn_expire_push_front(struct conntrack *ct, struct conn *conn); + static void set_mark(struct dp_packet *, struct conn *, + uint32_t val, uint32_t mask); +@@ -113,8 +113,7 @@ static void set_label(struct dp_packet *, struct conn *, + static void *clean_thread_main(void *f_); + + static bool +-nat_get_unique_tuple(struct conntrack *ct, const struct conn *conn, +- struct conn *nat_conn, ++nat_get_unique_tuple(struct conntrack *ct, struct conn *conn, + const struct nat_action_info_t *nat_info); + + static uint8_t +@@ -208,7 +207,7 @@ static alg_helper alg_helpers[] = { + #define ALG_WC_SRC_PORT 0 + + /* If the total number of connections goes above this value, no new connections +- * are accepted; this is for CT_CONN_TYPE_DEFAULT connections. */ ++ * are accepted. */ + #define DEFAULT_N_CONN_LIMIT 3000000 + + /* Does a member by member comparison of two conn_keys; this +@@ -234,61 +233,6 @@ conn_key_cmp(const struct conn_key *key1, const struct conn_key *key2) + return 1; + } + +-static void +-ct_print_conn_info(const struct conn *c, const char *log_msg, +- enum vlog_level vll, bool force, bool rl_on) +-{ +-#define CT_VLOG(RL_ON, LEVEL, ...) \ +- do { \ +- if (RL_ON) { \ +- static struct vlog_rate_limit rl_ = VLOG_RATE_LIMIT_INIT(5, 5); \ +- vlog_rate_limit(&this_module, LEVEL, &rl_, __VA_ARGS__); \ +- } else { \ +- vlog(&this_module, LEVEL, __VA_ARGS__); \ +- } \ +- } while (0) +- +- if (OVS_UNLIKELY(force || vlog_is_enabled(&this_module, vll))) { +- if (c->key.dl_type == htons(ETH_TYPE_IP)) { +- CT_VLOG(rl_on, vll, "%s: src ip "IP_FMT" dst ip "IP_FMT" rev src " +- "ip "IP_FMT" rev dst ip "IP_FMT" src/dst ports " +- "%"PRIu16"/%"PRIu16" rev src/dst ports " +- "%"PRIu16"/%"PRIu16" zone/rev zone " +- "%"PRIu16"/%"PRIu16" nw_proto/rev nw_proto " +- "%"PRIu8"/%"PRIu8, log_msg, +- IP_ARGS(c->key.src.addr.ipv4), +- IP_ARGS(c->key.dst.addr.ipv4), +- IP_ARGS(c->rev_key.src.addr.ipv4), +- IP_ARGS(c->rev_key.dst.addr.ipv4), +- ntohs(c->key.src.port), ntohs(c->key.dst.port), +- ntohs(c->rev_key.src.port), ntohs(c->rev_key.dst.port), +- c->key.zone, c->rev_key.zone, c->key.nw_proto, +- c->rev_key.nw_proto); +- } else { +- char ip6_s[INET6_ADDRSTRLEN]; +- inet_ntop(AF_INET6, &c->key.src.addr.ipv6, ip6_s, sizeof ip6_s); +- char ip6_d[INET6_ADDRSTRLEN]; +- inet_ntop(AF_INET6, &c->key.dst.addr.ipv6, ip6_d, sizeof ip6_d); +- char ip6_rs[INET6_ADDRSTRLEN]; +- inet_ntop(AF_INET6, &c->rev_key.src.addr.ipv6, ip6_rs, +- sizeof ip6_rs); +- char ip6_rd[INET6_ADDRSTRLEN]; +- inet_ntop(AF_INET6, &c->rev_key.dst.addr.ipv6, ip6_rd, +- sizeof ip6_rd); +- +- CT_VLOG(rl_on, vll, "%s: src ip %s dst ip %s rev src ip %s" +- " rev dst ip %s src/dst ports %"PRIu16"/%"PRIu16 +- " rev src/dst ports %"PRIu16"/%"PRIu16" zone/rev zone " +- "%"PRIu16"/%"PRIu16" nw_proto/rev nw_proto " +- "%"PRIu8"/%"PRIu8, log_msg, ip6_s, ip6_d, ip6_rs, +- ip6_rd, ntohs(c->key.src.port), ntohs(c->key.dst.port), +- ntohs(c->rev_key.src.port), ntohs(c->rev_key.dst.port), +- c->key.zone, c->rev_key.zone, c->key.nw_proto, +- c->rev_key.nw_proto); +- } +- } +-} +- + /* Initializes the connection tracker 'ct'. The caller is responsible for + * calling 'conntrack_destroy()', when the instance is not needed anymore */ + struct conntrack * +@@ -477,28 +421,27 @@ conn_clean__(struct conntrack *ct, struct conn *conn) + uint32_t hash; + + if (conn->alg) { +- expectation_clean(ct, &conn->key); ++ expectation_clean(ct, &conn->key_node[CT_DIR_FWD].key); + } + +- hash = conn_key_hash(&conn->key, ct->hash_basis); +- cmap_remove(&ct->conns, &conn->cm_node, hash); ++ hash = conn_key_hash(&conn->key_node[CT_DIR_FWD].key, ct->hash_basis); ++ cmap_remove(&ct->conns, &conn->key_node[CT_DIR_FWD].cm_node, hash); + +- if (conn->nat_conn) { +- hash = conn_key_hash(&conn->nat_conn->key, ct->hash_basis); +- cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash); ++ if (conn->nat_action) { ++ hash = conn_key_hash(&conn->key_node[CT_DIR_REV].key, ++ ct->hash_basis); ++ cmap_remove(&ct->conns, &conn->key_node[CT_DIR_REV].cm_node, hash); + } + + rculist_remove(&conn->node); + } + +-/* Must be called with 'conn' of 'conn_type' CT_CONN_TYPE_DEFAULT. Also +- * removes the associated nat 'conn' from the lookup datastructures. */ ++/* Also removes the associated nat 'conn' from the lookup ++ datastructures. */ + static void + conn_clean(struct conntrack *ct, struct conn *conn) + OVS_EXCLUDED(conn->lock, ct->ct_lock) + { +- ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT); +- + if (atomic_flag_test_and_set(&conn->reclaimed)) { + return; + } +@@ -585,34 +528,39 @@ conn_key_lookup(struct conntrack *ct, const struct conn_key *key, + uint32_t hash, long long now, struct conn **conn_out, + bool *reply) + { +- struct conn *conn; ++ struct conn_key_node *keyn; ++ struct conn *conn = NULL; + bool found = false; + +- CMAP_FOR_EACH_WITH_HASH (conn, cm_node, hash, &ct->conns) { ++ CMAP_FOR_EACH_WITH_HASH (keyn, cm_node, hash, &ct->conns) { ++ if (keyn->dir == CT_DIR_FWD) { ++ conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]); ++ } else { ++ conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_REV]); ++ } ++ + if (conn_expired(conn, now)) { + continue; + } +- if (!conn_key_cmp(&conn->key, key)) { +- found = true; +- if (reply) { +- *reply = false; +- } +- break; +- } +- if (!conn_key_cmp(&conn->rev_key, key)) { +- found = true; +- if (reply) { +- *reply = true; ++ ++ for (int i = CT_DIR_FWD; i < CT_DIRS; i++) { ++ if (!conn_key_cmp(&conn->key_node[i].key, key)) { ++ found = true; ++ if (reply) { ++ *reply = (i == CT_DIR_REV); ++ } ++ goto out_found; + } +- break; + } + } + ++out_found: + if (found && conn_out) { + *conn_out = conn; + } else if (conn_out) { + *conn_out = NULL; + } ++ + return found; + } + +@@ -646,7 +594,7 @@ write_ct_md(struct dp_packet *pkt, uint16_t zone, const struct conn *conn, + if (conn->alg_related) { + key = &conn->parent_key; + } else { +- key = &conn->key; ++ key = &conn->key_node[CT_DIR_FWD].key; + } + } else if (alg_exp) { + pkt->md.ct_mark = alg_exp->parent_mark; +@@ -877,7 +825,8 @@ nat_inner_packet(struct dp_packet *pkt, struct conn_key *key, + static void + nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool related) + { +- struct conn_key *key = reply ? &conn->key : &conn->rev_key; ++ enum key_dir dir = reply ? CT_DIR_FWD : CT_DIR_REV; ++ struct conn_key *key = &conn->key_node[dir].key; + uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action) + : conn->nat_action; + +@@ -911,7 +860,7 @@ conn_seq_skew_set(struct conntrack *ct, const struct conn *conn_in, + { + struct conn *conn; + +- conn_lookup(ct, &conn_in->key, now, &conn, NULL); ++ conn_lookup(ct, &conn_in->key_node[CT_DIR_FWD].key, now, &conn, NULL); + if (conn && seq_skew) { + conn->seq_skew = seq_skew; + conn->seq_skew_dir = seq_skew_dir; +@@ -947,7 +896,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, + OVS_REQUIRES(ct->ct_lock) + { + struct conn *nc = NULL; +- struct conn *nat_conn = NULL; + + if (!valid_new(pkt, &ctx->key)) { + pkt->md.ct_state = CS_INVALID; +@@ -961,6 +909,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, + } + + if (commit) { ++ struct conn_key_node *fwd_key_node, *rev_key_node; + struct zone_limit *zl = zone_limit_lookup_or_default(ct, + ctx->key.zone); + if (zl && atomic_count_get(&zl->czl.count) >= zl->czl.limit) { +@@ -975,9 +924,12 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, + } + + nc = new_conn(ct, pkt, &ctx->key, now, tp_id); +- memcpy(&nc->key, &ctx->key, sizeof nc->key); +- memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key); +- conn_key_reverse(&nc->rev_key); ++ fwd_key_node = &nc->key_node[CT_DIR_FWD]; ++ rev_key_node = &nc->key_node[CT_DIR_REV]; ++ memcpy(&fwd_key_node->key, &ctx->key, sizeof fwd_key_node->key); ++ memcpy(&rev_key_node->key, &fwd_key_node->key, ++ sizeof rev_key_node->key); ++ conn_key_reverse(&rev_key_node->key); + + if (ct_verify_helper(helper, ct_alg_ctl)) { + nc->alg = nullable_xstrdup(helper); +@@ -992,46 +944,33 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, + + if (nat_action_info) { + nc->nat_action = nat_action_info->nat_action; +- nat_conn = xzalloc(sizeof *nat_conn); + + if (alg_exp) { + if (alg_exp->nat_rpl_dst) { +- nc->rev_key.dst.addr = alg_exp->alg_nat_repl_addr; ++ rev_key_node->key.dst.addr = alg_exp->alg_nat_repl_addr; + nc->nat_action = NAT_ACTION_SRC; + } else { +- nc->rev_key.src.addr = alg_exp->alg_nat_repl_addr; ++ rev_key_node->key.src.addr = alg_exp->alg_nat_repl_addr; + nc->nat_action = NAT_ACTION_DST; + } + } else { +- memcpy(nat_conn, nc, sizeof *nat_conn); +- bool nat_res = nat_get_unique_tuple(ct, nc, nat_conn, +- nat_action_info); +- ++ bool nat_res = nat_get_unique_tuple(ct, nc, nat_action_info); + if (!nat_res) { + goto nat_res_exhaustion; + } +- +- /* Update nc with nat adjustments made to nat_conn by +- * nat_get_unique_tuple(). */ +- memcpy(nc, nat_conn, sizeof *nc); + } + + nat_packet(pkt, nc, false, ctx->icmp_related); +- memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key); +- memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key); +- nat_conn->conn_type = CT_CONN_TYPE_UN_NAT; +- nat_conn->nat_action = 0; +- nat_conn->alg = NULL; +- nat_conn->nat_conn = NULL; +- uint32_t nat_hash = conn_key_hash(&nat_conn->key, ct->hash_basis); +- cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash); ++ uint32_t rev_hash = conn_key_hash(&rev_key_node->key, ++ ct->hash_basis); ++ cmap_insert(&ct->conns, &rev_key_node->cm_node, rev_hash); + } + +- nc->nat_conn = nat_conn; + ovs_mutex_init_adaptive(&nc->lock); +- nc->conn_type = CT_CONN_TYPE_DEFAULT; + atomic_flag_clear(&nc->reclaimed); +- cmap_insert(&ct->conns, &nc->cm_node, ctx->hash); ++ fwd_key_node->dir = CT_DIR_FWD; ++ rev_key_node->dir = CT_DIR_REV; ++ cmap_insert(&ct->conns, &fwd_key_node->cm_node, ctx->hash); + conn_expire_push_front(ct, nc); + atomic_count_inc(&ct->n_conn); + ctx->conn = nc; /* For completeness. */ +@@ -1052,7 +991,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, + * firewall rules or a separate firewall. Also using zone partitioning + * can limit DoS impact. */ + nat_res_exhaustion: +- free(nat_conn); + delete_conn__(nc); + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); + VLOG_WARN_RL(&rl, "Unable to NAT due to tuple space exhaustion - " +@@ -1065,7 +1003,6 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt, + struct conn_lookup_ctx *ctx, struct conn *conn, + long long now) + { +- ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT); + bool create_new_conn = false; + + if (ctx->icmp_related) { +@@ -1092,7 +1029,8 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt, + pkt->md.ct_state = CS_INVALID; + break; + case CT_UPDATE_NEW: +- if (conn_lookup(ct, &conn->key, now, NULL, NULL)) { ++ if (conn_lookup(ct, &conn->key_node[CT_DIR_FWD].key, ++ now, NULL, NULL)) { + conn_force_expire(conn); + } + create_new_conn = true; +@@ -1268,8 +1206,10 @@ initial_conn_lookup(struct conntrack *ct, struct conn_lookup_ctx *ctx, + + if (natted) { + if (OVS_LIKELY(ctx->conn)) { ++ enum key_dir dir; + ctx->reply = !ctx->reply; +- ctx->key = ctx->reply ? ctx->conn->rev_key : ctx->conn->key; ++ dir = ctx->reply ? CT_DIR_REV : CT_DIR_FWD; ++ ctx->key = ctx->conn->key_node[dir].key; + ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); + } else { + /* A lookup failure does not necessarily imply that an +@@ -1302,31 +1242,13 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, + + /* Delete found entry if in wrong direction. 'force' implies commit. */ + if (OVS_UNLIKELY(force && ctx->reply && conn)) { +- if (conn_lookup(ct, &conn->key, now, NULL, NULL)) { ++ if (conn_lookup(ct, &conn->key_node[CT_DIR_FWD].key, ++ now, NULL, NULL)) { + conn_force_expire(conn); + } + conn = NULL; + } + +- if (OVS_LIKELY(conn)) { +- if (conn->conn_type == CT_CONN_TYPE_UN_NAT) { +- +- ctx->reply = true; +- struct conn *rev_conn = conn; /* Save for debugging. */ +- uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis); +- conn_key_lookup(ct, &ctx->key, hash, now, &conn, &ctx->reply); +- +- if (!conn) { +- pkt->md.ct_state |= CS_INVALID; +- write_ct_md(pkt, zone, NULL, NULL, NULL); +- char *log_msg = xasprintf("Missing parent conn %p", rev_conn); +- ct_print_conn_info(rev_conn, log_msg, VLL_INFO, true, true); +- free(log_msg); +- return; +- } +- } +- } +- + enum ct_alg_ctl_type ct_alg_ctl = get_alg_ctl_type(pkt, tp_src, tp_dst, + helper); + +@@ -1419,8 +1341,9 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, + struct conn *conn = packet->md.conn; + if (OVS_UNLIKELY(packet->md.ct_state == CS_INVALID)) { + write_ct_md(packet, zone, NULL, NULL, NULL); +- } else if (conn && conn->key.zone == zone && !force +- && !get_alg_ctl_type(packet, tp_src, tp_dst, helper)) { ++ } else if (conn && ++ conn->key_node[CT_DIR_FWD].key.zone == zone && !force && ++ !get_alg_ctl_type(packet, tp_src, tp_dst, helper)) { + process_one_fast(zone, setmark, setlabel, nat_action_info, + conn, packet); + } else if (OVS_UNLIKELY(!conn_key_extract(ct, packet, dl_type, &ctx, +@@ -2269,7 +2192,7 @@ nat_ipv6_addr_increment(struct in6_addr *ipv6, uint32_t increment) + } + + static uint32_t +-nat_range_hash(const struct conn *conn, uint32_t basis, ++nat_range_hash(const struct conn_key *key, uint32_t basis, + const struct nat_action_info_t *nat_info) + { + uint32_t hash = basis; +@@ -2279,11 +2202,11 @@ nat_range_hash(const struct conn *conn, uint32_t basis, + hash = hash_add(hash, + ((uint32_t) nat_info->max_port << 16) + | nat_info->min_port); +- hash = ct_endpoint_hash_add(hash, &conn->key.src); +- hash = ct_endpoint_hash_add(hash, &conn->key.dst); +- hash = hash_add(hash, (OVS_FORCE uint32_t) conn->key.dl_type); +- hash = hash_add(hash, conn->key.nw_proto); +- hash = hash_add(hash, conn->key.zone); ++ hash = ct_endpoint_hash_add(hash, &key->src); ++ hash = ct_endpoint_hash_add(hash, &key->dst); ++ hash = hash_add(hash, (OVS_FORCE uint32_t) key->dl_type); ++ hash = hash_add(hash, key->nw_proto); ++ hash = hash_add(hash, key->zone); + + /* The purpose of the second parameter is to distinguish hashes of data of + * different length; our data always has the same length so there is no +@@ -2357,7 +2280,7 @@ get_addr_in_range(union ct_addr *min, union ct_addr *max, + } + + static void +-find_addr(const struct conn *conn, union ct_addr *min, ++find_addr(const struct conn_key *key, union ct_addr *min, + union ct_addr *max, union ct_addr *curr, + uint32_t hash, bool ipv4, + const struct nat_action_info_t *nat_info) +@@ -2367,9 +2290,9 @@ find_addr(const struct conn *conn, union ct_addr *min, + /* All-zero case. */ + if (!memcmp(min, &zero_ip, sizeof *min)) { + if (nat_info->nat_action & NAT_ACTION_SRC) { +- *curr = conn->key.src.addr; ++ *curr = key->src.addr; + } else if (nat_info->nat_action & NAT_ACTION_DST) { +- *curr = conn->key.dst.addr; ++ *curr = key->dst.addr; + } + } else { + get_addr_in_range(min, max, curr, hash, ipv4); +@@ -2388,7 +2311,7 @@ store_addr_to_key(union ct_addr *addr, struct conn_key *key, + } + + static bool +-nat_get_unique_l4(struct conntrack *ct, struct conn *nat_conn, ++nat_get_unique_l4(struct conntrack *ct, struct conn_key *rev_key, + ovs_be16 *port, uint16_t curr, uint16_t min, + uint16_t max) + { +@@ -2411,8 +2334,7 @@ another_round: + } + + *port = htons(curr); +- if (!conn_lookup(ct, &nat_conn->rev_key, +- time_msec(), NULL, NULL)) { ++ if (!conn_lookup(ct, rev_key, time_msec(), NULL, NULL)) { + return true; + } + } +@@ -2450,54 +2372,50 @@ another_round: + * + * If none can be found, return exhaustion to the caller. */ + static bool +-nat_get_unique_tuple(struct conntrack *ct, const struct conn *conn, +- struct conn *nat_conn, ++nat_get_unique_tuple(struct conntrack *ct, struct conn *conn, + const struct nat_action_info_t *nat_info) + { +- uint32_t hash = nat_range_hash(conn, ct->hash_basis, nat_info); ++ struct conn_key *fwd_key = &conn->key_node[CT_DIR_FWD].key; ++ struct conn_key *rev_key = &conn->key_node[CT_DIR_REV].key; + union ct_addr min_addr = {0}, max_addr = {0}, addr = {0}; +- bool pat_proto = conn->key.nw_proto == IPPROTO_TCP || +- conn->key.nw_proto == IPPROTO_UDP || +- conn->key.nw_proto == IPPROTO_SCTP; ++ bool pat_proto = fwd_key->nw_proto == IPPROTO_TCP || ++ fwd_key->nw_proto == IPPROTO_UDP || ++ fwd_key->nw_proto == IPPROTO_SCTP; + uint16_t min_dport, max_dport, curr_dport; + uint16_t min_sport, max_sport, curr_sport; ++ uint32_t hash; + ++ hash = nat_range_hash(fwd_key, ct->hash_basis, nat_info); + min_addr = nat_info->min_addr; + max_addr = nat_info->max_addr; + +- find_addr(conn, &min_addr, &max_addr, &addr, hash, +- (conn->key.dl_type == htons(ETH_TYPE_IP)), nat_info); ++ find_addr(fwd_key, &min_addr, &max_addr, &addr, hash, ++ (fwd_key->dl_type == htons(ETH_TYPE_IP)), nat_info); + +- set_sport_range(nat_info, &conn->key, hash, &curr_sport, ++ set_sport_range(nat_info, fwd_key, hash, &curr_sport, + &min_sport, &max_sport); +- set_dport_range(nat_info, &conn->key, hash, &curr_dport, ++ set_dport_range(nat_info, fwd_key, hash, &curr_dport, + &min_dport, &max_dport); + + if (pat_proto) { +- nat_conn->rev_key.src.port = htons(curr_dport); +- nat_conn->rev_key.dst.port = htons(curr_sport); ++ rev_key->src.port = htons(curr_dport); ++ rev_key->dst.port = htons(curr_sport); + } + +- store_addr_to_key(&addr, &nat_conn->rev_key, +- nat_info->nat_action); ++ store_addr_to_key(&addr, rev_key, nat_info->nat_action); + + if (!pat_proto) { +- if (!conn_lookup(ct, &nat_conn->rev_key, +- time_msec(), NULL, NULL)) { +- return true; +- } +- +- return false; ++ return !conn_lookup(ct, rev_key, time_msec(), NULL, NULL); + } + + bool found = false; + if (nat_info->nat_action & NAT_ACTION_DST_PORT) { +- found = nat_get_unique_l4(ct, nat_conn, &nat_conn->rev_key.src.port, ++ found = nat_get_unique_l4(ct, rev_key, &rev_key->src.port, + curr_dport, min_dport, max_dport); + } + + if (!found) { +- found = nat_get_unique_l4(ct, nat_conn, &nat_conn->rev_key.dst.port, ++ found = nat_get_unique_l4(ct, rev_key, &rev_key->dst.port, + curr_sport, min_sport, max_sport); + } + +@@ -2513,9 +2431,9 @@ conn_update(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt, + struct conn_lookup_ctx *ctx, long long now) + { + ovs_mutex_lock(&conn->lock); ++ uint8_t nw_proto = conn->key_node[CT_DIR_FWD].key.nw_proto; + enum ct_update_res update_res = +- l4_protos[conn->key.nw_proto]->conn_update(ct, conn, pkt, ctx->reply, +- now); ++ l4_protos[nw_proto]->conn_update(ct, conn, pkt, ctx->reply, now); + ovs_mutex_unlock(&conn->lock); + return update_res; + } +@@ -2541,12 +2459,9 @@ conn_expiration(const struct conn *conn) + } + + static bool +-conn_expired(struct conn *conn, long long now) ++conn_expired(const struct conn *conn, long long now) + { +- if (conn->conn_type == CT_CONN_TYPE_DEFAULT) { +- return now >= conn_expiration(conn); +- } +- return false; ++ return now >= conn_expiration(conn); + } + + static bool +@@ -2572,9 +2487,7 @@ delete_conn__(struct conn *conn) + static void + delete_conn(struct conn *conn) + { +- ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT); + ovs_mutex_destroy(&conn->lock); +- free(conn->nat_conn); + delete_conn__(conn); + } + +@@ -2667,15 +2580,18 @@ static void + conn_to_ct_dpif_entry(const struct conn *conn, struct ct_dpif_entry *entry, + long long now) + { ++ const struct conn_key *rev_key = &conn->key_node[CT_DIR_REV].key; ++ const struct conn_key *key = &conn->key_node[CT_DIR_FWD].key; ++ + memset(entry, 0, sizeof *entry); +- conn_key_to_tuple(&conn->key, &entry->tuple_orig); +- conn_key_to_tuple(&conn->rev_key, &entry->tuple_reply); ++ conn_key_to_tuple(key, &entry->tuple_orig); ++ conn_key_to_tuple(rev_key, &entry->tuple_reply); + + if (conn->alg_related) { + conn_key_to_tuple(&conn->parent_key, &entry->tuple_parent); + } + +- entry->zone = conn->key.zone; ++ entry->zone = key->zone; + + ovs_mutex_lock(&conn->lock); + entry->mark = conn->mark; +@@ -2683,7 +2599,7 @@ conn_to_ct_dpif_entry(const struct conn *conn, struct ct_dpif_entry *entry, + + long long expiration = conn_expiration(conn) - now; + +- struct ct_l4_proto *class = l4_protos[conn->key.nw_proto]; ++ struct ct_l4_proto *class = l4_protos[key->nw_proto]; + if (class->conn_get_protoinfo) { + class->conn_get_protoinfo(conn, &entry->protoinfo); + } +@@ -2716,30 +2632,29 @@ conntrack_dump_start(struct conntrack *ct, struct conntrack_dump *dump, + + dump->ct = ct; + *ptot_bkts = 1; /* Need to clean up the callers. */ ++ dump->cursor = cmap_cursor_start(&ct->conns); + return 0; + } + + int + conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry) + { +- struct conntrack *ct = dump->ct; + long long now = time_msec(); + +- for (;;) { +- struct cmap_node *cm_node = cmap_next_position(&ct->conns, +- &dump->cm_pos); +- if (!cm_node) { +- break; ++ struct conn_key_node *keyn; ++ struct conn *conn; ++ ++ CMAP_CURSOR_FOR_EACH_CONTINUE (keyn, cm_node, &dump->cursor) { ++ if (keyn->dir != CT_DIR_FWD) { ++ continue; + } +- struct conn *conn; +- INIT_CONTAINER(conn, cm_node, cm_node); + ++ conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]); + if (conn_expired(conn, now)) { + continue; + } + +- if ((!dump->filter_zone || conn->key.zone == dump->zone) && +- (conn->conn_type != CT_CONN_TYPE_UN_NAT)) { ++ if (!dump->filter_zone || keyn->key.zone == dump->zone) { + conn_to_ct_dpif_entry(conn, entry, now); + return 0; + } +@@ -2823,14 +2738,15 @@ conntrack_exp_dump_done(struct conntrack_dump *dump OVS_UNUSED) + int + conntrack_flush(struct conntrack *ct, const uint16_t *zone) + { ++ struct conn_key_node *keyn; + struct conn *conn; + +- CMAP_FOR_EACH (conn, cm_node, &ct->conns) { +- if (conn->conn_type != CT_CONN_TYPE_DEFAULT) { ++ CMAP_FOR_EACH (keyn, cm_node, &ct->conns) { ++ if (keyn->dir != CT_DIR_FWD) { + continue; + } +- +- if (!zone || *zone == conn->key.zone) { ++ conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]); ++ if (!zone || *zone == keyn->key.zone) { + conn_clean(ct, conn); + } + } +@@ -2842,18 +2758,18 @@ int + conntrack_flush_tuple(struct conntrack *ct, const struct ct_dpif_tuple *tuple, + uint16_t zone) + { +- int error = 0; + struct conn_key key; + struct conn *conn; ++ int error = 0; + + memset(&key, 0, sizeof(key)); + tuple_to_conn_key(tuple, zone, &key); + conn_lookup(ct, &key, time_msec(), &conn, NULL); + +- if (conn && conn->conn_type == CT_CONN_TYPE_DEFAULT) { ++ if (conn) { + conn_clean(ct, conn); + } else { +- VLOG_WARN("Must flush tuple using the original pre-NATed tuple"); ++ VLOG_WARN("Tuple not found"); + error = ENOENT; + } + +@@ -2996,50 +2912,54 @@ expectation_create(struct conntrack *ct, ovs_be16 dst_port, + const struct conn *parent_conn, bool reply, bool src_ip_wc, + bool skip_nat) + { ++ const struct conn_key *pconn_key, *pconn_rev_key; + union ct_addr src_addr; + union ct_addr dst_addr; + union ct_addr alg_nat_repl_addr; + struct alg_exp_node *alg_exp_node = xzalloc(sizeof *alg_exp_node); + ++ pconn_key = &parent_conn->key_node[CT_DIR_FWD].key; ++ pconn_rev_key = &parent_conn->key_node[CT_DIR_REV].key; ++ + if (reply) { +- src_addr = parent_conn->key.src.addr; +- dst_addr = parent_conn->key.dst.addr; ++ src_addr = pconn_key->src.addr; ++ dst_addr = pconn_key->dst.addr; + alg_exp_node->nat_rpl_dst = true; + if (skip_nat) { + alg_nat_repl_addr = dst_addr; + } else if (parent_conn->nat_action & NAT_ACTION_DST) { +- alg_nat_repl_addr = parent_conn->rev_key.src.addr; ++ alg_nat_repl_addr = pconn_rev_key->src.addr; + alg_exp_node->nat_rpl_dst = false; + } else { +- alg_nat_repl_addr = parent_conn->rev_key.dst.addr; ++ alg_nat_repl_addr = pconn_rev_key->dst.addr; + } + } else { +- src_addr = parent_conn->rev_key.src.addr; +- dst_addr = parent_conn->rev_key.dst.addr; ++ src_addr = pconn_rev_key->src.addr; ++ dst_addr = pconn_rev_key->dst.addr; + alg_exp_node->nat_rpl_dst = false; + if (skip_nat) { + alg_nat_repl_addr = src_addr; + } else if (parent_conn->nat_action & NAT_ACTION_DST) { +- alg_nat_repl_addr = parent_conn->key.dst.addr; ++ alg_nat_repl_addr = pconn_key->dst.addr; + alg_exp_node->nat_rpl_dst = true; + } else { +- alg_nat_repl_addr = parent_conn->key.src.addr; ++ alg_nat_repl_addr = pconn_key->src.addr; + } + } + if (src_ip_wc) { + memset(&src_addr, 0, sizeof src_addr); + } + +- alg_exp_node->key.dl_type = parent_conn->key.dl_type; +- alg_exp_node->key.nw_proto = parent_conn->key.nw_proto; +- alg_exp_node->key.zone = parent_conn->key.zone; ++ alg_exp_node->key.dl_type = pconn_key->dl_type; ++ alg_exp_node->key.nw_proto = pconn_key->nw_proto; ++ alg_exp_node->key.zone = pconn_key->zone; + alg_exp_node->key.src.addr = src_addr; + alg_exp_node->key.dst.addr = dst_addr; + alg_exp_node->key.src.port = ALG_WC_SRC_PORT; + alg_exp_node->key.dst.port = dst_port; + alg_exp_node->parent_mark = parent_conn->mark; + alg_exp_node->parent_label = parent_conn->label; +- memcpy(&alg_exp_node->parent_key, &parent_conn->key, ++ memcpy(&alg_exp_node->parent_key, pconn_key, + sizeof alg_exp_node->parent_key); + /* Take the write lock here because it is almost 100% + * likely that the lookup will fail and +@@ -3291,12 +3211,16 @@ process_ftp_ctl_v4(struct conntrack *ct, + + switch (mode) { + case CT_FTP_MODE_ACTIVE: +- *v4_addr_rep = conn_for_expectation->rev_key.dst.addr.ipv4; +- conn_ipv4_addr = conn_for_expectation->key.src.addr.ipv4; ++ *v4_addr_rep = ++ conn_for_expectation->key_node[CT_DIR_REV].key.dst.addr.ipv4; ++ conn_ipv4_addr = ++ conn_for_expectation->key_node[CT_DIR_FWD].key.src.addr.ipv4; + break; + case CT_FTP_MODE_PASSIVE: +- *v4_addr_rep = conn_for_expectation->key.dst.addr.ipv4; +- conn_ipv4_addr = conn_for_expectation->rev_key.src.addr.ipv4; ++ *v4_addr_rep = ++ conn_for_expectation->key_node[CT_DIR_FWD].key.dst.addr.ipv4; ++ conn_ipv4_addr = ++ conn_for_expectation->key_node[CT_DIR_REV].key.src.addr.ipv4; + break; + case CT_TFTP_MODE: + default: +@@ -3328,7 +3252,7 @@ skip_ipv6_digits(char *str) + static enum ftp_ctl_pkt + process_ftp_ctl_v6(struct conntrack *ct, + struct dp_packet *pkt, +- const struct conn *conn_for_expectation, ++ const struct conn *conn_for_exp, + union ct_addr *v6_addr_rep, char **ftp_data_start, + size_t *addr_offset_from_ftp_data_start, + size_t *addr_size, enum ct_alg_mode *mode) +@@ -3396,24 +3320,25 @@ process_ftp_ctl_v6(struct conntrack *ct, + + switch (*mode) { + case CT_FTP_MODE_ACTIVE: +- *v6_addr_rep = conn_for_expectation->rev_key.dst.addr; ++ *v6_addr_rep = conn_for_exp->key_node[CT_DIR_REV].key.dst.addr; + /* Although most servers will block this exploit, there may be some + * less well managed. */ + if (memcmp(&ip6_addr, &v6_addr_rep->ipv6, sizeof ip6_addr) && +- memcmp(&ip6_addr, &conn_for_expectation->key.src.addr.ipv6, ++ memcmp(&ip6_addr, ++ &conn_for_exp->key_node[CT_DIR_FWD].key.src.addr.ipv6, + sizeof ip6_addr)) { + return CT_FTP_CTL_INVALID; + } + break; + case CT_FTP_MODE_PASSIVE: +- *v6_addr_rep = conn_for_expectation->key.dst.addr; ++ *v6_addr_rep = conn_for_exp->key_node[CT_DIR_FWD].key.dst.addr; + break; + case CT_TFTP_MODE: + default: + OVS_NOT_REACHED(); + } + +- expectation_create(ct, port, conn_for_expectation, ++ expectation_create(ct, port, conn_for_exp, + !!(pkt->md.ct_state & CS_REPLY_DIR), false, false); + return CT_FTP_CTL_INTEREST; + } +@@ -3571,7 +3496,8 @@ handle_tftp_ctl(struct conntrack *ct, + long long now OVS_UNUSED, enum ftp_ctl_pkt ftp_ctl OVS_UNUSED, + bool nat OVS_UNUSED) + { +- expectation_create(ct, conn_for_expectation->key.src.port, ++ expectation_create(ct, ++ conn_for_expectation->key_node[CT_DIR_FWD].key.src.port, + conn_for_expectation, + !!(pkt->md.ct_state & CS_REPLY_DIR), false, false); + } +diff --git a/lib/conntrack.h b/lib/conntrack.h +index 57d5159b61..ecf539b736 100644 +--- a/lib/conntrack.h ++++ b/lib/conntrack.h +@@ -101,8 +101,8 @@ struct conntrack_dump { + struct conntrack *ct; + unsigned bucket; + union { +- struct cmap_position cm_pos; + struct hmap_position hmap_pos; ++ struct cmap_cursor cursor; + }; + bool filter_zone; + uint16_t zone; diff --git a/lib/db-ctl-base.c b/lib/db-ctl-base.c index 5d2635946d..3a8068b12c 100644 --- a/lib/db-ctl-base.c @@ -1691,7 +2689,7 @@ index bdd12f6a7b..ac72a44bce 100644
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
-index 8f1361e21f..55700250df 100644
+index 8f1361e21f..6e30f2cc3b 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1312,6 +1312,16 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
@@ -1719,6 +2717,166 @@ index 8f1361e21f..55700250df 100644
return true;
}
+@@ -2487,6 +2498,35 @@ netdev_dpdk_prep_hwol_batch(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
+ return cnt;
+ }
+
++static void
++netdev_dpdk_mbuf_dump(const char *prefix, const char *message,
++ const struct rte_mbuf *mbuf)
++{
++ static struct vlog_rate_limit dump_rl = VLOG_RATE_LIMIT_INIT(5, 5);
++ char *response = NULL;
++ FILE *stream;
++ size_t size;
++
++ if (VLOG_DROP_DBG(&dump_rl)) {
++ return;
++ }
++
++ stream = open_memstream(&response, &size);
++ if (!stream) {
++ VLOG_ERR("Unable to open memstream for mbuf dump: %s.",
++ ovs_strerror(errno));
++ return;
++ }
++
++ rte_pktmbuf_dump(stream, mbuf, rte_pktmbuf_pkt_len(mbuf));
++
++ fclose(stream);
++
++ VLOG_DBG(prefix ? "%s: %s:\n%s" : "%s%s:\n%s",
++ prefix ? prefix : "", message, response);
++ free(response);
++}
++
+ /* Tries to transmit 'pkts' to txq 'qid' of device 'dev'. Takes ownership of
+ * 'pkts', even in case of failure.
+ *
+@@ -2503,6 +2543,8 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
+ VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
+ "Only %u/%u are valid: %s", netdev_get_name(&dev->up),
+ nb_tx_prep, cnt, rte_strerror(rte_errno));
++ netdev_dpdk_mbuf_dump(netdev_get_name(&dev->up),
++ "First invalid packet", pkts[nb_tx_prep]);
+ }
+
+ while (nb_tx != nb_tx_prep) {
+diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
+index 1a54add87f..e72b2b27ae 100644
+--- a/lib/netdev-dummy.c
++++ b/lib/netdev-dummy.c
+@@ -39,6 +39,7 @@
+ #include "pcap-file.h"
+ #include "openvswitch/poll-loop.h"
+ #include "openvswitch/shash.h"
++#include "ovs-router.h"
+ #include "sset.h"
+ #include "stream.h"
+ #include "unaligned.h"
+@@ -2045,11 +2046,20 @@ netdev_dummy_ip4addr(struct unixctl_conn *conn, int argc OVS_UNUSED,
+
+ if (netdev && is_dummy_class(netdev->netdev_class)) {
+ struct in_addr ip, mask;
++ struct in6_addr ip6;
++ uint32_t plen;
+ char *error;
+
+- error = ip_parse_masked(argv[2], &ip.s_addr, &mask.s_addr);
++ error = ip_parse_cidr(argv[2], &ip.s_addr, &plen);
+ if (!error) {
++ mask.s_addr = be32_prefix_mask(plen);
+ netdev_dummy_add_in4(netdev, ip, mask);
++
++ /* Insert local route entry for the new address. */
++ in6_addr_set_mapped_ipv4(&ip6, ip.s_addr);
++ ovs_router_force_insert(0, &ip6, plen + 96, true, argv[1],
++ &in6addr_any, &ip6);
++
+ unixctl_command_reply(conn, "OK");
+ } else {
+ unixctl_command_reply_error(conn, error);
+@@ -2079,6 +2089,11 @@ netdev_dummy_ip6addr(struct unixctl_conn *conn, int argc OVS_UNUSED,
+
+ mask = ipv6_create_mask(plen);
+ netdev_dummy_add_in6(netdev, &ip6, &mask);
++
++ /* Insert local route entry for the new address. */
++ ovs_router_force_insert(0, &ip6, plen, true, argv[1],
++ &in6addr_any, &ip6);
++
+ unixctl_command_reply(conn, "OK");
+ } else {
+ unixctl_command_reply_error(conn, error);
+diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
+index cca3408797..1ee585b941 100644
+--- a/lib/netdev-linux.c
++++ b/lib/netdev-linux.c
+@@ -2566,16 +2566,11 @@ exit:
+ }
+
+ static int
+-netdev_linux_get_speed(const struct netdev *netdev_, uint32_t *current,
+- uint32_t *max)
++netdev_linux_get_speed_locked(struct netdev_linux *netdev,
++ uint32_t *current, uint32_t *max)
+ {
+- struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+- int error;
+-
+- ovs_mutex_lock(&netdev->mutex);
+ if (netdev_linux_netnsid_is_remote(netdev)) {
+- error = EOPNOTSUPP;
+- goto exit;
++ return EOPNOTSUPP;
+ }
+
+ netdev_linux_read_features(netdev);
+@@ -2585,9 +2580,18 @@ netdev_linux_get_speed(const struct netdev *netdev_, uint32_t *current,
+ *max = MIN(UINT32_MAX,
+ netdev_features_to_bps(netdev->supported, 0) / 1000000ULL);
+ }
+- error = netdev->get_features_error;
++ return netdev->get_features_error;
++}
+
+-exit:
++static int
++netdev_linux_get_speed(const struct netdev *netdev_, uint32_t *current,
++ uint32_t *max)
++{
++ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
++ int error;
++
++ ovs_mutex_lock(&netdev->mutex);
++ error = netdev_linux_get_speed_locked(netdev, current, max);
+ ovs_mutex_unlock(&netdev->mutex);
+ return error;
+ }
+@@ -4800,8 +4804,10 @@ htb_parse_qdisc_details__(struct netdev *netdev, const struct smap *details,
+ hc->max_rate = smap_get_ullong(details, "max-rate", 0) / 8;
+ if (!hc->max_rate) {
+ uint32_t current_speed;
++ uint32_t max_speed OVS_UNUSED;
+
+- netdev_get_speed(netdev, ¤t_speed, NULL);
++ netdev_linux_get_speed_locked(netdev_linux_cast(netdev),
++ ¤t_speed, &max_speed);
+ hc->max_rate = current_speed ? current_speed / 8 * 1000000ULL
+ : NETDEV_DEFAULT_BPS / 8;
+ }
+@@ -5270,8 +5276,10 @@ hfsc_parse_qdisc_details__(struct netdev *netdev, const struct smap *details,
+ uint32_t max_rate = smap_get_ullong(details, "max-rate", 0) / 8;
+ if (!max_rate) {
+ uint32_t current_speed;
++ uint32_t max_speed OVS_UNUSED;
+
+- netdev_get_speed(netdev, ¤t_speed, NULL);
++ netdev_linux_get_speed_locked(netdev_linux_cast(netdev),
++ ¤t_speed, &max_speed);
+ max_rate = current_speed ? current_speed / 8 * 1000000ULL
+ : NETDEV_DEFAULT_BPS / 8;
+ }
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 14bc877719..992627fa23 100644
--- a/lib/netdev-offload-dpdk.c
@@ -2004,6 +3162,47 @@ index ab9ce6b2e0..f140d25feb 100644
#include "ovs-atomic-clang.h"
#elif HAVE_ATOMIC && __cplusplus >= 201103L
#include "ovs-atomic-c++.h"
+diff --git a/lib/ovs-router.c b/lib/ovs-router.c
+index 7c04bb0e6b..809152d29b 100644
+--- a/lib/ovs-router.c
++++ b/lib/ovs-router.c
+@@ -329,6 +329,20 @@ ovs_router_insert(uint32_t mark, const struct in6_addr *ip_dst, uint8_t plen,
+ }
+ }
+
++/* The same as 'ovs_router_insert', but it adds the route even if updates
++ * from the system routing table are disabled. Used for unit tests. */
++void
++ovs_router_force_insert(uint32_t mark, const struct in6_addr *ip_dst,
++ uint8_t plen, bool local, const char output_bridge[],
++ const struct in6_addr *gw,
++ const struct in6_addr *prefsrc)
++{
++ uint8_t priority = local ? plen + 64 : plen;
++
++ ovs_router_insert__(mark, priority, local, ip_dst, plen,
++ output_bridge, gw, prefsrc);
++}
++
+ static void
+ rt_entry_delete__(const struct cls_rule *cr)
+ {
+diff --git a/lib/ovs-router.h b/lib/ovs-router.h
+index eb4ff85d9e..d7dc7e55f3 100644
+--- a/lib/ovs-router.h
++++ b/lib/ovs-router.h
+@@ -34,6 +34,11 @@ void ovs_router_insert(uint32_t mark, const struct in6_addr *ip_dst,
+ uint8_t plen, bool local,
+ const char output_bridge[], const struct in6_addr *gw,
+ const struct in6_addr *prefsrc);
++void ovs_router_force_insert(uint32_t mark, const struct in6_addr *ip_dst,
++ uint8_t plen, bool local,
++ const char output_bridge[],
++ const struct in6_addr *gw,
++ const struct in6_addr *prefsrc);
+ void ovs_router_flush(void);
+
+ void ovs_router_disable_system_routing_table(void);
diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 634fbb56df..ba720474b6 100644
--- a/lib/ovsdb-idl.c
@@ -2073,6 +3272,30 @@ index 9777efea33..688fe56337 100644
#ifdef __cplusplus
}
#endif
+diff --git a/lib/rstp.c b/lib/rstp.c
+index 2f01966f79..90e8094599 100644
+--- a/lib/rstp.c
++++ b/lib/rstp.c
+@@ -50,7 +50,7 @@
+
+ VLOG_DEFINE_THIS_MODULE(rstp);
+
+-struct ovs_mutex rstp_mutex = OVS_MUTEX_INITIALIZER;
++struct ovs_mutex rstp_mutex;
+
+ static struct ovs_list all_rstps__ = OVS_LIST_INITIALIZER(&all_rstps__);
+ static struct ovs_list *const all_rstps OVS_GUARDED_BY(rstp_mutex) = &all_rstps__;
+@@ -248,6 +248,10 @@ void
+ rstp_init(void)
+ OVS_EXCLUDED(rstp_mutex)
+ {
++ /* We need a recursive mutex because rstp_send_bpdu() could loop back
++ * into the rstp module through a patch port. */
++ ovs_mutex_init_recursive(&rstp_mutex);
++
+ unixctl_command_register("rstp/tcn", "[bridge]", 0, 1, rstp_unixctl_tcn,
+ NULL);
+ unixctl_command_register("rstp/show", "[bridge]", 0, 1, rstp_unixctl_show,
diff --git a/lib/tc.c b/lib/tc.c
index f49048cdab..6b38925c30 100644
--- a/lib/tc.c
@@ -2154,6 +3377,79 @@ index b556762277..e9603432d2 100644
return retval;
}
+diff --git a/ofproto/bond.c b/ofproto/bond.c
+index cfdf44f854..c31869a4c7 100644
+--- a/ofproto/bond.c
++++ b/ofproto/bond.c
+@@ -186,7 +186,7 @@ static struct bond_member *choose_output_member(const struct bond *,
+ struct flow_wildcards *,
+ uint16_t vlan)
+ OVS_REQ_RDLOCK(rwlock);
+-static void update_recirc_rules__(struct bond *);
++static void update_recirc_rules(struct bond *) OVS_REQ_WRLOCK(rwlock);
+ static bool bond_may_recirc(const struct bond *);
+ static void bond_update_post_recirc_rules__(struct bond *, bool force)
+ OVS_REQ_WRLOCK(rwlock);
+@@ -299,7 +299,10 @@ bond_unref(struct bond *bond)
+ }
+ free(bond->hash);
+ bond->hash = NULL;
+- update_recirc_rules__(bond);
++
++ ovs_rwlock_wrlock(&rwlock);
++ update_recirc_rules(bond);
++ ovs_rwlock_unlock(&rwlock);
+
+ hmap_destroy(&bond->pr_rule_ops);
+ free(bond->primary);
+@@ -331,17 +334,8 @@ add_pr_rule(struct bond *bond, const struct match *match,
+ hmap_insert(&bond->pr_rule_ops, &pr_op->hmap_node, hash);
+ }
+
+-/* This function should almost never be called directly.
+- * 'update_recirc_rules()' should be called instead. Since
+- * this function modifies 'bond->pr_rule_ops', it is only
+- * safe when 'rwlock' is held.
+- *
+- * However, when the 'bond' is the only reference in the system,
+- * calling this function avoid acquiring lock only to satisfy
+- * lock annotation. Currently, only 'bond_unref()' calls
+- * this function directly. */
+ static void
+-update_recirc_rules__(struct bond *bond)
++update_recirc_rules(struct bond *bond) OVS_REQ_WRLOCK(rwlock)
+ {
+ struct match match;
+ struct bond_pr_rule_op *pr_op;
+@@ -407,6 +401,15 @@ update_recirc_rules__(struct bond *bond)
+
+ VLOG_ERR("failed to remove post recirculation flow %s", err_s);
+ free(err_s);
++ } else if (bond->hash) {
++ /* If the flow deletion failed, a subsequent call to
++ * ofproto_dpif_add_internal_flow() would just modify the
++ * flow preserving its statistics. Therefore, only reset
++ * the entry's byte counter if it succeeds. */
++ uint32_t hash = pr_op->match.flow.dp_hash & BOND_MASK;
++ struct bond_entry *entry = &bond->hash[hash];
++
++ entry->pr_tx_bytes = 0;
+ }
+
+ hmap_remove(&bond->pr_rule_ops, &pr_op->hmap_node);
+@@ -421,12 +424,6 @@ update_recirc_rules__(struct bond *bond)
+ ofpbuf_uninit(&ofpacts);
+ }
+
+-static void
+-update_recirc_rules(struct bond *bond)
+- OVS_REQ_RDLOCK(rwlock)
+-{
+- update_recirc_rules__(bond);
+-}
+
+ /* Updates 'bond''s overall configuration to 's'.
+ *
diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c
index b092e9e04e..f7f7b12799 100644
--- a/ofproto/connmgr.c
@@ -2197,6 +3493,70 @@ index b092e9e04e..f7f7b12799 100644
VLOG_INFO("%s: added %s controller \"%s\"",
mgr->name, ofconn_type_to_string(ofservice->type), target);
+diff --git a/ofproto/ofproto-dpif-monitor.c b/ofproto/ofproto-dpif-monitor.c
+index bb0e490910..5132f9c952 100644
+--- a/ofproto/ofproto-dpif-monitor.c
++++ b/ofproto/ofproto-dpif-monitor.c
+@@ -275,19 +275,16 @@ monitor_mport_run(struct mport *mport, struct dp_packet *packet)
+ long long int lldp_wake_time = LLONG_MAX;
+
+ if (mport->cfm && cfm_should_send_ccm(mport->cfm)) {
+- dp_packet_clear(packet);
+ cfm_compose_ccm(mport->cfm, packet, mport->hw_addr);
+ ofproto_dpif_send_packet(mport->ofport, false, packet);
+ }
+ if (mport->bfd && bfd_should_send_packet(mport->bfd)) {
+ bool oam;
+
+- dp_packet_clear(packet);
+ bfd_put_packet(mport->bfd, packet, mport->hw_addr, &oam);
+ ofproto_dpif_send_packet(mport->ofport, oam, packet);
+ }
+ if (mport->lldp && lldp_should_send_packet(mport->lldp)) {
+- dp_packet_clear(packet);
+ lldp_put_packet(mport->lldp, packet, mport->hw_addr);
+ ofproto_dpif_send_packet(mport->ofport, false, packet);
+ }
+diff --git a/ofproto/ofproto-dpif-trace.c b/ofproto/ofproto-dpif-trace.c
+index 527e2f17ed..4fbe85018e 100644
+--- a/ofproto/ofproto-dpif-trace.c
++++ b/ofproto/ofproto-dpif-trace.c
+@@ -845,17 +845,35 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
+ bool names)
+ {
+ struct ovs_list recirc_queue = OVS_LIST_INITIALIZER(&recirc_queue);
++ int recirculations = 0;
++
+ ofproto_trace__(ofproto, flow, packet, &recirc_queue,
+ ofpacts, ofpacts_len, output, names);
+
+ struct oftrace_recirc_node *recirc_node;
+ LIST_FOR_EACH_POP (recirc_node, node, &recirc_queue) {
++ if (recirculations++ > 4096) {
++ ds_put_cstr(output, "\n\n");
++ ds_put_char_multiple(output, '=', 79);
++ ds_put_cstr(output, "\nTrace reached the recirculation limit."
++ " Sopping the trace here.");
++ ds_put_format(output,
++ "\nQueued but not processed: %"PRIuSIZE
++ " recirculations.",
++ ovs_list_size(&recirc_queue) + 1);
++ oftrace_recirc_node_destroy(recirc_node);
++ break;
++ }
+ ofproto_trace_recirc_node(recirc_node, next_ct_states, output);
+ ofproto_trace__(ofproto, &recirc_node->flow, recirc_node->packet,
+ &recirc_queue, ofpacts, ofpacts_len, output,
+ names);
+ oftrace_recirc_node_destroy(recirc_node);
+ }
++ /* Destroy remaining recirculation nodes, if any. */
++ LIST_FOR_EACH_POP (recirc_node, node, &recirc_queue) {
++ oftrace_recirc_node_destroy(recirc_node);
++ }
+ }
+
+ void
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 04b583f816..292500f215 100644
--- a/ofproto/ofproto-dpif-upcall.c
@@ -2261,7 +3621,7 @@ index 9224ee2e6d..2e1fcb3a6f 100644
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_WARN_RL(&rl, "xcache LEARN action execution failed.");
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
-index 47ea0f47e7..be4bd66576 100644
+index 47ea0f47e7..078d1bd96b 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -1615,7 +1615,8 @@ xlate_lookup_ofproto_(const struct dpif_backer *backer,
@@ -2302,7 +3662,16 @@ index 47ea0f47e7..be4bd66576 100644
xport = xport_lookup(xcfg, tnl_port_should_receive(flow)
? tnl_port_receive(flow)
: odp_port_to_ofport(backer, flow->in_port.odp_port));
-@@ -5700,8 +5710,16 @@ xlate_learn_action(struct xlate_ctx *ctx, const struct ofpact_learn *learn)
+@@ -3768,6 +3778,8 @@ native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport,
+
+ if (flow->tunnel.ip_src) {
+ in6_addr_set_mapped_ipv4(&s_ip6, flow->tunnel.ip_src);
++ } else if (ipv6_addr_is_set(&flow->tunnel.ipv6_src)) {
++ s_ip6 = flow->tunnel.ipv6_src;
+ }
+
+ err = tnl_route_lookup_flow(ctx, flow, &d_ip6, &s_ip6, &out_dev);
+@@ -5700,8 +5712,16 @@ xlate_learn_action(struct xlate_ctx *ctx, const struct ofpact_learn *learn)
if (!error) {
bool success = true;
if (ctx->xin->allow_side_effects) {
@@ -3609,6 +4978,66 @@ index 30028ba7a0..50b9870641 100755
# flake8: noqa: E402
from scapy.all import RandMAC, RandIP, PcapWriter, RandIP6, RandShort, fuzz
+diff --git a/tests/nsh.at b/tests/nsh.at
+index 55296e5593..0040a50b36 100644
+--- a/tests/nsh.at
++++ b/tests/nsh.at
+@@ -521,51 +521,45 @@ AT_CHECK([
+ set interface vxlangpe32 type=vxlan options:exts=gpe options:remote_ip=30.0.0.2 options:packet_type=ptap ofport_request=3020
+
+ ovs-appctl netdev-dummy/ip4addr br-p1 10.0.0.1/24
+- ovs-appctl ovs/route/add 10.0.0.0/24 br-p1
+ ovs-appctl tnl/arp/set br-p1 10.0.0.1 $HWADDR_BRP1
+ ovs-appctl tnl/arp/set br-p1 10.0.0.2 $HWADDR_BRP2
+ ovs-appctl tnl/arp/set br-p1 10.0.0.3 $HWADDR_BRP3
+
+ ovs-appctl netdev-dummy/ip4addr br-p2 20.0.0.2/24
+- ovs-appctl ovs/route/add 20.0.0.0/24 br-p2
+ ovs-appctl tnl/arp/set br-p2 20.0.0.1 $HWADDR_BRP1
+ ovs-appctl tnl/arp/set br-p2 20.0.0.2 $HWADDR_BRP2
+ ovs-appctl tnl/arp/set br-p2 20.0.0.3 $HWADDR_BRP3
+
+ ovs-appctl netdev-dummy/ip4addr br-p3 30.0.0.3/24
+- ovs-appctl ovs/route/add 30.0.0.0/24 br-p3
+ ovs-appctl tnl/arp/set br-p3 30.0.0.1 $HWADDR_BRP1
+ ovs-appctl tnl/arp/set br-p3 30.0.0.2 $HWADDR_BRP2
+ ovs-appctl tnl/arp/set br-p3 30.0.0.3 $HWADDR_BRP3
+ ], [0], [stdout])
+
+ AT_CHECK([
+- ovs-appctl ovs/route/add 10.0.0.0/24 br-p1
+ ovs-appctl tnl/arp/set br-p1 10.0.0.1 $HWADDR_BRP1
+ ovs-appctl tnl/arp/set br-p1 10.0.0.2 $HWADDR_BRP2
+ ovs-appctl tnl/arp/set br-p1 10.0.0.3 $HWADDR_BRP3
+ ], [0], [stdout])
+
+ AT_CHECK([
+- ovs-appctl ovs/route/add 20.0.0.0/24 br-p2
+ ovs-appctl tnl/arp/set br-p2 20.0.0.1 $HWADDR_BRP1
+ ovs-appctl tnl/arp/set br-p2 20.0.0.2 $HWADDR_BRP2
+ ovs-appctl tnl/arp/set br-p2 20.0.0.3 $HWADDR_BRP3
+ ], [0], [stdout])
+
+ AT_CHECK([
+- ovs-appctl ovs/route/add 30.0.0.0/24 br-p3
+ ovs-appctl tnl/arp/set br-p3 30.0.0.1 $HWADDR_BRP1
+ ovs-appctl tnl/arp/set br-p3 30.0.0.2 $HWADDR_BRP2
+ ovs-appctl tnl/arp/set br-p3 30.0.0.3 $HWADDR_BRP3
+ ], [0], [stdout])
+
+ AT_CHECK([
+- ovs-appctl ovs/route/show | grep User:
++ ovs-appctl ovs/route/show | grep Cached: | sort
+ ], [0], [dnl
+-User: 10.0.0.0/24 dev br-p1 SRC 10.0.0.1
+-User: 20.0.0.0/24 dev br-p2 SRC 20.0.0.2
+-User: 30.0.0.0/24 dev br-p3 SRC 30.0.0.3
++Cached: 10.0.0.0/24 dev br-p1 SRC 10.0.0.1 local
++Cached: 20.0.0.0/24 dev br-p2 SRC 20.0.0.2 local
++Cached: 30.0.0.0/24 dev br-p3 SRC 30.0.0.3 local
+ ])
+
+ AT_CHECK([
diff --git a/tests/ofp-print.at b/tests/ofp-print.at
index 14aa554169..6a07e23c64 100644
--- a/tests/ofp-print.at
@@ -3637,10 +5066,34 @@ index 14aa554169..6a07e23c64 100644
+
AT_CLEANUP
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
-index f242f77f31..a39d0d3ae9 100644
+index f242f77f31..c6a7752665 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
-@@ -5854,6 +5854,40 @@ OVS_WAIT_UNTIL([check_flows], [ovs-ofctl dump-flows br0])
+@@ -547,6 +547,23 @@ ovs-appctl time/warp 1000 100
+ ovs-appctl bond/show > bond3.txt
+ AT_CHECK([sed -n '/member p2/,/^$/p' bond3.txt | grep 'hash'], [0], [ignore])
+
++# Check that both ports doing down and back up doesn't break statistics.
++AT_CHECK([ovs-appctl netdev-dummy/set-admin-state p1 down], 0, [OK
++])
++AT_CHECK([ovs-appctl netdev-dummy/set-admin-state p2 down], 0, [OK
++])
++ovs-appctl time/warp 1000 100
++AT_CHECK([ovs-appctl netdev-dummy/set-admin-state p1 up], 0, [OK
++])
++AT_CHECK([ovs-appctl netdev-dummy/set-admin-state p2 up], 0, [OK
++])
++ovs-appctl time/warp 1000 100
++
++AT_CHECK([SEND_TCP_BOND_PKTS([p5], [5], [65500])])
++# We sent 49125 KB of data total in 3 batches. No hash should have more
++# than that amount of load. Just checking that it is within 5 digits.
++AT_CHECK([ovs-appctl bond/show | grep -E '[[0-9]]{6}'], [1])
++
+ OVS_VSWITCHD_STOP()
+ AT_CLEANUP
+
+@@ -5854,6 +5871,40 @@ OVS_WAIT_UNTIL([check_flows], [ovs-ofctl dump-flows br0])
OVS_VSWITCHD_STOP
AT_CLEANUP
@@ -3681,6 +5134,40 @@ index f242f77f31..a39d0d3ae9 100644
AT_SETUP([ofproto-dpif - debug_slow action])
OVS_VSWITCHD_START
add_of_ports br0 1 2 3
+@@ -7619,12 +7670,14 @@ dummy@ovs-dummy: hit:0 missed:0
+ vm1 5/3: (dummy: ifindex=2011)
+ ])
+
+-dnl set up route to 1.1.2.92 via br0 and action=normal
++dnl Add 1.1.2.92 to br0 and action=normal
+ AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 1.1.2.88/24], [0], [OK
+ ])
+-AT_CHECK([ovs-appctl ovs/route/add 1.1.2.92/24 br0], [0], [OK
+-])
+ AT_CHECK([ovs-ofctl add-flow br0 action=normal])
++dnl Checking that a local route for added IP was successfully installed.
++AT_CHECK([ovs-appctl ovs/route/show | grep Cached], [0], [dnl
++Cached: 1.1.2.0/24 dev br0 SRC 1.1.2.88 local
++])
+
+ dnl Prime ARP Cache for 1.1.2.92
+ AT_CHECK([ovs-appctl netdev-dummy/receive p0 'recirc_id(0),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=1.1.2.92,tip=1.1.2.88,op=2,sha=f8:bc:12:44:34:b6,tha=00:00:00:00:00:00)'])
+@@ -7635,10 +7688,13 @@ ovs-vsctl \
+ --id=@sf create sflow targets=\"127.0.0.1:$SFLOW_PORT\" agent=127.0.0.1 \
+ header=128 sampling=1 polling=0
+
+-dnl set up route to 192.168.1.2 via br0
++dnl Add 192.168.1.2 to br0,
+ AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 192.168.1.1/16], [0], [OK
+ ])
+-AT_CHECK([ovs-appctl ovs/route/add 192.168.0.0/16 br0], [0], [OK
++dnl Checking that a local route for added IP was successfully installed.
++AT_CHECK([ovs-appctl ovs/route/show | grep Cached | sort], [0], [dnl
++Cached: 1.1.2.0/24 dev br0 SRC 1.1.2.88 local
++Cached: 192.168.0.0/16 dev br0 SRC 192.168.1.1 local
+ ])
+
+ dnl add rule for int-br to force packet onto tunnel. There is no ifindex
diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at
index d2e6ac768b..6213e6d91c 100644
--- a/tests/ofproto-macros.at
@@ -4011,10 +5498,22 @@ index 12cd2bc319..3e1df18a11 100644
[[[false]]],
[[[true]]]])
diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at
-index d36c3c117e..6eb758e229 100644
+index d36c3c117e..2050bc1736 100644
--- a/tests/ovsdb-server.at
+++ b/tests/ovsdb-server.at
-@@ -1830,9 +1830,14 @@ replication_schema > schema
+@@ -699,8 +699,10 @@ AT_CHECK_UNQUOTED(
+ [ignore])
+ # The error message for being unable to negotiate a shared ciphersuite
+ # is 'sslv3 alert handshake failure'. This is not the clearest message.
++# In openssl 3.2.0 all the error messages were updated to replace 'sslv3'
++# with 'ssl/tls'.
+ AT_CHECK_UNQUOTED(
+- [grep "sslv3 alert handshake failure" output], [0],
++ [grep -E "(sslv3|ssl/tls) alert handshake failure" output], [0],
+ [stdout],
+ [ignore])
+ OVSDB_SERVER_SHUTDOWN(["
+@@ -1830,9 +1832,14 @@ replication_schema > schema
AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore])
AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore])
@@ -4031,7 +5530,7 @@ index d36c3c117e..6eb758e229 100644
dnl Try to connect without specifying the active server.
AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/connect-active-ovsdb-server], [0],
-@@ -2153,9 +2158,16 @@ AT_CHECK([ovsdb-tool transact db2 \
+@@ -2153,9 +2160,16 @@ AT_CHECK([ovsdb-tool transact db2 \
dnl Start both 'db1' and 'db2'.
on_exit 'kill `cat *.pid`'
@@ -4050,7 +5549,7 @@ index d36c3c117e..6eb758e229 100644
OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/sync-status |grep active])
OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/sync-status |grep active])
-@@ -2382,6 +2394,6 @@ CLEAN_LOG_FILE([2.log], [2.log.clear])
+@@ -2382,6 +2396,6 @@ CLEAN_LOG_FILE([2.log], [2.log.clear])
dnl Checking that databases and logs are equal.
AT_CHECK([diff db.clear ./replay_dir/db.copy.clear])
@@ -4058,6 +5557,82 @@ index d36c3c117e..6eb758e229 100644
+AT_CHECK([diff -u 1.log.clear 2.log.clear])
AT_CLEANUP
+diff --git a/tests/packet-type-aware.at b/tests/packet-type-aware.at
+index 14cebf6efa..d634930fd5 100644
+--- a/tests/packet-type-aware.at
++++ b/tests/packet-type-aware.at
+@@ -142,30 +142,27 @@ AT_CHECK([
+ ### Setup GRE tunnels
+ AT_CHECK([
+ ovs-appctl netdev-dummy/ip4addr br-p1 10.0.0.1/24 &&
+- ovs-appctl ovs/route/add 10.0.0.0/24 br-p1 &&
+ ovs-appctl tnl/arp/set br-p1 10.0.0.1 $HWADDR_BRP1 &&
+ ovs-appctl tnl/arp/set br-p1 10.0.0.2 $HWADDR_BRP2 &&
+ ovs-appctl tnl/arp/set br-p1 10.0.0.3 $HWADDR_BRP3 &&
+
+ ovs-appctl netdev-dummy/ip4addr br-p2 20.0.0.2/24 &&
+- ovs-appctl ovs/route/add 20.0.0.0/24 br-p2 &&
+ ovs-appctl tnl/arp/set br-p2 20.0.0.1 $HWADDR_BRP1 &&
+ ovs-appctl tnl/arp/set br-p2 20.0.0.2 $HWADDR_BRP2 &&
+ ovs-appctl tnl/arp/set br-p2 20.0.0.3 $HWADDR_BRP3 &&
+
+ ovs-appctl netdev-dummy/ip4addr br-p3 30.0.0.3/24 &&
+- ovs-appctl ovs/route/add 30.0.0.0/24 br-p3 &&
+ ovs-appctl tnl/arp/set br-p3 30.0.0.1 $HWADDR_BRP1 &&
+ ovs-appctl tnl/arp/set br-p3 30.0.0.2 $HWADDR_BRP2 &&
+ ovs-appctl tnl/arp/set br-p3 30.0.0.3 $HWADDR_BRP3
+ ], [0], [ignore])
+
+ AT_CHECK([
+- ovs-appctl ovs/route/show | grep User:
++ ovs-appctl ovs/route/show | grep Cached: | sort
+ ], [0], [dnl
+-User: 10.0.0.0/24 dev br-p1 SRC 10.0.0.1
+-User: 20.0.0.0/24 dev br-p2 SRC 20.0.0.2
+-User: 30.0.0.0/24 dev br-p3 SRC 30.0.0.3
++Cached: 10.0.0.0/24 dev br-p1 SRC 10.0.0.1 local
++Cached: 20.0.0.0/24 dev br-p2 SRC 20.0.0.2 local
++Cached: 30.0.0.0/24 dev br-p3 SRC 30.0.0.3 local
+ ])
+
+ AT_CHECK([
+@@ -681,14 +678,13 @@ AT_CHECK([
+
+ AT_CHECK([
+ ovs-appctl netdev-dummy/ip4addr br2 10.0.0.1/24 &&
+- ovs-appctl ovs/route/add 10.0.0.0/24 br2 &&
+ ovs-appctl tnl/arp/set br2 10.0.0.2 de:af:be:ef:ba:be
+ ], [0], [ignore])
+
+ AT_CHECK([
+- ovs-appctl ovs/route/show | grep User:
++ ovs-appctl ovs/route/show | grep Cached:
+ ], [0], [dnl
+-User: 10.0.0.0/24 dev br2 SRC 10.0.0.1
++Cached: 10.0.0.0/24 dev br2 SRC 10.0.0.1 local
+ ])
+
+
+@@ -955,7 +951,6 @@ AT_CHECK([
+
+ AT_CHECK([
+ ovs-appctl netdev-dummy/ip4addr br0 20.0.0.1/24 &&
+- ovs-appctl ovs/route/add 20.0.0.2/24 br0 &&
+ ovs-appctl tnl/neigh/set br0 20.0.0.1 aa:bb:cc:00:00:01 &&
+ ovs-appctl tnl/neigh/set br0 20.0.0.2 aa:bb:cc:00:00:02
+ ], [0], [ignore])
+@@ -963,9 +958,9 @@ AT_CHECK([
+ ovs-appctl time/warp 1000
+
+ AT_CHECK([
+- ovs-appctl ovs/route/show | grep User
++ ovs-appctl ovs/route/show | grep Cached:
+ ],[0], [dnl
+-User: 20.0.0.0/24 dev br0 SRC 20.0.0.1
++Cached: 20.0.0.0/24 dev br0 SRC 20.0.0.1 local
+ ])
+
+ AT_CHECK([
diff --git a/tests/pmd.at b/tests/pmd.at
index 7c333a901b..7bdaca9e71 100644
--- a/tests/pmd.at
@@ -4087,6 +5662,141 @@ index 7c333a901b..7bdaca9e71 100644
AT_CHECK([echo 'table=0,in_port=p1,ip,nw_dst=10.1.0.0/16 actions=p2' | dnl
ovs-ofctl --bundle replace-flows br0 -])
+diff --git a/tests/rstp.at b/tests/rstp.at
+index 600e85dabd..e0d4bed4f0 100644
+--- a/tests/rstp.at
++++ b/tests/rstp.at
+@@ -253,3 +253,60 @@ AT_CHECK([ovs-vsctl del-port br0 p1])
+
+ OVS_VSWITCHD_STOP
+ AT_CLEANUP
++
++AT_SETUP([RSTP - patch ports])
++# Create br0 with interfaces p1 and p7
++# and br1 with interfaces p2 and p8
++# with p1 and p2 being connected patch ports.
++OVS_VSWITCHD_START(
++ [set port br0 other_config:rstp-enable=false -- \
++ set bridge br0 rstp-enable=true
++])
++
++AT_CHECK([add_of_br 1 \
++ set port br1 other_config:rstp-enable=false -- \
++ set bridge br1 rstp-enable=true])
++
++ovs-appctl time/stop
++
++AT_CHECK([ovs-vsctl \
++ add-port br0 p1 -- \
++ set interface p1 type=patch options:peer=p2 ofport_request=1 -- \
++ set port p1 other_config:rstp-enable=true -- \
++ add-port br1 p2 -- \
++ set interface p2 type=patch options:peer=p1 ofport_request=2 -- \
++ set port p2 other_config:rstp-enable=true -- \
++])
++
++AT_CHECK([ovs-vsctl \
++ add-port br0 p7 -- \
++ set interface p7 ofport_request=7 type=dummy -- \
++ set port p7 other_config:rstp-enable=false -- \
++ add-port br1 p8 -- \
++ set interface p8 ofport_request=8 type=dummy -- \
++ set port p8 other_config:rstp-enable=false -- \
++])
++
++AT_CHECK([ovs-ofctl add-flow br0 "in_port=7 icmp actions=1"])
++AT_CHECK([ovs-ofctl add-flow br0 "in_port=1 icmp actions=7"])
++AT_CHECK([ovs-ofctl add-flow br1 "in_port=8 icmp actions=2"])
++AT_CHECK([ovs-ofctl add-flow br1 "in_port=2 icmp actions=8"])
++
++# Give time for RSTP to synchronize.
++ovs-appctl time/warp 5000 500
++
++OVS_WAIT_UNTIL_EQUAL([cat ovs-vswitchd.log | FILTER_STP_TOPOLOGY], [dnl
++port p1: RSTP state changed from Disabled to Discarding
++port p2: RSTP state changed from Disabled to Discarding
++port p2: RSTP state changed from Discarding to Forwarding
++port p1: RSTP state changed from Discarding to Forwarding])
++
++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' | grep Datapath], [0], [dnl
++Datapath actions: 8
++])
++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(8),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' | grep Datapath], [0], [dnl
++Datapath actions: 7
++])
++
++OVS_VSWITCHD_STOP
++AT_CLEANUP
+diff --git a/tests/stp.at b/tests/stp.at
+index a6b6465d12..6239ec379f 100644
+--- a/tests/stp.at
++++ b/tests/stp.at
+@@ -464,6 +464,65 @@ Datapath actions: 2
+
+ AT_CLEANUP
+
++AT_SETUP([STP - patch ports])
++# Create br0 with interfaces p1 and p7
++# and br1 with interfaces p2 and p8
++# with p1 and p2 being connected patch ports.
++OVS_VSWITCHD_START(
++ [set port br0 other_config:stp-enable=false -- \
++ set bridge br0 stp-enable=true
++])
++
++AT_CHECK([add_of_br 1 \
++ set port br1 other_config:stp-enable=false -- \
++ set bridge br1 stp-enable=true])
++
++ovs-appctl time/stop
++
++AT_CHECK([ovs-vsctl \
++ add-port br0 p1 -- \
++ set interface p1 type=patch options:peer=p2 ofport_request=1 -- \
++ set port p1 other_config:stp-enable=true -- \
++ add-port br1 p2 -- \
++ set interface p2 type=patch options:peer=p1 ofport_request=2 -- \
++ set port p2 other_config:stp-enable=true -- \
++])
++
++AT_CHECK([ovs-vsctl \
++ add-port br0 p7 -- \
++ set interface p7 ofport_request=7 type=dummy -- \
++ set port p7 other_config:stp-enable=false -- \
++ add-port br1 p8 -- \
++ set interface p8 ofport_request=8 type=dummy -- \
++ set port p8 other_config:stp-enable=false -- \
++])
++
++AT_CHECK([ovs-ofctl add-flow br0 "in_port=7 icmp actions=1"])
++AT_CHECK([ovs-ofctl add-flow br0 "in_port=1 icmp actions=7"])
++AT_CHECK([ovs-ofctl add-flow br1 "in_port=8 icmp actions=2"])
++AT_CHECK([ovs-ofctl add-flow br1 "in_port=2 icmp actions=8"])
++
++# Give time for STP to synchronize.
++ovs-appctl time/warp 30000 3000
++
++OVS_WAIT_UNTIL_EQUAL([cat ovs-vswitchd.log | FILTER_STP_TOPOLOGY], [dnl
++port <>: STP state changed from disabled to listening
++port <>: STP state changed from disabled to listening
++port <>: STP state changed from listening to learning
++port <>: STP state changed from listening to learning
++port <>: STP state changed from learning to forwarding
++port <>: STP state changed from learning to forwarding])
++
++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' | grep Datapath], [0], [dnl
++Datapath actions: 8
++])
++AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(8),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' | grep Datapath], [0], [dnl
++Datapath actions: 7
++])
++
++OVS_VSWITCHD_STOP
++AT_CLEANUP
++
+ AT_SETUP([STP - flush the fdb and mdb when topology changed])
+ OVS_VSWITCHD_START([])
+
diff --git a/tests/system-afxdp.at b/tests/system-afxdp.at
index 0d09906fb6..88f6605663 100644
--- a/tests/system-afxdp.at
@@ -4178,7 +5888,7 @@ index 07f2b8fd0e..d3d27133b9 100644
])
diff --git a/tests/system-layer3-tunnels.at b/tests/system-layer3-tunnels.at
-index 81123f7309..6fbdedb64f 100644
+index 81123f7309..5dcdd2afae 100644
--- a/tests/system-layer3-tunnels.at
+++ b/tests/system-layer3-tunnels.at
@@ -34,15 +34,15 @@ AT_CHECK([ovs-ofctl add-flow br0 "priority=100 ip,nw_dst=10.1.1.2 action=mod_dl_
@@ -4200,7 +5910,7 @@ index 81123f7309..6fbdedb64f 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -83,15 +83,15 @@ AT_CHECK([ovs-ofctl add-flow br0 "priority=100 ip,nw_dst=10.1.1.2 action=mod_dl_
+@@ -83,76 +83,21 @@ AT_CHECK([ovs-ofctl add-flow br0 "priority=100 ip,nw_dst=10.1.1.2 action=mod_dl_
OVS_WAIT_UNTIL([ip netns exec at_ns0 ping -c 1 10.1.1.2])
dnl First, check the underlay
@@ -4219,7 +5929,68 @@ index 81123f7309..6fbdedb64f 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -191,11 +191,11 @@ AT_CHECK([ovs-vsctl add-port br1 patch1])
+ OVS_TRAFFIC_VSWITCHD_STOP
+ AT_CLEANUP
+
+-AT_SETUP([layer3 - use non-local port as tunnel endpoint])
+-
+-OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1])
+-AT_CHECK([ovs-vsctl add-port br0 vtep0 -- set int vtep0 type=dummy], [0])
+-AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy], [0])
+-AT_CHECK([ovs-vsctl add-port int-br t1 -- set Interface t1 type=gre \
+- options:remote_ip=1.1.2.92 ofport_request=3], [0])
+-
+-AT_CHECK([ovs-appctl dpif/show], [0], [dnl
+-dummy@ovs-dummy: hit:0 missed:0
+- br0:
+- br0 65534/100: (dummy-internal)
+- p0 1/1: (dummy)
+- vtep0 2/2: (dummy)
+- int-br:
+- int-br 65534/3: (dummy-internal)
+- t1 3/4: (gre: remote_ip=1.1.2.92)
+-])
+-
+-AT_CHECK([ovs-appctl netdev-dummy/ip4addr vtep0 1.1.2.88/24], [0], [OK
+-])
+-AT_CHECK([ovs-appctl ovs/route/add 1.1.2.92/24 vtep0], [0], [OK
+-])
+-AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+-AT_CHECK([ovs-ofctl add-flow int-br action=normal])
+-
+-dnl Use arp request and reply to achieve tunnel next hop mac binding
+-dnl By default, vtep0's MAC address is aa:55:aa:55:00:03
+-AT_CHECK([ovs-appctl netdev-dummy/receive vtep0 'recirc_id(0),in_port(2),eth(dst=ff:ff:ff:ff:ff:ff,src=aa:55:aa:55:00:03),eth_type(0x0806),arp(tip=1.1.2.92,sip=1.1.2.88,op=1,sha=aa:55:aa:55:00:03,tha=00:00:00:00:00:00)'])
+-AT_CHECK([ovs-appctl netdev-dummy/receive p0 'recirc_id(0),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:03),eth_type(0x0806),arp(sip=1.1.2.92,tip=1.1.2.88,op=2,sha=f8:bc:12:44:34:b6,tha=aa:55:aa:55:00:03)'])
+-
+-AT_CHECK([ovs-appctl tnl/neigh/show | tail -n+3 | sort], [0], [dnl
+-1.1.2.92 f8:bc:12:44:34:b6 br0
+-])
+-
+-AT_CHECK([ovs-appctl ovs/route/show | tail -n+2 | sort], [0], [dnl
+-User: 1.1.2.0/24 dev vtep0 SRC 1.1.2.88
+-])
+-
+-dnl Check GRE tunnel pop
+-AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:03),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+-
+-AT_CHECK([tail -1 stdout], [0],
+- [Datapath actions: tnl_pop(4)
+-])
+-
+-dnl Check GRE tunnel push
+-AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(3),eth(dst=f9:bc:12:44:34:b6,src=af:55:aa:55:00:03),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.92,proto=1,tos=0,ttl=64,frag=no)'], [0], [stdout])
+-AT_CHECK([tail -1 stdout], [0],
+- [Datapath actions: tnl_push(tnl_port(4),header(size=38,type=3,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:03,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x0,proto=0x6558))),out_port(2)),1
+-])
+-
+-OVS_VSWITCHD_STOP
+-AT_CLEANUP
+-
+ AT_SETUP([layer3 - ping over MPLS Bareudp])
+ OVS_CHECK_BAREUDP()
+ OVS_TRAFFIC_VSWITCHD_START([_ADD_BR([br1])])
+@@ -191,11 +136,11 @@ AT_CHECK([ovs-vsctl add-port br1 patch1])
AT_CHECK([ovs-ofctl -O OpenFlow13 add-flows br0 flows0.txt])
AT_CHECK([ovs-ofctl -O OpenFlow13 add-flows br1 flows1.txt])
@@ -4233,7 +6004,7 @@ index 81123f7309..6fbdedb64f 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
OVS_TRAFFIC_VSWITCHD_STOP
-@@ -239,11 +239,11 @@ AT_CHECK([ovs-vsctl add-port br1 patch1])
+@@ -239,11 +184,11 @@ AT_CHECK([ovs-vsctl add-port br1 patch1])
AT_CHECK([ovs-ofctl -O OpenFlow13 add-flows br0 flows0.txt])
AT_CHECK([ovs-ofctl -O OpenFlow13 add-flows br1 flows1.txt])
@@ -4510,7 +6281,7 @@ index 871a3bda4f..3d84a53182 100644
])
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
-index 808c492a22..e051c942f0 100644
+index 808c492a22..23404a2799 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -10,13 +10,13 @@ ADD_NAMESPACES(at_ns0, at_ns1)
@@ -5231,7 +7002,78 @@ index 808c492a22..e051c942f0 100644
AT_BANNER([MPLS])
AT_SETUP([mpls - encap header dp-support])
-@@ -2516,6 +2677,7 @@ AT_CLEANUP
+@@ -2322,34 +2483,53 @@ AT_BANNER([QoS])
+
+ AT_SETUP([QoS - basic configuration])
+ AT_SKIP_IF([test $HAVE_TC = no])
++AT_SKIP_IF([test $HAVE_ETHTOOL = "no"])
+ OVS_TRAFFIC_VSWITCHD_START()
+
+-ADD_NAMESPACES(at_ns0, at_ns1)
++AT_CHECK([ip tuntap add ovs-tap0 mode tap])
++on_exit 'ip link del ovs-tap0'
++AT_CHECK([ip tuntap add ovs-tap1 mode tap])
++on_exit 'ip link del ovs-tap1'
+
+-ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+-ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
++dnl Set maximum link speed to 5Gb.
++AT_CHECK([ethtool -s ovs-tap0 speed 5000 duplex full])
++AT_CHECK([ip link set dev ovs-tap0 up])
++AT_CHECK([ethtool -s ovs-tap1 speed 5000 duplex full])
++AT_CHECK([ip link set dev ovs-tap1 up])
+
+-dnl Adding a custom qdisc to ovs-p1, ovs-p0 will have the default qdisc.
+-AT_CHECK([tc qdisc add dev ovs-p1 root noqueue])
+-AT_CHECK([tc qdisc show dev ovs-p1 | grep -q noqueue])
++AT_CHECK([ovs-vsctl add-port br0 ovs-tap0 -- set int ovs-tap0 type=tap])
++AT_CHECK([ovs-vsctl add-port br0 ovs-tap1 -- set int ovs-tap1 type=tap])
+
+-dnl Configure the same QoS for both ports.
+-AT_CHECK([ovs-vsctl set port ovs-p0 qos=@qos -- set port ovs-p1 qos=@qos dnl
+- -- --id=@qos create qos dnl
+- type=linux-htb other-config:max-rate=3000000 queues:0=@queue dnl
+- -- --id=@queue create queue dnl
++dnl Adding a custom qdisc to ovs-tap1, ovs-tap0 will have the default qdisc.
++AT_CHECK([tc qdisc add dev ovs-tap1 root noqueue])
++AT_CHECK([tc qdisc show dev ovs-tap1 | grep -q noqueue])
++
++dnl Configure the same QoS for both ports:
++dnl queue0 uses fixed max-rate.
++dnl queue1 relies on underlying link speed.
++AT_CHECK([ovs-vsctl dnl
++ -- --id=@queue0 create queue dnl
+ other_config:min-rate=2000000 other_config:max-rate=3000000 dnl
+- other_config:burst=3000000],
++ other_config:burst=3000000 dnl
++ -- --id=@queue1 create queue dnl
++ other_config:min-rate=4000000 other_config:burst=4000000 dnl
++ -- --id=@qos create qos dnl
++ type=linux-htb queues:0=@queue0 dnl
++ queues:1=@queue1 -- dnl
++ -- set port ovs-tap0 qos=@qos -- set port ovs-tap1 qos=@qos],
+ [ignore], [ignore])
+
+ dnl Wait for qdiscs to be applied.
+-OVS_WAIT_UNTIL([tc qdisc show dev ovs-p0 | grep -q htb])
+-OVS_WAIT_UNTIL([tc qdisc show dev ovs-p1 | grep -q htb])
++OVS_WAIT_UNTIL([tc qdisc show dev ovs-tap0 | grep -q htb])
++OVS_WAIT_UNTIL([tc qdisc show dev ovs-tap1 | grep -q htb])
+
+ dnl Check the configuration.
+-m4_define([HTB_CONF], [rate 2Mbit ceil 3Mbit burst 375000b cburst 375000b])
+-AT_CHECK([tc class show dev ovs-p0 | grep -q 'class htb .* HTB_CONF'])
+-AT_CHECK([tc class show dev ovs-p1 | grep -q 'class htb .* HTB_CONF'])
++m4_define([HTB_CONF0], [rate 2Mbit ceil 3Mbit burst 375000b cburst 375000b])
++m4_define([HTB_CONF1], [rate 4Mbit ceil 5Gbit burst 500000b cburst 500000b])
++AT_CHECK([tc class show dev ovs-tap0 | grep -q 'class htb .* HTB_CONF0'])
++AT_CHECK([tc class show dev ovs-tap0 | grep -q 'class htb .* HTB_CONF1'])
++AT_CHECK([tc class show dev ovs-tap1 | grep -q 'class htb .* HTB_CONF0'])
++AT_CHECK([tc class show dev ovs-tap1 | grep -q 'class htb .* HTB_CONF1'])
+
+ OVS_TRAFFIC_VSWITCHD_STOP
+ AT_CLEANUP
+@@ -2516,6 +2696,7 @@ AT_CLEANUP
AT_SETUP([conntrack - ct flush])
CHECK_CONNTRACK()
@@ -5239,7 +7081,7 @@ index 808c492a22..e051c942f0 100644
OVS_TRAFFIC_VSWITCHD_START()
ADD_NAMESPACES(at_ns0, at_ns1)
-@@ -2526,10 +2688,8 @@ ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+@@ -2526,10 +2707,8 @@ ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
AT_DATA([flows.txt], [dnl
priority=1,action=drop
priority=10,arp,action=normal
@@ -5252,7 +7094,7 @@ index 808c492a22..e051c942f0 100644
])
AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
-@@ -2564,7 +2724,7 @@ AT_CHECK([FLUSH_CMD zone=5 'ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17
+@@ -2564,7 +2743,7 @@ AT_CHECK([FLUSH_CMD zone=5 'ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17
AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0])
dnl Test ICMP traffic
@@ -5261,7 +7103,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -2692,6 +2852,25 @@ udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.
+@@ -2692,6 +2871,25 @@ udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.
AT_CHECK([FLUSH_CMD])
@@ -5287,7 +7129,7 @@ index 808c492a22..e051c942f0 100644
AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
])
-@@ -2745,7 +2924,7 @@ priority=100,in_port=2,icmp,ct_state=+trk+est,action=1
+@@ -2745,7 +2943,7 @@ priority=100,in_port=2,icmp,ct_state=+trk+est,action=1
AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
dnl Pings from ns0->ns1 should work fine.
@@ -5296,7 +7138,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -2786,7 +2965,7 @@ priority=100,in_port=2,icmp,ct_state=+trk+est,action=1
+@@ -2786,7 +2984,7 @@ priority=100,in_port=2,icmp,ct_state=+trk+est,action=1
AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
dnl Pings from ns0->ns1 should work fine.
@@ -5305,7 +7147,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -2886,7 +3065,7 @@ NS_CHECK_EXEC([at_ns1], [ping6 -q -c 3 -i 0.3 -w 2 fc00::1 | FORMAT_PING], [0],
+@@ -2886,7 +3084,7 @@ NS_CHECK_EXEC([at_ns1], [ping6 -q -c 3 -i 0.3 -w 2 fc00::1 | FORMAT_PING], [0],
])
dnl Pings from ns0->ns1 should work fine.
@@ -5314,7 +7156,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -3796,7 +3975,7 @@ table=0,in_port=ovs-p1,ct_state=+trk+rel+rpl,icmp,actions=ovs-p0
+@@ -3796,7 +3994,7 @@ table=0,in_port=ovs-p1,ct_state=+trk+rel+rpl,icmp,actions=ovs-p0
AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
rm p0.pcap
@@ -5323,7 +7165,7 @@ index 808c492a22..e051c942f0 100644
OVS_WAIT_UNTIL([grep "listening" tcpdump0_err])
dnl Send UDP packet from 10.1.1.1:1234 to 10.1.1.240:80
-@@ -3837,12 +4016,12 @@ dnl Modify userspace conntrack fragmentation handling.
+@@ -3837,12 +4035,12 @@ dnl Modify userspace conntrack fragmentation handling.
DPCTL_MODIFY_FRAGMENTATION()
dnl Ipv4 fragmentation connectivity check.
@@ -5338,7 +7180,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -3914,12 +4093,12 @@ dnl Modify userspace conntrack fragmentation handling.
+@@ -3914,12 +4112,12 @@ dnl Modify userspace conntrack fragmentation handling.
DPCTL_MODIFY_FRAGMENTATION()
dnl Ipv4 fragmentation connectivity check.
@@ -5353,7 +7195,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -3960,22 +4139,22 @@ AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
+@@ -3960,22 +4158,22 @@ AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
OVS_WAIT_UNTIL([ip netns exec at_ns0 ping -c 1 10.2.2.2])
dnl Ipv4 fragmentation connectivity check.
@@ -5380,7 +7222,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -4134,12 +4313,12 @@ dnl "connect: Cannot assign requested address"
+@@ -4134,12 +4332,12 @@ dnl "connect: Cannot assign requested address"
OVS_WAIT_UNTIL([ip netns exec at_ns0 ping6 -c 1 fc00::2])
dnl Ipv6 fragmentation connectivity check.
@@ -5395,7 +7237,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -4216,12 +4395,12 @@ dnl "connect: Cannot assign requested address"
+@@ -4216,12 +4414,12 @@ dnl "connect: Cannot assign requested address"
OVS_WAIT_UNTIL([ip netns exec at_ns0 ping6 -c 1 fc00::2])
dnl Ipv4 fragmentation connectivity check.
@@ -5410,7 +7252,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -4259,22 +4438,22 @@ AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
+@@ -4259,22 +4457,22 @@ AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
OVS_WAIT_UNTIL([ip netns exec at_ns0 ping6 -c 1 fc00:1::4])
dnl Ipv6 fragmentation connectivity check.
@@ -5437,7 +7279,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -4486,18 +4665,18 @@ ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], [10.1.1.1/24],
+@@ -4486,18 +4684,18 @@ ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], [10.1.1.1/24],
[id 0 dstport 4789])
dnl First, check the underlay
@@ -5460,7 +7302,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -4546,18 +4725,18 @@ dnl "connect: Cannot assign requested address"
+@@ -4546,18 +4744,18 @@ dnl "connect: Cannot assign requested address"
OVS_WAIT_UNTIL([ip netns exec at_ns0 ping6 -c 1 fc00::2])
dnl First, check the underlay
@@ -5483,7 +7325,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -4670,7 +4849,7 @@ dnl The default udp_single and icmp_first timeouts are 30 seconds in
+@@ -4670,7 +4868,7 @@ dnl The default udp_single and icmp_first timeouts are 30 seconds in
dnl kernel DP, and 60 seconds in userspace DP.
dnl Send ICMP and UDP traffic
@@ -5492,7 +7334,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
-@@ -4696,7 +4875,7 @@ done
+@@ -4696,7 +4894,7 @@ done
AT_CHECK([ovs-vsctl --may-exist add-zone-tp $DP_TYPE zone=5 udp_first=1 udp_single=1 icmp_first=1 icmp_reply=1])
dnl Send ICMP and UDP traffic
@@ -5501,7 +7343,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
-@@ -4714,7 +4893,7 @@ AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
+@@ -4714,7 +4912,7 @@ AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
])
dnl Re-send ICMP and UDP traffic to test conntrack cache
@@ -5510,7 +7352,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
-@@ -4735,7 +4914,7 @@ dnl Set the timeout policy to default again.
+@@ -4735,7 +4933,7 @@ dnl Set the timeout policy to default again.
AT_CHECK([ovs-vsctl del-zone-tp $DP_TYPE zone=5])
dnl Send ICMP and UDP traffic
@@ -5519,7 +7361,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
-@@ -5001,7 +5180,7 @@ table=2,in_port=1,ip,ct_state=+trk+est,ct_zone=2,action=LOCAL
+@@ -5001,7 +5199,7 @@ table=2,in_port=1,ip,ct_state=+trk+est,ct_zone=2,action=LOCAL
AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
@@ -5528,7 +7370,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -5072,7 +5251,7 @@ table=4,priority=100,ip,action=output:NXM_NX_REG0[[]]
+@@ -5072,7 +5270,7 @@ table=4,priority=100,ip,action=output:NXM_NX_REG0[[]]
AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
@@ -5537,7 +7379,7 @@ index 808c492a22..e051c942f0 100644
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
-@@ -6140,7 +6319,7 @@ table=10 priority=0 action=drop
+@@ -6140,7 +6338,7 @@ table=10 priority=0 action=drop
AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
rm p0.pcap
@@ -5546,7 +7388,7 @@ index 808c492a22..e051c942f0 100644
sleep 1
dnl UDP packets from ns0->ns1 should solicit "destination unreachable" response.
-@@ -6164,7 +6343,7 @@ AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sed -e 's/dst=
+@@ -6164,7 +6362,7 @@ AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sed -e 's/dst=
udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=