From f83af5de2b530ce7314812eba5b384df798037b0 Mon Sep 17 00:00:00 2001 From: Open vSwitch CI Date: Jul 17 2021 03:44:16 +0000 Subject: Import openvswitch2.15-2.15.0-27 from Fast DataPath --- diff --git a/SOURCES/openvswitch-2.15.0.patch b/SOURCES/openvswitch-2.15.0.patch index 3b2e7ee..2864451 100644 --- a/SOURCES/openvswitch-2.15.0.patch +++ b/SOURCES/openvswitch-2.15.0.patch @@ -174,12 +174,19 @@ index 103495415a..a98ec672fc 100644 This configuration accomplishes that the high priority traffic has a guaranteed bandwidth egressing the ports at CIR (1000pps), but it can also diff --git a/NEWS b/NEWS -index bc901efdb1..d6a9395be4 100644 +index bc901efdb1..154a299d93 100644 --- a/NEWS +++ b/NEWS -@@ -1,3 +1,14 @@ -+v2.15.1 - xx xxx xxxx +@@ -1,3 +1,21 @@ ++v2.15.2 - xx xxx xxxx +--------------------- ++ - OVS now reports the datapath capability 'ct_zero_snat', which reflects ++ whether the SNAT with all-zero IP address is supported. ++ See ovs-vswitchd.conf.db(5) for details. ++ ++v2.15.1 - 01 Jul 2021 ++--------------------- ++ - Bug fixes + - ovs-ctl: + * New option '--no-record-hostname' to disable hostname configuration + in ovsdb on startup. @@ -211,7 +218,7 @@ index 435685c93d..15a54d636f 100644 AC_CHECK_MEMBERS([struct tcf_t.firstuse], [], [], [#include ]) diff --git a/configure.ac b/configure.ac -index fd82d7d270..9299342960 100644 +index fd82d7d270..bcee218005 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ @@ -219,20 +226,70 @@ index fd82d7d270..9299342960 100644 AC_PREREQ(2.63) -AC_INIT(openvswitch, 2.15.0, bugs@openvswitch.org) -+AC_INIT(openvswitch, 2.15.1, bugs@openvswitch.org) ++AC_INIT(openvswitch, 2.15.2, bugs@openvswitch.org) AC_CONFIG_SRCDIR([datapath/datapath.c]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) +diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj +index d50a126b43..18f884f41b 100644 +--- a/datapath-windows/ovsext/ovsext.vcxproj ++++ b/datapath-windows/ovsext/ovsext.vcxproj +@@ -192,22 +192,39 @@ + + + true ++ $(CRT_IncludePath);$(KM_IncludePath); + + + true ++ $(CRT_IncludePath);$(KM_IncludePath); + + + true + ..\misc\DriverRecommendedRules.ruleset + true ++ $(CRT_IncludePath);$(KM_IncludePath); + + + true + ..\misc\DriverRecommendedRules.ruleset ++ $(CRT_IncludePath);$(KM_IncludePath); + + + true + ..\misc\DriverRecommendedRules.ruleset ++ $(CRT_IncludePath);$(KM_IncludePath); ++ ++ ++ $(CRT_IncludePath);$(KM_IncludePath); ++ ++ ++ $(CRT_IncludePath);$(KM_IncludePath); ++ ++ ++ $(CRT_IncludePath);$(KM_IncludePath); ++ ++ ++ $(CRT_IncludePath);$(KM_IncludePath); + + + diff --git a/debian/changelog b/debian/changelog -index 1f2b7a3668..8b5d075840 100644 +index 1f2b7a3668..ed5b127e59 100644 --- a/debian/changelog +++ b/debian/changelog -@@ -1,3 +1,9 @@ +@@ -1,3 +1,15 @@ ++openvswitch (2.15.2-1) unstable; urgency=low ++ [ Open vSwitch team ] ++ * New upstream version ++ ++ -- Open vSwitch team Thu, 01 Jul 2021 20:19:24 +0200 ++ +openvswitch (2.15.1-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + -+ -- Open vSwitch team Mon, 15 Feb 2021 17:35:33 +0100 ++ -- Open vSwitch team Thu, 01 Jul 2021 20:19:24 +0200 + openvswitch (2.15.0-1) unstable; urgency=low @@ -18350,8 +18407,179 @@ index 64111768b3..668507fd37 100755 def unixctl_xfrm_policies(conn, unused_argv, unused_aux): global xfrm policies = xfrm.get_policies() +diff --git a/lib/conntrack.c b/lib/conntrack.c +index feaaec1c3f..15d1cde79d 100644 +--- a/lib/conntrack.c ++++ b/lib/conntrack.c +@@ -46,6 +46,7 @@ VLOG_DEFINE_THIS_MODULE(conntrack); + COVERAGE_DEFINE(conntrack_full); + COVERAGE_DEFINE(conntrack_long_cleanup); + COVERAGE_DEFINE(conntrack_l4csum_err); ++COVERAGE_DEFINE(conntrack_lookup_natted_miss); + + struct conn_lookup_ctx { + struct conn_key key; +@@ -291,6 +292,11 @@ conntrack_init(void) + static struct ovsthread_once setup_l4_once = OVSTHREAD_ONCE_INITIALIZER; + struct conntrack *ct = xzalloc(sizeof *ct); + ++ /* This value can be used during init (e.g. timeout_policy_init()), ++ * set it first to ensure it is available. ++ */ ++ ct->hash_basis = random_uint32(); ++ + ovs_rwlock_init(&ct->resources_lock); + ovs_rwlock_wrlock(&ct->resources_lock); + hmap_init(&ct->alg_expectations); +@@ -308,7 +314,6 @@ conntrack_init(void) + timeout_policy_init(ct); + ovs_mutex_unlock(&ct->ct_lock); + +- ct->hash_basis = random_uint32(); + atomic_count_init(&ct->n_conn, 0); + atomic_init(&ct->n_conn_limit, DEFAULT_N_CONN_LIMIT); + atomic_init(&ct->tcp_seq_chk, true); +@@ -1281,6 +1286,34 @@ process_one_fast(uint16_t zone, const uint32_t *setmark, + } + } + ++static void ++initial_conn_lookup(struct conntrack *ct, struct conn_lookup_ctx *ctx, ++ long long now, bool natted) ++{ ++ if (natted) { ++ /* If the packet has been already natted (e.g. a previous ++ * action took place), retrieve it performing a lookup of its ++ * reverse key. */ ++ conn_key_reverse(&ctx->key); ++ } ++ ++ conn_key_lookup(ct, &ctx->key, ctx->hash, now, &ctx->conn, &ctx->reply); ++ ++ if (natted) { ++ if (OVS_LIKELY(ctx->conn)) { ++ ctx->reply = !ctx->reply; ++ ctx->key = ctx->reply ? ctx->conn->rev_key : ctx->conn->key; ++ ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); ++ } else { ++ /* A lookup failure does not necessarily imply that an ++ * error occurred, it may simply indicate that a conn got ++ * removed during the recirculation. */ ++ COVERAGE_INC(conntrack_lookup_natted_miss); ++ conn_key_reverse(&ctx->key); ++ } ++ } ++} ++ + static void + process_one(struct conntrack *ct, struct dp_packet *pkt, + struct conn_lookup_ctx *ctx, uint16_t zone, +@@ -1296,7 +1329,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, + } + + bool create_new_conn = false; +- conn_key_lookup(ct, &ctx->key, ctx->hash, now, &ctx->conn, &ctx->reply); ++ initial_conn_lookup(ct, ctx, now, !!(pkt->md.ct_state & ++ (CS_SRC_NAT | CS_DST_NAT))); + struct conn *conn = ctx->conn; + + /* Delete found entry if in wrong direction. 'force' implies commit. */ +@@ -1669,15 +1703,22 @@ static inline bool + checksum_valid(const struct conn_key *key, const void *data, size_t size, + const void *l3) + { ++ bool valid; ++ + if (key->dl_type == htons(ETH_TYPE_IP)) { + uint32_t csum = packet_csum_pseudoheader(l3); +- return csum_finish(csum_continue(csum, data, size)) == 0; ++ valid = (csum_finish(csum_continue(csum, data, size)) == 0); + } else if (key->dl_type == htons(ETH_TYPE_IPV6)) { +- return packet_csum_upperlayer6(l3, data, key->nw_proto, size) == 0; ++ valid = (packet_csum_upperlayer6(l3, data, key->nw_proto, size) == 0); + } else { ++ valid = false; ++ } ++ ++ if (!valid) { + COVERAGE_INC(conntrack_l4csum_err); +- return false; + } ++ ++ return valid; + } + + static inline bool +@@ -2076,6 +2117,8 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, + ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); + return true; + } ++ } else { ++ COVERAGE_INC(conntrack_l4csum_err); + } + } + +diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c +index 6a5ba052dd..cfc2315e3d 100644 +--- a/lib/ct-dpif.c ++++ b/lib/ct-dpif.c +@@ -889,3 +889,11 @@ ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id, + dpif, tp_id, dl_type, nw_proto, tp_name, is_generic) + : EOPNOTSUPP); + } ++ ++int ++ct_dpif_get_features(struct dpif *dpif, enum ct_features *features) ++{ ++ return (dpif->dpif_class->ct_get_features ++ ? dpif->dpif_class->ct_get_features(dpif, features) ++ : EOPNOTSUPP); ++} +diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h +index 88f4c7e28c..b59cba962a 100644 +--- a/lib/ct-dpif.h ++++ b/lib/ct-dpif.h +@@ -271,6 +271,11 @@ struct ct_dpif_timeout_policy { + * timeout attribute values */ + }; + ++/* Conntrack Features. */ ++enum ct_features { ++ CONNTRACK_F_ZERO_SNAT = 1 << 0, /* All-zero SNAT support. */ ++}; ++ + int ct_dpif_dump_start(struct dpif *, struct ct_dpif_dump_state **, + const uint16_t *zone, int *); + int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *); +@@ -325,5 +330,6 @@ int ct_dpif_timeout_policy_dump_done(struct dpif *dpif, void *state); + int ct_dpif_get_timeout_policy_name(struct dpif *dpif, uint32_t tp_id, + uint16_t dl_type, uint8_t nw_proto, + char **tp_name, bool *is_generic); ++int ct_dpif_get_features(struct dpif *dpif, enum ct_features *features); + + #endif /* CT_DPIF_H */ +diff --git a/lib/dp-packet.h b/lib/dp-packet.h +index 9e2d06b3dd..cb3f30e5b6 100644 +--- a/lib/dp-packet.h ++++ b/lib/dp-packet.h +@@ -726,7 +726,6 @@ enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */ + struct dp_packet_batch { + size_t count; + bool trunc; /* true if the batch needs truncate. */ +- bool do_not_steal; /* Indicate that the packets should not be stolen. */ + struct dp_packet *packets[NETDEV_MAX_BURST]; + }; + +@@ -735,7 +734,6 @@ dp_packet_batch_init(struct dp_packet_batch *batch) + { + batch->count = 0; + batch->trunc = false; +- batch->do_not_steal = false; + } + + static inline void diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c -index 4381c618f1..650e67ab30 100644 +index 4381c618f1..f18441072a 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -279,8 +279,9 @@ static bool dpcls_lookup(struct dpcls *cls, @@ -18366,7 +18594,128 @@ index 4381c618f1..650e67ab30 100644 uint64_t packet_count; uint64_t byte_count; }; -@@ -3834,6 +3835,15 @@ dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) +@@ -1338,19 +1339,21 @@ dpif_netdev_subtable_lookup_get(struct unixctl_conn *conn, int argc OVS_UNUSED, + } + + static void +-dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc, ++dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[], void *aux OVS_UNUSED) + { + /* This function requires 2 parameters (argv[1] and argv[2]) to execute. + * argv[1] is subtable name + * argv[2] is priority +- * argv[3] is the datapath name (optional if only 1 datapath exists) + */ + const char *func_name = argv[1]; + + errno = 0; + char *err_char; + uint32_t new_prio = strtoul(argv[2], &err_char, 10); ++ uint32_t lookup_dpcls_changed = 0; ++ uint32_t lookup_subtable_changed = 0; ++ struct shash_node *node; + if (errno != 0 || new_prio > UINT8_MAX) { + unixctl_command_reply_error(conn, + "error converting priority, use integer in range 0-255\n"); +@@ -1364,58 +1367,42 @@ dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc, + return; + } + +- /* argv[3] is optional datapath instance. If no datapath name is provided +- * and only one datapath exists, the one existing datapath is reprobed. +- */ + ovs_mutex_lock(&dp_netdev_mutex); +- struct dp_netdev *dp = NULL; +- +- if (argc == 4) { +- dp = shash_find_data(&dp_netdevs, argv[3]); +- } else if (shash_count(&dp_netdevs) == 1) { +- dp = shash_first(&dp_netdevs)->data; +- } +- +- if (!dp) { +- ovs_mutex_unlock(&dp_netdev_mutex); +- unixctl_command_reply_error(conn, +- "please specify an existing datapath"); +- return; +- } +- +- /* Get PMD threads list, required to get DPCLS instances. */ +- size_t n; +- uint32_t lookup_dpcls_changed = 0; +- uint32_t lookup_subtable_changed = 0; +- struct dp_netdev_pmd_thread **pmd_list; +- sorted_poll_thread_list(dp, &pmd_list, &n); ++ SHASH_FOR_EACH (node, &dp_netdevs) { ++ struct dp_netdev *dp = node->data; + +- /* take port mutex as HMAP iters over them. */ +- ovs_mutex_lock(&dp->port_mutex); ++ /* Get PMD threads list, required to get DPCLS instances. */ ++ size_t n; ++ struct dp_netdev_pmd_thread **pmd_list; ++ sorted_poll_thread_list(dp, &pmd_list, &n); + +- for (size_t i = 0; i < n; i++) { +- struct dp_netdev_pmd_thread *pmd = pmd_list[i]; +- if (pmd->core_id == NON_PMD_CORE_ID) { +- continue; +- } ++ /* take port mutex as HMAP iters over them. */ ++ ovs_mutex_lock(&dp->port_mutex); + +- struct dp_netdev_port *port = NULL; +- HMAP_FOR_EACH (port, node, &dp->ports) { +- odp_port_t in_port = port->port_no; +- struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port); +- if (!cls) { ++ for (size_t i = 0; i < n; i++) { ++ struct dp_netdev_pmd_thread *pmd = pmd_list[i]; ++ if (pmd->core_id == NON_PMD_CORE_ID) { + continue; + } +- uint32_t subtbl_changes = dpcls_subtable_lookup_reprobe(cls); +- if (subtbl_changes) { +- lookup_dpcls_changed++; +- lookup_subtable_changed += subtbl_changes; ++ ++ struct dp_netdev_port *port = NULL; ++ HMAP_FOR_EACH (port, node, &dp->ports) { ++ odp_port_t in_port = port->port_no; ++ struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port); ++ if (!cls) { ++ continue; ++ } ++ uint32_t subtbl_changes = dpcls_subtable_lookup_reprobe(cls); ++ if (subtbl_changes) { ++ lookup_dpcls_changed++; ++ lookup_subtable_changed += subtbl_changes; ++ } + } + } +- } + +- /* release port mutex before netdev mutex. */ +- ovs_mutex_unlock(&dp->port_mutex); ++ /* release port mutex before netdev mutex. */ ++ ovs_mutex_unlock(&dp->port_mutex); ++ } + ovs_mutex_unlock(&dp_netdev_mutex); + + struct ds reply = DS_EMPTY_INITIALIZER; +@@ -1644,8 +1631,8 @@ dpif_netdev_init(void) + 0, 1, dpif_netdev_bond_show, + NULL); + unixctl_command_register("dpif-netdev/subtable-lookup-prio-set", +- "[lookup_func] [prio] [dp]", +- 2, 3, dpif_netdev_subtable_lookup_set, ++ "[lookup_func] [prio]", ++ 2, 2, dpif_netdev_subtable_lookup_set, + NULL); + unixctl_command_register("dpif-netdev/subtable-lookup-prio-get", "", + 0, 0, dpif_netdev_subtable_lookup_get, +@@ -3834,6 +3821,15 @@ dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) return error; } @@ -18382,7 +18731,15 @@ index 4381c618f1..650e67ab30 100644 if (put->ufid) { ufid = *put->ufid; } else { -@@ -4878,6 +4888,12 @@ struct rr_numa { +@@ -4159,7 +4155,6 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) + } + + dp_packet_batch_init_packet(&pp, execute->packet); +- pp.do_not_steal = true; + dp_netdev_execute_actions(pmd, &pp, false, execute->flow, + execute->actions, execute->actions_len); + dp_netdev_pmd_flush_output_packets(pmd, true); +@@ -4878,6 +4873,12 @@ struct rr_numa { bool idx_inc; }; @@ -18395,7 +18752,7 @@ index 4381c618f1..650e67ab30 100644 static struct rr_numa * rr_numa_list_lookup(struct rr_numa_list *rr, int numa_id) { -@@ -5590,10 +5606,17 @@ get_dry_run_variance(struct dp_netdev *dp, uint32_t *core_list, +@@ -5590,10 +5591,17 @@ get_dry_run_variance(struct dp_netdev *dp, uint32_t *core_list, for (int i = 0; i < n_rxqs; i++) { int numa_id = netdev_get_numa_id(rxqs[i]->port->netdev); numa = rr_numa_list_lookup(&rr, numa_id); @@ -18416,7 +18773,7 @@ index 4381c618f1..650e67ab30 100644 goto cleanup; } -@@ -6203,12 +6226,14 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, +@@ -6203,12 +6211,14 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, /* Update all bands and find the one hit with the highest rate for each * packet (if any). */ for (int m = 0; m < meter->n_bands; ++m) { @@ -18435,7 +18792,7 @@ index 4381c618f1..650e67ab30 100644 } /* Drain the bucket for all the packets, if possible. */ -@@ -6226,8 +6251,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, +@@ -6226,8 +6236,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, * (Only one band will be fired by a packet, and that * can be different for each packet.) */ for (int i = band_exceeded_pkt; i < cnt; i++) { @@ -18446,7 +18803,7 @@ index 4381c618f1..650e67ab30 100644 exceeded_band[i] = m; } } -@@ -6246,8 +6271,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, +@@ -6246,8 +6256,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, /* Update the exceeding band for the exceeding packet. * (Only one band will be fired by a packet, and that * can be different for each packet.) */ @@ -18457,7 +18814,7 @@ index 4381c618f1..650e67ab30 100644 exceeded_band[i] = m; } } -@@ -6329,16 +6354,14 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, +@@ -6329,16 +6339,14 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, config->bands[i].burst_size = config->bands[i].rate; } @@ -18479,8 +18836,16 @@ index 4381c618f1..650e67ab30 100644 if (band_max_delta_t > meter->max_delta_t) { meter->max_delta_t = band_max_delta_t; } +@@ -8493,6 +8501,7 @@ const struct dpif_class dpif_netdev_class = { + NULL, /* ct_timeout_policy_dump_next */ + NULL, /* ct_timeout_policy_dump_done */ + dpif_netdev_ct_get_timeout_policy_name, ++ NULL, /* ct_get_features */ + dpif_netdev_ipf_set_enabled, + dpif_netdev_ipf_set_min_frag, + dpif_netdev_ipf_set_max_nfrags, diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c -index ceb56c6851..73d5608a81 100644 +index ceb56c6851..e6cb3ca7fa 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -2061,6 +2061,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) @@ -18558,6 +18923,68 @@ index ceb56c6851..73d5608a81 100644 nl_msg_put(request, &req_zone_limit, sizeof req_zone_limit); } nl_msg_end_nested(request, opt_offset); +@@ -3161,6 +3165,20 @@ dpif_netlink_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED, + return 0; + } + ++static int ++dpif_netlink_ct_get_features(struct dpif *dpif OVS_UNUSED, ++ enum ct_features *features) ++{ ++ if (features != NULL) { ++#ifndef _WIN32 ++ *features = CONNTRACK_F_ZERO_SNAT; ++#else ++ *features = 0; ++#endif ++ } ++ return 0; ++} ++ + #define CT_DPIF_NL_TP_TCP_MAPPINGS \ + CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_SENT, SYN_SENT) \ + CT_DPIF_NL_TP_MAPPING(TCP, TCP, SYN_RECV, SYN_RECV) \ +@@ -4003,6 +4021,7 @@ const struct dpif_class dpif_netlink_class = { + dpif_netlink_ct_timeout_policy_dump_next, + dpif_netlink_ct_timeout_policy_dump_done, + dpif_netlink_ct_get_timeout_policy_name, ++ dpif_netlink_ct_get_features, + NULL, /* ipf_set_enabled */ + NULL, /* ipf_set_min_frag */ + NULL, /* ipf_set_max_nfrags */ +@@ -4662,7 +4681,7 @@ report_loss(struct dpif_netlink *dpif, struct dpif_channel *ch, uint32_t ch_idx, + time_msec() - ch->last_poll); + } + +- VLOG_WARN("%s: lost packet on port channel %u of handler %u", +- dpif_name(&dpif->dpif), ch_idx, handler_id); ++ VLOG_WARN("%s: lost packet on port channel %u of handler %u%s", ++ dpif_name(&dpif->dpif), ch_idx, handler_id, ds_cstr(&s)); + ds_destroy(&s); + } +diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h +index b817fceac6..59e0a3a9dd 100644 +--- a/lib/dpif-provider.h ++++ b/lib/dpif-provider.h +@@ -81,6 +81,7 @@ struct ct_dpif_dump_state; + struct ct_dpif_entry; + struct ct_dpif_tuple; + struct ct_dpif_timeout_policy; ++enum ct_features; + + /* 'dpif_ipf_proto_status' and 'dpif_ipf_status' are presently in + * sync with 'ipf_proto_status' and 'ipf_status', but more +@@ -562,6 +563,10 @@ struct dpif_class { + uint16_t dl_type, uint8_t nw_proto, + char **tp_name, bool *is_generic); + ++ /* Stores the conntrack features supported by 'dpif' into features. ++ * The value is a bitmap of CONNTRACK_F_* bits. */ ++ int (*ct_get_features)(struct dpif *, enum ct_features *features); ++ + /* IP Fragmentation. */ + + /* Disables or enables conntrack fragment reassembly. The default diff --git a/lib/dpif.c b/lib/dpif.c index 56d0b4a654..26e8bfb7db 100644 --- a/lib/dpif.c @@ -18583,8 +19010,82 @@ index ecda896c78..f9728e6739 100644 const struct flow *flow; /* Flow extracted from 'packet'. */ /* Input, but possibly modified as a side effect of execution. */ +diff --git a/lib/ipf.c b/lib/ipf.c +index c20bcc0b33..9c83f1913a 100644 +--- a/lib/ipf.c ++++ b/lib/ipf.c +@@ -93,7 +93,6 @@ struct ipf_frag { + struct dp_packet *pkt; + uint16_t start_data_byte; + uint16_t end_data_byte; +- bool dnsteal; /* 'do not steal': if true, ipf should not free packet. */ + }; + + /* The key for a collection of fragments potentially making up an unfragmented +@@ -795,8 +794,7 @@ ipf_is_frag_duped(const struct ipf_frag *frag_list, int last_inuse_idx, + static bool + ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list, + struct dp_packet *pkt, uint16_t start_data_byte, +- uint16_t end_data_byte, bool ff, bool lf, bool v6, +- bool dnsteal) ++ uint16_t end_data_byte, bool ff, bool lf, bool v6) + OVS_REQUIRES(ipf->ipf_lock) + { + bool duped_frag = ipf_is_frag_duped(ipf_list->frag_list, +@@ -811,10 +809,9 @@ ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list, + * recommend not setting the mempool number of buffers too low + * and also clamp the number of fragments. */ + struct ipf_frag *frag = &ipf_list->frag_list[last_inuse_idx + 1]; +- frag->pkt = pkt; ++ frag->pkt = dp_packet_clone(pkt); + frag->start_data_byte = start_data_byte; + frag->end_data_byte = end_data_byte; +- frag->dnsteal = dnsteal; + ipf_list->last_inuse_idx++; + atomic_count_inc(&ipf->nfrag); + ipf_count(ipf, v6, IPF_NFRAGS_ACCEPTED); +@@ -851,8 +848,7 @@ ipf_list_init(struct ipf_list *ipf_list, struct ipf_list_key *key, + * to a list of fragemnts. */ + static bool + ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type, +- uint16_t zone, long long now, uint32_t hash_basis, +- bool dnsteal) ++ uint16_t zone, long long now, uint32_t hash_basis) + OVS_REQUIRES(ipf->ipf_lock) + { + struct ipf_list_key key; +@@ -921,7 +917,7 @@ ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type, + } + + return ipf_process_frag(ipf, ipf_list, pkt, start_data_byte, +- end_data_byte, ff, lf, v6, dnsteal); ++ end_data_byte, ff, lf, v6); + } + + /* Filters out fragments from a batch of fragments and adjust the batch. */ +@@ -942,8 +938,7 @@ ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb, + ipf_is_valid_v6_frag(ipf, pkt)))) { + + ovs_mutex_lock(&ipf->ipf_lock); +- if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis, +- pb->do_not_steal)) { ++ if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis)) { + dp_packet_batch_refill(pb, pkt, pb_idx); + } + ovs_mutex_unlock(&ipf->ipf_lock); +@@ -1338,9 +1333,7 @@ ipf_destroy(struct ipf *ipf) + while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { + struct dp_packet *pkt + = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; +- if (!ipf_list->frag_list[ipf_list->last_sent_idx + 1].dnsteal) { +- dp_packet_delete(pkt); +- } ++ dp_packet_delete(pkt); + atomic_count_dec(&ipf->nfrag); + ipf_list->last_sent_idx++; + } diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c -index 6be23dbeed..ee97354db6 100644 +index 6be23dbeed..0ab511b60d 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -672,7 +672,9 @@ netdev_linux_update_lag(struct rtnetlink_change *change) @@ -18634,7 +19135,39 @@ index 6be23dbeed..ee97354db6 100644 } do { -@@ -2572,7 +2574,7 @@ exit: +@@ -1288,14 +1290,28 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu, + for (i = 0; i < retval; i++) { + struct dp_packet *pkt; + +- if (mmsgs[i].msg_len < ETH_HEADER_LEN) { ++ if (mmsgs[i].msg_hdr.msg_flags & MSG_TRUNC ++ || mmsgs[i].msg_len < ETH_HEADER_LEN) { + struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + ++ /* The rx->aux_bufs[i] will be re-used next time. */ + dp_packet_delete(buffers[i]); + netdev->rx_dropped += 1; +- VLOG_WARN_RL(&rl, "%s: Dropped packet: less than ether hdr size", +- netdev_get_name(netdev_)); ++ if (mmsgs[i].msg_hdr.msg_flags & MSG_TRUNC) { ++ /* Data is truncated, so the packet is corrupted, and needs ++ * to be dropped. This can happen if TSO/GRO is enabled in ++ * the kernel, but not in userspace, i.e. there is no dp ++ * buffer to store the full packet. */ ++ VLOG_WARN_RL(&rl, ++ "%s: Dropped packet: Too big. GRO/TSO enabled?", ++ netdev_get_name(netdev_)); ++ } else { ++ VLOG_WARN_RL(&rl, ++ "%s: Dropped packet: less than ether hdr size", ++ netdev_get_name(netdev_)); ++ } ++ + continue; + } + +@@ -2572,7 +2588,7 @@ exit: static struct tc_police tc_matchall_fill_police(uint32_t kbits_rate, uint32_t kbits_burst) { @@ -18644,7 +19177,7 @@ index 6be23dbeed..ee97354db6 100644 struct tc_police police; struct tc_ratespec rate; diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c -index 72b7915052..7656427845 100644 +index 72b7915052..32f52b46f2 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -48,6 +48,7 @@ static struct hmap ufid_to_tc = HMAP_INITIALIZER(&ufid_to_tc); @@ -18683,7 +19216,17 @@ index 72b7915052..7656427845 100644 match_set_ct_state_masked(match, ct_statev, ct_statem); } -@@ -1406,6 +1428,90 @@ flower_match_to_tun_opt(struct tc_flower *flower, const struct flow_tnl *tnl, +@@ -820,8 +842,7 @@ parse_tc_flower_to_match(struct tc_flower *flower, + action->encap.tp_dst); + } + if (!action->encap.no_csum) { +- nl_msg_put_u8(buf, OVS_TUNNEL_KEY_ATTR_CSUM, +- !action->encap.no_csum); ++ nl_msg_put_flag(buf, OVS_TUNNEL_KEY_ATTR_CSUM); + } + + parse_tc_flower_geneve_opts(action, buf); +@@ -1406,6 +1427,90 @@ flower_match_to_tun_opt(struct tc_flower *flower, const struct flow_tnl *tnl, flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len; } @@ -18774,7 +19317,7 @@ index 72b7915052..7656427845 100644 static int netdev_tc_flow_put(struct netdev *netdev, struct match *match, struct nlattr *actions, size_t actions_len, -@@ -1650,54 +1756,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, +@@ -1650,54 +1755,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, } } @@ -18830,7 +19373,7 @@ index 72b7915052..7656427845 100644 /* ignore exact match on skb_mark of 0. */ if (mask->pkt_mark == UINT32_MAX && !key->pkt_mark) { -@@ -1779,6 +1838,10 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, +@@ -1779,6 +1837,10 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, const struct nlattr *ct = nl_attr_get(nla); const size_t ct_len = nl_attr_get_size(nla); @@ -18841,7 +19384,7 @@ index 72b7915052..7656427845 100644 err = parse_put_flow_ct_action(&flower, action, ct, ct_len); if (err) { return err; -@@ -1971,6 +2034,96 @@ out: +@@ -1971,6 +2033,96 @@ out: tc_add_del_qdisc(ifindex, false, block_id, TC_INGRESS); } @@ -18938,7 +19481,7 @@ index 72b7915052..7656427845 100644 static void probe_tc_block_support(int ifindex) { -@@ -2038,6 +2191,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) +@@ -2038,6 +2190,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) block_id = get_block_id_from_netdev(netdev); probe_multi_mask_per_prio(ifindex); @@ -19086,6 +19629,36 @@ index bf0f8af544..737f48047b 100644 ofputil_group_to_string(group_id, name, sizeof name); ds_put_cstr(s, name); +diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml +index a2778de4bc..3894cb3c33 100644 +--- a/lib/ovs-actions.xml ++++ b/lib/ovs-actions.xml +@@ -1666,7 +1666,7 @@ for i in [1,n_members]: + + +

The ct action

+- ct(argument]...) ++ ct([argument]...) + ct(commit[, argument]...) + +

+@@ -1833,6 +1833,16 @@ for i in [1,n_members]: + connection, will behave the same as a bare nat. +

+ ++

++ For SNAT, there is a special case when the src IP ++ address is configured as all 0's, i.e., ++ nat(src=0.0.0.0). In this case, when a source port ++ collision is detected during the commit, the source port will be ++ translated to an ephemeral port. If there is no collision, no SNAT ++ is performed. Note that this is currently only implemented in the ++ Linux kernel datapath. ++

++ +

+ Open vSwitch 2.6 introduced nat. Linux 4.6 was the + earliest upstream kernel that implemented ct support for diff --git a/lib/ovsdb-cs.c b/lib/ovsdb-cs.c index ff8adaefb5..911b71dd4f 100644 --- a/lib/ovsdb-cs.c @@ -19453,6 +20026,215 @@ index 05bb48d66c..d93483245e 100644 const struct ovsdb_idl_row *ovsdb_idl_track_get_first( const struct ovsdb_idl *, const struct ovsdb_idl_table_class *); const struct ovsdb_idl_row *ovsdb_idl_track_get_next(const struct ovsdb_idl_row *); +diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c +index c0b0ae0448..af0bcbde8d 100644 +--- a/lib/tun-metadata.c ++++ b/lib/tun-metadata.c +@@ -828,7 +828,7 @@ tun_metadata_to_geneve_nlattr(const struct flow_tnl *tun, + } else { + tun_metadata_to_geneve_nlattr_mask(key, tun, flow, b); + } +- } else if (flow->metadata.present.len || is_mask) { ++ } else { + nl_msg_put_unspec(b, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, + tun->metadata.opts.gnv, + flow->metadata.present.len); +diff --git a/ofproto/bond.c b/ofproto/bond.c +index 35b9caac01..a4116588f4 100644 +--- a/ofproto/bond.c ++++ b/ofproto/bond.c +@@ -1173,49 +1173,72 @@ bond_shift_load(struct bond_entry *hash, struct bond_member *to) + bond->bond_revalidate = true; + } + +-/* Picks and returns a bond_entry to migrate from 'from' (the most heavily ++/* Picks and returns 'bond_entry's to migrate from 'from' (the most heavily + * loaded bond member) to a bond member that has 'to_tx_bytes' bytes of load, + * given that doing so must decrease the ratio of the load on the two members +- * by at least 0.1. Returns NULL if there is no appropriate entry. ++ * by at least 0.1. Returns number of entries filled in 'to_migrate'. + * +- * The list of entries isn't sorted. I don't know of a reason to prefer to +- * shift away small hashes or large hashes. */ +-static struct bond_entry * +-choose_entry_to_migrate(const struct bond_member *from, uint64_t to_tx_bytes) ++ * The list of entries is sorted in descending order of load. This allows us ++ * to collect subset of entries with accumulated load close to ideal. */ ++static size_t ++choose_entries_to_migrate(const struct bond_member *from, uint64_t to_tx_bytes, ++ struct bond_entry **to_migrate) + OVS_REQ_WRLOCK(rwlock) + { + struct bond_entry *e; ++ /* Note, the ideal traffic is the mid point between 'from' and 'to'. ++ * This value does not change by rebalancing. */ ++ uint64_t ideal_tx_bytes = (from->tx_bytes + to_tx_bytes) / 2; ++ uint64_t ideal_delta = ideal_tx_bytes - to_tx_bytes; ++ uint64_t delta = 0; /* The amount to rebalance. */ ++ uint64_t new_low; /* The lower bandwidth between 'to' and 'from' ++ * after rebalancing. */ ++ uint64_t migration_threshold = ideal_delta / 10; /* 10% */ ++ size_t cnt = 0; + + if (ovs_list_is_short(&from->entries)) { + /* 'from' carries no more than one MAC hash, so shifting load away from + * it would be pointless. */ +- return NULL; ++ return 0; + } + + LIST_FOR_EACH (e, list_node, &from->entries) { +- uint64_t delta = e->tx_bytes; /* The amount to rebalance. */ +- uint64_t ideal_tx_bytes = (from->tx_bytes + to_tx_bytes)/2; +- /* Note, the ideal traffic is the mid point +- * between 'from' and 'to'. This value does +- * not change by rebalancing. */ +- uint64_t new_low; /* The lower bandwidth between 'to' and 'from' +- after rebalancing. */ +- +- new_low = MIN(from->tx_bytes - delta, to_tx_bytes + delta); +- +- if ((new_low > to_tx_bytes) && +- (new_low - to_tx_bytes >= (ideal_tx_bytes - to_tx_bytes) / 10)) { +- /* Only rebalance if the new 'low' is closer to to the mid point, +- * and the improvement exceeds 10% of current traffic +- * deviation from the ideal split. +- * +- * The improvement on the 'high' side is always the same as the +- * 'low' side. Thus consider 'low' side is sufficient. */ +- return e; ++ if (delta + e->tx_bytes <= ideal_delta) { ++ /* Take next entry if amount to rebalance will not exceed ideal. */ ++ to_migrate[cnt++] = e; ++ delta += e->tx_bytes; ++ } ++ if (ideal_delta - delta < migration_threshold) { ++ /* Stop collecting hashes if we're close enough to the ideal value ++ * to avoid frequent moving of light ones. */ ++ break; + } + } + +- return NULL; ++ if (!cnt) { ++ /* There is no entry with load less than or equal to 'ideal_delta'. ++ * Lets try closest one. The closest is the last in sorted list. */ ++ struct bond_entry *closest; ++ ++ ASSIGN_CONTAINER(closest, ovs_list_back(&from->entries), list_node); ++ ++ delta = closest->tx_bytes; ++ to_migrate[cnt++] = closest; ++ } ++ ++ new_low = MIN(from->tx_bytes - delta, to_tx_bytes + delta); ++ if ((new_low > to_tx_bytes) && ++ (new_low - to_tx_bytes >= migration_threshold)) { ++ /* Only rebalance if the new 'low' is closer to to the mid point and the ++ * improvement of traffic deviation from the ideal split exceeds 10% ++ * (migration threshold). ++ * ++ * The improvement on the 'high' side is always the same as the 'low' ++ * side. Thus consider 'low' side is sufficient. */ ++ return cnt; ++ } ++ ++ return 0; + } + + /* Inserts 'member' into 'bals' so that descending order of 'tx_bytes' is +@@ -1242,6 +1265,22 @@ reinsert_bal(struct ovs_list *bals, struct bond_member *member) + insert_bal(bals, member); + } + ++static int ++compare_bond_entries(const void *a_, const void *b_) ++ OVS_REQ_RDLOCK(rwlock) ++{ ++ const struct bond_entry *const *ap = a_; ++ const struct bond_entry *const *bp = b_; ++ const struct bond_entry *a = *ap; ++ const struct bond_entry *b = *bp; ++ ++ if (a->tx_bytes != b->tx_bytes) { ++ return a->tx_bytes > b->tx_bytes ? -1 : 1; ++ } else { ++ return 0; ++ } ++} ++ + /* If 'bond' needs rebalancing, does so. + * + * The caller should have called bond_account() for each active flow, or in case +@@ -1251,8 +1290,8 @@ reinsert_bal(struct ovs_list *bals, struct bond_member *member) + void + bond_rebalance(struct bond *bond) + { ++ struct bond_entry *e, *hashes[BOND_BUCKETS]; + struct bond_member *member; +- struct bond_entry *e; + struct ovs_list bals; + bool rebalanced = false; + bool use_recirc; +@@ -1276,7 +1315,15 @@ bond_rebalance(struct bond *bond) + member->tx_bytes = 0; + ovs_list_init(&member->entries); + } +- for (e = &bond->hash[0]; e <= &bond->hash[BOND_MASK]; e++) { ++ ++ for (int i = 0; i < BOND_BUCKETS; i++) { ++ hashes[i] = &bond->hash[i]; ++ } ++ qsort(hashes, BOND_BUCKETS, sizeof *hashes, compare_bond_entries); ++ ++ /* Iteration over sorted bond hashes will give us sorted 'entries'. */ ++ for (int i = 0; i < BOND_BUCKETS; i++) { ++ e = hashes[i]; + if (e->member && e->tx_bytes) { + e->member->tx_bytes += e->tx_bytes; + ovs_list_push_back(&e->member->entries, &e->list_node); +@@ -1311,15 +1358,23 @@ bond_rebalance(struct bond *bond) + break; + } + +- /* 'from' is carrying significantly more load than 'to'. Pick a hash ++ /* 'from' is carrying significantly more load than 'to'. Pick hashes + * to move from 'from' to 'to'. */ +- e = choose_entry_to_migrate(from, to->tx_bytes); +- if (e) { ++ size_t cnt = choose_entries_to_migrate(from, to->tx_bytes, hashes); ++ if (!cnt) { ++ /* Can't usefully migrate anything away from 'from'. ++ * Don't reconsider it. */ ++ ovs_list_remove(&from->bal_node); ++ continue; ++ } ++ ++ for (size_t i = 0; i < cnt; i++) { ++ e = hashes[i]; + bond_shift_load(e, to); + + /* Delete element from from->entries. + * +- * We don't add the element to to->hashes. That would only allow ++ * We don't add the element to to->entries. That would only allow + * 'e' to be migrated to another member in this rebalancing run, and + * there is no point in doing that. */ + ovs_list_remove(&e->list_node); +@@ -1327,12 +1382,8 @@ bond_rebalance(struct bond *bond) + /* Re-sort 'bals'. */ + reinsert_bal(&bals, from); + reinsert_bal(&bals, to); +- rebalanced = true; +- } else { +- /* Can't usefully migrate anything away from 'from'. +- * Don't reconsider it. */ +- ovs_list_remove(&from->bal_node); + } ++ rebalanced = true; + } + + /* Implement exponentially weighted moving average. A weight of 1/2 causes diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index 9c5c633b41..fa8f6cd0e8 100644 --- a/ofproto/connmgr.c @@ -19477,6 +20259,19 @@ index 9c5c633b41..fa8f6cd0e8 100644 LIST_FOR_EACH (ofconn, connmgr_node, &mgr->conns) { struct rconn_packet_counter *counter = ofconn->monitor_counter; +diff --git a/ofproto/ipfix-gen-entities b/ofproto/ipfix-gen-entities +index d5abe9c2ed..dcecdab212 100755 +--- a/ofproto/ipfix-gen-entities ++++ b/ofproto/ipfix-gen-entities +@@ -7,8 +7,6 @@ + # notice and this notice are preserved. This file is offered as-is, + # without warranty of any kind. + +-from __future__ import print_function +- + import getopt + import re + import sys diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c index fdcb9eabbf..864c136b5d 100644 --- a/ofproto/ofproto-dpif-sflow.c @@ -19511,10 +20306,46 @@ index 5fae46adfc..ccf97266c0 100644 ovs_mutex_destroy(&udpif->ukeys[i].mutex); } diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c -index fd0b2fdea0..47db9bb57d 100644 +index fd0b2fdea0..5ce56adfae 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c -@@ -5413,6 +5413,8 @@ ct_add_timeout_policy_to_dpif(struct dpif *dpif, +@@ -1389,6 +1389,24 @@ check_ct_timeout_policy(struct dpif_backer *backer) + return !error; + } + ++/* Tests whether 'backer''s datapath supports the all-zero SNAT case. */ ++static bool ++dpif_supports_ct_zero_snat(struct dpif_backer *backer) ++{ ++ enum ct_features features; ++ bool supported = false; ++ ++ if (!ct_dpif_get_features(backer->dpif, &features)) { ++ if (features & CONNTRACK_F_ZERO_SNAT) { ++ supported = true; ++ } ++ } ++ VLOG_INFO("%s: Datapath %s ct_zero_snat", ++ dpif_name(backer->dpif), (supported) ? "supports" ++ : "does not support"); ++ return supported; ++} ++ + /* Tests whether 'backer''s datapath supports the + * OVS_ACTION_ATTR_CHECK_PKT_LEN action. */ + static bool +@@ -1588,8 +1606,9 @@ check_support(struct dpif_backer *backer) + backer->rt_support.ct_timeout = check_ct_timeout_policy(backer); + backer->rt_support.explicit_drop_action = + dpif_supports_explicit_drop_action(backer->dpif); +- backer->rt_support.lb_output_action= ++ backer->rt_support.lb_output_action = + dpif_supports_lb_output_action(backer->dpif); ++ backer->rt_support.ct_zero_snat = dpif_supports_ct_zero_snat(backer); + + /* Flow fields. */ + backer->rt_support.odp.ct_state = check_ct_state(backer); +@@ -5413,6 +5432,8 @@ ct_add_timeout_policy_to_dpif(struct dpif *dpif, struct ct_dpif_timeout_policy cdtp; struct simap_node *node; @@ -19523,6 +20354,30 @@ index fd0b2fdea0..47db9bb57d 100644 cdtp.id = ct_tp->tp_id; SIMAP_FOR_EACH (node, &ct_tp->tp) { ct_dpif_set_timeout_policy_attr_by_name(&cdtp, node->name, node->data); +@@ -5603,6 +5624,7 @@ get_datapath_cap(const char *datapath_type, struct smap *cap) + smap_add(cap, "explicit_drop_action", + s.explicit_drop_action ? "true" :"false"); + smap_add(cap, "lb_output_action", s.lb_output_action ? "true" : "false"); ++ smap_add(cap, "ct_zero_snat", s.ct_zero_snat ? "true" : "false"); + } + + /* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and +diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h +index b41c3d82ad..191cfcb0df 100644 +--- a/ofproto/ofproto-dpif.h ++++ b/ofproto/ofproto-dpif.h +@@ -204,7 +204,10 @@ struct group_dpif *group_dpif_lookup(struct ofproto_dpif *, + DPIF_SUPPORT_FIELD(bool, explicit_drop_action, "Explicit Drop action") \ + \ + /* True if the datapath supports balance_tcp optimization */ \ +- DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode") ++ DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode")\ ++ \ ++ /* True if the datapath supports all-zero IP SNAT. */ \ ++ DPIF_SUPPORT_FIELD(bool, ct_zero_snat, "Conntrack all-zero IP SNAT") + + + /* Stores the various features which the corresponding backer supports. */ diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index b91517cd25..80ec2d9ac9 100644 --- a/ofproto/ofproto.c @@ -19576,6 +20431,50 @@ index 72756eb1f2..ba28e36d78 100644 } struct ovsdb_schema *schema2 = fetch_schema(rpc, schema1->name); +diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in +index 5914e08789..61cded16d3 100755 +--- a/ovsdb/ovsdb-idlc.in ++++ b/ovsdb/ovsdb-idlc.in +@@ -1,6 +1,5 @@ + #! @PYTHON3@ + +-from __future__ import print_function + import getopt + import os + import re +diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c +index 29a2bace84..ce6aee3008 100644 +--- a/ovsdb/ovsdb-server.c ++++ b/ovsdb/ovsdb-server.c +@@ -655,8 +655,6 @@ add_db(struct server_config *config, struct db *db) + static struct ovsdb_error * OVS_WARN_UNUSED_RESULT + open_db(struct server_config *config, const char *filename) + { +- struct db *db; +- + /* If we know that the file is already open, return a good error message. + * Otherwise, if the file is open, we'll fail later on with a harder to + * interpret file locking error. */ +@@ -671,9 +669,6 @@ open_db(struct server_config *config, const char *filename) + return error; + } + +- db = xzalloc(sizeof *db); +- db->filename = xstrdup(filename); +- + struct ovsdb_schema *schema; + if (ovsdb_storage_is_clustered(storage)) { + schema = NULL; +@@ -686,6 +681,9 @@ open_db(struct server_config *config, const char *filename) + } + ovs_assert(schema && !txn_json); + } ++ ++ struct db *db = xzalloc(sizeof *db); ++ db->filename = xstrdup(filename); + db->db = ovsdb_create(schema, storage); + ovsdb_jsonrpc_server_add_db(config->jsonrpc, db->db); + diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c index 9042658fa8..e019631e9a 100644 --- a/ovsdb/ovsdb.c @@ -19768,8 +20667,21 @@ index f662e90566..40415fcf62 100644 } return false; +diff --git a/python/ovs/compat/sortedcontainers/sortedlist.py b/python/ovs/compat/sortedcontainers/sortedlist.py +index 8aec6bbac1..ba55566926 100644 +--- a/python/ovs/compat/sortedcontainers/sortedlist.py ++++ b/python/ovs/compat/sortedcontainers/sortedlist.py +@@ -3,8 +3,6 @@ + """ + # pylint: disable=redefined-builtin, ungrouped-imports + +-from __future__ import print_function +- + from bisect import bisect_left, bisect_right, insort + from collections import Sequence, MutableSequence + from functools import wraps diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py -index 5850ac7abf..4226d1cb2f 100644 +index 5850ac7abf..4cf79cf94e 100644 --- a/python/ovs/db/idl.py +++ b/python/ovs/db/idl.py @@ -12,6 +12,7 @@ @@ -19791,7 +20703,60 @@ index 5850ac7abf..4226d1cb2f 100644 class Idl(object): """Open vSwitch Database Interface Definition Language (OVSDB IDL). -@@ -614,6 +619,7 @@ class Idl(object): +@@ -241,6 +246,7 @@ class Idl(object): + i = 0 + while i < 50: + i += 1 ++ previous_change_seqno = self.change_seqno + if not self._session.is_connected(): + break + +@@ -269,7 +275,7 @@ class Idl(object): + if msg.params[0] == str(self.server_monitor_uuid): + self.__parse_update(msg.params[1], OVSDB_UPDATE, + tables=self.server_tables) +- self.change_seqno = initial_change_seqno ++ self.change_seqno = previous_change_seqno + if not self.__check_server_db(): + self.force_reconnect() + break +@@ -312,7 +318,7 @@ class Idl(object): + self.__error() + break + else: +- self.change_seqno = initial_change_seqno ++ self.change_seqno = previous_change_seqno + self.__send_monitor_request() + elif (msg.type == ovs.jsonrpc.Message.T_REPLY + and self._server_monitor_request_id is not None +@@ -322,7 +328,7 @@ class Idl(object): + self._server_monitor_request_id = None + self.__parse_update(msg.result, OVSDB_UPDATE, + tables=self.server_tables) +- self.change_seqno = initial_change_seqno ++ self.change_seqno = previous_change_seqno + if self.__check_server_db(): + self.__send_monitor_request() + self.__send_db_change_aware() +@@ -336,7 +342,7 @@ class Idl(object): + self.__error() + break + else: +- self.change_seqno = initial_change_seqno ++ self.change_seqno = previous_change_seqno + self.__send_monitor_request() + elif (msg.type == ovs.jsonrpc.Message.T_REPLY + and self._db_change_aware_request_id is not None +@@ -372,7 +378,7 @@ class Idl(object): + self.force_reconnect() + break + else: +- self.change_seqno = initial_change_seqno ++ self.change_seqno = previous_change_seqno + self.__send_monitor_request() + elif (msg.type in (ovs.jsonrpc.Message.T_ERROR, + ovs.jsonrpc.Message.T_REPLY) +@@ -614,6 +620,7 @@ class Idl(object): raise error.Error(" is not an object", table_updates) @@ -19799,7 +20764,7 @@ index 5850ac7abf..4226d1cb2f 100644 for table_name, table_update in table_updates.items(): table = tables.get(table_name) if not table: -@@ -639,7 +645,9 @@ class Idl(object): +@@ -639,7 +646,9 @@ class Idl(object): % (table_name, uuid_string)) if version == OVSDB_UPDATE2: @@ -19810,7 +20775,7 @@ index 5850ac7abf..4226d1cb2f 100644 self.change_seqno += 1 continue -@@ -652,17 +660,20 @@ class Idl(object): +@@ -652,17 +661,20 @@ class Idl(object): raise error.Error(' missing "old" and ' '"new" members', row_update) @@ -19835,7 +20800,7 @@ index 5850ac7abf..4226d1cb2f 100644 else: # XXX rate-limit vlog.warn("cannot delete missing row %s from table" -@@ -681,29 +692,27 @@ class Idl(object): +@@ -681,29 +693,27 @@ class Idl(object): changed = self.__row_update(table, row, row_update) table.rows[uuid] = row if changed: @@ -19870,7 +20835,7 @@ index 5850ac7abf..4226d1cb2f 100644 else: # XXX rate-limit vlog.warn("cannot delete missing row %s from table %s" -@@ -723,7 +732,7 @@ class Idl(object): +@@ -723,7 +733,7 @@ class Idl(object): if op == ROW_CREATE: table.rows[uuid] = row if changed: @@ -19879,7 +20844,7 @@ index 5850ac7abf..4226d1cb2f 100644 else: op = ROW_UPDATE if not row: -@@ -737,8 +746,8 @@ class Idl(object): +@@ -737,8 +747,8 @@ class Idl(object): if op == ROW_CREATE: table.rows[uuid] = row if changed: @@ -19890,6 +20855,145 @@ index 5850ac7abf..4226d1cb2f 100644 def __check_server_db(self): """Returns True if this is a valid server database, False otherwise.""" +diff --git a/python/ovstest/rpcserver.py b/python/ovstest/rpcserver.py +index c4aab70207..05b6b1be20 100644 +--- a/python/ovstest/rpcserver.py ++++ b/python/ovstest/rpcserver.py +@@ -18,22 +18,14 @@ rpcserver is an XML RPC server that allows RPC client to initiate tests + + import sys + +-import exceptions +- + import xmlrpc.client + +-import tcp +- + from twisted.internet import reactor + from twisted.internet.error import CannotListenError + from twisted.web import server + from twisted.web import xmlrpc + +-import udp +- +-import util +- +-import vswitch ++from . import tcp, udp, util, vswitch + + + class TestArena(xmlrpc.XMLRPC): +@@ -210,7 +202,7 @@ class TestArena(xmlrpc.XMLRPC): + (_, port) = self.__get_handle_resources(handle) + port.loseConnection() + self.__delete_handle(handle) +- except exceptions.KeyError: ++ except KeyError: + return -1 + return 0 + +@@ -222,7 +214,7 @@ class TestArena(xmlrpc.XMLRPC): + (_, connector) = self.__get_handle_resources(handle) + connector.disconnect() + self.__delete_handle(handle) +- except exceptions.KeyError: ++ except KeyError: + return -1 + return 0 + +diff --git a/python/ovstest/tcp.py b/python/ovstest/tcp.py +index c495717f2f..098c6cba3e 100644 +--- a/python/ovstest/tcp.py ++++ b/python/ovstest/tcp.py +@@ -21,7 +21,7 @@ import time + from twisted.internet import interfaces + from twisted.internet.protocol import ClientFactory, Factory, Protocol + +-from zope.interface import implements ++from zope.interface.declarations import implementer + + + class TcpListenerConnection(Protocol): +@@ -55,8 +55,8 @@ class TcpListenerFactory(Factory): + return str(self.stats) + + ++@implementer(interfaces.IPushProducer) + class Producer(object): +- implements(interfaces.IPushProducer) + """ + This producer class generates infinite byte stream for a specified time + duration +diff --git a/python/ovstest/tests.py b/python/ovstest/tests.py +index 6de3cc3af4..f959f945ef 100644 +--- a/python/ovstest/tests.py ++++ b/python/ovstest/tests.py +@@ -10,8 +10,6 @@ + # See the License for the specific language governing permissions and + # limitations under the License. + +-from __future__ import print_function +- + import math + import time + +diff --git a/python/ovstest/util.py b/python/ovstest/util.py +index 72457158f2..270d6a0376 100644 +--- a/python/ovstest/util.py ++++ b/python/ovstest/util.py +@@ -26,8 +26,6 @@ import socket + import struct + import subprocess + +-import exceptions +- + import xmlrpc.client + + +@@ -88,7 +86,7 @@ def start_process(args): + stderr=subprocess.PIPE) + out, err = p.communicate() + return (p.returncode, out, err) +- except exceptions.OSError: ++ except OSError: + return (-1, None, None) + + +diff --git a/python/ovstest/vswitch.py b/python/ovstest/vswitch.py +index 9d5b5cffd0..45c9587eeb 100644 +--- a/python/ovstest/vswitch.py ++++ b/python/ovstest/vswitch.py +@@ -15,7 +15,7 @@ + """ + vswitch module allows its callers to interact with OVS DB. + """ +-import util ++from . import util + + + def ovs_vsctl_add_bridge(bridge): +diff --git a/python/setup.py b/python/setup.py +index d385d83722..cfe01763f3 100644 +--- a/python/setup.py ++++ b/python/setup.py +@@ -10,8 +10,6 @@ + # See the License for the specific language governing permissions and + # limitations under the License. + +-from __future__ import print_function +- + import sys + + from distutils.command.build_ext import build_ext +@@ -82,8 +80,6 @@ setup_args = dict( + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: System :: Networking', + 'License :: OSI Approved :: Apache Software License', +- 'Programming Language :: Python :: 2', +- 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', diff --git a/tests/atlocal.in b/tests/atlocal.in index 02e2dc57f2..cfca7e1926 100644 --- a/tests/atlocal.in @@ -20137,6 +21241,142 @@ index 199db8ed0f..59093c03c9 100644 # fin_timeout bad_action 'fin_timeout(foo=bar)' "invalid key 'foo' in 'fin_timeout' argument" +diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at +index 31064ed95e..5567eed115 100644 +--- a/tests/ofproto-dpif.at ++++ b/tests/ofproto-dpif.at +@@ -342,6 +342,22 @@ AT_CHECK([test `egrep 'in_port\(6\)' br1_flows.txt |wc -l` -gt 3]) + OVS_VSWITCHD_STOP + AT_CLEANUP + ++# SEND_TCP_BOND_PKTS([p_name], [p_ofport], [packet_len]) ++# ++# Sends 256 packets to port 'p_name' with different TCP destination ports. ++m4_define([SEND_TCP_BOND_PKTS], ++ [ ++ len_cmd="" ++ if test -n "$3"; then ++ len_cmd=" --len $3" ++ fi ++ for i in `seq 0 255`; do ++ pkt="in_port($2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=$i),tcp_flags(ack)" ++ ovs-appctl netdev-dummy/receive $1 $pkt$len_cmd ++ done ++ ] ++) ++ + AT_SETUP([ofproto-dpif - balance-tcp bonding]) + # Create br0 with members bond0(p1, p2, p3) and p7, + # and br1 with members bond1(p4, p5, p6) and p8. +@@ -377,13 +393,7 @@ ovs-appctl lacp/show > lacp.txt + ovs-appctl bond/show > bond.txt + # Check that lb_output is not enabled by default. + AT_CHECK([grep -q '^lb_output action: disabled' bond.txt]) +-( +-for i in `seq 0 255` ; +- do +- pkt="in_port(7),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=$i),tcp_flags(ack)" +- AT_CHECK([ovs-appctl netdev-dummy/receive p7 $pkt]) +- done +-) ++AT_CHECK([SEND_TCP_BOND_PKTS([p7], [7])]) + ovs-appctl time/warp 300 100 + AT_CHECK([ovs-appctl dpif/dump-flows br0 |grep tcp > br0_flows.txt]) + AT_CHECK([ovs-appctl dpif/dump-flows br1 |grep tcp > br1_flows.txt]) +@@ -400,13 +410,7 @@ OVS_WAIT_UNTIL([ovs-appctl bond/show | grep -q '^lb_output action: enabled']) + ovs-appctl time/warp 10000 500 + ovs-appctl revalidator/wait + OVS_WAIT_WHILE([ovs-appctl dpif/dump-flows br1 | grep -q tcp]) +-( +-for i in $(seq 256) ; +- do +- pkt="in_port(7),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=$i),tcp_flags(ack)" +- AT_CHECK([ovs-appctl netdev-dummy/receive p7 $pkt]) +- done +-) ++AT_CHECK([SEND_TCP_BOND_PKTS([p7], [7])]) + ovs-appctl time/warp 300 100 + AT_CHECK([ovs-appctl dpif/dump-flows br0 | grep tcp > br0_flows.txt]) + AT_CHECK([ovs-appctl dpif/dump-flows br1 | grep tcp > br1_flows.txt]) +@@ -423,6 +427,78 @@ OVS_WAIT_UNTIL([test -z "$(ovs-appctl dpif-netdev/bond-show)"]) + OVS_VSWITCHD_STOP() + AT_CLEANUP + ++# Make sure that rebalancing works after link state changes. ++AT_SETUP([ofproto-dpif - balance-tcp bonding rebalance after link state changes]) ++# Create br0 with interfaces bond0(p1, p2) and p5, ++# and br1 with interfaces bond1(p3, p4) and p6. ++# bond0 <-> bond1 ++# Send some traffic, set link state down and up for p2, ++# send big amount of traffic to trigger rebalancing and ++# make sure that some hashes rebalanced. ++OVS_VSWITCHD_START( ++ [add-bond br0 bond0 p1 p2 bond_mode=balance-tcp lacp=active \ ++ other-config:lacp-time=fast other-config:bond-rebalance-interval=1000 --\ ++ set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 mtu_request=65535 -- \ ++ set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 mtu_request=65535 -- \ ++ add-port br0 p5 -- set interface p5 ofport_request=5 type=dummy mtu_request=65535 -- \ ++ add-br br1 -- \ ++ set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \ ++ set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \ ++ fail-mode=secure -- \ ++ add-bond br1 bond1 p3 p4 bond_mode=balance-tcp lacp=active \ ++ other-config:lacp-time=fast other-config:bond-rebalance-interval=1000 --\ ++ set interface p3 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=3 mtu_request=65535 -- \ ++ set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=4 mtu_request=65535 -- \ ++ add-port br1 p6 -- set interface p6 ofport_request=6 type=dummy mtu_request=65535 --]) ++AT_CHECK([ovs-appctl vlog/set bond:dbg]) ++AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK ++]) ++AT_CHECK([ovs-ofctl add-flow br0 action=normal]) ++AT_CHECK([ovs-ofctl add-flow br1 action=normal]) ++AT_CHECK([ovs-appctl upcall/disable-megaflows], [0], [megaflows disabled ++], []) ++OVS_WAIT_WHILE([ovs-appctl bond/show | grep "may_enable: false"]) ++ ++ovs-appctl time/stop ++ovs-appctl time/warp 2000 200 ++ ++# Send some traffic to distribute all the hashes between ports. ++AT_CHECK([SEND_TCP_BOND_PKTS([p5], [5], [65500])]) ++ ++# Wait for rebalancing for per-hash stats accounting. ++ovs-appctl time/warp 1000 100 ++ ++# Check that p2 handles some hashes. ++ovs-appctl bond/show > bond1.txt ++AT_CHECK([sed -n '/member p2/,/^$/p' bond1.txt | grep 'hash'], [0], [ignore]) ++ ++# Move p2 down to force all hashes move to p1 ++AT_CHECK([ovs-appctl netdev-dummy/set-admin-state p2 down], 0, [OK ++]) ++ ++ovs-appctl time/warp 200 100 ++# Check that all hashes moved form p2 ++ovs-appctl bond/show > bond2.txt ++AT_CHECK([sed -n '/member p2/,/^$/p' bond2.txt | grep 'hash'], [1], [ignore]) ++ ++# Move p2 up ++AT_CHECK([ovs-appctl netdev-dummy/set-admin-state p2 up], 0, [OK ++]) ++ ++# Send some packets to trigger rebalancing. ++AT_CHECK([SEND_TCP_BOND_PKTS([p5], [5], [65500])]) ++ ++# Wait for rebalancing ++ovs-appctl time/warp 1000 100 ++ ++# Check that some hashes was shifted to p2 ++ovs-appctl bond/show > bond3.txt ++AT_CHECK([sed -n '/member p2/,/^$/p' bond3.txt | grep 'hash'], [0], [ignore]) ++ ++OVS_VSWITCHD_STOP() ++AT_CLEANUP ++ ++ + # Makes sure recirculation does not change the way packet is handled. + AT_SETUP([ofproto-dpif - balance-tcp bonding, different recirc flow ]) + OVS_VSWITCHD_START( diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at index 5ddca67e71..604f15c2d1 100644 --- a/tests/ovs-ofctl.at @@ -21380,6 +22620,28 @@ index 4b4791a7da..62181dd4de 100644 +008: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 009: done ]]) +diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at +index 15628a7c6f..86d633ac4f 100644 +--- a/tests/system-kmod-macros.at ++++ b/tests/system-kmod-macros.at +@@ -99,6 +99,17 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP], + # + m4_define([CHECK_CONNTRACK_NAT]) + ++# CHECK_CONNTRACK_ZEROIP_SNAT() ++# ++# Perform requirements checks for running conntrack all-zero IP SNAT tests. ++# The kernel always supports all-zero IP SNAT, so no check is needed. ++# However, the Windows datapath using the same netlink interface does not. ++# ++m4_define([CHECK_CONNTRACK_ZEROIP_SNAT], ++[ ++ AT_SKIP_IF([test "$IS_WIN32" = "yes"]) ++]) ++ + # CHECK_CONNTRACK_TIMEOUT() + # + # Perform requirements checks for running conntrack customized timeout tests. diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at index 4f601ef939..c8e4c68fae 100644 --- a/tests/system-offloads-traffic.at @@ -21438,6 +22700,200 @@ index 4f601ef939..c8e4c68fae 100644 +]) +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP +diff --git a/tests/system-traffic.at b/tests/system-traffic.at +index fb5b9a36d2..ef0c199155 100644 +--- a/tests/system-traffic.at ++++ b/tests/system-traffic.at +@@ -574,6 +574,60 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PI + OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + ++AT_SETUP([datapath - ping over geneve tunnel, delete flow regression]) ++OVS_CHECK_GENEVE() ++ ++OVS_TRAFFIC_VSWITCHD_START() ++ADD_BR([br-underlay]) ++ ++AT_DATA([flows.txt], [dnl ++priority=100,icmp actions=resubmit(,10) ++priority=0 actions=NORMAL ++table=10, priority=100, ip, actions=ct(table=20,zone=65520) ++table=20, priority=200, ip, ct_state=-new+trk, actions=resubmit(,30) ++table=20, priority=100, ip, ct_state=+new, actions=resubmit(,30) ++table=20, priority=50, ip, actions=DROP ++table=30, priority=100, ip, actions=ct(commit,table=40,zone=65520) ++table=40, actions=normal ++]) ++ ++AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) ++AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"]) ++ ++ADD_NAMESPACES(at_ns0) ++ ++dnl Set up underlay link from host into the namespace using veth pair. ++ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24") ++AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"]) ++AT_CHECK([ip link set dev br-underlay up]) ++ ++dnl Set up tunnel endpoints on OVS outside the namespace and with a native ++dnl linux device inside the namespace. ++ADD_OVS_TUNNEL([geneve], [br0], [at_gnv0], [172.31.1.1], [10.1.1.100/24]) ++ADD_NATIVE_TUNNEL([geneve], [ns_gnv0], [at_ns0], [172.31.1.100], [10.1.1.1/24], ++ [vni 0]) ++ ++dnl First, check the underlay ++NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.1.100 | FORMAT_PING], [0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++dnl ping over tunnel should work ++NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++AT_CHECK([ovs-ofctl del-flows br0 "ct_state=+new"]) ++ ++dnl ping should not go through after removal of the flow ++NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl ++7 packets transmitted, 0 received, 100% packet loss, time 0ms ++]) ++ ++OVS_TRAFFIC_VSWITCHD_STOP(["/|ERR|/d ++/|WARN|/d"]) ++AT_CLEANUP ++ + AT_SETUP([datapath - flow resume with geneve tun_metadata]) + OVS_CHECK_GENEVE() + +@@ -4433,6 +4487,52 @@ tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src= + OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + ++ ++AT_SETUP([conntrack - all-zero IP SNAT]) ++AT_SKIP_IF([test $HAVE_NC = no]) ++CHECK_CONNTRACK() ++CHECK_CONNTRACK_ZEROIP_SNAT() ++OVS_TRAFFIC_VSWITCHD_START() ++ ++ADD_NAMESPACES(at_ns0, at_ns1) ++ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") ++ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") ++NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2]) ++ ++OVS_START_L7([at_ns1], [http]) ++ ++AT_DATA([flows.txt], [dnl ++table=0,priority=30,ct_state=-trk,ip,action=ct(table=0) ++table=0,priority=20,ct_state=-rpl,ip,nw_dst=10.1.1.0/24,actions=ct(commit,nat(src=0.0.0.0),table=10) ++table=0,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24,actions=resubmit(,10) ++table=0,priority=20,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10.1.1.2),table=10) ++table=0,priority=10,arp,action=normal ++table=0,priority=1,action=drop ++table=10,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24 actions=ct(table=20,nat) ++table=10,priority=10,ip,nw_dst=10.1.1.0/24 actions=resubmit(,20) ++table=20,priority=10,ip,nw_dst=10.1.1.1,action=1 ++table=20,priority=10,ip,nw_dst=10.1.1.2,action=2 ++]) ++AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) ++ ++dnl - Test to make sure src nat is NOT done when not needed ++NS_CHECK_EXEC([at_ns0], [echo "TEST" | nc -p 30000 10.1.1.2 80 > nc-1.log]) ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [0], [dnl ++tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=30000,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=30000),protoinfo=(state=TIME_WAIT) ++]) ++ ++dnl - Test to make sure src nat is done when needed ++NS_CHECK_EXEC([at_ns0], [echo "TEST2" | nc -p 30001 172.1.1.2 80 > nc-2.log]) ++NS_CHECK_EXEC([at_ns0], [echo "TEST3" | nc -p 30001 10.1.1.2 80 > nc-3.log]) ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 30001 | grep "orig=.src=10\.1\.1\.1," | sed -e 's/port=30001/port=/g' -e 's/sport=80,dport=[[0-9]]\+/sport=80,dport=/g' | sort], [0], [dnl ++tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=),protoinfo=(state=TIME_WAIT) ++tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=),protoinfo=(state=TIME_WAIT) ++]) ++ ++OVS_TRAFFIC_VSWITCHD_STOP ++AT_CLEANUP ++ ++ + AT_SETUP([conntrack - simple DNAT]) + CHECK_CONNTRACK() + CHECK_CONNTRACK_NAT() +@@ -4488,6 +4588,41 @@ tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src= + OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + ++AT_SETUP([conntrack - DNAT with additional SNAT]) ++CHECK_CONNTRACK() ++OVS_TRAFFIC_VSWITCHD_START() ++ ++ADD_NAMESPACES(at_ns0, at_ns1) ++ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") ++ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") ++NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2]) ++ ++OVS_START_L7([at_ns1], [http]) ++ ++AT_DATA([flows.txt], [dnl ++table=0,priority=30,in_port=1,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10.1.1.2:80),table=1) ++table=0,priority=20,in_port=2,ip,actions=ct(nat),1 ++table=0,priority=10,arp,actions=NORMAL ++table=0,priority=1,actions=drop ++dnl Be sure all ct() actions but src nat are executed ++table=1,ip,actions=ct(commit,nat(src=10.1.1.240),exec(set_field:0xac->ct_mark,set_field:0xac->ct_label),table=2) ++table=2,in_port=1,ip,ct_mark=0xac,ct_label=0xac,actions=2 ++]) ++AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) ++ ++NS_CHECK_EXEC([at_ns0], [wget http://172.1.1.2:8080 -t 5 -T 1 --retry-connrefused -v -o wget0.log]) ++ ++dnl - make sure only dst nat has been performed ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.240)], [0], [dnl ++]) ++ ++AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.1)], [0], [dnl ++tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=,dport=),reply=(src=10.1.1.2,dst=10.1.1.1,sport=,dport=),mark=172,labels=0xac,protoinfo=(state=) ++]) ++ ++OVS_TRAFFIC_VSWITCHD_STOP ++AT_CLEANUP ++ + AT_SETUP([conntrack - more complex DNAT]) + CHECK_CONNTRACK() + CHECK_CONNTRACK_NAT() +diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at +index 34f82cee3d..9f0d38dfb3 100644 +--- a/tests/system-userspace-macros.at ++++ b/tests/system-userspace-macros.at +@@ -96,6 +96,16 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP]) + # + m4_define([CHECK_CONNTRACK_NAT]) + ++# CHECK_CONNTRACK_ZEROIP_SNAT() ++# ++# Perform requirements checks for running conntrack all-zero IP SNAT tests. ++# The userspace datapath does not support all-zero IP SNAT. ++# ++m4_define([CHECK_CONNTRACK_ZEROIP_SNAT], ++[ ++ AT_SKIP_IF([:]) ++]) ++ + # CHECK_CONNTRACK_TIMEOUT() + # + # Perform requirements checks for running conntrack customized timeout tests. +diff --git a/tests/test-jsonrpc.py b/tests/test-jsonrpc.py +index 3eabcd78d5..1df5afa221 100644 +--- a/tests/test-jsonrpc.py ++++ b/tests/test-jsonrpc.py +@@ -12,8 +12,6 @@ + # See the License for the specific language governing permissions and + # limitations under the License. + +-from __future__ import print_function +- + import argparse + import errno + import os diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c index 15433e3472..a886f971e7 100644 --- a/tests/test-ovsdb.c @@ -22037,6 +23493,43 @@ index a196802743..72a319123e 100644 if updates is None: output += "None" else: +diff --git a/tests/test-reconnect.py b/tests/test-reconnect.py +index f0ad9f9793..cea48eb527 100644 +--- a/tests/test-reconnect.py ++++ b/tests/test-reconnect.py +@@ -12,8 +12,6 @@ + # See the License for the specific language governing permissions and + # limitations under the License. + +-from __future__ import print_function +- + import errno + import sys + +diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py +index bc6bfae15a..ac14da29b1 100755 +--- a/utilities/checkpatch.py ++++ b/utilities/checkpatch.py +@@ -13,7 +13,6 @@ + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. +-from __future__ import print_function + + import email + import getopt +diff --git a/utilities/gdb/ovs_gdb.py b/utilities/gdb/ovs_gdb.py +index 1111f3100d..0b2ecb81be 100644 +--- a/utilities/gdb/ovs_gdb.py ++++ b/utilities/gdb/ovs_gdb.py +@@ -55,7 +55,6 @@ + # ... + # ... + # +-from __future__ import print_function + import gdb + import sys + import uuid diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in index d71c34e691..4156da20ef 100644 --- a/utilities/ovs-ctl.in @@ -22117,6 +23610,39 @@ index d71c34e691..4156da20ef 100644 help) usage ;; +diff --git a/utilities/ovs-l3ping.in b/utilities/ovs-l3ping.in +index 92d32acb3f..1ece06457c 100644 +--- a/utilities/ovs-l3ping.in ++++ b/utilities/ovs-l3ping.in +@@ -19,7 +19,7 @@ achieved by tunneling the control connection inside the tunnel itself. + """ + + import socket +-import xmlrpclib ++import xmlrpc.client + + import ovstest.args as args + import ovstest.tests as tests +@@ -64,13 +64,13 @@ if __name__ == '__main__': + ps = get_packet_sizes(me, he, args.client[0]) + tests.do_direct_tests(me, he, bandwidth, interval, ps) + except KeyboardInterrupt: +- print "Terminating" +- except xmlrpclib.Fault: +- print "Couldn't contact peer" ++ print("Terminating") ++ except xmlrpc.client.Fault: ++ print("Couldn't contact peer") + except socket.error: +- print "Couldn't contact peer" +- except xmlrpclib.ProtocolError: +- print "XMLRPC control channel was abruptly terminated" ++ print("Couldn't contact peer") ++ except xmlrpc.client.ProtocolError: ++ print("XMLRPC control channel was abruptly terminated") + finally: + if local_server is not None: + local_server.terminate() diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index 3601890f40..ede7f1e61a 100644 --- a/utilities/ovs-ofctl.c @@ -22162,8 +23688,196 @@ index 3601890f40..ede7f1e61a 100644 { "mod-group", "switch group", 1, 2, ofctl_mod_group, OVS_RW }, { "del-groups", "switch [group]", +diff --git a/utilities/ovs-parse-backtrace.in b/utilities/ovs-parse-backtrace.in +index d5506769a8..f44f05cd1e 100755 +--- a/utilities/ovs-parse-backtrace.in ++++ b/utilities/ovs-parse-backtrace.in +@@ -70,7 +70,7 @@ result. Expected usage is for ovs-appctl backtrace to be piped in.""") + if os.path.exists(debug): + binary = debug + +- print "Binary: %s\n" % binary ++ print("Binary: %s\n" % binary) + + stdin = sys.stdin.read() + +@@ -88,15 +88,15 @@ result. Expected usage is for ovs-appctl backtrace to be piped in.""") + for lines, count in traces: + longest = max(len(l) for l in lines) + +- print "Backtrace Count: %d" % count ++ print("Backtrace Count: %d" % count) + for line in lines: + match = re.search(r'\[(0x.*)]', line) + if match: +- print "%s %s" % (line.ljust(longest), +- addr2line(binary, match.group(1))) ++ print("%s %s" % (line.ljust(longest), ++ addr2line(binary, match.group(1)))) + else: +- print line +- print ++ print(line) ++ print() + + + if __name__ == "__main__": +diff --git a/utilities/ovs-pcap.in b/utilities/ovs-pcap.in +index dddbee4dfb..6b5f63399e 100755 +--- a/utilities/ovs-pcap.in ++++ b/utilities/ovs-pcap.in +@@ -14,8 +14,6 @@ + # See the License for the specific language governing permissions and + # limitations under the License. + +-from __future__ import print_function +- + import binascii + import getopt + import struct +@@ -79,7 +77,7 @@ if __name__ == "__main__": + try: + options, args = getopt.gnu_getopt(sys.argv[1:], 'hV', + ['help', 'version']) +- except getopt.GetoptException as geo: ++ except getopt.GetoptError as geo: + sys.stderr.write("%s: %s\n" % (argv0, geo.msg)) + sys.exit(1) + +diff --git a/utilities/ovs-vlan-test.in b/utilities/ovs-vlan-test.in +index 154573a9b5..de3ae16862 100755 +--- a/utilities/ovs-vlan-test.in ++++ b/utilities/ovs-vlan-test.in +@@ -14,9 +14,9 @@ + # See the License for the specific language governing permissions and + # limitations under the License. + +-import BaseHTTPServer + import getopt +-import httplib ++import http.client ++import http.server + import os + import threading + import time +@@ -84,7 +84,7 @@ class UDPReceiver: + + try: + sock.bind((self.vlan_ip, self.vlan_port)) +- except socket.error, e: ++ except socket.error as e: + print_safe('Failed to bind to %s:%d with error: %s' + % (self.vlan_ip, self.vlan_port, e)) + os._exit(1) #sys.exit only exits the current thread. +@@ -95,7 +95,7 @@ class UDPReceiver: + data, _ = sock.recvfrom(4096) + except socket.timeout: + continue +- except socket.error, e: ++ except socket.error as e: + print_safe('Failed to receive from %s:%d with error: %s' + % (self.vlan_ip, self.vlan_port, e)) + os._exit(1) +@@ -180,7 +180,7 @@ class VlanServer: + for _ in range(send_time * 2): + try: + send_packet(test_id, size, ip, port) +- except socket.error, e: ++ except socket.error as e: + self.set_result(test_id, 'Failure: ' + str(e)) + return + time.sleep(.5) +@@ -194,15 +194,15 @@ class VlanServer: + def run(self): + self.udp_recv.start() + try: +- BaseHTTPServer.HTTPServer((self.server_ip, self.server_port), ++ http.server.HTTPServer((self.server_ip, self.server_port), + VlanServerHandler).serve_forever() +- except socket.error, e: ++ except socket.error as e: + print_safe('Failed to start control server: %s' % e) + self.udp_recv.stop() + + return 1 + +-class VlanServerHandler(BaseHTTPServer.BaseHTTPRequestHandler): ++class VlanServerHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + + #Guarantee three arguments. +@@ -244,7 +244,7 @@ class VlanClient: + self.udp_recv = UDPReceiver(vlan_ip, vlan_port) + + def request(self, resource): +- conn = httplib.HTTPConnection(self.server_ip_port) ++ conn = http.client.HTTPConnection(self.server_ip_port) + conn.request('GET', resource) + return conn + +@@ -256,7 +256,7 @@ class VlanClient: + try: + conn = self.request('/start/recv') + data = conn.getresponse().read() +- except (socket.error, httplib.HTTPException), e: ++ except (socket.error, http.client.HTTPException) as e: + error_msg(e) + return False + +@@ -277,7 +277,7 @@ class VlanClient: + send_packet(test_id, size, ip, port) + resp = self.request('/result/%d' % test_id).getresponse() + data = resp.read() +- except (socket.error, httplib.HTTPException), e: ++ except (socket.error, http.client.HTTPException) as e: + error_msg(e) + return False + +@@ -302,7 +302,7 @@ class VlanClient: + try: + conn = self.request(resource) + test_id = conn.getresponse().read() +- except (socket.error, httplib.HTTPException), e: ++ except (socket.error, http.client.HTTPException) as e: + error_msg(e) + return False + +@@ -335,7 +335,7 @@ class VlanClient: + try: + resp = self.request('/ping').getresponse() + data = resp.read() +- except (socket.error, httplib.HTTPException), e: ++ except (socket.error, http.client.HTTPException) as e: + error_msg(e) + return False + +@@ -383,7 +383,7 @@ def main(): + try: + options, args = getopt.gnu_getopt(sys.argv[1:], 'hVs', + ['help', 'version', 'server']) +- except getopt.GetoptError, geo: ++ except getopt.GetoptError as geo: + print_safe('%s: %s\n' % (sys.argv[0], geo.msg)) + return 1 + +diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c +index 5ed7e82343..ea0630e112 100644 +--- a/vswitchd/bridge.c ++++ b/vswitchd/bridge.c +@@ -3019,9 +3019,9 @@ ofp12_controller_role_to_str(enum ofp12_controller_role role) + case OFPCR12_ROLE_EQUAL: + return "other"; + case OFPCR12_ROLE_PRIMARY: +- return "primary"; ++ return "master"; + case OFPCR12_ROLE_SECONDARY: +- return "secondary"; ++ return "slave"; + case OFPCR12_ROLE_NOCHANGE: + default: + return NULL; diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml -index a2ad84edef..4597a215d9 100644 +index a2ad84edef..d8ea287d5d 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -4660,7 +4660,8 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ @@ -22186,3 +23900,19 @@ index a2ad84edef..4597a215d9 100644 The Excess Burst Size (EBS) is measured in bytes and represents a +@@ -6124,6 +6126,15 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ + True if the datapath supports OVS_ACTION_ATTR_DROP. If false, + explicit drop action will not be sent to the datapath. + ++ ++ True if the datapath supports all-zero SNAT. This is a special case ++ if the src IP address is configured as all 0's, i.e., ++ nat(src=0.0.0.0). In this case, when a source port ++ collision is detected during the commit, the source port will be ++ translated to an ephemeral port. If there is no collision, no SNAT ++ is performed. ++ + + + diff --git a/SPECS/openvswitch2.15.spec b/SPECS/openvswitch2.15.spec index 4a95f35..4001061 100644 --- a/SPECS/openvswitch2.15.spec +++ b/SPECS/openvswitch2.15.spec @@ -57,7 +57,7 @@ Summary: Open vSwitch Group: System Environment/Daemons daemon/database/utilities URL: http://www.openvswitch.org/ Version: 2.15.0 -Release: 24%{?dist} +Release: 27%{?dist} # Nearly all of openvswitch is ASL 2.0. The bugtool is LGPLv2+, and the # lib/sflow*.[ch] files are SISSL @@ -697,6 +697,41 @@ exit 0 %endif %changelog +* Sat Jul 17 2021 Open vSwitch CI - 2.15.0-27 +- Merging upstream branch-2.15 [RH gerrit: abdd952536] + Commit list: + 72132a9403 bond: Fix broken rebalancing after link state changes. + aa84cfe25d dpif-netlink: Fix report_loss() message. + aec05f7cd1 ovsdb-server: Fix memleak when failing to read storage. + 05bdf11fc3 conntrack: Init hash basis first at creation. + 94e3b9d9ce netdev-linux: Ignore TSO packets when TSO is not enabled for userspace. + 842bfb899f conntrack: Handle already natted packets. + ab873c1afe conntrack: Document all-zero IP SNAT behavior and add a test case. + 86d6a9ee14 python: Fix Idl.run change_seqno update. + 1ba0c83655 bridge: Use correct (legacy) role names in database. + 7e5293ea5a Prepare for 2.15.2. + b855bbc326 Set release date for 2.15.1. + 007a4f48fe dpif-netdev: Apply subtable-lookup-prio-set on any datapath. + c93358a563 netlink: removed incorrect optimization + 31626579fa ovs-actions.xml: Add missing bracket. + 30596ec278 netdev-offload-tc: Use nl_msg_put_flag for OVS_TUNNEL_KEY_ATTR_CSUM. + 728980291a conntrack: Increment coverage counter for all bad checksum cases. + + +* Wed Jun 30 2021 Timothy Redaelli - 2.15.0-26 +- Use 10 characters of hash to generate the changelog [RH gerrit: d89d9cd0f1] + This is needed to avoid that the history changes since the default + changes from time to time + + +* Wed Jun 30 2021 Timothy Redaelli - 2.15.0-25 +- Merging 881d71ea22e datapath-windows: Specify external include .. [RH gerrit: 8ad5538601] + Commit list: + 881d71ea22e datapath-windows: Specify external include paths + 934668c295e Remove Python 2 leftovers. + aaa59670556 ipf: Fix a use-after-free error, and remove the 'do_not_steal' flag. + + * Fri Jun 11 2021 Open vSwitch CI - 2.15.0-24 - Merging upstream branch-2.15 [RH gerrit: 78ba3622d9] Commit list: