|
|
773311 |
From 88056d15bffe67c033322de16c01a013e7bc7c7c Mon Sep 17 00:00:00 2001
|
|
|
773311 |
From: Mark Michelson <mmichels@redhat.com>
|
|
|
773311 |
Date: Wed, 6 May 2020 09:49:55 -0400
|
|
|
773311 |
Subject: [PATCH] ofctrl: Split large group_mod messages up.
|
|
|
773311 |
|
|
|
773311 |
Group mod messages have the possibility of growing very large if OVN
|
|
|
773311 |
installs a load balancer with a great many backends. The current
|
|
|
773311 |
approach is to send a single ADD message with the entire group contents.
|
|
|
773311 |
If the size of this message exceeds UINT16_MAX, then OpenFlow cannot
|
|
|
773311 |
properly express the length of the message since the OpenFlow header's
|
|
|
773311 |
length is limited to 16 bits.
|
|
|
773311 |
|
|
|
773311 |
This patch solves the problem by breaking the message into pieces. The
|
|
|
773311 |
first piece is an ADD, and subsequent messages are INSERT_BUCKET
|
|
|
773311 |
messages. This way, we end up being able to express the entire size of
|
|
|
773311 |
the group through multiple OpenFlow messages.
|
|
|
773311 |
|
|
|
773311 |
Signed-off-by: Mark Michelson <mmichels@redhat.com>
|
|
|
773311 |
Acked-by: Numan Siddique <numans@ovn.org>
|
|
|
773311 |
---
|
|
|
773311 |
controller/ofctrl.c | 70 ++++++++++++++++++++++++++++++++++++++++++---
|
|
|
773311 |
tests/ovn.at | 29 +++++++++++++++++++
|
|
|
773311 |
2 files changed, 95 insertions(+), 4 deletions(-)
|
|
|
773311 |
|
|
|
773311 |
diff --git a/controller/ofctrl.c b/controller/ofctrl.c
|
|
|
773311 |
index 4b51cd86e..073e076c7 100644
|
|
|
773311 |
--- a/controller/ofctrl.c
|
|
|
773311 |
+++ b/controller/ofctrl.c
|
|
|
773311 |
@@ -930,10 +930,72 @@ encode_group_mod(const struct ofputil_group_mod *gm)
|
|
|
773311 |
}
|
|
|
773311 |
|
|
|
773311 |
static void
|
|
|
773311 |
-add_group_mod(const struct ofputil_group_mod *gm, struct ovs_list *msgs)
|
|
|
773311 |
+add_group_mod(struct ofputil_group_mod *gm, struct ovs_list *msgs)
|
|
|
773311 |
{
|
|
|
773311 |
struct ofpbuf *msg = encode_group_mod(gm);
|
|
|
773311 |
- ovs_list_push_back(msgs, &msg->list_node);
|
|
|
773311 |
+ if (msg->size <= UINT16_MAX) {
|
|
|
773311 |
+ ovs_list_push_back(msgs, &msg->list_node);
|
|
|
773311 |
+ return;
|
|
|
773311 |
+ }
|
|
|
773311 |
+ /* This group mod request is too large to fit in a single OF message
|
|
|
773311 |
+ * since the header can only specify a 16-bit size. We need to break
|
|
|
773311 |
+ * this into multiple group_mod requests.
|
|
|
773311 |
+ */
|
|
|
773311 |
+
|
|
|
773311 |
+ /* Pull the first bucket. All buckets are approximately the same length
|
|
|
773311 |
+ * since they contain near-identical actions. Using its length can give
|
|
|
773311 |
+ * us a good approximation of how many buckets we can fit in a single
|
|
|
773311 |
+ * OF message.
|
|
|
773311 |
+ */
|
|
|
773311 |
+ ofpraw_pull_assert(msg);
|
|
|
773311 |
+ struct ofp15_group_mod *ogm = ofpbuf_pull(msg, sizeof(*ogm));
|
|
|
773311 |
+ struct ofp15_bucket *of_bucket = ofpbuf_pull(msg, sizeof(*of_bucket));
|
|
|
773311 |
+ uint16_t bucket_size = ntohs(of_bucket->len);
|
|
|
773311 |
+
|
|
|
773311 |
+ ofpbuf_delete(msg);
|
|
|
773311 |
+
|
|
|
773311 |
+ /* Dividing by 2 here ensures that just in case there are variations in
|
|
|
773311 |
+ * the size of the buckets, we will not put too many in our new group_mod
|
|
|
773311 |
+ * message.
|
|
|
773311 |
+ */
|
|
|
773311 |
+ size_t max_buckets = ((UINT16_MAX - sizeof *ogm) / bucket_size) / 2;
|
|
|
773311 |
+
|
|
|
773311 |
+ ovs_assert(max_buckets < ovs_list_size(&gm->buckets));
|
|
|
773311 |
+
|
|
|
773311 |
+ uint16_t command = OFPGC15_INSERT_BUCKET;
|
|
|
773311 |
+ if (gm->command == OFPGC15_DELETE ||
|
|
|
773311 |
+ gm->command == OFPGC15_REMOVE_BUCKET) {
|
|
|
773311 |
+ command = OFPGC15_REMOVE_BUCKET;
|
|
|
773311 |
+ }
|
|
|
773311 |
+ struct ofputil_group_mod split = {
|
|
|
773311 |
+ .command = command,
|
|
|
773311 |
+ .type = gm->type,
|
|
|
773311 |
+ .group_id = gm->group_id,
|
|
|
773311 |
+ .command_bucket_id = OFPG15_BUCKET_LAST,
|
|
|
773311 |
+ };
|
|
|
773311 |
+ ovs_list_init(&split.buckets);
|
|
|
773311 |
+
|
|
|
773311 |
+ size_t i = 0;
|
|
|
773311 |
+ struct ofputil_bucket *bucket;
|
|
|
773311 |
+ LIST_FOR_EACH (bucket, list_node, &gm->buckets) {
|
|
|
773311 |
+ if (i++ < max_buckets) {
|
|
|
773311 |
+ continue;
|
|
|
773311 |
+ }
|
|
|
773311 |
+ break;
|
|
|
773311 |
+ }
|
|
|
773311 |
+
|
|
|
773311 |
+ ovs_list_splice(&split.buckets, &bucket->list_node, &gm->buckets);
|
|
|
773311 |
+
|
|
|
773311 |
+ struct ofpbuf *orig = encode_group_mod(gm);
|
|
|
773311 |
+ ovs_list_push_back(msgs, &orig->list_node);
|
|
|
773311 |
+
|
|
|
773311 |
+ /* We call this recursively just in case our new
|
|
|
773311 |
+ * INSERT_BUCKET/REMOVE_BUCKET group_mod is still too
|
|
|
773311 |
+ * large for an OF message. This will allow for it to
|
|
|
773311 |
+ * be broken into pieces, too.
|
|
|
773311 |
+ */
|
|
|
773311 |
+ add_group_mod(&split, msgs);
|
|
|
773311 |
+ ofputil_uninit_group_mod(&split);
|
|
|
773311 |
}
|
|
|
773311 |
|
|
|
773311 |
|
|
|
773311 |
@@ -1124,7 +1186,7 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table,
|
|
|
773311 |
char *group_string = xasprintf("group_id=%"PRIu32",%s",
|
|
|
773311 |
desired->table_id,
|
|
|
773311 |
desired->name);
|
|
|
773311 |
- char *error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD, group_string,
|
|
|
773311 |
+ char *error = parse_ofp_group_mod_str(&gm, OFPGC15_ADD, group_string,
|
|
|
773311 |
NULL, NULL, &usable_protocols);
|
|
|
773311 |
if (!error) {
|
|
|
773311 |
add_group_mod(&gm, &msgs);
|
|
|
773311 |
@@ -1243,7 +1305,7 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table,
|
|
|
773311 |
enum ofputil_protocol usable_protocols;
|
|
|
773311 |
char *group_string = xasprintf("group_id=%"PRIu32"",
|
|
|
773311 |
installed->table_id);
|
|
|
773311 |
- char *error = parse_ofp_group_mod_str(&gm, OFPGC11_DELETE,
|
|
|
773311 |
+ char *error = parse_ofp_group_mod_str(&gm, OFPGC15_DELETE,
|
|
|
773311 |
group_string, NULL, NULL,
|
|
|
773311 |
&usable_protocols);
|
|
|
773311 |
if (!error) {
|
|
|
773311 |
diff --git a/tests/ovn.at b/tests/ovn.at
|
|
|
773311 |
index 52d994972..f39fda2e4 100644
|
|
|
773311 |
--- a/tests/ovn.at
|
|
|
773311 |
+++ b/tests/ovn.at
|
|
|
773311 |
@@ -19179,3 +19179,32 @@ OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected])
|
|
|
773311 |
|
|
|
773311 |
OVN_CLEANUP([hv1])
|
|
|
773311 |
AT_CLEANUP
|
|
|
773311 |
+
|
|
|
773311 |
+AT_SETUP([ovn -- Big Load Balancer])
|
|
|
773311 |
+ovn_start
|
|
|
773311 |
+
|
|
|
773311 |
+ovn-nbctl ls-add ls1
|
|
|
773311 |
+ovn-nbctl lsp-add ls1 lsp1
|
|
|
773311 |
+
|
|
|
773311 |
+net_add n1
|
|
|
773311 |
+sim_add hv1
|
|
|
773311 |
+
|
|
|
773311 |
+as hv1
|
|
|
773311 |
+ovs-vsctl add-br br-phys
|
|
|
773311 |
+ovn_attach n1 br-phys 192.168.0.1
|
|
|
773311 |
+ovs-vsctl add-port br-int p1 -- set Interface p1 external-ids:iface-id=lsp1
|
|
|
773311 |
+
|
|
|
773311 |
+IPS=192.169.0.1:80
|
|
|
773311 |
+for i in `seq 1 9` ; do
|
|
|
773311 |
+ for j in `seq 1 254` ; do
|
|
|
773311 |
+ IPS=${IPS},192.169.$i.$j:80
|
|
|
773311 |
+ done
|
|
|
773311 |
+done
|
|
|
773311 |
+
|
|
|
773311 |
+ovn-nbctl lb-add lb0 172.172.0.1:8080 "${IPS}"
|
|
|
773311 |
+ovn-nbctl --wait=hv ls-lb-add ls1 lb0
|
|
|
773311 |
+
|
|
|
773311 |
+AT_CHECK([test 2287 = `ovs-ofctl dump-group-stats br-int | grep -o bucket | wc -l`])
|
|
|
773311 |
+
|
|
|
773311 |
+OVN_CLEANUP([hv1])
|
|
|
773311 |
+AT_CLEANUP
|
|
|
773311 |
--
|
|
|
773311 |
2.25.4
|
|
|
773311 |
|