Blob Blame History Raw
From 88056d15bffe67c033322de16c01a013e7bc7c7c Mon Sep 17 00:00:00 2001
From: Mark Michelson <mmichels@redhat.com>
Date: Wed, 6 May 2020 09:49:55 -0400
Subject: [PATCH] ofctrl: Split large group_mod messages up.

Group mod messages have the possibility of growing very large if OVN
installs a load balancer with a great many backends. The current
approach is to send a single ADD message with the entire group contents.
If the size of this message exceeds UINT16_MAX, then OpenFlow cannot
properly express the length of the message since the OpenFlow header's
length is limited to 16 bits.

This patch solves the problem by breaking the message into pieces. The
first piece is an ADD, and subsequent messages are INSERT_BUCKET
messages. This way, we end up being able to express the entire size of
the group through multiple OpenFlow messages.

Signed-off-by: Mark Michelson <mmichels@redhat.com>
Acked-by: Numan Siddique <numans@ovn.org>
---
 controller/ofctrl.c | 70 ++++++++++++++++++++++++++++++++++++++++++---
 tests/ovn.at        | 29 +++++++++++++++++++
 2 files changed, 95 insertions(+), 4 deletions(-)

diff --git a/controller/ofctrl.c b/controller/ofctrl.c
index 4b51cd86e..073e076c7 100644
--- a/controller/ofctrl.c
+++ b/controller/ofctrl.c
@@ -930,10 +930,72 @@ encode_group_mod(const struct ofputil_group_mod *gm)
 }
 
 static void
-add_group_mod(const struct ofputil_group_mod *gm, struct ovs_list *msgs)
+add_group_mod(struct ofputil_group_mod *gm, struct ovs_list *msgs)
 {
     struct ofpbuf *msg = encode_group_mod(gm);
-    ovs_list_push_back(msgs, &msg->list_node);
+    if (msg->size <= UINT16_MAX) {
+        ovs_list_push_back(msgs, &msg->list_node);
+        return;
+    }
+    /* This group mod request is too large to fit in a single OF message
+     * since the header can only specify a 16-bit size. We need to break
+     * this into multiple group_mod requests.
+     */
+
+    /* Pull the first bucket. All buckets are approximately the same length
+     * since they contain near-identical actions. Using its length can give
+     * us a good approximation of how many buckets we can fit in a single
+     * OF message.
+     */
+    ofpraw_pull_assert(msg);
+    struct ofp15_group_mod *ogm = ofpbuf_pull(msg, sizeof(*ogm));
+    struct ofp15_bucket *of_bucket = ofpbuf_pull(msg, sizeof(*of_bucket));
+    uint16_t bucket_size = ntohs(of_bucket->len);
+
+    ofpbuf_delete(msg);
+
+    /* Dividing by 2 here ensures that just in case there are variations in
+     * the size of the buckets, we will not put too many in our new group_mod
+     * message.
+     */
+    size_t max_buckets = ((UINT16_MAX - sizeof *ogm) / bucket_size) / 2;
+
+    ovs_assert(max_buckets < ovs_list_size(&gm->buckets));
+
+    uint16_t command = OFPGC15_INSERT_BUCKET;
+    if (gm->command == OFPGC15_DELETE ||
+        gm->command == OFPGC15_REMOVE_BUCKET) {
+        command = OFPGC15_REMOVE_BUCKET;
+    }
+    struct ofputil_group_mod split = {
+        .command = command,
+        .type = gm->type,
+        .group_id = gm->group_id,
+        .command_bucket_id = OFPG15_BUCKET_LAST,
+    };
+    ovs_list_init(&split.buckets);
+
+    size_t i = 0;
+    struct ofputil_bucket *bucket;
+    LIST_FOR_EACH (bucket, list_node, &gm->buckets) {
+        if (i++ < max_buckets) {
+            continue;
+        }
+        break;
+    }
+
+    ovs_list_splice(&split.buckets, &bucket->list_node, &gm->buckets);
+
+    struct ofpbuf *orig = encode_group_mod(gm);
+    ovs_list_push_back(msgs, &orig->list_node);
+
+    /* We call this recursively just in case our new
+     * INSERT_BUCKET/REMOVE_BUCKET group_mod is still too
+     * large for an OF message. This will allow for it to
+     * be broken into pieces, too.
+     */
+    add_group_mod(&split, msgs);
+    ofputil_uninit_group_mod(&split);
 }
 
 
@@ -1124,7 +1186,7 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table,
         char *group_string = xasprintf("group_id=%"PRIu32",%s",
                                        desired->table_id,
                                        desired->name);
-        char *error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD, group_string,
+        char *error = parse_ofp_group_mod_str(&gm, OFPGC15_ADD, group_string,
                                               NULL, NULL, &usable_protocols);
         if (!error) {
             add_group_mod(&gm, &msgs);
@@ -1243,7 +1305,7 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table,
         enum ofputil_protocol usable_protocols;
         char *group_string = xasprintf("group_id=%"PRIu32"",
                                        installed->table_id);
-        char *error = parse_ofp_group_mod_str(&gm, OFPGC11_DELETE,
+        char *error = parse_ofp_group_mod_str(&gm, OFPGC15_DELETE,
                                               group_string, NULL, NULL,
                                               &usable_protocols);
         if (!error) {
diff --git a/tests/ovn.at b/tests/ovn.at
index 52d994972..f39fda2e4 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -19179,3 +19179,32 @@ OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected])
 
 OVN_CLEANUP([hv1])
 AT_CLEANUP
+
+AT_SETUP([ovn -- Big Load Balancer])
+ovn_start
+
+ovn-nbctl ls-add ls1
+ovn-nbctl lsp-add ls1 lsp1
+
+net_add n1
+sim_add hv1
+
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl add-port br-int p1 -- set Interface p1 external-ids:iface-id=lsp1
+
+IPS=192.169.0.1:80
+for i in `seq 1 9` ; do
+    for j in `seq 1 254` ; do
+        IPS=${IPS},192.169.$i.$j:80
+    done
+done
+
+ovn-nbctl lb-add lb0 172.172.0.1:8080 "${IPS}"
+ovn-nbctl --wait=hv ls-lb-add ls1 lb0
+
+AT_CHECK([test 2287 = `ovs-ofctl dump-group-stats br-int | grep -o bucket | wc -l`])
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
-- 
2.25.4