773311
From 88056d15bffe67c033322de16c01a013e7bc7c7c Mon Sep 17 00:00:00 2001
773311
From: Mark Michelson <mmichels@redhat.com>
773311
Date: Wed, 6 May 2020 09:49:55 -0400
773311
Subject: [PATCH] ofctrl: Split large group_mod messages up.
773311
773311
Group mod messages have the possibility of growing very large if OVN
773311
installs a load balancer with a great many backends. The current
773311
approach is to send a single ADD message with the entire group contents.
773311
If the size of this message exceeds UINT16_MAX, then OpenFlow cannot
773311
properly express the length of the message since the OpenFlow header's
773311
length is limited to 16 bits.
773311
773311
This patch solves the problem by breaking the message into pieces. The
773311
first piece is an ADD, and subsequent messages are INSERT_BUCKET
773311
messages. This way, we end up being able to express the entire size of
773311
the group through multiple OpenFlow messages.
773311
773311
Signed-off-by: Mark Michelson <mmichels@redhat.com>
773311
Acked-by: Numan Siddique <numans@ovn.org>
773311
---
773311
 controller/ofctrl.c | 70 ++++++++++++++++++++++++++++++++++++++++++---
773311
 tests/ovn.at        | 29 +++++++++++++++++++
773311
 2 files changed, 95 insertions(+), 4 deletions(-)
773311
773311
diff --git a/controller/ofctrl.c b/controller/ofctrl.c
773311
index 4b51cd86e..073e076c7 100644
773311
--- a/controller/ofctrl.c
773311
+++ b/controller/ofctrl.c
773311
@@ -930,10 +930,72 @@ encode_group_mod(const struct ofputil_group_mod *gm)
773311
 }
773311
 
773311
 static void
773311
-add_group_mod(const struct ofputil_group_mod *gm, struct ovs_list *msgs)
773311
+add_group_mod(struct ofputil_group_mod *gm, struct ovs_list *msgs)
773311
 {
773311
     struct ofpbuf *msg = encode_group_mod(gm);
773311
-    ovs_list_push_back(msgs, &msg->list_node);
773311
+    if (msg->size <= UINT16_MAX) {
773311
+        ovs_list_push_back(msgs, &msg->list_node);
773311
+        return;
773311
+    }
773311
+    /* This group mod request is too large to fit in a single OF message
773311
+     * since the header can only specify a 16-bit size. We need to break
773311
+     * this into multiple group_mod requests.
773311
+     */
773311
+
773311
+    /* Pull the first bucket. All buckets are approximately the same length
773311
+     * since they contain near-identical actions. Using its length can give
773311
+     * us a good approximation of how many buckets we can fit in a single
773311
+     * OF message.
773311
+     */
773311
+    ofpraw_pull_assert(msg);
773311
+    struct ofp15_group_mod *ogm = ofpbuf_pull(msg, sizeof(*ogm));
773311
+    struct ofp15_bucket *of_bucket = ofpbuf_pull(msg, sizeof(*of_bucket));
773311
+    uint16_t bucket_size = ntohs(of_bucket->len);
773311
+
773311
+    ofpbuf_delete(msg);
773311
+
773311
+    /* Dividing by 2 here ensures that just in case there are variations in
773311
+     * the size of the buckets, we will not put too many in our new group_mod
773311
+     * message.
773311
+     */
773311
+    size_t max_buckets = ((UINT16_MAX - sizeof *ogm) / bucket_size) / 2;
773311
+
773311
+    ovs_assert(max_buckets < ovs_list_size(&gm->buckets));
773311
+
773311
+    uint16_t command = OFPGC15_INSERT_BUCKET;
773311
+    if (gm->command == OFPGC15_DELETE ||
773311
+        gm->command == OFPGC15_REMOVE_BUCKET) {
773311
+        command = OFPGC15_REMOVE_BUCKET;
773311
+    }
773311
+    struct ofputil_group_mod split = {
773311
+        .command = command,
773311
+        .type = gm->type,
773311
+        .group_id = gm->group_id,
773311
+        .command_bucket_id = OFPG15_BUCKET_LAST,
773311
+    };
773311
+    ovs_list_init(&split.buckets);
773311
+
773311
+    size_t i = 0;
773311
+    struct ofputil_bucket *bucket;
773311
+    LIST_FOR_EACH (bucket, list_node, &gm->buckets) {
773311
+        if (i++ < max_buckets) {
773311
+            continue;
773311
+        }
773311
+        break;
773311
+    }
773311
+
773311
+    ovs_list_splice(&split.buckets, &bucket->list_node, &gm->buckets);
773311
+
773311
+    struct ofpbuf *orig = encode_group_mod(gm);
773311
+    ovs_list_push_back(msgs, &orig->list_node);
773311
+
773311
+    /* We call this recursively just in case our new
773311
+     * INSERT_BUCKET/REMOVE_BUCKET group_mod is still too
773311
+     * large for an OF message. This will allow for it to
773311
+     * be broken into pieces, too.
773311
+     */
773311
+    add_group_mod(&split, msgs);
773311
+    ofputil_uninit_group_mod(&split);
773311
 }
773311
 
773311
 
773311
@@ -1124,7 +1186,7 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table,
773311
         char *group_string = xasprintf("group_id=%"PRIu32",%s",
773311
                                        desired->table_id,
773311
                                        desired->name);
773311
-        char *error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD, group_string,
773311
+        char *error = parse_ofp_group_mod_str(&gm, OFPGC15_ADD, group_string,
773311
                                               NULL, NULL, &usable_protocols);
773311
         if (!error) {
773311
             add_group_mod(&gm, &msgs);
773311
@@ -1243,7 +1305,7 @@ ofctrl_put(struct ovn_desired_flow_table *flow_table,
773311
         enum ofputil_protocol usable_protocols;
773311
         char *group_string = xasprintf("group_id=%"PRIu32"",
773311
                                        installed->table_id);
773311
-        char *error = parse_ofp_group_mod_str(&gm, OFPGC11_DELETE,
773311
+        char *error = parse_ofp_group_mod_str(&gm, OFPGC15_DELETE,
773311
                                               group_string, NULL, NULL,
773311
                                               &usable_protocols);
773311
         if (!error) {
773311
diff --git a/tests/ovn.at b/tests/ovn.at
773311
index 52d994972..f39fda2e4 100644
773311
--- a/tests/ovn.at
773311
+++ b/tests/ovn.at
773311
@@ -19179,3 +19179,32 @@ OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected])
773311
 
773311
 OVN_CLEANUP([hv1])
773311
 AT_CLEANUP
773311
+
773311
+AT_SETUP([ovn -- Big Load Balancer])
773311
+ovn_start
773311
+
773311
+ovn-nbctl ls-add ls1
773311
+ovn-nbctl lsp-add ls1 lsp1
773311
+
773311
+net_add n1
773311
+sim_add hv1
773311
+
773311
+as hv1
773311
+ovs-vsctl add-br br-phys
773311
+ovn_attach n1 br-phys 192.168.0.1
773311
+ovs-vsctl add-port br-int p1 -- set Interface p1 external-ids:iface-id=lsp1
773311
+
773311
+IPS=192.169.0.1:80
773311
+for i in `seq 1 9` ; do
773311
+    for j in `seq 1 254` ; do
773311
+        IPS=${IPS},192.169.$i.$j:80
773311
+    done
773311
+done
773311
+
773311
+ovn-nbctl lb-add lb0 172.172.0.1:8080 "${IPS}"
773311
+ovn-nbctl --wait=hv ls-lb-add ls1 lb0
773311
+
773311
+AT_CHECK([test 2287 = `ovs-ofctl dump-group-stats br-int | grep -o bucket | wc -l`])
773311
+
773311
+OVN_CLEANUP([hv1])
773311
+AT_CLEANUP
773311
-- 
773311
2.25.4
773311