Blob Blame History Raw
From 5e165afe5eca8e093879df006a6d61be2cbdda53 Mon Sep 17 00:00:00 2001
From: Dumitru Ceara <dceara@redhat.com>
Date: Fri, 16 Aug 2019 14:20:45 +0200
Subject: [PATCH 2/4] ovn-northd: Add IGMP Relay support

Add a new configuration option 'mcast_relay' to the Logical_Router:options
in the OVN Northbound database.

If a router is configured with 'mcast_relay' enabled then ovn-northd
will install Logical_Flows to allow IP multicast traffic to be routed
between Logical_Switches. The logical router will aggregate all IGMP
groups from attached logical switches and modify the routing pipeline in
the following way:
- Table S_ROUTER_IN_IP_INPUT: add flow allowing IP multicast traffic
  if mcast_relay is enabled on the datapath.
- Table S_ROUTER_IN_IP_ROUTING: add flow matching the group address,
  update TTL and set outport="<Multicast_Group> associated with the
  IGMP group". Continue to next table.
- Table S_ROUTER_IN_ARP_RESOLVE: bypass ARP resolve for IP multicast
  traffic and continue to next table.
- Table S_ROUTER_OUT_DELIVERY: add flow matching IP multicast traffic
  and set ETH.SRC to the MAC address of the logical port on which
  traffic is forwarded.

Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Mark Michelson <mmichels@redhat.com>
Signed-off-by: Numan Siddique <nusiddiq@redhat.com>

(cherry-picked from ovn commit 5d1527b11e9420a36adc2410f78be6b6674c098e)
---
 NEWS                        |   1 +
 ovn/lib/logical-fields.c    |   2 +
 ovn/lib/mcast-group-index.h |  13 +-
 ovn/northd/ovn-northd.8.xml |  79 +++++-
 ovn/northd/ovn-northd.c     | 499 +++++++++++++++++++++++++++---------
 ovn/ovn-nb.xml              |   6 +
 ovn/ovn-sb.xml              |   2 +
 tests/ovn.at                | 199 ++++++++++++--
 8 files changed, 650 insertions(+), 151 deletions(-)

diff --git a/NEWS b/NEWS
index c81890b7e..754ac2e71 100644
--- a/NEWS
+++ b/NEWS
@@ -64,6 +64,7 @@ v2.12.0 - 03 Sep 2019
        members of the same transport zone(s).
      * Support for IGMP Snooping and IGMP Querier.
      * Support for new logical switch port type - 'virtual'.
+     * Support for IGMP Snooping/Querier and Relay.
    - New QoS type "linux-netem" on Linux.
    - Added support for TLS Server Name Indication (SNI).
    - Linux datapath:
diff --git a/ovn/lib/logical-fields.c b/ovn/lib/logical-fields.c
index 4ad5bf481..8fb591c0a 100644
--- a/ovn/lib/logical-fields.c
+++ b/ovn/lib/logical-fields.c
@@ -156,6 +156,8 @@ ovn_init_symtab(struct shash *symtab)
 
     expr_symtab_add_field(symtab, "ip4.src", MFF_IPV4_SRC, "ip4", false);
     expr_symtab_add_field(symtab, "ip4.dst", MFF_IPV4_DST, "ip4", false);
+    expr_symtab_add_predicate(symtab, "ip4.src_mcast",
+                              "ip4.src[28..31] == 0xe");
     expr_symtab_add_predicate(symtab, "ip4.mcast", "ip4.dst[28..31] == 0xe");
 
     expr_symtab_add_predicate(symtab, "icmp4", "ip4 && ip.proto == 1");
diff --git a/ovn/lib/mcast-group-index.h b/ovn/lib/mcast-group-index.h
index 859e6a72f..6249cac99 100644
--- a/ovn/lib/mcast-group-index.h
+++ b/ovn/lib/mcast-group-index.h
@@ -20,8 +20,17 @@ struct ovsdb_idl;
 
 struct sbrec_datapath_binding;
 
-#define OVN_MCAST_FLOOD_TUNNEL_KEY   65535
-#define OVN_MCAST_UNKNOWN_TUNNEL_KEY (OVN_MCAST_FLOOD_TUNNEL_KEY - 1)
+#define OVN_MIN_MULTICAST 32768
+#define OVN_MAX_MULTICAST 65535
+
+enum ovn_mcast_tunnel_keys {
+
+    OVN_MCAST_FLOOD_TUNNEL_KEY = OVN_MIN_MULTICAST,
+    OVN_MCAST_UNKNOWN_TUNNEL_KEY,
+    OVN_MCAST_MROUTER_FLOOD_TUNNEL_KEY,
+    OVN_MIN_IP_MULTICAST,
+    OVN_MAX_IP_MULTICAST = OVN_MAX_MULTICAST,
+};
 
 struct ovsdb_idl_index *mcast_group_index_create(struct ovsdb_idl *);
 const struct sbrec_multicast_group *
diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
index 0435dae26..ec2b6454c 100644
--- a/ovn/northd/ovn-northd.8.xml
+++ b/ovn/northd/ovn-northd.8.xml
@@ -952,10 +952,40 @@ output;
 
     <ul>
       <li>
-        A priority-100 flow that outputs all packets with an Ethernet broadcast
+        A priority-100 flow that punts all IGMP packets to
+        <code>ovn-controller</code> if IGMP snooping is enabled on the
+        logical switch.
+      </li>
+
+      <li>
+        Priority-90 flows that forward registered IP multicast traffic to
+        their corresponding multicast group, which <code>ovn-northd</code>
+        creates based on learnt <ref table="IGMP_Group" db="OVN_Southbound"/>
+        entries.  The flows also forward packets to the
+        <code>MC_MROUTER_FLOOD</code> multicast group, which
+        <code>ovn-nortdh</code> populates with all the logical ports that
+        are connected to logical routers with
+        <ref column="options" table="Logical_Router"/>:mcast_relay='true'.
+      </li>
+
+      <li>
+        A priority-85 flow that forwards all IP multicast traffic destined to
+        224.0.0.X to the <code>MC_FLOOD</code> multicast group, which
+        <code>ovn-northd</code> populates with all enabled logical ports.
+      </li>
+
+      <li>
+        A priority-80 flow that forwards all unregistered IP multicast traffic
+        to the <code>MC_MROUTER_FLOOD</code> multicast group, if any.
+        Otherwise the flow drops all unregistered IP multicast packets.  This
+        flow is added only if <ref column="other_config"
+        table="Logical_Switch"/>:mcast_flood_unregistered='false'.
+      </li>
+
+      <li>
+        A priority-70 flow that outputs all packets with an Ethernet broadcast
         or multicast <code>eth.dst</code> to the <code>MC_FLOOD</code>
-        multicast group, which <code>ovn-northd</code> populates with all
-        enabled logical ports.
+        multicast group.
       </li>
 
       <li>
@@ -1236,6 +1266,14 @@ output;
         </ul>
       </li>
 
+      <li>
+        <p>
+          A priority-95 flow allows IP multicast traffic if
+          <ref column="options" table="Logical_Router"/>:mcast_relay='true',
+          otherwise drops it.
+        </p>
+      </li>
+
       <li>
         <p>
           ICMP echo reply.  These flows reply to ICMP echo requests received
@@ -1982,6 +2020,16 @@ output;
     </p>
 
     <ul>
+      <li>
+        <p>
+          Priority-500 flows that match IP multicast traffic destined to
+          groups registered on any of the attached switches and sets
+          <code>outport</code> to the associated multicast group that will
+          eventually flood the traffic to all interested attached logical
+          switches. The flows also decrement TTL.
+        </p>
+      </li>
+
       <li>
         <p>
           For distributed logical routers where one of the logical router
@@ -2115,6 +2163,15 @@ next;
     </p>
 
     <ul>
+      <li>
+        <p>
+          A priority-500 flow that matches IP multicast traffic that was
+          allowed in the routing pipeline. For this kind of traffic the
+          <code>outport</code> was already set so the flow just advances to
+          the next table.
+        </p>
+      </li>
+
       <li>
         <p>
           For distributed logical routers where one of the logical router
@@ -2684,9 +2741,19 @@ clone {
     <h3>Egress Table 3: Delivery</h3>
 
     <p>
-      Packets that reach this table are ready for delivery.  It contains
-      priority-100 logical flows that match packets on each enabled logical
-      router port, with action <code>output;</code>.
+      Packets that reach this table are ready for delivery.  It contains:
+      <ul>
+        <li>
+          Priority-110 logical flows that match IP multicast packets on each
+          enabled logical router port and modify the Ethernet source address
+          of the packets to the Ethernet address of the port and then execute
+          action <code>output;</code>.
+        </li>
+        <li>
+          Priority-100 logical flows that match packets on each enabled
+          logical router port, with action <code>output;</code>.
+        </li>
+      </ul>
     </p>
 
 </manpage>
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index d81cfd893..83a7ec14f 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -440,32 +440,52 @@ struct ipam_info {
     bool mac_only;
 };
 
-#define OVN_MIN_MULTICAST 32768
-#define OVN_MAX_MULTICAST OVN_MCAST_FLOOD_TUNNEL_KEY
-BUILD_ASSERT_DECL(OVN_MIN_MULTICAST < OVN_MAX_MULTICAST);
-
-#define OVN_MIN_IP_MULTICAST OVN_MIN_MULTICAST
-#define OVN_MAX_IP_MULTICAST (OVN_MCAST_UNKNOWN_TUNNEL_KEY - 1)
-BUILD_ASSERT_DECL(OVN_MAX_IP_MULTICAST >= OVN_MIN_MULTICAST);
-
 /*
  * Multicast snooping and querier per datapath configuration.
  */
+struct mcast_switch_info {
+
+    bool enabled;               /* True if snooping enabled. */
+    bool querier;               /* True if querier enabled. */
+    bool flood_unregistered;    /* True if unregistered multicast should be
+                                 * flooded.
+                                 */
+    bool flood_relay;           /* True if the switch is connected to a
+                                 * multicast router and unregistered multicast
+                                 * should be flooded to the mrouter. Only
+                                 * applicable if flood_unregistered == false.
+                                 */
+
+    int64_t table_size;         /* Max number of IP multicast groups. */
+    int64_t idle_timeout;       /* Timeout after which an idle group is
+                                 * flushed.
+                                 */
+    int64_t query_interval;     /* Interval between multicast queries. */
+    char *eth_src;              /* ETH src address of the multicast queries. */
+    char *ipv4_src;             /* IP src address of the multicast queries. */
+    int64_t query_max_response; /* Expected time after which reports should
+                                 * be received for queries that were sent out.
+                                 */
+
+    uint32_t active_flows;      /* Current number of active IP multicast
+                                 * flows.
+                                 */
+};
+
+struct mcast_router_info {
+    bool relay; /* True if the router should relay IP multicast. */
+};
+
 struct mcast_info {
-    bool enabled;
-    bool querier;
-    bool flood_unregistered;
-
-    int64_t table_size;
-    int64_t idle_timeout;
-    int64_t query_interval;
-    char *eth_src;
-    char *ipv4_src;
-    int64_t  query_max_response;
-
-    struct hmap group_tnlids;
-    uint32_t group_tnlid_hint;
-    uint32_t active_flows;
+
+    struct hmap group_tnlids;  /* Group tunnel IDs in use on this DP. */
+    uint32_t group_tnlid_hint; /* Hint for allocating next group tunnel ID. */
+    struct ovs_list groups;    /* List of groups learnt on this DP. */
+
+    union {
+        struct mcast_switch_info sw;  /* Switch specific multicast info. */
+        struct mcast_router_info rtr; /* Router specific multicast info. */
+    };
 };
 
 static uint32_t
@@ -566,6 +586,7 @@ ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
 }
 
 static void ovn_ls_port_group_destroy(struct hmap *nb_pgs);
+static void destroy_mcast_info_for_datapath(struct ovn_datapath *od);
 
 static void
 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
@@ -579,12 +600,7 @@ ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
         bitmap_free(od->ipam_info.allocated_ipv4s);
         free(od->router_ports);
         ovn_ls_port_group_destroy(&od->nb_pgs);
-
-        if (od->nbs) {
-            free(od->mcast_info.eth_src);
-            free(od->mcast_info.ipv4_src);
-            destroy_tnlids(&od->mcast_info.group_tnlids);
-        }
+        destroy_mcast_info_for_datapath(od);
 
         free(od);
     }
@@ -721,23 +737,28 @@ init_ipam_info_for_datapath(struct ovn_datapath *od)
 }
 
 static void
-init_mcast_info_for_datapath(struct ovn_datapath *od)
+init_mcast_info_for_router_datapath(struct ovn_datapath *od)
 {
-    if (!od->nbs) {
-        return;
-    }
+    struct mcast_router_info *mcast_rtr_info = &od->mcast_info.rtr;
 
-    struct mcast_info *mcast_info = &od->mcast_info;
+    mcast_rtr_info->relay = smap_get_bool(&od->nbr->options, "mcast_relay",
+                                          false);
+}
 
-    mcast_info->enabled =
+static void
+init_mcast_info_for_switch_datapath(struct ovn_datapath *od)
+{
+    struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw;
+
+    mcast_sw_info->enabled =
         smap_get_bool(&od->nbs->other_config, "mcast_snoop", false);
-    mcast_info->querier =
+    mcast_sw_info->querier =
         smap_get_bool(&od->nbs->other_config, "mcast_querier", true);
-    mcast_info->flood_unregistered =
+    mcast_sw_info->flood_unregistered =
         smap_get_bool(&od->nbs->other_config, "mcast_flood_unregistered",
                       false);
 
-    mcast_info->table_size =
+    mcast_sw_info->table_size =
         smap_get_ullong(&od->nbs->other_config, "mcast_table_size",
                         OVN_MCAST_DEFAULT_MAX_ENTRIES);
 
@@ -749,54 +770,94 @@ init_mcast_info_for_datapath(struct ovn_datapath *od)
     } else if (idle_timeout > OVN_MCAST_MAX_IDLE_TIMEOUT_S) {
         idle_timeout = OVN_MCAST_MAX_IDLE_TIMEOUT_S;
     }
-    mcast_info->idle_timeout = idle_timeout;
+    mcast_sw_info->idle_timeout = idle_timeout;
 
     uint32_t query_interval =
         smap_get_ullong(&od->nbs->other_config, "mcast_query_interval",
-                        mcast_info->idle_timeout / 2);
+                        mcast_sw_info->idle_timeout / 2);
     if (query_interval < OVN_MCAST_MIN_QUERY_INTERVAL_S) {
         query_interval = OVN_MCAST_MIN_QUERY_INTERVAL_S;
     } else if (query_interval > OVN_MCAST_MAX_QUERY_INTERVAL_S) {
         query_interval = OVN_MCAST_MAX_QUERY_INTERVAL_S;
     }
-    mcast_info->query_interval = query_interval;
+    mcast_sw_info->query_interval = query_interval;
 
-    mcast_info->eth_src =
+    mcast_sw_info->eth_src =
         nullable_xstrdup(smap_get(&od->nbs->other_config, "mcast_eth_src"));
-    mcast_info->ipv4_src =
+    mcast_sw_info->ipv4_src =
         nullable_xstrdup(smap_get(&od->nbs->other_config, "mcast_ip4_src"));
 
-    mcast_info->query_max_response =
+    mcast_sw_info->query_max_response =
         smap_get_ullong(&od->nbs->other_config, "mcast_query_max_response",
                         OVN_MCAST_DEFAULT_QUERY_MAX_RESPONSE_S);
 
-    hmap_init(&mcast_info->group_tnlids);
-    mcast_info->group_tnlid_hint = OVN_MIN_IP_MULTICAST;
-    mcast_info->active_flows = 0;
+    mcast_sw_info->active_flows = 0;
+}
+
+static void
+init_mcast_info_for_datapath(struct ovn_datapath *od)
+{
+    if (!od->nbr && !od->nbs) {
+        return;
+    }
+
+    hmap_init(&od->mcast_info.group_tnlids);
+    od->mcast_info.group_tnlid_hint = OVN_MIN_IP_MULTICAST;
+    ovs_list_init(&od->mcast_info.groups);
+
+    if (od->nbs) {
+        init_mcast_info_for_switch_datapath(od);
+    } else {
+        init_mcast_info_for_router_datapath(od);
+    }
+}
+
+static void
+destroy_mcast_info_for_switch_datapath(struct ovn_datapath *od)
+{
+    struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw;
+
+    free(mcast_sw_info->eth_src);
+    free(mcast_sw_info->ipv4_src);
+}
+
+static void
+destroy_mcast_info_for_datapath(struct ovn_datapath *od)
+{
+    if (!od->nbr && !od->nbs) {
+        return;
+    }
+
+    if (od->nbs) {
+        destroy_mcast_info_for_switch_datapath(od);
+    }
+
+    destroy_tnlids(&od->mcast_info.group_tnlids);
 }
 
 static void
-store_mcast_info_for_datapath(const struct sbrec_ip_multicast *sb,
-                              struct ovn_datapath *od)
+store_mcast_info_for_switch_datapath(const struct sbrec_ip_multicast *sb,
+                                     struct ovn_datapath *od)
 {
-    struct mcast_info *mcast_info = &od->mcast_info;
+    struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw;
 
     sbrec_ip_multicast_set_datapath(sb, od->sb);
-    sbrec_ip_multicast_set_enabled(sb, &mcast_info->enabled, 1);
-    sbrec_ip_multicast_set_querier(sb, &mcast_info->querier, 1);
-    sbrec_ip_multicast_set_table_size(sb, &mcast_info->table_size, 1);
-    sbrec_ip_multicast_set_idle_timeout(sb, &mcast_info->idle_timeout, 1);
+    sbrec_ip_multicast_set_enabled(sb, &mcast_sw_info->enabled, 1);
+    sbrec_ip_multicast_set_querier(sb, &mcast_sw_info->querier, 1);
+    sbrec_ip_multicast_set_table_size(sb, &mcast_sw_info->table_size, 1);
+    sbrec_ip_multicast_set_idle_timeout(sb, &mcast_sw_info->idle_timeout, 1);
     sbrec_ip_multicast_set_query_interval(sb,
-                                          &mcast_info->query_interval, 1);
+                                          &mcast_sw_info->query_interval, 1);
     sbrec_ip_multicast_set_query_max_resp(sb,
-                                          &mcast_info->query_max_response, 1);
+                                          &mcast_sw_info->query_max_response,
+                                          1);
 
-    if (mcast_info->eth_src) {
-        sbrec_ip_multicast_set_eth_src(sb, mcast_info->eth_src);
+    if (mcast_sw_info->eth_src) {
+        sbrec_ip_multicast_set_eth_src(sb, mcast_sw_info->eth_src);
     }
 
-    if (mcast_info->ipv4_src) {
-        sbrec_ip_multicast_set_ip4_src(sb, mcast_info->ipv4_src);
+    if (mcast_sw_info->ipv4_src) {
+        sbrec_ip_multicast_set_ip4_src(sb, mcast_sw_info->ipv4_src);
     }
 }
 
@@ -913,6 +974,7 @@ join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
                                      NULL, nbr, NULL);
             ovs_list_push_back(nb_only, &od->list);
         }
+        init_mcast_info_for_datapath(od);
         ovs_list_push_back(lr_list, &od->lr_list);
     }
 }
@@ -2006,6 +2068,13 @@ join_logical_ports(struct northd_context *ctx,
                     break;
                 }
             }
+
+            /* If the router is multicast enabled then set relay on the switch
+             * datapath.
+             */
+            if (peer->od && peer->od->mcast_info.rtr.relay) {
+                op->od->mcast_info.sw.flood_relay = true;
+            }
         } else if (op->nbrp && op->nbrp->peer && !op->derived) {
             struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
             if (peer) {
@@ -2861,6 +2930,10 @@ struct multicast_group {
 static const struct multicast_group mc_flood =
     { MC_FLOOD, OVN_MCAST_FLOOD_TUNNEL_KEY };
 
+#define MC_MROUTER_FLOOD "_MC_mrouter_flood"
+static const struct multicast_group mc_mrouter_flood =
+    { MC_MROUTER_FLOOD, OVN_MCAST_MROUTER_FLOOD_TUNNEL_KEY };
+
 #define MC_UNKNOWN "_MC_unknown"
 static const struct multicast_group mc_unknown =
     { MC_UNKNOWN, OVN_MCAST_UNKNOWN_TUNNEL_KEY };
@@ -2970,7 +3043,8 @@ ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
  */
 struct ovn_igmp_group_entry {
     struct ovs_list list_node; /* Linkage in the list of entries. */
-    const struct sbrec_igmp_group *sb;
+    size_t n_ports;
+    struct ovn_port **ports;
 };
 
 /*
@@ -2979,12 +3053,13 @@ struct ovn_igmp_group_entry {
  */
 struct ovn_igmp_group {
     struct hmap_node hmap_node; /* Index on 'datapath' and 'address'. */
+    struct ovs_list list_node;  /* Linkage in the per-dp igmp group list. */
 
     struct ovn_datapath *datapath;
     struct in6_addr address; /* Multicast IPv6-mapped-IPv4 or IPv4 address. */
     struct multicast_group mcgroup;
 
-    struct ovs_list sb_entries; /* List of SB entries for this group. */
+    struct ovs_list entries; /* List of SB entries for this group. */
 };
 
 static uint32_t
@@ -3012,77 +3087,120 @@ ovn_igmp_group_find(struct hmap *igmp_groups,
     return NULL;
 }
 
-static void
+static struct ovn_igmp_group *
 ovn_igmp_group_add(struct northd_context *ctx, struct hmap *igmp_groups,
                    struct ovn_datapath *datapath,
-                   const struct sbrec_igmp_group *sb_igmp_group)
+                   const struct in6_addr *address,
+                   const char *address_s)
 {
-    struct in6_addr group_address;
-    ovs_be32 ipv4;
-
-    if (ip_parse(sb_igmp_group->address, &ipv4)) {
-        group_address = in6_addr_mapped_ipv4(ipv4);
-    } else if (!ipv6_parse(sb_igmp_group->address, &group_address)) {
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-        VLOG_WARN_RL(&rl, "invalid IGMP group address: %s",
-                     sb_igmp_group->address);
-        return;
-    }
-
     struct ovn_igmp_group *igmp_group =
-        ovn_igmp_group_find(igmp_groups, datapath, &group_address);
+        ovn_igmp_group_find(igmp_groups, datapath, address);
 
     if (!igmp_group) {
         igmp_group = xmalloc(sizeof *igmp_group);
 
         const struct sbrec_multicast_group *mcgroup =
-            mcast_group_lookup(ctx->sbrec_mcast_group_by_name_dp,
-                               sb_igmp_group->address, datapath->sb);
+            mcast_group_lookup(ctx->sbrec_mcast_group_by_name_dp, address_s,
+                               datapath->sb);
 
         igmp_group->datapath = datapath;
-        igmp_group->address = group_address;
+        igmp_group->address = *address;
         if (mcgroup) {
             igmp_group->mcgroup.key = mcgroup->tunnel_key;
             add_tnlid(&datapath->mcast_info.group_tnlids, mcgroup->tunnel_key);
         } else {
             igmp_group->mcgroup.key = 0;
         }
-        igmp_group->mcgroup.name = sb_igmp_group->address;
-        ovs_list_init(&igmp_group->sb_entries);
+        igmp_group->mcgroup.name = address_s;
+        ovs_list_init(&igmp_group->entries);
 
         hmap_insert(igmp_groups, &igmp_group->hmap_node,
-                    ovn_igmp_group_hash(datapath, &group_address));
+                    ovn_igmp_group_hash(datapath, address));
+        ovs_list_push_back(&datapath->mcast_info.groups,
+                           &igmp_group->list_node);
     }
 
+    return igmp_group;
+}
+
+static bool
+ovn_igmp_group_get_address(const struct sbrec_igmp_group *sb_igmp_group,
+                           struct in6_addr *address)
+{
+    ovs_be32 ipv4;
+
+    if (ip_parse(sb_igmp_group->address, &ipv4)) {
+        *address = in6_addr_mapped_ipv4(ipv4);
+        return true;
+    }
+    if (!ipv6_parse(sb_igmp_group->address, address)) {
+        return false;
+    }
+    return true;
+}
+
+static struct ovn_port **
+ovn_igmp_group_get_ports(const struct sbrec_igmp_group *sb_igmp_group,
+                         size_t *n_ports, struct hmap *ovn_ports)
+{
+    struct ovn_port **ports = xmalloc(sb_igmp_group->n_ports * sizeof *ports);
+
+     *n_ports = 0;
+     for (size_t i = 0; i < sb_igmp_group->n_ports; i++) {
+        ports[(*n_ports)] =
+            ovn_port_find(ovn_ports, sb_igmp_group->ports[i]->logical_port);
+        if (ports[(*n_ports)]) {
+            (*n_ports)++;
+        }
+    }
+
+    return ports;
+}
+
+static void
+ovn_igmp_group_add_entry(struct ovn_igmp_group *igmp_group,
+                         struct ovn_port **ports, size_t n_ports)
+{
     struct ovn_igmp_group_entry *entry = xmalloc(sizeof *entry);
 
-    entry->sb = sb_igmp_group;
-    ovs_list_push_back(&igmp_group->sb_entries , &entry->list_node);
+    entry->ports = ports;
+    entry->n_ports = n_ports;
+    ovs_list_push_back(&igmp_group->entries, &entry->list_node);
+}
+
+static void
+ovn_igmp_group_destroy_entry(struct ovn_igmp_group_entry *entry)
+{
+    free(entry->ports);
+}
+
+static bool
+ovn_igmp_group_allocate_id(struct ovn_igmp_group *igmp_group)
+{
+    if (igmp_group->mcgroup.key == 0) {
+        struct mcast_info *mcast_info = &igmp_group->datapath->mcast_info;
+        igmp_group->mcgroup.key = ovn_mcast_group_allocate_key(mcast_info);
+    }
+
+    if (igmp_group->mcgroup.key == 0) {
+        return false;
+    }
+
+    return true;
 }
 
 static void
 ovn_igmp_group_aggregate_ports(struct ovn_igmp_group *igmp_group,
-                               struct hmap *ovn_ports,
                                struct hmap *mcast_groups)
 {
     struct ovn_igmp_group_entry *entry;
 
-    LIST_FOR_EACH_POP (entry, list_node, &igmp_group->sb_entries) {
-        size_t n_oports = 0;
-        struct ovn_port **oports =
-            xmalloc(entry->sb->n_ports * sizeof *oports);
-
-        for (size_t i = 0; i < entry->sb->n_ports; i++) {
-            oports[n_oports] =
-                ovn_port_find(ovn_ports, entry->sb->ports[i]->logical_port);
-            if (oports[n_oports]) {
-                n_oports++;
-            }
-        }
-
+    LIST_FOR_EACH_POP (entry, list_node, &igmp_group->entries) {
         ovn_multicast_add_ports(mcast_groups, igmp_group->datapath,
-                                &igmp_group->mcgroup, oports, n_oports);
-        free(oports);
+                                &igmp_group->mcgroup, entry->ports,
+                                entry->n_ports);
+
+        ovn_igmp_group_destroy_entry(entry);
         free(entry);
     }
 }
@@ -3094,10 +3212,12 @@ ovn_igmp_group_destroy(struct hmap *igmp_groups,
     if (igmp_group) {
         struct ovn_igmp_group_entry *entry;
 
-        LIST_FOR_EACH_POP (entry, list_node, &igmp_group->sb_entries) {
+        LIST_FOR_EACH_POP (entry, list_node, &igmp_group->entries) {
+            ovn_igmp_group_destroy_entry(entry);
             free(entry);
         }
         hmap_remove(igmp_groups, &igmp_group->hmap_node);
+        ovs_list_remove(&igmp_group->list_node);
         free(igmp_group);
     }
 }
@@ -5404,7 +5524,9 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
             continue;
         }
 
-        if (od->mcast_info.enabled) {
+        struct mcast_switch_info *mcast_sw_info = &od->mcast_info.sw;
+
+        if (mcast_sw_info->enabled) {
             /* Punt IGMP traffic to controller. */
             ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100,
                           "ip4 && ip.proto == 2", "igmp;");
@@ -5417,9 +5539,16 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
                           "outport = \""MC_FLOOD"\"; output;");
 
             /* Drop unregistered IP multicast if not allowed. */
-            if (!od->mcast_info.flood_unregistered) {
-                ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 80,
-                              "ip4 && ip4.mcast", "drop;");
+            if (!mcast_sw_info->flood_unregistered) {
+                /* Forward unregistered IP multicast to mrouter (if any). */
+                if (mcast_sw_info->flood_relay) {
+                    ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 80,
+                                  "ip4 && ip4.mcast",
+                                  "outport = \""MC_MROUTER_FLOOD"\"; output;");
+                } else {
+                    ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 80,
+                                  "ip4 && ip4.mcast", "drop;");
+                }
             }
         }
 
@@ -5436,18 +5565,26 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
             continue;
         }
 
-        struct mcast_info *mcast_info = &igmp_group->datapath->mcast_info;
+        struct mcast_switch_info *mcast_sw_info =
+            &igmp_group->datapath->mcast_info.sw;
 
-        if (mcast_info->active_flows >= mcast_info->table_size) {
+        if (mcast_sw_info->active_flows >= mcast_sw_info->table_size) {
             continue;
         }
-        mcast_info->active_flows++;
+        mcast_sw_info->active_flows++;
 
         ds_clear(&match);
         ds_clear(&actions);
 
         ds_put_format(&match, "eth.mcast && ip4 && ip4.dst == %s ",
                       igmp_group->mcgroup.name);
+        /* Also flood traffic to all multicast routers with relay enabled. */
+        if (mcast_sw_info->flood_relay) {
+            ds_put_cstr(&actions,
+                        "clone { "
+                            "outport = \""MC_MROUTER_FLOOD "\"; output; "
+                        "};");
+        }
         ds_put_format(&actions, "outport = \"%s\"; output; ",
                       igmp_group->mcgroup.name);
 
@@ -6293,7 +6430,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
          * source or destination, and zero network source or destination
          * (priority 100). */
         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
-                      "ip4.mcast || "
+                      "ip4.src_mcast ||"
                       "ip4.src == 255.255.255.255 || "
                       "ip4.src == 127.0.0.0/8 || "
                       "ip4.dst == 127.0.0.0/8 || "
@@ -6301,6 +6438,10 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                       "ip4.dst == 0.0.0.0/8",
                       "drop;");
 
+        /* Allow multicast if relay enabled (priority 95). */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 95, "ip4.mcast",
+                      od->mcast_info.rtr.relay ? "next;" : "drop;");
+
         /* ARP reply handling.  Use ARP replies to populate the logical
          * router's ARP table. */
         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
@@ -7608,6 +7749,27 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
         }
     }
 
+    /* IP Multicast lookup. Here we set the output port, adjust TTL and
+     * advance to next table (priority 500).
+     */
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbr || !od->mcast_info.rtr.relay) {
+            continue;
+        }
+        struct ovn_igmp_group *igmp_group;
+
+        LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) {
+            ds_clear(&match);
+            ds_clear(&actions);
+            ds_put_format(&match, "ip4 && ip4.dst == %s ",
+                          igmp_group->mcgroup.name);
+            ds_put_format(&actions, "outport = \"%s\"; ip.ttl--; next;",
+                          igmp_group->mcgroup.name);
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
+                          ds_cstr(&match), ds_cstr(&actions));
+        }
+    }
+
     /* Logical router ingress table 8: Policy.
      *
      * A packet that arrives at this table is an IP packet that should be
@@ -7638,10 +7800,24 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
 
     /* Local router ingress table 9: ARP Resolution.
      *
-     * Any packet that reaches this table is an IP packet whose next-hop IP
-     * address is in reg0. (ip4.dst is the final destination.) This table
-     * resolves the IP address in reg0 into an output port in outport and an
-     * Ethernet address in eth.dst. */
+     * Multicast packets already have the outport set so just advance to next
+     * table (priority 500). */
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbr) {
+            continue;
+        }
+
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500,
+                      "ip4.mcast", "next;");
+    }
+
+    /* Local router ingress table 9: ARP Resolution.
+     *
+     * Any unicast packet that reaches this table is an IP packet whose
+     * next-hop IP address is in reg0. (ip4.dst is the final destination.)
+     * This table resolves the IP address in reg0 into an output port in
+     * outport and an Ethernet address in eth.dst.
+     */
     HMAP_FOR_EACH (op, key_node, ports) {
         if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
             continue;
@@ -8123,9 +8299,13 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
         ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
     }
 
-    /* Logical router egress table 1: Delivery (priority 100).
+    /* Logical router egress table 1: Delivery (priority 100-110).
      *
-     * Priority 100 rules deliver packets to enabled logical ports. */
+     * Priority 100 rules deliver packets to enabled logical ports.
+     * Priority 110 rules match multicast packets and update the source
+     * mac before delivering to enabled logical ports. IP multicast traffic
+     * bypasses S_ROUTER_IN_IP_ROUTING route lookups.
+     */
     HMAP_FOR_EACH (op, key_node, ports) {
         if (!op->nbrp) {
             continue;
@@ -8145,6 +8325,20 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
             continue;
         }
 
+        /* If multicast relay is enabled then also adjust source mac for IP
+         * multicast traffic.
+         */
+        if (op->od->mcast_info.rtr.relay) {
+            ds_clear(&match);
+            ds_clear(&actions);
+            ds_put_format(&match, "ip4.mcast && outport == %s",
+                          op->json_key);
+            ds_put_format(&actions, "eth.src = %s; output;",
+                          op->lrp_networks.ea_s);
+            ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110,
+                        ds_cstr(&match), ds_cstr(&actions));
+        }
+
         ds_clear(&match);
         ds_put_format(&match, "outport == %s", op->json_key);
         ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
@@ -8696,7 +8890,7 @@ build_ip_mcast(struct northd_context *ctx, struct hmap *datapaths)
         if (!ip_mcast) {
             ip_mcast = sbrec_ip_multicast_insert(ctx->ovnsb_txn);
         }
-        store_mcast_info_for_datapath(ip_mcast, od);
+        store_mcast_info_for_switch_datapath(ip_mcast, od);
     }
 
     /* Delete southbound records without northbound matches. */
@@ -8728,6 +8922,14 @@ build_mcast_groups(struct northd_context *ctx,
 
         if (lsp_is_enabled(op->nbsp)) {
             ovn_multicast_add(mcast_groups, &mc_flood, op);
+
+            /* If this port is connected to a multicast router then add it
+             * to the MC_MROUTER_FLOOD group.
+             */
+            if (op->od->mcast_info.sw.flood_relay && op->peer &&
+                    op->peer->od && op->peer->od->mcast_info.rtr.relay) {
+                ovn_multicast_add(mcast_groups, &mc_mrouter_flood, op);
+            }
         }
     }
 
@@ -8750,10 +8952,61 @@ build_mcast_groups(struct northd_context *ctx,
             continue;
         }
 
+        struct in6_addr group_address;
+        if (!ovn_igmp_group_get_address(sb_igmp, &group_address)) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+            VLOG_WARN_RL(&rl, "invalid IGMP group address: %s",
+                         sb_igmp->address);
+            continue;
+        }
+
         /* Add the IGMP group entry. Will also try to allocate an ID for it
          * if the multicast group already exists.
          */
-        ovn_igmp_group_add(ctx, igmp_groups, od, sb_igmp);
+        struct ovn_igmp_group *igmp_group =
+            ovn_igmp_group_add(ctx, igmp_groups, od, &group_address,
+                               sb_igmp->address);
+
+        /* Extract the IGMP group ports from the SB entry and store them
+         * in the IGMP group.
+         */
+        size_t n_igmp_ports;
+        struct ovn_port **igmp_ports =
+            ovn_igmp_group_get_ports(sb_igmp, &n_igmp_ports, ports);
+        ovn_igmp_group_add_entry(igmp_group, igmp_ports, n_igmp_ports);
+    }
+
+    /* Build IGMP groups for multicast routers with relay enabled. The router
+     * IGMP groups are based on the groups learnt by their multicast enabled
+     * peers.
+     */
+    struct ovn_datapath *od;
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+
+        if (ovs_list_is_empty(&od->mcast_info.groups)) {
+            continue;
+        }
+
+        for (size_t i = 0; i < od->n_router_ports; i++) {
+            struct ovn_port *router_port = od->router_ports[i]->peer;
+
+            if (!router_port || !router_port->od ||
+                    !router_port->od->mcast_info.rtr.relay) {
+                continue;
+            }
+
+            struct ovn_igmp_group *igmp_group;
+            LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) {
+                struct ovn_igmp_group *igmp_group_rtr =
+                    ovn_igmp_group_add(ctx, igmp_groups, router_port->od,
+                                       &igmp_group->address,
+                                       igmp_group->mcgroup.name);
+                struct ovn_port **router_igmp_ports =
+                    xmalloc(sizeof *router_igmp_ports);
+                router_igmp_ports[0] = router_port;
+                ovn_igmp_group_add_entry(igmp_group_rtr, router_igmp_ports, 1);
+            }
+        }
     }
 
     /* Walk the aggregated IGMP groups and allocate IDs for new entries.
@@ -8761,21 +9014,17 @@ build_mcast_groups(struct northd_context *ctx,
      */
     struct ovn_igmp_group *igmp_group, *igmp_group_next;
     HMAP_FOR_EACH_SAFE (igmp_group, igmp_group_next, hmap_node, igmp_groups) {
-        if (igmp_group->mcgroup.key == 0) {
-            struct mcast_info *mcast_info = &igmp_group->datapath->mcast_info;
-            igmp_group->mcgroup.key = ovn_mcast_group_allocate_key(mcast_info);
-        }
 
-        /* If we ran out of keys just destroy the entry. */
-        if (igmp_group->mcgroup.key == 0) {
+        if (!ovn_igmp_group_allocate_id(igmp_group)) {
+            /* If we ran out of keys just destroy the entry. */
             ovn_igmp_group_destroy(igmp_groups, igmp_group);
             continue;
         }
 
-        /* Aggregate the ports from all SB entries corresponding to this
+        /* Aggregate the ports from all entries corresponding to this
          * group.
          */
-        ovn_igmp_group_aggregate_ports(igmp_group, ports, mcast_groups);
+        ovn_igmp_group_aggregate_ports(igmp_group, mcast_groups);
     }
 }
 
diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
index b457b6efc..c2472a04a 100644
--- a/ovn/ovn-nb.xml
+++ b/ovn/ovn-nb.xml
@@ -1535,6 +1535,12 @@
           address.
         </p>
       </column>
+      <column name="options" key="mcast_relay" type='{"type": "boolean"}'>
+        <p>
+          Enables/disables IP multicast relay between logical switches
+          connected to the logical router. Default: False.
+        </p>
+      </column>
     </group>
 
     <group title="Common Columns">
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index 71bd08665..477e7bc7a 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -1017,6 +1017,8 @@
         <li><code>eth.mcast</code> expands to <code>eth.dst[40]</code></li>
         <li><code>vlan.present</code> expands to <code>vlan.tci[12]</code></li>
         <li><code>ip4</code> expands to <code>eth.type == 0x800</code></li>
+        <li><code>ip4.src_mcast</code> expands to
+        <code>ip4.src[28..31] == 0xe</code></li>
         <li><code>ip4.mcast</code> expands to <code>ip4.dst[28..31] == 0xe</code></li>
         <li><code>ip6</code> expands to <code>eth.type == 0x86dd</code></li>
         <li><code>ip</code> expands to <code>ip4 || ip6</code></li>
diff --git a/tests/ovn.at b/tests/ovn.at
index 3f7e06cf5..df41a7549 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -15900,12 +15900,12 @@ AT_CHECK([ovn-sbctl get controller_event $uuid event_info:vip], [0], [dnl
 OVN_CLEANUP([hv1], [hv2])
 AT_CLEANUP
 
-AT_SETUP([ovn -- IGMP snoop/querier])
+AT_SETUP([ovn -- IGMP snoop/querier/relay])
 AT_SKIP_IF([test $HAVE_PYTHON = no])
 ovn_start
 
 # Logical network:
-# Two independent logical switches (sw1 and sw2).
+# Three logical switches (sw1-sw3) connected to a logical router (rtr).
 # sw1:
 #   - subnet 10.0.0.0/8
 #   - 2 ports bound on hv1 (sw1-p11, sw1-p12)
@@ -15915,6 +15915,10 @@ ovn_start
 #   - 1 port bound on hv1 (sw2-p1)
 #   - 1 port bound on hv2 (sw2-p2)
 #   - IGMP Querier from 20.0.0.254
+# sw3:
+#   - subnet 30.0.0.0/8
+#   - 1 port bound on hv1 (sw3-p1)
+#   - 1 port bound on hv2 (sw3-p2)
 
 reset_pcap_file() {
     local iface=$1
@@ -15991,29 +15995,47 @@ store_igmp_v3_query() {
 }
 
 #
-# send_ip_multicast_pkt INPORT HV ETH_SRC ETH_DST IP_SRC IP_DST IP_LEN
-#    IP_PROTO DATA OUTFILE
+# send_ip_multicast_pkt INPORT HV ETH_SRC ETH_DST IP_SRC IP_DST IP_LEN TTL
+#    IP_CHKSUM IP_PROTO DATA
 #
 # This shell function causes an IP multicast packet to be received on INPORT
 # of HV.
 # The hexdump of the packet is stored in OUTFILE.
 #
 send_ip_multicast_pkt() {
-    local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ip_src=$5 ip_dst=$6
-    local ip_len=$7 ip_chksum=$8 proto=$9 data=${10} outfile=${11}
-
-    local ip_ttl=20
+    local inport=$1 hv=$2 eth_src=$3 eth_dst=$4
+    local ip_src=$5 ip_dst=$6 ip_len=$7 ip_ttl=$8 ip_chksum=$9 proto=${10}
+    local data=${11}
 
     local eth=${eth_dst}${eth_src}0800
     local ip=450000${ip_len}95f14000${ip_ttl}${proto}${ip_chksum}${ip_src}${ip_dst}
     local packet=${eth}${ip}${data}
 
     as $hv ovs-appctl netdev-dummy/receive ${inport} ${packet}
+}
+
+#
+# store_ip_multicast_pkt ETH_SRC ETH_DST IP_SRC IP_DST IP_LEN TTL
+#    IP_CHKSUM IP_PROTO DATA OUTFILE
+#
+# This shell function builds an IP multicast packet and stores the hexdump of
+# the packet in OUTFILE.
+#
+store_ip_multicast_pkt() {
+    local eth_src=$1 eth_dst=$2
+    local ip_src=$3 ip_dst=$4 ip_len=$5 ip_ttl=$6 ip_chksum=$7 proto=$8
+    local data=$9 outfile=${10}
+
+    local eth=${eth_dst}${eth_src}0800
+    local ip=450000${ip_len}95f14000${ip_ttl}${proto}${ip_chksum}${ip_src}${ip_dst}
+    local packet=${eth}${ip}${data}
+
     echo ${packet} >> ${outfile}
 }
 
 ovn-nbctl ls-add sw1
 ovn-nbctl ls-add sw2
+ovn-nbctl ls-add sw3
 
 ovn-nbctl lsp-add sw1 sw1-p11
 ovn-nbctl lsp-add sw1 sw1-p12
@@ -16021,6 +16043,26 @@ ovn-nbctl lsp-add sw1 sw1-p21
 ovn-nbctl lsp-add sw1 sw1-p22
 ovn-nbctl lsp-add sw2 sw2-p1
 ovn-nbctl lsp-add sw2 sw2-p2
+ovn-nbctl lsp-add sw3 sw3-p1
+ovn-nbctl lsp-add sw3 sw3-p2
+
+ovn-nbctl lr-add rtr
+ovn-nbctl lrp-add rtr rtr-sw1 00:00:00:00:01:00 10.0.0.254/24
+ovn-nbctl lrp-add rtr rtr-sw2 00:00:00:00:02:00 20.0.0.254/24
+ovn-nbctl lrp-add rtr rtr-sw3 00:00:00:00:03:00 30.0.0.254/24
+
+ovn-nbctl lsp-add sw1 sw1-rtr                      \
+    -- lsp-set-type sw1-rtr router                 \
+    -- lsp-set-addresses sw1-rtr 00:00:00:00:01:00 \
+    -- lsp-set-options sw1-rtr router-port=rtr-sw1
+ovn-nbctl lsp-add sw2 sw2-rtr                      \
+    -- lsp-set-type sw2-rtr router                 \
+    -- lsp-set-addresses sw2-rtr 00:00:00:00:02:00 \
+    -- lsp-set-options sw2-rtr router-port=rtr-sw2
+ovn-nbctl lsp-add sw3 sw3-rtr                      \
+    -- lsp-set-type sw3-rtr router                 \
+    -- lsp-set-addresses sw3-rtr 00:00:00:00:03:00 \
+    -- lsp-set-options sw3-rtr router-port=rtr-sw3
 
 net_add n1
 sim_add hv1
@@ -16042,6 +16084,11 @@ ovs-vsctl -- add-port br-int hv1-vif3 -- \
     options:tx_pcap=hv1/vif3-tx.pcap \
     options:rxq_pcap=hv1/vif3-rx.pcap \
     ofport-request=1
+ovs-vsctl -- add-port br-int hv1-vif4 -- \
+    set interface hv1-vif4 external-ids:iface-id=sw3-p1 \
+    options:tx_pcap=hv1/vif4-tx.pcap \
+    options:rxq_pcap=hv1/vif4-rx.pcap \
+    ofport-request=1
 
 sim_add hv2
 as hv2
@@ -16062,12 +16109,18 @@ ovs-vsctl -- add-port br-int hv2-vif3 -- \
     options:tx_pcap=hv2/vif3-tx.pcap \
     options:rxq_pcap=hv2/vif3-rx.pcap \
     ofport-request=1
+ovs-vsctl -- add-port br-int hv2-vif4 -- \
+    set interface hv2-vif4 external-ids:iface-id=sw3-p2 \
+    options:tx_pcap=hv2/vif4-tx.pcap \
+    options:rxq_pcap=hv2/vif4-rx.pcap \
+    ofport-request=1
 
 OVN_POPULATE_ARP
 
 # Enable IGMP snooping on sw1.
-ovn-nbctl set Logical_Switch sw1 other_config:mcast_querier="false"
-ovn-nbctl set Logical_Switch sw1 other_config:mcast_snoop="true"
+ovn-nbctl set Logical_Switch sw1       \
+    other_config:mcast_querier="false" \
+    other_config:mcast_snoop="true"
 
 # No IGMP query should be generated by sw1 (mcast_querier="false").
 truncate -s 0 expected
@@ -16100,9 +16153,12 @@ truncate -s 0 expected
 truncate -s 0 expected_empty
 send_ip_multicast_pkt hv1-vif2 hv1 \
     000000000001 01005e000144 \
-    $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e ca70 11 \
-    e518e518000a3b3a0000 \
-    expected
+    $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \
+    e518e518000a3b3a0000
+store_ip_multicast_pkt \
+    000000000001 01005e000144 \
+    $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \
+    e518e518000a3b3a0000 expected
 
 OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected])
 OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected])
@@ -16123,17 +16179,19 @@ OVS_WAIT_UNTIL([
     test "${total_entries}" = "1"
 ])
 
-# Send traffic traffic and make sure it gets forwarded only on the port that
-# joined.
+# Send traffic and make sure it gets forwarded only on the port that joined.
 as hv1 reset_pcap_file hv1-vif1 hv1/vif1
 as hv2 reset_pcap_file hv2-vif1 hv2/vif1
 truncate -s 0 expected
 truncate -s 0 expected_empty
 send_ip_multicast_pkt hv1-vif2 hv1 \
     000000000001 01005e000144 \
-    $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e ca70 11 \
-    e518e518000a3b3a0000 \
-    expected
+    $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \
+    e518e518000a3b3a0000
+store_ip_multicast_pkt \
+    000000000001 01005e000144 \
+    $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \
+    e518e518000a3b3a0000 expected
 
 OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected_empty])
 OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected])
@@ -16167,5 +16225,110 @@ sleep 1
 OVN_CHECK_PACKETS([hv1/vif3-tx.pcap], [expected])
 OVN_CHECK_PACKETS([hv2/vif3-tx.pcap], [expected])
 
+# Dissable IGMP querier on sw2.
+ovn-nbctl set Logical_Switch sw2 \
+    other_config:mcast_querier="false"
+
+# Enable IGMP snooping on sw3.
+ovn-nbctl set Logical_Switch sw3       \
+    other_config:mcast_querier="false" \
+    other_config:mcast_snoop="true"
+
+# Send traffic from sw3 and make sure rtr doesn't relay it.
+truncate -s 0 expected_empty
+
+as hv1 reset_pcap_file hv1-vif1 hv1/vif1
+as hv1 reset_pcap_file hv1-vif2 hv1/vif2
+as hv1 reset_pcap_file hv1-vif3 hv1/vif3
+as hv1 reset_pcap_file hv1-vif4 hv1/vif4
+as hv2 reset_pcap_file hv2-vif1 hv2/vif1
+as hv2 reset_pcap_file hv2-vif2 hv2/vif2
+as hv2 reset_pcap_file hv2-vif3 hv2/vif3
+as hv2 reset_pcap_file hv2-vif4 hv2/vif4
+
+send_ip_multicast_pkt hv2-vif4 hv2 \
+    000000000001 01005e000144 \
+    $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \
+    e518e518000a3b3a0000
+
+# Sleep a bit to make sure no traffic is received and then check.
+sleep 1
+OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected_empty])
+OVN_CHECK_PACKETS([hv2/vif3-tx.pcap], [expected_empty])
+OVN_CHECK_PACKETS([hv1/vif4-tx.pcap], [expected_empty])
+OVN_CHECK_PACKETS([hv1/vif2-tx.pcap], [expected_empty])
+OVN_CHECK_PACKETS([hv1/vif3-tx.pcap], [expected_empty])
+OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected_empty])
+OVN_CHECK_PACKETS([hv2/vif2-tx.pcap], [expected_empty])
+OVN_CHECK_PACKETS([hv2/vif4-tx.pcap], [expected_empty])
+
+# Enable IGMP relay on rtr
+ovn-nbctl set logical_router rtr \
+    options:mcast_relay="true"
+
+# Inject IGMP Join for 239.0.1.68 on sw1-p11.
+send_igmp_v3_report hv1-vif1 hv1 \
+    000000000001 $(ip_to_hex 10 0 0 1) f9f8 \
+    $(ip_to_hex 239 0 1 68) 04 e9b9 \
+    /dev/null
+# Inject IGMP Join for 239.0.1.68 on sw2-p2.
+send_igmp_v3_report hv2-vif3 hv2 \
+    000000000001 $(ip_to_hex 10 0 0 1) f9f8 \
+    $(ip_to_hex 239 0 1 68) 04 e9b9 \
+    /dev/null
+# Inject IGMP Join for 239.0.1.68 on sw3-p1.
+send_igmp_v3_report hv1-vif4 hv1 \
+    000000000001 $(ip_to_hex 10 0 0 1) f9f8 \
+    $(ip_to_hex 239 0 1 68) 04 e9b9 \
+    /dev/null
+
+# Check that the IGMP Group is learned by all switches.
+OVS_WAIT_UNTIL([
+    total_entries=`ovn-sbctl find IGMP_Group | grep "239.0.1.68" | wc -l`
+    test "${total_entries}" = "3"
+])
+
+# Send traffic from sw3 and make sure it is relayed by rtr.
+# and ports that joined.
+truncate -s 0 expected_routed_sw1
+truncate -s 0 expected_routed_sw2
+truncate -s 0 expected_switched
+truncate -s 0 expected_empty
+
+as hv1 reset_pcap_file hv1-vif1 hv1/vif1
+as hv1 reset_pcap_file hv1-vif2 hv1/vif2
+as hv1 reset_pcap_file hv1-vif3 hv1/vif3
+as hv1 reset_pcap_file hv1-vif4 hv1/vif4
+as hv2 reset_pcap_file hv2-vif1 hv2/vif1
+as hv2 reset_pcap_file hv2-vif2 hv2/vif2
+as hv2 reset_pcap_file hv2-vif3 hv2/vif3
+as hv2 reset_pcap_file hv2-vif4 hv2/vif4
+
+send_ip_multicast_pkt hv2-vif4 hv2 \
+    000000000001 01005e000144 \
+    $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \
+    e518e518000a3b3a0000
+store_ip_multicast_pkt \
+    000000000100 01005e000144 \
+    $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 1f cb70 11 \
+    e518e518000a3b3a0000 expected_routed_sw1
+store_ip_multicast_pkt \
+    000000000200 01005e000144 \
+    $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 1f cb70 11 \
+    e518e518000a3b3a0000 expected_routed_sw2
+store_ip_multicast_pkt \
+    000000000001 01005e000144 \
+    $(ip_to_hex 10 0 0 42) $(ip_to_hex 239 0 1 68) 1e 20 ca70 11 \
+    e518e518000a3b3a0000 expected_switched
+
+OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [expected_routed_sw1])
+OVN_CHECK_PACKETS([hv2/vif3-tx.pcap], [expected_routed_sw2])
+OVN_CHECK_PACKETS([hv1/vif4-tx.pcap], [expected_switched])
+OVN_CHECK_PACKETS([hv1/vif2-tx.pcap], [expected_empty])
+OVN_CHECK_PACKETS([hv1/vif3-tx.pcap], [expected_empty])
+OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected_empty])
+OVN_CHECK_PACKETS([hv2/vif2-tx.pcap], [expected_empty])
+OVN_CHECK_PACKETS([hv2/vif4-tx.pcap], [expected_empty])
+
 OVN_CLEANUP([hv1], [hv2])
 AT_CLEANUP
-- 
2.21.0