0e847a
From 5010c42c47b7b5a3d68d83369d6c17ed0bc11cff Mon Sep 17 00:00:00 2001
0e847a
From: Petr Mensik <pemensik@redhat.com>
0e847a
Date: Wed, 17 Feb 2021 11:47:28 +0100
0e847a
Subject: [PATCH] Correct occasional --bind-dynamic synchronization break
0e847a
0e847a
Request only one re-read of addresses and/or routes
0e847a
0e847a
Previous implementation re-reads systemd addresses exactly the same
0e847a
number of time equal number of notifications received.
0e847a
This is not necessary, we need just notification of change, then re-read
0e847a
the current state and adapt listeners. Repeated re-reading slows netlink
0e847a
processing and highers CPU usage on mass interface changes.
0e847a
0e847a
Continue reading multicast events from netlink, even when ENOBUFS
0e847a
arrive. Broadcasts are not trusted anyway and refresh would be done in
0e847a
iface_enumerate. Save queued events sent again.
0e847a
0e847a
Remove sleeping on netlink ENOBUFS
0e847a
0e847a
With reduced number of written events netlink should receive ENOBUFS
0e847a
rarely. It does not make sense to wait if it is received. It is just a
0e847a
signal some packets got missing. Fast reading all pending packets is required,
0e847a
seq checking ensures it already. Finishes changes by
0e847a
commit 1d07667ac77c55b9de56b1b2c385167e0e0ec27a.
0e847a
0e847a
Move restart from iface_enumerate to enumerate_interfaces
0e847a
0e847a
When ENOBUFS is received, restart of reading addresses is done. But
0e847a
previously found addresses might not have been found this time. In order
0e847a
to catch this, restart both IPv4 and IPv6 enumeration with clearing
0e847a
found interfaces first. It should deliver up-to-date state also after
0e847a
ENOBUFS.
0e847a
0e847a
Read all netlink messages before netlink restart
0e847a
0e847a
Before writing again into netlink socket, try fetching all pending
0e847a
messages. They would be ignored, only might trigger new address
0e847a
synchronization. Should ensure new try has better chance to succeed.
0e847a
0e847a
Request sending ENOBUFS again
0e847a
0e847a
ENOBUFS error handling was improved. Netlink is correctly drained before
0e847a
sending a new request again. It seems ENOBUFS supression is no longer
0e847a
necessary or wanted. Let kernel tell us when it failed and handle it a
0e847a
good way.
0e847a
---
0e847a
 src/netlink.c | 67 ++++++++++++++++++++++++++++++++++++---------------
0e847a
 src/network.c | 11 +++++++--
0e847a
 2 files changed, 57 insertions(+), 21 deletions(-)
0e847a
0e847a
diff --git a/src/netlink.c b/src/netlink.c
0e847a
index ac1a1c5..f95f3e8 100644
0e847a
--- a/src/netlink.c
0e847a
+++ b/src/netlink.c
0e847a
@@ -32,13 +32,21 @@
0e847a
 
0e847a
 #ifndef NDA_RTA
0e847a
 #  define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) 
0e847a
-#endif 
0e847a
+#endif
0e847a
+
0e847a
+/* Used to request refresh of addresses or routes just once,
0e847a
+ * when multiple changes might be announced. */
0e847a
+enum async_states {
0e847a
+  STATE_NEWADDR = (1 << 0),
0e847a
+  STATE_NEWROUTE = (1 << 1),
0e847a
+};
0e847a
 
0e847a
 
0e847a
 static struct iovec iov;
0e847a
 static u32 netlink_pid;
0e847a
 
0e847a
-static void nl_async(struct nlmsghdr *h);
0e847a
+static unsigned nl_async(struct nlmsghdr *h, unsigned state);
0e847a
+static void nl_multicast_state(unsigned state);
0e847a
 
0e847a
 void netlink_init(void)
0e847a
 {
0e847a
@@ -135,7 +143,9 @@ static ssize_t netlink_recv(void)
0e847a
   
0e847a
 
0e847a
 /* family = AF_UNSPEC finds ARP table entries.
0e847a
-   family = AF_LOCAL finds MAC addresses. */
0e847a
+   family = AF_LOCAL finds MAC addresses.
0e847a
+   returns 0 on failure, 1 on success, -1 when restart is required
0e847a
+*/
0e847a
 int iface_enumerate(int family, void *parm, int (*callback)())
0e847a
 {
0e847a
   struct sockaddr_nl addr;
0e847a
@@ -143,6 +153,7 @@ int iface_enumerate(int family, void *parm, int (*callback)())
0e847a
   ssize_t len;
0e847a
   static unsigned int seq = 0;
0e847a
   int callback_ok = 1;
0e847a
+  unsigned state = 0;
0e847a
 
0e847a
   struct {
0e847a
     struct nlmsghdr nlh;
0e847a
@@ -154,7 +165,6 @@ int iface_enumerate(int family, void *parm, int (*callback)())
0e847a
   addr.nl_groups = 0;
0e847a
   addr.nl_pid = 0; /* address to kernel */
0e847a
  
0e847a
- again: 
0e847a
   if (family == AF_UNSPEC)
0e847a
     req.nlh.nlmsg_type = RTM_GETNEIGH;
0e847a
   else if (family == AF_LOCAL)
0e847a
@@ -181,8 +191,8 @@ int iface_enumerate(int family, void *parm, int (*callback)())
0e847a
 	{
0e847a
 	  if (errno == ENOBUFS)
0e847a
 	    {
0e847a
-	      sleep(1);
0e847a
-	      goto again;
0e847a
+	      nl_multicast_state(state);
0e847a
+	      return -1;
0e847a
 	    }
0e847a
 	  return 0;
0e847a
 	}
0e847a
@@ -191,7 +201,7 @@ int iface_enumerate(int family, void *parm, int (*callback)())
0e847a
 	if (h->nlmsg_pid != netlink_pid || h->nlmsg_type == NLMSG_ERROR)
0e847a
 	  {
0e847a
 	    /* May be multicast arriving async */
0e847a
-	    nl_async(h);
0e847a
+	    state = nl_async(h, state);
0e847a
 	  }
0e847a
 	else if (h->nlmsg_seq != seq)
0e847a
 	  {
0e847a
@@ -327,26 +337,36 @@ int iface_enumerate(int family, void *parm, int (*callback)())
0e847a
     }
0e847a
 }
0e847a
 
0e847a
-void netlink_multicast(void)
0e847a
+static void nl_multicast_state(unsigned state)
0e847a
 {
0e847a
   ssize_t len;
0e847a
   struct nlmsghdr *h;
0e847a
   int flags;
0e847a
-  
0e847a
-  /* don't risk blocking reading netlink messages here. */
0e847a
+
0e847a
   if ((flags = fcntl(daemon->netlinkfd, F_GETFL)) == -1 ||
0e847a
       fcntl(daemon->netlinkfd, F_SETFL, flags | O_NONBLOCK) == -1) 
0e847a
     return;
0e847a
+
0e847a
+  do {
0e847a
+    /* don't risk blocking reading netlink messages here. */
0e847a
+    while ((len = netlink_recv()) != -1)
0e847a
   
0e847a
-  if ((len = netlink_recv()) != -1)
0e847a
-    for (h = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(h, (size_t)len); h = NLMSG_NEXT(h, len))
0e847a
-      nl_async(h);
0e847a
-  
0e847a
+      for (h = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(h, (size_t)len); h = NLMSG_NEXT(h, len))
0e847a
+	state = nl_async(h, state);
0e847a
+  } while (errno == ENOBUFS);
0e847a
+
0e847a
   /* restore non-blocking status */
0e847a
   fcntl(daemon->netlinkfd, F_SETFL, flags);
0e847a
 }
0e847a
 
0e847a
-static void nl_async(struct nlmsghdr *h)
0e847a
+void netlink_multicast(void)
0e847a
+{
0e847a
+  unsigned state = 0;
0e847a
+  nl_multicast_state(state);
0e847a
+}
0e847a
+
0e847a
+
0e847a
+static unsigned nl_async(struct nlmsghdr *h, unsigned state)
0e847a
 {
0e847a
   if (h->nlmsg_type == NLMSG_ERROR)
0e847a
     {
0e847a
@@ -354,7 +374,8 @@ static void nl_async(struct nlmsghdr *h)
0e847a
       if (err->error != 0)
0e847a
 	my_syslog(LOG_ERR, _("netlink returns error: %s"), strerror(-(err->error)));
0e847a
     }
0e847a
-  else if (h->nlmsg_pid == 0 && h->nlmsg_type == RTM_NEWROUTE) 
0e847a
+  else if (h->nlmsg_pid == 0 && h->nlmsg_type == RTM_NEWROUTE &&
0e847a
+	   (state & STATE_NEWROUTE)==0)
0e847a
     {
0e847a
       /* We arrange to receive netlink multicast messages whenever the network route is added.
0e847a
 	 If this happens and we still have a DNS packet in the buffer, we re-send it.
0e847a
@@ -366,10 +387,18 @@ static void nl_async(struct nlmsghdr *h)
0e847a
       if (rtm->rtm_type == RTN_UNICAST && rtm->rtm_scope == RT_SCOPE_LINK &&
0e847a
 	  (rtm->rtm_table == RT_TABLE_MAIN ||
0e847a
 	   rtm->rtm_table == RT_TABLE_LOCAL))
0e847a
-	queue_event(EVENT_NEWROUTE);
0e847a
+	{
0e847a
+	  queue_event(EVENT_NEWROUTE);
0e847a
+	  state |= STATE_NEWROUTE;
0e847a
+	}
0e847a
+    }
0e847a
+  else if ((h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) &&
0e847a
+	   (state & STATE_NEWADDR)==0)
0e847a
+    {
0e847a
+      queue_event(EVENT_NEWADDR);
0e847a
+      state |= STATE_NEWADDR;
0e847a
     }
0e847a
-  else if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR) 
0e847a
-    queue_event(EVENT_NEWADDR);
0e847a
+  return state;
0e847a
 }
0e847a
 #endif
0e847a
 
0e847a
diff --git a/src/network.c b/src/network.c
0e847a
index c6e7d89..47caf38 100644
0e847a
--- a/src/network.c
0e847a
+++ b/src/network.c
0e847a
@@ -656,7 +656,8 @@ int enumerate_interfaces(int reset)
0e847a
 
0e847a
   if ((param.fd = socket(PF_INET, SOCK_DGRAM, 0)) == -1)
0e847a
     return 0;
0e847a
- 
0e847a
+
0e847a
+again:
0e847a
   /* Mark interfaces for garbage collection */
0e847a
   for (iface = daemon->interfaces; iface; iface = iface->next) 
0e847a
     iface->found = 0;
0e847a
@@ -709,10 +710,16 @@ int enumerate_interfaces(int reset)
0e847a
   
0e847a
 #ifdef HAVE_IPV6
0e847a
   ret = iface_enumerate(AF_INET6, &param, iface_allowed_v6);
0e847a
+  if (ret < 0)
0e847a
+    goto again;
0e847a
 #endif
0e847a
 
0e847a
   if (ret)
0e847a
-    ret = iface_enumerate(AF_INET, &param, iface_allowed_v4); 
0e847a
+    {
0e847a
+      ret = iface_enumerate(AF_INET, &param, iface_allowed_v4);
0e847a
+      if (ret < 0)
0e847a
+	goto again;
0e847a
+    }
0e847a
  
0e847a
   errsave = errno;
0e847a
   close(param.fd);
0e847a
-- 
0e847a
2.26.2
0e847a