Blob Blame History Raw
From d88dc5e696f1b8b95e416890ac831eb0c26250ff Mon Sep 17 00:00:00 2001
From: Simon Kelley <simon@thekelleys.org.uk>
Date: Mon, 15 Mar 2021 21:59:51 +0000
Subject: [PATCH] Use random source ports where possible if source
 addresses/interfaces in use.

CVE-2021-3448 applies.

It's possible to specify the source address or interface to be
used when contacting upstream nameservers: server=8.8.8.8@1.2.3.4
or server=8.8.8.8@1.2.3.4#66 or server=8.8.8.8@eth0, and all of
these have, until now, used a single socket, bound to a fixed
port. This was originally done to allow an error (non-existent
interface, or non-local address) to be detected at start-up. This
means that any upstream servers specified in such a way don't use
random source ports, and are more susceptible to cache-poisoning
attacks.

We now use random ports where possible, even when the
source is specified, so server=8.8.8.8@1.2.3.4 or
server=8.8.8.8@eth0 will use random source
ports. server=8.8.8.8@1.2.3.4#66 or any use of --query-port will
use the explicitly configured port, and should only be done with
understanding of the security implications.
Note that this change changes non-existing interface, or non-local
source address errors from fatal to run-time. The error will be
logged and communiction with the server not possible.
---
 man/dnsmasq.8 |   4 +-
 src/dnsmasq.c |  31 +++--
 src/dnsmasq.h |  28 ++--
 src/forward.c | 373 +++++++++++++++++++++++++++++++-------------------
 src/loop.c    |  20 +--
 src/network.c | 100 ++++----------
 src/option.c  |   3 +-
 src/tftp.c    |   6 +-
 src/util.c    |   2 +-
 9 files changed, 310 insertions(+), 257 deletions(-)

diff --git a/man/dnsmasq.8 b/man/dnsmasq.8
index 45d2273..7f4c62e 100644
--- a/man/dnsmasq.8
+++ b/man/dnsmasq.8
@@ -419,7 +419,7 @@ Tells dnsmasq to never forward A or AAAA queries for plain names, without dots
 or domain parts, to upstream nameservers. If the name is not known
 from /etc/hosts or DHCP then a "not found" answer is returned.
 .TP
-.B \-S, --local, --server=[/[<domain>]/[domain/]][<ipaddr>[#<port>][@<source-ip>|<interface>[#<port>]]
+.B \-S, --local, --server=[/[<domain>]/[domain/]][<ipaddr>[#<port>]][@<interface>][@<source-ip>[#<port>]]
 Specify IP address of upstream servers directly. Setting this flag does
 not suppress reading of /etc/resolv.conf, use -R to do that. If one or
 more 
@@ -481,7 +481,7 @@ source address specified but the port may be specified directly as
 part of the source address. Forcing queries to an interface is not
 implemented on all platforms supported by dnsmasq.
 .TP
-.B --rev-server=<ip-address>/<prefix-len>,<ipaddr>[#<port>][@<source-ip>|<interface>[#<port>]]
+.B --rev-server=<ip-address>/<prefix-len>,<ipaddr>[#<port>][@<source-ip>|@<interface>[#<port>]]
 This is functionally the same as 
 .B --server, 
 but provides some syntactic sugar to make specifying address-to-name queries easier. For example
diff --git a/src/dnsmasq.c b/src/dnsmasq.c
index b7f0a29..3a1f65e 100644
--- a/src/dnsmasq.c
+++ b/src/dnsmasq.c
@@ -1538,6 +1538,7 @@ static int set_dns_listeners(time_t now)
 {
   struct serverfd *serverfdp;
   struct listener *listener;
+  struct randfd_list *rfl;
   int wait = 0, i;
   
 #ifdef HAVE_TFTP
@@ -1557,11 +1558,14 @@ static int set_dns_listeners(time_t now)
   for (serverfdp = daemon->sfds; serverfdp; serverfdp = serverfdp->next)
     poll_listen(serverfdp->fd, POLLIN);
     
-  if (daemon->port != 0 && !daemon->osport)
-    for (i = 0; i < RANDOM_SOCKS; i++)
-      if (daemon->randomsocks[i].refcount != 0)
-	poll_listen(daemon->randomsocks[i].fd, POLLIN);
-	  
+  for (i = 0; i < RANDOM_SOCKS; i++)
+    if (daemon->randomsocks[i].refcount != 0)
+      poll_listen(daemon->randomsocks[i].fd, POLLIN);
+
+  /* Check overflow random sockets too. */
+  for (rfl = daemon->rfl_poll; rfl; rfl = rfl->next)
+    poll_listen(rfl->rfd->fd, POLLIN);
+  
   for (listener = daemon->listeners; listener; listener = listener->next)
     {
       /* only listen for queries if we have resources */
@@ -1592,17 +1596,22 @@ static void check_dns_listeners(time_t now)
 {
   struct serverfd *serverfdp;
   struct listener *listener;
+  struct randfd_list *rfl;
   int i;
 
   for (serverfdp = daemon->sfds; serverfdp; serverfdp = serverfdp->next)
     if (poll_check(serverfdp->fd, POLLIN))
-      reply_query(serverfdp->fd, serverfdp->source_addr.sa.sa_family, now);
+      reply_query(serverfdp->fd, now);
   
-  if (daemon->port != 0 && !daemon->osport)
-    for (i = 0; i < RANDOM_SOCKS; i++)
-      if (daemon->randomsocks[i].refcount != 0 && 
-	  poll_check(daemon->randomsocks[i].fd, POLLIN))
-	reply_query(daemon->randomsocks[i].fd, daemon->randomsocks[i].family, now);
+  for (i = 0; i < RANDOM_SOCKS; i++)
+    if (daemon->randomsocks[i].refcount != 0 && 
+	poll_check(daemon->randomsocks[i].fd, POLLIN))
+      reply_query(daemon->randomsocks[i].fd, now);
+
+  /* Check overflow random sockets too. */
+  for (rfl = daemon->rfl_poll; rfl; rfl = rfl->next)
+    if (poll_check(rfl->rfd->fd, POLLIN))
+      reply_query(rfl->rfd->fd, now);
   
   for (listener = daemon->listeners; listener; listener = listener->next)
     {
diff --git a/src/dnsmasq.h b/src/dnsmasq.h
index 221f788..4beef35 100644
--- a/src/dnsmasq.h
+++ b/src/dnsmasq.h
@@ -521,13 +521,20 @@ struct serverfd {
 };
 
 struct randfd {
+  struct server *serv;
   int fd;
-  unsigned short refcount, family;
+  unsigned short refcount; /* refcount == 0xffff means overflow record. */
 };
-  
+
+struct randfd_list {
+  struct randfd *rfd;
+  struct randfd_list *next;
+};
+
 struct server {
   union mysockaddr addr, source_addr;
   char interface[IF_NAMESIZE+1];
+  unsigned int ifindex; /* corresponding to interface, above */
   struct serverfd *sfd; 
   char *domain; /* set if this server only handles a domain. */ 
   int flags, tcpfd, edns_pktsz;
@@ -640,10 +647,7 @@ struct frec {
     struct frec_src *next;
   } frec_src;
   struct server *sentto; /* NULL means free */
-  struct randfd *rfd4;
-#ifdef HAVE_IPV6
-  struct randfd *rfd6;
-#endif
+  struct randfd_list *rfds;
   unsigned short new_id;
   int forwardall, flags;
   time_t time;
@@ -1062,9 +1066,10 @@ extern struct daemon {
   int forwardcount;
   struct server *srv_save; /* Used for resend on DoD */
   size_t packet_len;       /*      "        "        */
-  struct randfd *rfd_save; /*      "        "        */
+  int    fd_save;          /*      "        "        */
   pid_t tcp_pids[MAX_PROCS];
   struct randfd randomsocks[RANDOM_SOCKS];
+  struct randfd_list *rfl_spare, *rfl_poll;
   int v6pktinfo; 
   struct addrlist *interface_addrs; /* list of all addresses/prefix lengths associated with all local interfaces */
   int log_id, log_display_id; /* ids of transactions for logging */
@@ -1227,7 +1232,7 @@ void safe_strncpy(char *dest, const char *src, size_t size);
 void safe_pipe(int *fd, int read_noblock);
 void *whine_malloc(size_t size);
 int sa_len(union mysockaddr *addr);
-int sockaddr_isequal(union mysockaddr *s1, union mysockaddr *s2);
+int sockaddr_isequal(const union mysockaddr *s1, const union mysockaddr *s2);
 int hostname_isequal(const char *a, const char *b);
 time_t dnsmasq_time(void);
 int netmask_length(struct in_addr mask);
@@ -1276,7 +1281,7 @@ char *parse_server(char *arg, union mysockaddr *addr,
 int option_read_dynfile(char *file, int flags);
 
 /* forward.c */
-void reply_query(int fd, int family, time_t now);
+void reply_query(int fd, time_t now);
 void receive_query(struct listener *listen, time_t now);
 unsigned char *tcp_request(int confd, time_t now,
 			   union mysockaddr *local_addr, struct in_addr netmask, int auth_dns);
@@ -1286,13 +1291,12 @@ int send_from(int fd, int nowild, char *packet, size_t len,
 	       union mysockaddr *to, struct all_addr *source,
 	       unsigned int iface);
 void resend_query(void);
-struct randfd *allocate_rfd(int family);
-void free_rfd(struct randfd *rfd);
+int allocate_rfd(struct randfd_list **fdlp, struct server *serv);
+void free_rfds(struct randfd_list **fdlp);
 
 /* network.c */
 int indextoname(int fd, int index, char *name);
 int local_bind(int fd, union mysockaddr *addr, char *intname, unsigned int ifindex, int is_tcp);
-int random_sock(int family);
 void pre_allocate_sfds(void);
 int reload_servers(char *fname);
 void mark_servers(int flag);
diff --git a/src/forward.c b/src/forward.c
index 82dd850..11e0310 100644
--- a/src/forward.c
+++ b/src/forward.c
@@ -16,7 +16,7 @@
 
 #include "dnsmasq.h"
 
-static struct frec *lookup_frec(unsigned short id, int fd, int family, void *hash);
+static struct frec *lookup_frec(unsigned short id, int fd, void *hash);
 static struct frec *lookup_frec_by_sender(unsigned short id,
 					  union mysockaddr *addr,
 					  void *hash);
@@ -291,29 +291,19 @@ static int forward_query(int udpfd, union mysockaddr *udpaddr,
 	  if (find_pseudoheader(header, plen, NULL, &pheader, &is_sign, NULL) && !is_sign)
 	    PUTSHORT(SAFE_PKTSZ, pheader);
 	  
-	  if (forward->sentto->addr.sa.sa_family == AF_INET) 
-	    log_query(F_NOEXTRA | F_DNSSEC | F_IPV4, "retry", (struct all_addr *)&forward->sentto->addr.in.sin_addr, "dnssec");
-#ifdef HAVE_IPV6
-	  else
-	    log_query(F_NOEXTRA | F_DNSSEC | F_IPV6, "retry", (struct all_addr *)&forward->sentto->addr.in6.sin6_addr, "dnssec");
-#endif
-  
-	  if (forward->sentto->sfd)
-	    fd = forward->sentto->sfd->fd;
-	  else
+	  if ((fd = allocate_rfd(&forward->rfds, forward->sentto)) != -1)
 	    {
+	      if (forward->sentto->addr.sa.sa_family == AF_INET) 
+		log_query(F_NOEXTRA | F_DNSSEC | F_IPV4, "retry", (struct all_addr *)&forward->sentto->addr.in.sin_addr, "dnssec");
 #ifdef HAVE_IPV6
-	      if (forward->sentto->addr.sa.sa_family == AF_INET6)
-		fd = forward->rfd6->fd;
 	      else
+		log_query(F_NOEXTRA | F_DNSSEC | F_IPV6, "retry", (struct all_addr *)&forward->sentto->addr.in6.sin6_addr, "dnssec");
 #endif
-		fd = forward->rfd4->fd;
+	      while (retry_send(sendto(fd, (char *)header, plen, 0,
+				       &forward->sentto->addr.sa,
+				       sa_len(&forward->sentto->addr))));
 	    }
-	  
-	  while (retry_send( sendto(fd, (char *)header, plen, 0,
-				    &forward->sentto->addr.sa,
-				    sa_len(&forward->sentto->addr))));
-	  
+
 	  return 1;
 	}
 #endif
@@ -490,50 +480,26 @@ static int forward_query(int udpfd, union mysockaddr *udpaddr,
       
       while (1)
 	{ 
+	  int fd;
+
 	  /* only send to servers dealing with our domain.
 	     domain may be NULL, in which case server->domain 
 	     must be NULL also. */
 	  
 	  if (type == (start->flags & SERV_TYPE) &&
 	      (type != SERV_HAS_DOMAIN || hostname_isequal(domain, start->domain)) &&
-	      !(start->flags & (SERV_LITERAL_ADDRESS | SERV_LOOP)))
+	      !(start->flags & (SERV_LITERAL_ADDRESS | SERV_LOOP)) &&
+	      ((fd = allocate_rfd(&forward->rfds, start)) != -1))
 	    {
-	      int fd;
-
-	      /* find server socket to use, may need to get random one. */
-	      if (start->sfd)
-		fd = start->sfd->fd;
-	      else 
-		{
-#ifdef HAVE_IPV6
-		  if (start->addr.sa.sa_family == AF_INET6)
-		    {
-		      if (!forward->rfd6 &&
-			  !(forward->rfd6 = allocate_rfd(AF_INET6)))
-			break;
-		      daemon->rfd_save = forward->rfd6;
-		      fd = forward->rfd6->fd;
-		    }
-		  else
-#endif
-		    {
-		      if (!forward->rfd4 &&
-			  !(forward->rfd4 = allocate_rfd(AF_INET)))
-			break;
-		      daemon->rfd_save = forward->rfd4;
-		      fd = forward->rfd4->fd;
-		    }
-
 #ifdef HAVE_CONNTRACK
-		  /* Copy connection mark of incoming query to outgoing connection. */
-		  if (option_bool(OPT_CONNTRACK))
-		    {
-		      unsigned int mark;
-		      if (get_incoming_mark(&forward->source, &forward->dest, 0, &mark))
-			setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
-		    }
-#endif
+	      /* Copy connection mark of incoming query to outgoing connection. */
+	      if (option_bool(OPT_CONNTRACK))
+		{
+		  unsigned int mark;
+		  if (get_incoming_mark(&forward->frec_src.source, &forward->frec_src.dest, 0, &mark))
+		    setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
 		}
+#endif
 	      
 #ifdef HAVE_DNSSEC
 	      if (option_bool(OPT_DNSSEC_VALID) && (forward->flags & FREC_ADDED_PHEADER))
@@ -561,6 +527,7 @@ static int forward_query(int udpfd, union mysockaddr *udpaddr,
 		  /* Keep info in case we want to re-send this packet */
 		  daemon->srv_save = start;
 		  daemon->packet_len = plen;
+		  daemon->fd_save = fd;
 		  
 		  if (!gotname)
 		    strcpy(daemon->namebuff, "query");
@@ -579,7 +546,7 @@ static int forward_query(int udpfd, union mysockaddr *udpaddr,
 		    break;
 		  forward->forwardall++;
 		}
-	    } 
+	    }
 	  
 	  if (!(start = start->next))
  	    start = daemon->servers;
@@ -779,7 +746,7 @@ static size_t process_reply(struct dns_header *header, time_t now, struct server
 }
 
 /* sets new last_server */
-void reply_query(int fd, int family, time_t now)
+void reply_query(int fd, time_t now)
 {
   /* packet from peer server, extract data for cache, and send to
      original requester */
@@ -794,9 +761,8 @@ void reply_query(int fd, int family, time_t now)
 
   /* packet buffer overwritten */
   daemon->srv_save = NULL;
-  
+
   /* Determine the address of the server replying  so that we can mark that as good */
-  serveraddr.sa.sa_family = family;
 #ifdef HAVE_IPV6
   if (serveraddr.sa.sa_family == AF_INET6)
     serveraddr.in6.sin6_flowinfo = 0;
@@ -822,7 +788,7 @@ void reply_query(int fd, int family, time_t now)
 
   hash = hash_questions(header, n, daemon->namebuff);
   
-  if (!(forward = lookup_frec(ntohs(header->id), fd, family, hash)))
+  if (!(forward = lookup_frec(ntohs(header->id), fd, hash)))
     return;
   
   /* log_query gets called indirectly all over the place, so 
@@ -1027,9 +993,8 @@ void reply_query(int fd, int family, time_t now)
 			}
 		      
 		      new->sentto = server;
-		      new->rfd4 = NULL;
+		      new->rfds = NULL;
 #ifdef HAVE_IPV6
-		      new->rfd6 = NULL;
 #endif
 		      new->frec_src.next = NULL;
 		      new->flags &= ~(FREC_DNSKEY_QUERY | FREC_DS_QUERY);
@@ -1059,26 +1024,7 @@ void reply_query(int fd, int family, time_t now)
 		      /* Don't resend this. */
 		      daemon->srv_save = NULL;
 		      
-		      if (server->sfd)
-			fd = server->sfd->fd;
-		      else
-			{
-			  fd = -1;
-#ifdef HAVE_IPV6
-			  if (server->addr.sa.sa_family == AF_INET6)
-			    {
-			      if (new->rfd6 || (new->rfd6 = allocate_rfd(AF_INET6)))
-				fd = new->rfd6->fd;
-			    }
-			  else
-#endif
-			    {
-			      if (new->rfd4 || (new->rfd4 = allocate_rfd(AF_INET)))
-				fd = new->rfd4->fd;
-			    }
-			}
-		      
-		      if (fd != -1)
+		      if ((fd = allocate_rfd(&new->rfds, server)) != -1)
 			{
 #ifdef HAVE_CONNTRACK
 			  /* Copy connection mark of incoming query to outgoing connection. */
@@ -1234,7 +1180,7 @@ void receive_query(struct listener *listen, time_t now)
 
   /* packet buffer overwritten */
   daemon->srv_save = NULL;
-  
+
   dst_addr_4.s_addr = dst_addr.addr.addr4.s_addr = 0;
   netmask.s_addr = 0;
   
@@ -2066,10 +2012,9 @@ static struct frec *allocate_frec(time_t now)
       f->next = daemon->frec_list;
       f->time = now;
       f->sentto = NULL;
-      f->rfd4 = NULL;
+      f->rfds = NULL;
       f->flags = 0;
 #ifdef HAVE_IPV6
-      f->rfd6 = NULL;
 #endif
 #ifdef HAVE_DNSSEC
       f->dependent = NULL;
@@ -2082,46 +2027,192 @@ static struct frec *allocate_frec(time_t now)
   return f;
 }
 
-struct randfd *allocate_rfd(int family)
+/* return a UDP socket bound to a random port, have to cope with straying into
+   occupied port nos and reserved ones. */
+static int random_sock(struct server *s)
 {
-  static int finger = 0;
-  int i;
+  int fd;
+
+  if ((fd = socket(s->source_addr.sa.sa_family, SOCK_DGRAM, 0)) != -1)
+    {
+      if (local_bind(fd, &s->source_addr, s->interface, s->ifindex, 0))
+	return fd;
 
+      if (s->interface[0] == 0)
+	(void)prettyprint_addr(&s->source_addr, daemon->namebuff);
+      else
+	strcpy(daemon->namebuff, s->interface);
+
+      my_syslog(LOG_ERR, _("failed to bind server socket to %s: %s"),
+		daemon->namebuff, strerror(errno));
+      close(fd);
+    }
+  
+  return -1;
+}
+
+/* compare source addresses and interface, serv2 can be null. */
+static int server_isequal(const struct server *serv1,
+			 const struct server *serv2)
+{
+  return (serv2 &&
+    serv2->ifindex == serv1->ifindex &&
+    sockaddr_isequal(&serv2->source_addr, &serv1->source_addr) &&
+    strncmp(serv2->interface, serv1->interface, IF_NAMESIZE) == 0);
+}
+
+/* fdlp points to chain of randomfds already in use by transaction.
+   If there's already a suitable one, return it, else allocate a 
+   new one and add it to the list. 
+
+   Not leaking any resources in the face of allocation failures
+   is rather convoluted here.
+   
+   Note that rfd->serv may be NULL, when a server goes away.
+*/
+int allocate_rfd(struct randfd_list **fdlp, struct server *serv)
+{
+  static int finger = 0;
+  int i, j = 0;
+  struct randfd_list *rfl;
+  struct randfd *rfd = NULL;
+  int fd = 0;
+  
+  /* If server has a pre-allocated fd, use that. */
+  if (serv->sfd)
+    return serv->sfd->fd;
+  
+  /* existing suitable random port socket linked to this transaction? */
+  for (rfl = *fdlp; rfl; rfl = rfl->next)
+    if (server_isequal(serv, rfl->rfd->serv))
+      return rfl->rfd->fd;
+
+  /* No. need new link. */
+  if ((rfl = daemon->rfl_spare))
+    daemon->rfl_spare = rfl->next;
+  else if (!(rfl = whine_malloc(sizeof(struct randfd_list))))
+    return -1;
+   
   /* limit the number of sockets we have open to avoid starvation of 
      (eg) TFTP. Once we have a reasonable number, randomness should be OK */
-
   for (i = 0; i < RANDOM_SOCKS; i++)
     if (daemon->randomsocks[i].refcount == 0)
       {
-	if ((daemon->randomsocks[i].fd = random_sock(family)) == -1)
-	  break;
-      
-	daemon->randomsocks[i].refcount = 1;
-	daemon->randomsocks[i].family = family;
-	return &daemon->randomsocks[i];
+	if ((fd = random_sock(serv)) != -1)
+    	  {
+	    rfd = &daemon->randomsocks[i];
+	    rfd->serv = serv;
+	    rfd->fd = fd;
+	    rfd->refcount = 1;
+	  }
+	break;
       }
-
+  
   /* No free ones or cannot get new socket, grab an existing one */
-  for (i = 0; i < RANDOM_SOCKS; i++)
+  if (!rfd)
+    for (j = 0; j < RANDOM_SOCKS; j++)
+      {
+	i = (j + finger) % RANDOM_SOCKS;
+	if (daemon->randomsocks[i].refcount != 0 &&
+	    server_isequal(serv, daemon->randomsocks[i].serv) &&
+	    daemon->randomsocks[i].refcount != 0xfffe)
+	  {
+	    finger = i + 1;
+	    rfd = &daemon->randomsocks[i];
+	    rfd->refcount++;
+	    break;
+	  }
+      }
+
+  if (j == RANDOM_SOCKS)
     {
-      int j = (i+finger) % RANDOM_SOCKS;
-      if (daemon->randomsocks[j].refcount != 0 &&
-	  daemon->randomsocks[j].family == family && 
-	  daemon->randomsocks[j].refcount != 0xffff)
+      struct randfd_list *rfl_poll;
+
+      /* there are no free slots, and non with the same parameters we can piggy-back on. 
+	 We're going to have to allocate a new temporary record, distinguished by
+	 refcount == 0xffff. This will exist in the frec randfd list, never be shared,
+	 and be freed when no longer in use. It will also be held on 
+	 the daemon->rfl_poll list so the poll system can find it. */
+
+      if ((rfl_poll = daemon->rfl_spare))
+	daemon->rfl_spare = rfl_poll->next;
+      else
+	rfl_poll = whine_malloc(sizeof(struct randfd_list));
+      
+      if (!rfl_poll ||
+	  !(rfd = whine_malloc(sizeof(struct randfd))) ||
+	  (fd = random_sock(serv)) == -1)
 	{
-	  finger = j;
-	  daemon->randomsocks[j].refcount++;
-	  return &daemon->randomsocks[j];
+	  
+	  /* Don't leak anything we may already have */
+	  rfl->next = daemon->rfl_spare;
+	  daemon->rfl_spare = rfl;
+
+	  if (rfl_poll)
+	    {
+	      rfl_poll->next = daemon->rfl_spare;
+	      daemon->rfl_spare = rfl_poll;
+	    }
+	  
+	  if (rfd)
+	    free(rfd);
+	  
+	  return -1; /* doom */
 	}
-    }
 
-  return NULL; /* doom */
+      /* Note rfd->serv not set here, since it's not reused */
+      rfd->fd = fd;
+      rfd->refcount = 0xffff; /* marker for temp record */
+
+      rfl_poll->rfd = rfd;
+      rfl_poll->next = daemon->rfl_poll;
+      daemon->rfl_poll = rfl_poll;
+    }
+  
+  rfl->rfd = rfd;
+  rfl->next = *fdlp;
+  *fdlp = rfl;
+  
+  return rfl->rfd->fd;
 }
 
-void free_rfd(struct randfd *rfd)
+void free_rfds(struct randfd_list **fdlp)
 {
-  if (rfd && --(rfd->refcount) == 0)
-    close(rfd->fd);
+  struct randfd_list *tmp, *rfl, *poll, *next, **up;
+  
+  for (rfl = *fdlp; rfl; rfl = tmp)
+    {
+      if (rfl->rfd->refcount == 0xffff || --(rfl->rfd->refcount) == 0)
+	close(rfl->rfd->fd);
+
+      /* temporary overflow record */
+      if (rfl->rfd->refcount == 0xffff)
+	{
+	  free(rfl->rfd);
+	  
+	  /* go through the link of all these by steam to delete.
+	     This list is expected to be almost always empty. */
+	  for (poll = daemon->rfl_poll, up = &daemon->rfl_poll; poll; poll = next)
+	    {
+	      next = poll->next;
+	      
+	      if (poll->rfd == rfl->rfd)
+		{
+		  *up = poll->next;
+		  poll->next = daemon->rfl_spare;
+		  daemon->rfl_spare = poll;
+		}
+	      else
+		up = &poll->next;
+	    }
+	}
+
+      tmp = rfl->next;
+      rfl->next = daemon->rfl_spare;
+      daemon->rfl_spare = rfl;
+    }
+
+  *fdlp = NULL;
 }
 
 static void free_frec(struct frec *f)
@@ -2137,14 +2228,11 @@ static void free_frec(struct frec *f)
     }
     
   f->frec_src.next = NULL;    
-  free_rfd(f->rfd4);
-  f->rfd4 = NULL;
+  free_rfds(&f->rfds);
   f->sentto = NULL;
   f->flags = 0;
   
 #ifdef HAVE_IPV6
-  free_rfd(f->rfd6);
-  f->rfd6 = NULL;
 #endif
 
 #ifdef HAVE_DNSSEC
@@ -2252,26 +2340,39 @@ struct frec *get_new_frec(time_t now, int *wait, int force)
 }
 
 /* crc is all-ones if not known. */
-static struct frec *lookup_frec(unsigned short id, int fd, int family, void *hash)
+static struct frec *lookup_frec(unsigned short id, int fd, void *hash)
 {
   struct frec *f;
-
+  struct server *s;
+  int type;
+  struct randfd_list *fdl;
+  
   for(f = daemon->frec_list; f; f = f->next)
     if (f->sentto && f->new_id == id && 
 	(memcmp(hash, f->hash, HASH_SIZE) == 0))
       {
 	/* sent from random port */
-	if (family == AF_INET && f->rfd4 && f->rfd4->fd == fd)
-	  return f;
-
-	if (family == AF_INET6 && f->rfd6 && f->rfd6->fd == fd)
-	  return f;
-
-	/* sent to upstream from bound socket. */
-	if (f->sentto->sfd && f->sentto->sfd->fd == fd)
+	for (fdl = f->rfds; fdl; fdl = fdl->next)
+	  if (fdl->rfd->fd == fd)
 	  return f;
+	
+	/* Sent to upstream from socket associated with a server. 
+	   Note we have to iterate over all the possible servers, since they may
+	   have different bound sockets. */
+	type = f->sentto->flags & SERV_TYPE;
+	s = f->sentto;
+	do {
+	  if ((type == (s->flags & SERV_TYPE)) &&
+	      (type != SERV_HAS_DOMAIN ||
+	       (s->domain && hostname_isequal(f->sentto->domain, s->domain))) &&
+	      !(s->flags & (SERV_LITERAL_ADDRESS | SERV_LOOP)) &&
+	      s->sfd && s->sfd->fd == fd)
+	    return f;
+	  
+	  s = s->next ? s->next : daemon->servers;
+	} while (s != f->sentto);
       }
-      
+
   return NULL;
 }
 
@@ -2317,31 +2418,27 @@ static struct frec *lookup_frec_by_query(void *hash, unsigned int flags)
 void resend_query()
 {
   if (daemon->srv_save)
-    {
-      int fd;
-      
-      if (daemon->srv_save->sfd)
-	fd = daemon->srv_save->sfd->fd;
-      else if (daemon->rfd_save && daemon->rfd_save->refcount != 0)
-	fd = daemon->rfd_save->fd;
-      else
-	return;
-      
-      while(retry_send(sendto(fd, daemon->packet, daemon->packet_len, 0,
-			      &daemon->srv_save->addr.sa, 
-			      sa_len(&daemon->srv_save->addr)))); 
-    }
+    while(retry_send(sendto(daemon->fd_save, daemon->packet, daemon->packet_len, 0,
+			    &daemon->srv_save->addr.sa, 
+			    sa_len(&daemon->srv_save->addr)))); 
 }
 
 /* A server record is going away, remove references to it */
 void server_gone(struct server *server)
 {
   struct frec *f;
+  int i;
   
   for (f = daemon->frec_list; f; f = f->next)
     if (f->sentto && f->sentto == server)
       free_frec(f);
-  
+
+  /* If any random socket refers to this server, NULL the reference.
+     No more references to the socket will be created in the future. */
+  for (i = 0; i < RANDOM_SOCKS; i++)
+    if (daemon->randomsocks[i].refcount != 0 && daemon->randomsocks[i].serv == server)
+      daemon->randomsocks[i].serv = NULL;
+
   if (daemon->last_server == server)
     daemon->last_server = NULL;
 
diff --git a/src/loop.c b/src/loop.c
index 0b47a2f..98d0b9e 100644
--- a/src/loop.c
+++ b/src/loop.c
@@ -22,6 +22,7 @@ static ssize_t loop_make_probe(u32 uid);
 void loop_send_probes()
 {
    struct server *serv;
+   struct randfd_list *rfds = NULL;
    
    if (!option_bool(OPT_LOOP_DETECT))
      return;
@@ -34,29 +35,22 @@ void loop_send_probes()
        {
 	 ssize_t len = loop_make_probe(serv->uid);
 	 int fd;
-	 struct randfd *rfd = NULL;
 	 
-	 if (serv->sfd)
-	   fd = serv->sfd->fd;
-	 else 
-	   {
-	     if (!(rfd = allocate_rfd(serv->addr.sa.sa_family)))
-	       continue;
-	     fd = rfd->fd;
-	   }
-
+	 if ((fd = allocate_rfd(&rfds, serv)) == -1)
+	   continue;
+	 
 	 while (retry_send(sendto(fd, daemon->packet, len, 0, 
 				  &serv->addr.sa, sa_len(&serv->addr))));
-	 
-	 free_rfd(rfd);
        }
+
+   free_rfds(&rfds);
 }
   
 static ssize_t loop_make_probe(u32 uid)
 {
   struct dns_header *header = (struct dns_header *)daemon->packet;
   unsigned char *p = (unsigned char *)(header+1);
-
+  
   /* packet buffer overwritten */
   daemon->srv_save = NULL;
   
diff --git a/src/network.c b/src/network.c
index 47caf38..4eda1fd 100644
--- a/src/network.c
+++ b/src/network.c
@@ -639,7 +639,8 @@ int enumerate_interfaces(int reset)
 #ifdef HAVE_AUTH
   struct auth_zone *zone;
 #endif
-
+  struct server *serv;
+  
   /* Do this max once per select cycle  - also inhibits netlink socket use
    in TCP child processes. */
 
@@ -657,6 +658,13 @@ int enumerate_interfaces(int reset)
   if ((param.fd = socket(PF_INET, SOCK_DGRAM, 0)) == -1)
     return 0;
 
+  /* iface indexes can change when interfaces are created/destroyed.
+     We use them in the main forwarding control path, when the path
+     to a server is specified by an interface, so cache them.
+     Update the cache here. */
+  for (serv = daemon->servers; serv; serv = serv->next)
+    serv->ifindex = if_nametoindex(serv->interface);
+
 again:
   /* Mark interfaces for garbage collection */
   for (iface = daemon->interfaces; iface; iface = iface->next) 
@@ -754,7 +762,7 @@ again:
 
   errno = errsave;
   spare = param.spare;
-    
+  
   return ret;
 }
 
@@ -893,10 +901,10 @@ int tcp_interface(int fd, int af)
   /* use mshdr so that the CMSDG_* macros are available */
   msg.msg_control = daemon->packet;
   msg.msg_controllen = len = daemon->packet_buff_sz;
-  
+
   /* we overwrote the buffer... */
-  daemon->srv_save = NULL;
-  
+  daemon->srv_save = NULL; 
+
   if (af == AF_INET)
     {
       if (setsockopt(fd, IPPROTO_IP, IP_PKTINFO, &opt, sizeof(opt)) != -1 &&
@@ -1228,61 +1236,6 @@ void join_multicast(int dienow)
 }
 #endif
 
-/* return a UDP socket bound to a random port, have to cope with straying into
-   occupied port nos and reserved ones. */
-int random_sock(int family)
-{
-  int fd;
-
-  if ((fd = socket(family, SOCK_DGRAM, 0)) != -1)
-    {
-      union mysockaddr addr;
-      unsigned int ports_avail = ((unsigned short)daemon->max_port - (unsigned short)daemon->min_port) + 1;
-      int tries = ports_avail < 30 ? 3 * ports_avail : 100;
-
-      memset(&addr, 0, sizeof(addr));
-      addr.sa.sa_family = family;
-
-      /* don't loop forever if all ports in use. */
-
-      if (fix_fd(fd))
-	while(tries--)
-	  {
-	    unsigned short port = htons(daemon->min_port + (rand16() % ((unsigned short)ports_avail)));
-	    
-	    if (family == AF_INET) 
-	      {
-		addr.in.sin_addr.s_addr = INADDR_ANY;
-		addr.in.sin_port = port;
-#ifdef HAVE_SOCKADDR_SA_LEN
-		addr.in.sin_len = sizeof(struct sockaddr_in);
-#endif
-	      }
-#ifdef HAVE_IPV6
-	    else
-	      {
-		addr.in6.sin6_addr = in6addr_any; 
-		addr.in6.sin6_port = port;
-#ifdef HAVE_SOCKADDR_SA_LEN
-		addr.in6.sin6_len = sizeof(struct sockaddr_in6);
-#endif
-	      }
-#endif
-	    
-	    if (bind(fd, (struct sockaddr *)&addr, sa_len(&addr)) == 0)
-	      return fd;
-	    
-	    if (errno != EADDRINUSE && errno != EACCES)
-	      break;
-	  }
-
-      close(fd);
-    }
-
-  return -1; 
-}
-  
-
 int local_bind(int fd, union mysockaddr *addr, char *intname, unsigned int ifindex, int is_tcp)
 {
   union mysockaddr addr_copy = *addr;
@@ -1328,39 +1281,34 @@ int local_bind(int fd, union mysockaddr *addr, char *intname, unsigned int ifind
   return 1;
 }
 
-static struct serverfd *allocate_sfd(union mysockaddr *addr, char *intname)
+static struct serverfd *allocate_sfd(union mysockaddr *addr, char *intname, unsigned int ifindex)
 {
   struct serverfd *sfd;
-  unsigned int ifindex = 0;
   int errsave;
 
   /* when using random ports, servers which would otherwise use
-     the INADDR_ANY/port0 socket have sfd set to NULL */
-  if (!daemon->osport && intname[0] == 0)
+     the INADDR_ANY/port0 socket have sfd set to NULL, this is 
+     anything without an explictly set source port. */
+  if (!daemon->osport)
     {
       errno = 0;
       
       if (addr->sa.sa_family == AF_INET &&
-	  addr->in.sin_addr.s_addr == INADDR_ANY &&
 	  addr->in.sin_port == htons(0)) 
 	return NULL;
 
 #ifdef HAVE_IPV6
       if (addr->sa.sa_family == AF_INET6 &&
-	  memcmp(&addr->in6.sin6_addr, &in6addr_any, sizeof(in6addr_any)) == 0 &&
 	  addr->in6.sin6_port == htons(0)) 
 	return NULL;
 #endif
     }
 
-  if (intname && strlen(intname) != 0)
-    ifindex = if_nametoindex(intname); /* index == 0 when not binding to an interface */
-      
   /* may have a suitable one already */
   for (sfd = daemon->sfds; sfd; sfd = sfd->next )
-    if (sockaddr_isequal(&sfd->source_addr, addr) &&
-	strcmp(intname, sfd->interface) == 0 &&
-	ifindex == sfd->ifindex) 
+    if (ifindex == sfd->ifindex &&
+	sockaddr_isequal(&sfd->source_addr, addr) &&
+	strcmp(intname, sfd->interface) == 0)
       return sfd;
   
   /* need to make a new one. */
@@ -1408,7 +1356,7 @@ void pre_allocate_sfds(void)
 #ifdef HAVE_SOCKADDR_SA_LEN
       addr.in.sin_len = sizeof(struct sockaddr_in);
 #endif
-      allocate_sfd(&addr, "");
+      allocate_sfd(&addr, "", 0);
 #ifdef HAVE_IPV6
       memset(&addr, 0, sizeof(addr));
       addr.in6.sin6_family = AF_INET6;
@@ -1417,13 +1365,13 @@ void pre_allocate_sfds(void)
 #ifdef HAVE_SOCKADDR_SA_LEN
       addr.in6.sin6_len = sizeof(struct sockaddr_in6);
 #endif
-      allocate_sfd(&addr, "");
+      allocate_sfd(&addr, "", 0);
 #endif
     }
   
   for (srv = daemon->servers; srv; srv = srv->next)
     if (!(srv->flags & (SERV_LITERAL_ADDRESS | SERV_NO_ADDR | SERV_USE_RESOLV | SERV_NO_REBIND)) &&
-	!allocate_sfd(&srv->source_addr, srv->interface) &&
+	!allocate_sfd(&srv->source_addr, srv->interface, srv->ifindex) &&
 	errno != 0 &&
 	option_bool(OPT_NOWILD))
       {
@@ -1631,7 +1579,7 @@ void check_servers(void)
 	  
 	  /* Do we need a socket set? */
 	  if (!serv->sfd && 
-	      !(serv->sfd = allocate_sfd(&serv->source_addr, serv->interface)) &&
+	      !(serv->sfd = allocate_sfd(&serv->source_addr, serv->interface, serv->ifindex)) &&
 	      errno != 0)
 	    {
 	      my_syslog(LOG_WARNING, 
diff --git a/src/option.c b/src/option.c
index 79122df..abc5a48 100644
--- a/src/option.c
+++ b/src/option.c
@@ -795,7 +795,8 @@ char *parse_server(char *arg, union mysockaddr *addr, union mysockaddr *source_a
     if (interface_opt)
       {
 #if defined(SO_BINDTODEVICE)
-	safe_strncpy(interface, interface_opt, IF_NAMESIZE);
+	safe_strncpy(interface, source, IF_NAMESIZE);
+	source = interface_opt;
 #else
 	return _("interface binding not supported");
 #endif
diff --git a/src/tftp.c b/src/tftp.c
index f2eccbc..ba9833e 100644
--- a/src/tftp.c
+++ b/src/tftp.c
@@ -96,7 +96,7 @@ void tftp_request(struct listener *listen, time_t now)
 
   if ((len = recvmsg(listen->tftpfd, &msg, 0)) < 2)
     return;
-
+  
   /* Can always get recvd interface for IPv6 */
   if (!check_dest)
     {
@@ -566,7 +566,7 @@ void check_tftp_listeners(time_t now)
 	{
 	  /* we overwrote the buffer... */
 	  daemon->srv_save = NULL;
-	  
+
 	  if ((len = recv(transfer->sockfd, daemon->packet, daemon->packet_buff_sz, 0)) >= (ssize_t)sizeof(struct ack))
 	    {
 	      if (ntohs(mess->op) == OP_ACK && ntohs(mess->block) == (unsigned short)transfer->block) 
@@ -609,7 +609,7 @@ void check_tftp_listeners(time_t now)
 	  	  
 	  /* we overwrote the buffer... */
 	  daemon->srv_save = NULL;
-	 
+
 	  if ((len = get_block(daemon->packet, transfer)) == -1)
 	    {
 	      len = tftp_err_oops(daemon->packet, transfer->file->filename);
diff --git a/src/util.c b/src/util.c
index 6287529..d016db6 100644
--- a/src/util.c
+++ b/src/util.c
@@ -311,7 +311,7 @@ void *whine_malloc(size_t size)
   return ret;
 }
 
-int sockaddr_isequal(union mysockaddr *s1, union mysockaddr *s2)
+int sockaddr_isequal(const union mysockaddr *s1, const union mysockaddr *s2)
 {
   if (s1->sa.sa_family == s2->sa.sa_family)
     { 
-- 
2.26.2