db4030
From a6331686a8e3a5a2b0d1313de84978cd6d9ef65c Mon Sep 17 00:00:00 2001
9abc64
From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= <michal@isc.org>
9abc64
Date: Thu, 31 Oct 2019 08:48:35 +0100
9abc64
Subject: [PATCH] Prevent query loops for misbehaving servers
9abc64
9abc64
If a TCP connection fails while attempting to send a query to a server,
9abc64
the fetch context will be restarted without marking the target server as
9abc64
a bad one.  If this happens for a server which:
9abc64
9abc64
  - was already marked with the DNS_FETCHOPT_EDNS512 flag,
9abc64
  - responds to EDNS queries with the UDP payload size set to 512 bytes,
9abc64
  - does not send response packets larger than 512 bytes,
9abc64
9abc64
and the response for the query being sent is larger than 512 byes, then
9abc64
named will pointlessly alternate between sending UDP queries with EDNS
9abc64
UDP payload size set to 512 bytes (which are responded to with truncated
9abc64
answers) and TCP connections until the fetch context retry limit is
9abc64
reached.  Prevent such query loops by marking the server as bad for a
9abc64
given fetch context if the advertised EDNS UDP payload size for that
9abc64
server gets reduced to 512 bytes and it is impossible to reach it using
9abc64
TCP.
9abc64
9abc64
(cherry picked from commit 6cd115994e0d10631172c56a7dab1ace83e946b4)
9abc64
---
9abc64
 bin/tests/system/legacy/tests.sh | 11 +++++++++++
9abc64
 lib/dns/resolver.c               | 13 +++++++++++++
9abc64
 2 files changed, 24 insertions(+)
9abc64
9abc64
diff --git a/bin/tests/system/legacy/tests.sh b/bin/tests/system/legacy/tests.sh
db4030
index c4356f2..7c30dcb 100755
9abc64
--- a/bin/tests/system/legacy/tests.sh
9abc64
+++ b/bin/tests/system/legacy/tests.sh
9abc64
@@ -142,6 +142,17 @@ grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
9abc64
 if [ $ret != 0 ]; then echo_i "failed"; fi
9abc64
 status=`expr $status + $ret`
9abc64
 
9abc64
+n=`expr $n + 1`
9abc64
+echo_i "checking recursive lookup to edns 512 + no tcp server does not cause query loops ($n)"
9abc64
+ret=0
9abc64
+sent=`grep -c -F "sending packet to 10.53.0.7" ns1/named.run`
9abc64
+if [ $sent -ge 10 ]; then
9abc64
+	echo_i "ns1 sent $sent queries to ns7, expected less than 10"
9abc64
+	ret=1
9abc64
+fi
9abc64
+if [ $ret != 0 ]; then echo_i "failed"; fi
9abc64
+status=`expr $status + $ret`
9abc64
+
9abc64
 if $SHELL ../testcrypto.sh > /dev/null 2>&1
9abc64
 then
9abc64
     $PERL $SYSTEMTESTTOP/stop.pl . ns1
9abc64
diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c
db4030
index 0384b15..36f6b6c 100644
9abc64
--- a/lib/dns/resolver.c
9abc64
+++ b/lib/dns/resolver.c
db4030
@@ -2744,10 +2744,22 @@ resquery_connected(isc_task_t *task, isc_event_t *event) {
9abc64
 			 * No route to remote.
9abc64
 			 */
9abc64
 			isc_socket_detach(&query->tcpsocket);
9abc64
+			/*
9abc64
+			 * Do not query this server again in this fetch context
9abc64
+			 * if we already tried reducing the advertised EDNS UDP
9abc64
+			 * payload size to 512 bytes and the server is
9abc64
+			 * unavailable over TCP.  This prevents query loops
9abc64
+			 * lasting until the fetch context restart limit is
9abc64
+			 * reached when attempting to get answers whose size
9abc64
+			 * exceeds 512 bytes from broken servers.
9abc64
+			 */
9abc64
+			if ((query->options & DNS_FETCHOPT_EDNS512) != 0) {
9abc64
+				add_bad(fctx, query->addrinfo, sevent->result,
9abc64
+					badns_unreachable);
9abc64
+			}
9abc64
 			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE, ISC_FALSE);
9abc64
 			retry = ISC_TRUE;
9abc64
 			break;
db4030
-
db4030
 		default:
db4030
 			FCTXTRACE3("query canceled in connected() due to "
db4030
 				   "unexpected event result; responding",