Blob Blame History Raw
From f7882fef99e90bb2106f0458b407c7914bc87034 Mon Sep 17 00:00:00 2001
From: Thomas Haller <thaller@redhat.com>
Date: Wed, 30 Nov 2016 15:09:41 +0100
Subject: [PATCH 1/1] lib: use MSG_PEEK by default for nl_recvmsgs()

The MSG_PEEK API of recvmsg() should be avoid because it requires an additional
syscall. But worse is to choose a too small buffer size and failing to receive
the message.

A user who is aware of the issue can avoid MSG_PEEK by either
nl_socket_disable_msg_peek()/nl_socket_enable_msg_peek() or by setting
a buffer size via nl_socket_set_msg_buf_size().

By default however we now use MSG_PEEK. This is more important since commit
90c6ebec9bd7a where the link dump request can be rather large.

Signed-off-by: Thomas Haller <thaller@redhat.com>
(cherry picked from commit 55ea6e6b6cd805f441b410971c9dd7575e783ef4)
---
 include/netlink-private/types.h |  3 ++-
 include/netlink/utils.h         | 15 +++++++++++++++
 lib/nl.c                        |  5 +++--
 lib/socket.c                    | 20 +++++++++++++++++++-
 lib/utils.c                     |  2 +-
 5 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/include/netlink-private/types.h b/include/netlink-private/types.h
index 0f67ddd..f1467cc 100644
--- a/include/netlink-private/types.h
+++ b/include/netlink-private/types.h
@@ -27,7 +27,8 @@
 #define NL_SOCK_PASSCRED	(1<<1)
 #define NL_OWN_PORT		(1<<2)
 #define NL_MSG_PEEK		(1<<3)
-#define NL_NO_AUTO_ACK		(1<<4)
+#define NL_MSG_PEEK_EXPLICIT	(1<<4)
+#define NL_NO_AUTO_ACK		(1<<5)
 
 #define NL_MSG_CRED_PRESENT 1
 
diff --git a/include/netlink/utils.h b/include/netlink/utils.h
index 1115bb4..2273835 100644
--- a/include/netlink/utils.h
+++ b/include/netlink/utils.h
@@ -224,6 +224,21 @@ enum {
 	NL_CAPABILITY_RTNL_ADDR_PEER_ID_FIX = 20,
 #define NL_CAPABILITY_RTNL_ADDR_PEER_ID_FIX NL_CAPABILITY_RTNL_ADDR_PEER_ID_FIX
 
+	/* Older versions of libnl3 would not use MSG_PEEK for nl_recvmsgs() unless calling
+	 * nl_socket_enable_msg_peek(). Instead, the user had to specify the buffer size via
+	 * nl_socket_set_msg_buf_size(), which in turn would default to 4*getpagesize().
+	 *
+	 * The default value might not be large enough, so users who were not aware of the
+	 * problem easily ended up using a too small receive buffer. Usually, one wants to
+	 * avoid MSG_PEEK for recvmsg() because it requires an additional syscall.
+	 *
+	 * Now, as indicated by this capability, nl_recvmsgs() would use MSG_PEEK by default. The
+	 * user still can explicitly disable MSG_PEEK by calling nl_socket_disable_msg_peek() or
+	 * by setting the nl_socket_set_msg_buf_size() to a non-zero value.
+	 */
+	NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT = 24,
+#define NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT
+
 	__NL_CAPABILITY_MAX,
 	NL_CAPABILITY_MAX = (__NL_CAPABILITY_MAX - 1),
 #define NL_CAPABILITY_MAX NL_CAPABILITY_MAX
diff --git a/lib/nl.c b/lib/nl.c
index 2d1ce81..8df08e6 100644
--- a/lib/nl.c
+++ b/lib/nl.c
@@ -676,7 +676,8 @@ int nl_recv(struct nl_sock *sk, struct sockaddr_nl *nla,
 	if (!buf || !nla)
 		return -NLE_INVAL;
 
-	if (sk->s_flags & NL_MSG_PEEK)
+	if (   (sk->s_flags & NL_MSG_PEEK)
+	    || (!(sk->s_flags & NL_MSG_PEEK_EXPLICIT) && sk->s_bufsize == 0))
 		flags |= MSG_PEEK | MSG_TRUNC;
 
 	if (page_size == 0)
@@ -742,7 +743,7 @@ retry:
 		void *tmp;
 
 		/* respond with error to an incomplete message */
-		if (!(sk->s_flags & NL_MSG_PEEK)) {
+		if (flags == 0) {
 			retval = -NLE_MSG_TRUNC;
 			goto abort;
 		}
diff --git a/lib/socket.c b/lib/socket.c
index 109c416..99e1a1b 100644
--- a/lib/socket.c
+++ b/lib/socket.c
@@ -722,18 +722,23 @@ int nl_socket_set_nonblocking(const struct nl_sock *sk)
 /**
  * Enable use of MSG_PEEK when reading from socket
  * @arg sk		Netlink socket.
+ *
+ * See also NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT capability
  */
 void nl_socket_enable_msg_peek(struct nl_sock *sk)
 {
-	sk->s_flags |= NL_MSG_PEEK;
+	sk->s_flags |= (NL_MSG_PEEK | NL_MSG_PEEK_EXPLICIT);
 }
 
 /**
  * Disable use of MSG_PEEK when reading from socket
  * @arg sk		Netlink socket.
+ *
+ * See also NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT capability
  */
 void nl_socket_disable_msg_peek(struct nl_sock *sk)
 {
+	sk->s_flags |= NL_MSG_PEEK_EXPLICIT;
 	sk->s_flags &= ~NL_MSG_PEEK;
 }
 
@@ -853,6 +858,19 @@ int nl_socket_set_buffer_size(struct nl_sock *sk, int rxbuf, int txbuf)
  * socket will be able to receive. It is generally recommneded to specify
  * a buffer size no less than the size of a memory page.
  *
+ * Setting the @bufsize to zero means to use a default of 4 times getpagesize().
+ *
+ * When MSG_PEEK is enabled, the buffer size is used for the initial choice
+ * of the buffer while peeking. It still makes sense to choose an optimal value
+ * to avoid realloc().
+ *
+ * When MSG_PEEK is disabled, the buffer size is important because a too small
+ * size will lead to failure of receiving the message via nl_recvmsgs().
+ *
+ * By default, MSG_PEEK is enabled unless the user calls either nl_socket_disable_msg_peek()/
+ * nl_socket_enable_msg_peek() or sets the message buffer size to a positive value.
+ * See capability NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT for that.
+ *
  * @return 0 on success or a negative error code.
  */
 int nl_socket_set_msg_buf_size(struct nl_sock *sk, size_t bufsize)
diff --git a/lib/utils.c b/lib/utils.c
index 0f2a252..d4c0413 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -1168,7 +1168,7 @@ int nl_has_capability (int capability)
 			0,
 			0,
 			0,
-			0),
+			NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT),
 		/* IMPORTANT: these capability numbers are intended to be universal and stable
 		 * for libnl3. Don't allocate new numbers on your own that differ from upstream
 		 * libnl3.
-- 
2.9.3