|
|
bc8210 |
From f7882fef99e90bb2106f0458b407c7914bc87034 Mon Sep 17 00:00:00 2001
|
|
|
bc8210 |
From: Thomas Haller <thaller@redhat.com>
|
|
|
bc8210 |
Date: Wed, 30 Nov 2016 15:09:41 +0100
|
|
|
bc8210 |
Subject: [PATCH 1/1] lib: use MSG_PEEK by default for nl_recvmsgs()
|
|
|
bc8210 |
|
|
|
bc8210 |
The MSG_PEEK API of recvmsg() should be avoid because it requires an additional
|
|
|
bc8210 |
syscall. But worse is to choose a too small buffer size and failing to receive
|
|
|
bc8210 |
the message.
|
|
|
bc8210 |
|
|
|
bc8210 |
A user who is aware of the issue can avoid MSG_PEEK by either
|
|
|
bc8210 |
nl_socket_disable_msg_peek()/nl_socket_enable_msg_peek() or by setting
|
|
|
bc8210 |
a buffer size via nl_socket_set_msg_buf_size().
|
|
|
bc8210 |
|
|
|
bc8210 |
By default however we now use MSG_PEEK. This is more important since commit
|
|
|
bc8210 |
90c6ebec9bd7a where the link dump request can be rather large.
|
|
|
bc8210 |
|
|
|
bc8210 |
Signed-off-by: Thomas Haller <thaller@redhat.com>
|
|
|
bc8210 |
(cherry picked from commit 55ea6e6b6cd805f441b410971c9dd7575e783ef4)
|
|
|
bc8210 |
---
|
|
|
bc8210 |
include/netlink-private/types.h | 3 ++-
|
|
|
bc8210 |
include/netlink/utils.h | 15 +++++++++++++++
|
|
|
bc8210 |
lib/nl.c | 5 +++--
|
|
|
bc8210 |
lib/socket.c | 20 +++++++++++++++++++-
|
|
|
bc8210 |
lib/utils.c | 2 +-
|
|
|
bc8210 |
5 files changed, 40 insertions(+), 5 deletions(-)
|
|
|
bc8210 |
|
|
|
bc8210 |
diff --git a/include/netlink-private/types.h b/include/netlink-private/types.h
|
|
|
bc8210 |
index 0f67ddd..f1467cc 100644
|
|
|
bc8210 |
--- a/include/netlink-private/types.h
|
|
|
bc8210 |
+++ b/include/netlink-private/types.h
|
|
|
bc8210 |
@@ -27,7 +27,8 @@
|
|
|
bc8210 |
#define NL_SOCK_PASSCRED (1<<1)
|
|
|
bc8210 |
#define NL_OWN_PORT (1<<2)
|
|
|
bc8210 |
#define NL_MSG_PEEK (1<<3)
|
|
|
bc8210 |
-#define NL_NO_AUTO_ACK (1<<4)
|
|
|
bc8210 |
+#define NL_MSG_PEEK_EXPLICIT (1<<4)
|
|
|
bc8210 |
+#define NL_NO_AUTO_ACK (1<<5)
|
|
|
bc8210 |
|
|
|
bc8210 |
#define NL_MSG_CRED_PRESENT 1
|
|
|
bc8210 |
|
|
|
bc8210 |
diff --git a/include/netlink/utils.h b/include/netlink/utils.h
|
|
|
bc8210 |
index 1115bb4..2273835 100644
|
|
|
bc8210 |
--- a/include/netlink/utils.h
|
|
|
bc8210 |
+++ b/include/netlink/utils.h
|
|
|
bc8210 |
@@ -224,6 +224,21 @@ enum {
|
|
|
bc8210 |
NL_CAPABILITY_RTNL_ADDR_PEER_ID_FIX = 20,
|
|
|
bc8210 |
#define NL_CAPABILITY_RTNL_ADDR_PEER_ID_FIX NL_CAPABILITY_RTNL_ADDR_PEER_ID_FIX
|
|
|
bc8210 |
|
|
|
bc8210 |
+ /* Older versions of libnl3 would not use MSG_PEEK for nl_recvmsgs() unless calling
|
|
|
bc8210 |
+ * nl_socket_enable_msg_peek(). Instead, the user had to specify the buffer size via
|
|
|
bc8210 |
+ * nl_socket_set_msg_buf_size(), which in turn would default to 4*getpagesize().
|
|
|
bc8210 |
+ *
|
|
|
bc8210 |
+ * The default value might not be large enough, so users who were not aware of the
|
|
|
bc8210 |
+ * problem easily ended up using a too small receive buffer. Usually, one wants to
|
|
|
bc8210 |
+ * avoid MSG_PEEK for recvmsg() because it requires an additional syscall.
|
|
|
bc8210 |
+ *
|
|
|
bc8210 |
+ * Now, as indicated by this capability, nl_recvmsgs() would use MSG_PEEK by default. The
|
|
|
bc8210 |
+ * user still can explicitly disable MSG_PEEK by calling nl_socket_disable_msg_peek() or
|
|
|
bc8210 |
+ * by setting the nl_socket_set_msg_buf_size() to a non-zero value.
|
|
|
bc8210 |
+ */
|
|
|
bc8210 |
+ NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT = 24,
|
|
|
bc8210 |
+#define NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT
|
|
|
bc8210 |
+
|
|
|
bc8210 |
__NL_CAPABILITY_MAX,
|
|
|
bc8210 |
NL_CAPABILITY_MAX = (__NL_CAPABILITY_MAX - 1),
|
|
|
bc8210 |
#define NL_CAPABILITY_MAX NL_CAPABILITY_MAX
|
|
|
bc8210 |
diff --git a/lib/nl.c b/lib/nl.c
|
|
|
bc8210 |
index 2d1ce81..8df08e6 100644
|
|
|
bc8210 |
--- a/lib/nl.c
|
|
|
bc8210 |
+++ b/lib/nl.c
|
|
|
bc8210 |
@@ -676,7 +676,8 @@ int nl_recv(struct nl_sock *sk, struct sockaddr_nl *nla,
|
|
|
bc8210 |
if (!buf || !nla)
|
|
|
bc8210 |
return -NLE_INVAL;
|
|
|
bc8210 |
|
|
|
bc8210 |
- if (sk->s_flags & NL_MSG_PEEK)
|
|
|
bc8210 |
+ if ( (sk->s_flags & NL_MSG_PEEK)
|
|
|
bc8210 |
+ || (!(sk->s_flags & NL_MSG_PEEK_EXPLICIT) && sk->s_bufsize == 0))
|
|
|
bc8210 |
flags |= MSG_PEEK | MSG_TRUNC;
|
|
|
bc8210 |
|
|
|
bc8210 |
if (page_size == 0)
|
|
|
bc8210 |
@@ -742,7 +743,7 @@ retry:
|
|
|
bc8210 |
void *tmp;
|
|
|
bc8210 |
|
|
|
bc8210 |
/* respond with error to an incomplete message */
|
|
|
bc8210 |
- if (!(sk->s_flags & NL_MSG_PEEK)) {
|
|
|
bc8210 |
+ if (flags == 0) {
|
|
|
bc8210 |
retval = -NLE_MSG_TRUNC;
|
|
|
bc8210 |
goto abort;
|
|
|
bc8210 |
}
|
|
|
bc8210 |
diff --git a/lib/socket.c b/lib/socket.c
|
|
|
bc8210 |
index 109c416..99e1a1b 100644
|
|
|
bc8210 |
--- a/lib/socket.c
|
|
|
bc8210 |
+++ b/lib/socket.c
|
|
|
bc8210 |
@@ -722,18 +722,23 @@ int nl_socket_set_nonblocking(const struct nl_sock *sk)
|
|
|
bc8210 |
/**
|
|
|
bc8210 |
* Enable use of MSG_PEEK when reading from socket
|
|
|
bc8210 |
* @arg sk Netlink socket.
|
|
|
bc8210 |
+ *
|
|
|
bc8210 |
+ * See also NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT capability
|
|
|
bc8210 |
*/
|
|
|
bc8210 |
void nl_socket_enable_msg_peek(struct nl_sock *sk)
|
|
|
bc8210 |
{
|
|
|
bc8210 |
- sk->s_flags |= NL_MSG_PEEK;
|
|
|
bc8210 |
+ sk->s_flags |= (NL_MSG_PEEK | NL_MSG_PEEK_EXPLICIT);
|
|
|
bc8210 |
}
|
|
|
bc8210 |
|
|
|
bc8210 |
/**
|
|
|
bc8210 |
* Disable use of MSG_PEEK when reading from socket
|
|
|
bc8210 |
* @arg sk Netlink socket.
|
|
|
bc8210 |
+ *
|
|
|
bc8210 |
+ * See also NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT capability
|
|
|
bc8210 |
*/
|
|
|
bc8210 |
void nl_socket_disable_msg_peek(struct nl_sock *sk)
|
|
|
bc8210 |
{
|
|
|
bc8210 |
+ sk->s_flags |= NL_MSG_PEEK_EXPLICIT;
|
|
|
bc8210 |
sk->s_flags &= ~NL_MSG_PEEK;
|
|
|
bc8210 |
}
|
|
|
bc8210 |
|
|
|
bc8210 |
@@ -853,6 +858,19 @@ int nl_socket_set_buffer_size(struct nl_sock *sk, int rxbuf, int txbuf)
|
|
|
bc8210 |
* socket will be able to receive. It is generally recommneded to specify
|
|
|
bc8210 |
* a buffer size no less than the size of a memory page.
|
|
|
bc8210 |
*
|
|
|
bc8210 |
+ * Setting the @bufsize to zero means to use a default of 4 times getpagesize().
|
|
|
bc8210 |
+ *
|
|
|
bc8210 |
+ * When MSG_PEEK is enabled, the buffer size is used for the initial choice
|
|
|
bc8210 |
+ * of the buffer while peeking. It still makes sense to choose an optimal value
|
|
|
bc8210 |
+ * to avoid realloc().
|
|
|
bc8210 |
+ *
|
|
|
bc8210 |
+ * When MSG_PEEK is disabled, the buffer size is important because a too small
|
|
|
bc8210 |
+ * size will lead to failure of receiving the message via nl_recvmsgs().
|
|
|
bc8210 |
+ *
|
|
|
bc8210 |
+ * By default, MSG_PEEK is enabled unless the user calls either nl_socket_disable_msg_peek()/
|
|
|
bc8210 |
+ * nl_socket_enable_msg_peek() or sets the message buffer size to a positive value.
|
|
|
bc8210 |
+ * See capability NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT for that.
|
|
|
bc8210 |
+ *
|
|
|
bc8210 |
* @return 0 on success or a negative error code.
|
|
|
bc8210 |
*/
|
|
|
bc8210 |
int nl_socket_set_msg_buf_size(struct nl_sock *sk, size_t bufsize)
|
|
|
bc8210 |
diff --git a/lib/utils.c b/lib/utils.c
|
|
|
bc8210 |
index 0f2a252..d4c0413 100644
|
|
|
bc8210 |
--- a/lib/utils.c
|
|
|
bc8210 |
+++ b/lib/utils.c
|
|
|
bc8210 |
@@ -1168,7 +1168,7 @@ int nl_has_capability (int capability)
|
|
|
bc8210 |
0,
|
|
|
bc8210 |
0,
|
|
|
bc8210 |
0,
|
|
|
bc8210 |
- 0),
|
|
|
bc8210 |
+ NL_CAPABILITY_NL_RECVMSGS_PEEK_BY_DEFAULT),
|
|
|
bc8210 |
/* IMPORTANT: these capability numbers are intended to be universal and stable
|
|
|
bc8210 |
* for libnl3. Don't allocate new numbers on your own that differ from upstream
|
|
|
bc8210 |
* libnl3.
|
|
|
bc8210 |
--
|
|
|
bc8210 |
2.9.3
|
|
|
bc8210 |
|