|
|
7dd887 |
From ad7e94b35988c8cd03866d47aa6fb21841cfae7c Mon Sep 17 00:00:00 2001
|
|
|
7dd887 |
From: =?UTF-8?q?Nikola=20Forr=C3=B3?= <nforro@redhat.com>
|
|
|
7dd887 |
Date: Tue, 28 Mar 2017 15:04:36 +0200
|
|
|
7dd887 |
Subject: [PATCH 6/6] packet.7: add missing socket options
|
|
|
7dd887 |
|
|
|
7dd887 |
---
|
|
|
7dd887 |
man-pages/man7/packet.7 | 218 ++++++++++++++++++++++++++++++++++++++++++++++--
|
|
|
7dd887 |
1 file changed, 209 insertions(+), 9 deletions(-)
|
|
|
7dd887 |
|
|
|
7dd887 |
diff --git a/man-pages/man7/packet.7 b/man-pages/man7/packet.7
|
|
|
7dd887 |
index f5d990b..b217e5e 100644
|
|
|
7dd887 |
--- a/man-pages/man7/packet.7
|
|
|
7dd887 |
+++ b/man-pages/man7/packet.7
|
|
|
7dd887 |
@@ -177,19 +177,24 @@ and
|
|
|
7dd887 |
.I sll_ifindex
|
|
|
7dd887 |
are used.
|
|
|
7dd887 |
.SS Socket options
|
|
|
7dd887 |
+Packet socket options are configured by calling
|
|
|
7dd887 |
+.BR setsockopt (2)
|
|
|
7dd887 |
+with level
|
|
|
7dd887 |
+.BR SOL_PACKET .
|
|
|
7dd887 |
+.TP
|
|
|
7dd887 |
+.BR PACKET_ADD_MEMBERSHIP
|
|
|
7dd887 |
+.PD 0
|
|
|
7dd887 |
+.TP
|
|
|
7dd887 |
+.BR PACKET_DROP_MEMBERSHIP
|
|
|
7dd887 |
+.PD
|
|
|
7dd887 |
Packet sockets can be used to configure physical layer multicasting
|
|
|
7dd887 |
and promiscuous mode.
|
|
|
7dd887 |
-It works by calling
|
|
|
7dd887 |
-.BR setsockopt (2)
|
|
|
7dd887 |
-on a packet socket for
|
|
|
7dd887 |
-.B SOL_PACKET
|
|
|
7dd887 |
-and one of the options
|
|
|
7dd887 |
.B PACKET_ADD_MEMBERSHIP
|
|
|
7dd887 |
-to add a binding or
|
|
|
7dd887 |
+adds a binding and
|
|
|
7dd887 |
.B PACKET_DROP_MEMBERSHIP
|
|
|
7dd887 |
-to drop it.
|
|
|
7dd887 |
+drops it.
|
|
|
7dd887 |
They both expect a
|
|
|
7dd887 |
-.B packet_mreq
|
|
|
7dd887 |
+.I packet_mreq
|
|
|
7dd887 |
structure as argument:
|
|
|
7dd887 |
|
|
|
7dd887 |
.in +4n
|
|
|
7dd887 |
@@ -222,11 +227,206 @@ and
|
|
|
7dd887 |
sets the socket up to receive all multicast packets arriving at
|
|
|
7dd887 |
the interface.
|
|
|
7dd887 |
|
|
|
7dd887 |
-In addition the traditional ioctls
|
|
|
7dd887 |
+In addition, the traditional ioctls
|
|
|
7dd887 |
.BR SIOCSIFFLAGS ,
|
|
|
7dd887 |
.BR SIOCADDMULTI ,
|
|
|
7dd887 |
.B SIOCDELMULTI
|
|
|
7dd887 |
can be used for the same purpose.
|
|
|
7dd887 |
+.TP
|
|
|
7dd887 |
+.BR PACKET_AUXDATA " (since Linux 2.6.21)"
|
|
|
7dd887 |
+.\" commit 8dc4194474159660d7f37c495e3fc3f10d0db8cc
|
|
|
7dd887 |
+If this binary option is enabled, the packet socket passes a metadata
|
|
|
7dd887 |
+structure along with each packet in the
|
|
|
7dd887 |
+.BR recvmsg (2)
|
|
|
7dd887 |
+control field.
|
|
|
7dd887 |
+The structure can be read with
|
|
|
7dd887 |
+.BR cmsg (3).
|
|
|
7dd887 |
+It is defined as
|
|
|
7dd887 |
+
|
|
|
7dd887 |
+.in +4n
|
|
|
7dd887 |
+.nf
|
|
|
7dd887 |
+struct tpacket_auxdata {
|
|
|
7dd887 |
+ __u32 tp_status;
|
|
|
7dd887 |
+ __u32 tp_len; /* packet length */
|
|
|
7dd887 |
+ __u32 tp_snaplen; /* captured length */
|
|
|
7dd887 |
+ __u16 tp_mac;
|
|
|
7dd887 |
+ __u16 tp_net;
|
|
|
7dd887 |
+ __u16 tp_vlan_tci;
|
|
|
7dd887 |
+ __u16 tp_padding;
|
|
|
7dd887 |
+};
|
|
|
7dd887 |
+.fi
|
|
|
7dd887 |
+.in
|
|
|
7dd887 |
+.TP
|
|
|
7dd887 |
+.BR PACKET_FANOUT " (since Linux 3.1)"
|
|
|
7dd887 |
+.\" commit dc99f600698dcac69b8f56dda9a8a00d645c5ffc
|
|
|
7dd887 |
+To scale processing across threads, packet sockets can form a fanout
|
|
|
7dd887 |
+group.
|
|
|
7dd887 |
+In this mode, each matching packet is enqueued onto only one
|
|
|
7dd887 |
+socket in the group.
|
|
|
7dd887 |
+A socket joins a fanout group by calling
|
|
|
7dd887 |
+.BR setsockopt (2)
|
|
|
7dd887 |
+with level
|
|
|
7dd887 |
+.B SOL_PACKET
|
|
|
7dd887 |
+and option
|
|
|
7dd887 |
+.BR PACKET_FANOUT .
|
|
|
7dd887 |
+Each network namespace can have up to 65536 independent groups.
|
|
|
7dd887 |
+A socket selects a group by encoding the ID in the first 16 bits of
|
|
|
7dd887 |
+the integer option value.
|
|
|
7dd887 |
+The first packet socket to join a group implicitly creates it.
|
|
|
7dd887 |
+To successfully join an existing group, subsequent packet sockets
|
|
|
7dd887 |
+must have the same protocol, device settings, fanout mode and
|
|
|
7dd887 |
+flags (see below).
|
|
|
7dd887 |
+Packet sockets can leave a fanout group only by closing the socket.
|
|
|
7dd887 |
+The group is deleted when the last socket is closed.
|
|
|
7dd887 |
+
|
|
|
7dd887 |
+Fanout supports multiple algorithms to spread traffic between sockets.
|
|
|
7dd887 |
+The default mode,
|
|
|
7dd887 |
+.BR PACKET_FANOUT_HASH ,
|
|
|
7dd887 |
+sends packets from the same flow to the same socket to maintain
|
|
|
7dd887 |
+per-flow ordering.
|
|
|
7dd887 |
+For each packet, it chooses a socket by taking the packet flow hash
|
|
|
7dd887 |
+modulo the number of sockets in the group, where a flow hash is a hash
|
|
|
7dd887 |
+over network-layer address and optional transport-layer port fields.
|
|
|
7dd887 |
+The load-balance mode
|
|
|
7dd887 |
+.BR PACKET_FANOUT_LB
|
|
|
7dd887 |
+implements a round-robin algorithm.
|
|
|
7dd887 |
+.BR PACKET_FANOUT_CPU
|
|
|
7dd887 |
+selects the socket based on the CPU that the packet arrived on.
|
|
|
7dd887 |
+.BR PACKET_FANOUT_ROLLOVER
|
|
|
7dd887 |
+processes all data on a single socket, moves to the next when one
|
|
|
7dd887 |
+becomes backlogged.
|
|
|
7dd887 |
+.BR PACKET_FANOUT_RND
|
|
|
7dd887 |
+selects the socket using a pseudo-random number generator.
|
|
|
7dd887 |
+
|
|
|
7dd887 |
+Fanout modes can take additional options.
|
|
|
7dd887 |
+IP fragmentation causes packets from the same flow to have different
|
|
|
7dd887 |
+flow hashes.
|
|
|
7dd887 |
+The flag
|
|
|
7dd887 |
+.BR PACKET_FANOUT_FLAG_DEFRAG ,
|
|
|
7dd887 |
+if set, causes packet to be defragmented before fanout is applied, to
|
|
|
7dd887 |
+preserve order even in this case.
|
|
|
7dd887 |
+Fanout mode and options are communicated in the second 16 bits of the
|
|
|
7dd887 |
+integer option value.
|
|
|
7dd887 |
+The flag
|
|
|
7dd887 |
+.BR PACKET_FANOUT_FLAG_ROLLOVER
|
|
|
7dd887 |
+enables the roll over mechanism as a backup strategy: if the
|
|
|
7dd887 |
+original fanout algorithm selects a backlogged socket, the packet
|
|
|
7dd887 |
+rolls over to the next available one.
|
|
|
7dd887 |
+.TP
|
|
|
7dd887 |
+.BR PACKET_LOSS " (with " PACKET_TX_RING )
|
|
|
7dd887 |
+If set, do not silently drop a packet on transmission error, but
|
|
|
7dd887 |
+return it with status set to
|
|
|
7dd887 |
+.BR TP_STATUS_WRONG_FORMAT .
|
|
|
7dd887 |
+.TP
|
|
|
7dd887 |
+.BR PACKET_RESERVE " (with " PACKET_RX_RING )
|
|
|
7dd887 |
+By default, a packet receive ring writes packets immediately following the
|
|
|
7dd887 |
+metadata structure and alignment padding.
|
|
|
7dd887 |
+This integer option reserves additional headroom.
|
|
|
7dd887 |
+.TP
|
|
|
7dd887 |
+.BR PACKET_RX_RING
|
|
|
7dd887 |
+Create a memory-mapped ring buffer for asynchronous packet reception.
|
|
|
7dd887 |
+The packet socket reserves a contiguous region of application address
|
|
|
7dd887 |
+space, lays it out into an array of packet slots and copies packets
|
|
|
7dd887 |
+(up to
|
|
|
7dd887 |
+.IR tp_snaplen
|
|
|
7dd887 |
+) into subsequent slots.
|
|
|
7dd887 |
+Each packet is preceded by a metadata structure similar to
|
|
|
7dd887 |
+.IR tpacket_auxdata .
|
|
|
7dd887 |
+The protocol fields encode the offset to the data
|
|
|
7dd887 |
+from the start of the metadata header.
|
|
|
7dd887 |
+.I tp_net
|
|
|
7dd887 |
+stores the offset to the network layer.
|
|
|
7dd887 |
+If the packet socket is of type
|
|
|
7dd887 |
+.BR SOCK_DGRAM ,
|
|
|
7dd887 |
+then
|
|
|
7dd887 |
+.I tp_mac
|
|
|
7dd887 |
+is the same.
|
|
|
7dd887 |
+If it is of type
|
|
|
7dd887 |
+.BR SOCK_RAW ,
|
|
|
7dd887 |
+then that field stores the offset to the link-layer frame.
|
|
|
7dd887 |
+Packet socket and application communicate the head and tail of the ring
|
|
|
7dd887 |
+through the
|
|
|
7dd887 |
+.I tp_status
|
|
|
7dd887 |
+field.
|
|
|
7dd887 |
+The packet socket owns all slots with status
|
|
|
7dd887 |
+.BR TP_STATUS_KERNEL .
|
|
|
7dd887 |
+After filling a slot, it changes the status of the slot to transfer
|
|
|
7dd887 |
+ownership to the application.
|
|
|
7dd887 |
+During normal operation, the new status is
|
|
|
7dd887 |
+.BR TP_STATUS_USER ,
|
|
|
7dd887 |
+to signal that a correctly received packet has been stored.
|
|
|
7dd887 |
+When the application has finished processing a packet, it transfers
|
|
|
7dd887 |
+ownership of the slot back to the socket by setting the status to
|
|
|
7dd887 |
+.BR TP_STATUS_KERNEL .
|
|
|
7dd887 |
+Packet sockets implement multiple variants of the packet ring.
|
|
|
7dd887 |
+The implementation details are described in
|
|
|
7dd887 |
+.IR Documentation/networking/packet_mmap.txt
|
|
|
7dd887 |
+in the Linux kernel source tree.
|
|
|
7dd887 |
+.TP
|
|
|
7dd887 |
+.BR PACKET_STATISTICS
|
|
|
7dd887 |
+Retrieve packet socket statistics in the form of a structure
|
|
|
7dd887 |
+
|
|
|
7dd887 |
+.in +4n
|
|
|
7dd887 |
+.nf
|
|
|
7dd887 |
+struct tpacket_stats {
|
|
|
7dd887 |
+ unsigned int tp_packets; /* Total packet count */
|
|
|
7dd887 |
+ unsigned int tp_drops; /* Dropped packet count */
|
|
|
7dd887 |
+};
|
|
|
7dd887 |
+.fi
|
|
|
7dd887 |
+.in
|
|
|
7dd887 |
+
|
|
|
7dd887 |
+Receiving statistics resets the internal counters.
|
|
|
7dd887 |
+The statistics structure differs when using a ring of variant
|
|
|
7dd887 |
+.BR TPACKET_V3 .
|
|
|
7dd887 |
+.TP
|
|
|
7dd887 |
+.BR PACKET_TIMESTAMP " (with " PACKET_RX_RING "; since Linux 2.6.36)"
|
|
|
7dd887 |
+.\" commit 614f60fa9d73a9e8fdff3df83381907fea7c5649
|
|
|
7dd887 |
+The packet receive ring always stores a timestamp in the metadata header.
|
|
|
7dd887 |
+By default, this is a software generated timestamp generated when the
|
|
|
7dd887 |
+packet is copied into the ring.
|
|
|
7dd887 |
+This integer option selects the type of timestamp.
|
|
|
7dd887 |
+Besides the default, it support the two hardware formats described in
|
|
|
7dd887 |
+.IR Documentation/networking/timestamping.txt
|
|
|
7dd887 |
+in the Linux kernel source tree.
|
|
|
7dd887 |
+.TP
|
|
|
7dd887 |
+.BR PACKET_TX_RING " (since Linux 2.6.31)"
|
|
|
7dd887 |
+.\" commit 69e3c75f4d541a6eb151b3ef91f34033cb3ad6e1
|
|
|
7dd887 |
+Create a memory-mapped ring buffer for packet transmission.
|
|
|
7dd887 |
+This option is similar to
|
|
|
7dd887 |
+.BR PACKET_RX_RING
|
|
|
7dd887 |
+and takes the same arguments.
|
|
|
7dd887 |
+The application writes packets into slots with status
|
|
|
7dd887 |
+.BR TP_STATUS_AVAILABLE
|
|
|
7dd887 |
+and schedules them for transmission by changing the status to
|
|
|
7dd887 |
+.BR TP_STATUS_SEND_REQUEST .
|
|
|
7dd887 |
+When packets are ready to be transmitted, the application calls
|
|
|
7dd887 |
+.BR send (2)
|
|
|
7dd887 |
+or a variant thereof.
|
|
|
7dd887 |
+The
|
|
|
7dd887 |
+.I buf
|
|
|
7dd887 |
+and
|
|
|
7dd887 |
+.I len
|
|
|
7dd887 |
+fields of this call are ignored.
|
|
|
7dd887 |
+If an address is passed using
|
|
|
7dd887 |
+.BR sendto (2)
|
|
|
7dd887 |
+or
|
|
|
7dd887 |
+.BR sendmsg (2) ,
|
|
|
7dd887 |
+then that overrides the socket default.
|
|
|
7dd887 |
+On successful transmission, the socket resets the slot to
|
|
|
7dd887 |
+.BR TP_STATUS_AVAILABLE .
|
|
|
7dd887 |
+It discards packets silently on error unless
|
|
|
7dd887 |
+.BR PACKET_LOSS
|
|
|
7dd887 |
+is set.
|
|
|
7dd887 |
+.TP
|
|
|
7dd887 |
+.BR PACKET_VERSION " (with " PACKET_RX_RING "; since Linux 2.6.27)"
|
|
|
7dd887 |
+.\" commit bbd6ef87c544d88c30e4b762b1b61ef267a7d279
|
|
|
7dd887 |
+By default,
|
|
|
7dd887 |
+.BR PACKET_RX_RING
|
|
|
7dd887 |
+creates a packet receive ring of variant
|
|
|
7dd887 |
+.BR TPACKET_V1 .
|
|
|
7dd887 |
+To create another variant, configure the desired variant by setting this
|
|
|
7dd887 |
+integer option before creating the ring.
|
|
|
7dd887 |
+
|
|
|
7dd887 |
.SS Ioctls
|
|
|
7dd887 |
.B SIOCGSTAMP
|
|
|
7dd887 |
can be used to receive the timestamp of the last received packet.
|
|
|
7dd887 |
--
|
|
|
7dd887 |
2.7.4
|
|
|
7dd887 |
|