naccyde / rpms / iproute

Forked from rpms/iproute 5 months ago
Clone

Blame SOURCES/0047-tc-add-a-man-page-for-u32-filter.patch

049c96
From ccf0d2713af94b388d8220d30d8ca82be3c913fc Mon Sep 17 00:00:00 2001
049c96
From: Phil Sutter <psutter@redhat.com>
049c96
Date: Thu, 18 Feb 2016 15:45:21 +0100
049c96
Subject: [PATCH] tc: add a man page for u32 filter
049c96
049c96
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1286711
049c96
Upstream Status: iproute2.git commit f15a23966fff3
049c96
049c96
commit f15a23966fff35e484812ec1d733d9438f658644
049c96
Author: Phil Sutter <phil@nwl.cc>
049c96
Date:   Fri Oct 23 19:47:15 2015 +0200
049c96
049c96
    tc: add a man page for u32 filter
049c96
049c96
    Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
049c96
    Signed-off-by: Phil Sutter <phil@nwl.cc>
049c96
---
049c96
 man/man8/tc-u32.8 | 663 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
049c96
 1 file changed, 663 insertions(+)
049c96
 create mode 100644 man/man8/tc-u32.8
049c96
049c96
diff --git a/man/man8/tc-u32.8 b/man/man8/tc-u32.8
049c96
new file mode 100644
049c96
index 0000000..47c8f2d
049c96
--- /dev/null
049c96
+++ b/man/man8/tc-u32.8
049c96
@@ -0,0 +1,663 @@
049c96
+.TH "Universal 32bit classifier in tc" 8 "25 Sep 2015" "iproute2" "Linux"
049c96
+
049c96
+.SH NAME
049c96
+u32 \- universal 32bit traffic control filter
049c96
+.SH SYNOPSIS
049c96
+.in +8
049c96
+.ti -8
049c96
+.BR tc " " filter " ... [ " handle
049c96
+.IR HANDLE " ] "
049c96
+.B u32
049c96
+.IR OPTION_LIST " [ "
049c96
+.B offset
049c96
+.IR OFFSET " ] [ "
049c96
+.B hashkey
049c96
+.IR HASHKEY " ] [ "
049c96
+.B classid
049c96
+.IR CLASSID " ] [ "
049c96
+.B divisor
049c96
+.IR uint_value " ] [ "
049c96
+.B order
049c96
+.IR u32_value " ] [ "
049c96
+.B ht
049c96
+.IR HANDLE " ] [ "
049c96
+.B sample
049c96
+.IR SELECTOR " [ "
049c96
+.B divisor
049c96
+.IR uint_value " ] ] [ "
049c96
+.B link
049c96
+.IR HANDLE " ] [ "
049c96
+.B indev
049c96
+.IR ifname " ] [ "
049c96
+.BR help " ]"
049c96
+
049c96
+.ti -8
049c96
+.IR HANDLE " := { "
049c96
+\fIu12_hex_htid\fB:\fR[\fIu8_hex_hash\fB:\fR[\fIu12_hex_nodeid\fR] | \fB0x\fIu32_hex_value\fR }
049c96
+
049c96
+.ti -8
049c96
+.IR OPTION_LIST " := [ " OPTION_LIST " ] " OPTION
049c96
+
049c96
+.ti -8
049c96
+.IR HASHKEY " := [ "
049c96
+.B mask
049c96
+.IR u32_hex_value " ] [ "
049c96
+.B at
049c96
+.IR 4*int_value " ]"
049c96
+
049c96
+.ti -8
049c96
+.IR CLASSID " := { "
049c96
+.BR root " | "
049c96
+.BR none " | "
049c96
+[\fIu16_major\fR]\fB:\fIu16_minor\fR | \fIu32_hex_value\fR }
049c96
+
049c96
+.ti -8
049c96
+.IR OFFSET " := [ "
049c96
+.B plus
049c96
+.IR int_value " ] [ "
049c96
+.B at
049c96
+.IR 2*int_value " ] [ "
049c96
+.B mask
049c96
+.IR u16_hex_value " ] [ "
049c96
+.B shift
049c96
+.IR int_value " ] [ "
049c96
+.BR eat " ]"
049c96
+
049c96
+.ti -8
049c96
+.IR OPTION " := { "
049c96
+.B match
049c96
+.IR SELECTOR " | "
049c96
+.B action
049c96
+.IR ACTION " } "
049c96
+
049c96
+.ti -8
049c96
+.IR SELECTOR " := { "
049c96
+.B u32
049c96
+.IR VAL_MASK_32 " | "
049c96
+.B u16
049c96
+.IR VAL_MASK_16 " | "
049c96
+.B u8
049c96
+.IR VAL_MASK_8 " | "
049c96
+.B ip
049c96
+.IR IP " | "
049c96
+.B ip6
049c96
+.IR IP6 " | { "
049c96
+.BR tcp " | " udp " } "
049c96
+.IR TCPUDP " | "
049c96
+.B icmp
049c96
+.IR ICMP " | "
049c96
+.B mark
049c96
+.IR VAL_MASK_32 " | "
049c96
+.B ether
049c96
+.IR ETHER " }"
049c96
+
049c96
+.ti -8
049c96
+.IR IP " := { { "
049c96
+.BR src " | " dst " } { " default " | " any " | " all " | "
049c96
+.IR ip_address " [ "
049c96
+.BR / " { "
049c96
+.IR prefixlen " | " netmask " } ] } " AT " | { "
049c96
+.BR dsfield " | " ihl " | " protocol " | " precedence " | "
049c96
+.BR icmp_type " | " icmp_code " } "
049c96
+.IR VAL_MASK_8 " | { "
049c96
+.BR sport " | " dport " } "
049c96
+.IR VAL_MASK_16 " | "
049c96
+.BR nofrag " | " firstfrag " | " df " | " mf " }"
049c96
+
049c96
+.ti -8
049c96
+.IR IP6 " := { { "
049c96
+.BR src " | " dst " } { " default " | " any " | " all " | "
049c96
+.IR ip6_address " [/" prefixlen " ] } " AT " | "
049c96
+.B priority
049c96
+.IR VAL_MASK_8 " | { "
049c96
+.BR protocol " | " icmp_type " | " icmp_code " } "
049c96
+.IR VAL_MASK_8 " | "
049c96
+.B flowlabel
049c96
+.IR VAL_MASK_32 " | { "
049c96
+.BR sport " | " dport " } "
049c96
+.IR VAL_MASK_16 " }"
049c96
+
049c96
+.ti -8
049c96
+.IR TCPUDP " := { "
049c96
+.BR src " | " dst " } "
049c96
+.I VAL_MASK_16
049c96
+
049c96
+.ti -8
049c96
+.IR ICMP " := { "
049c96
+.B type
049c96
+.IR VAL_MASK_8 " | "
049c96
+.B code
049c96
+.IR VAL_MASK_8 " }"
049c96
+
049c96
+.ti -8
049c96
+.IR ETHER " := { "
049c96
+.BR src " | " dst " } "
049c96
+.IR ether_address " " AT
049c96
+
049c96
+.ti -8
049c96
+.IR VAL_MASK_32 " := " u32_value " " u32_hex_mask " [ " AT " ]"
049c96
+
049c96
+.ti -8
049c96
+.IR VAL_MASK_16 " := " u16_value " " u16_hex_mask " [ " AT " ]"
049c96
+
049c96
+.ti -8
049c96
+.IR VAL_MASK_8 " := " u8_value " " u8_hex_mask " [ " AT " ]"
049c96
+
049c96
+.ti -8
049c96
+.IR AT " := [ "
049c96
+.BR at " [ " nexthdr+ " ] "
049c96
+.IR int_value " ]"
049c96
+.SH DESCRIPTION
049c96
+The Universal/Ugly 32bit filter allows to match arbitrary bitfields in the
049c96
+packet. Due to breaking everything down to values, masks and offsets, It is
049c96
+equally powerful and hard to use. Luckily many abstracting directives are
049c96
+present which allow defining rules on a higher level and therefore free the
049c96
+user from having to fiddle with bits and masks in many cases.
049c96
+
049c96
+There are two general modes of invocation: The first mode creates a new filter
049c96
+to delegate packets to different destinations. Apart from the obvious ones,
049c96
+namely classifying the packet by specifying a
049c96
+.I CLASSID
049c96
+or calling an
049c96
+.BR action ,
049c96
+one may
049c96
+.B link
049c96
+one filter to another one (or even a list of them), effectively organizing
049c96
+filters into a tree-like hierarchy.
049c96
+
049c96
+Typically filter delegation is done by means of a hash table, which leads to the
049c96
+second mode of invocation: it merely serves to set up these hash tables. Filters
049c96
+can select a hash table and provide a key selector from which a hash is to be
049c96
+computed and used as key to lookup the table's bucket which contains filters for
049c96
+further processing. This is useful if a high number of filters is in use, as the
049c96
+overhead of performing the hash operation and table lookup becomes negligible in
049c96
+that case. Using hashtables with
049c96
+.B u32
049c96
+basically involves the following pattern:
049c96
+.IP (1) 4
049c96
+Creating a new hash table, specifying it's size using the
049c96
+.B divisor
049c96
+parameter and ideally a handle by which the table can be identified. If the
049c96
+latter is not given, the kernel chooses one on it's own, which has to be
049c96
+guessed later.
049c96
+.IP (2) 4
049c96
+Creating filters which link to the created table in
049c96
+.I (1)
049c96
+using the
049c96
+.B link
049c96
+parameter and defining the packet data which the kernel will use to calculate
049c96
+the
049c96
+.BR hashkey .
049c96
+.IP (3) 4
049c96
+Adding filters to buckets in the hash table from
049c96
+.IR (1) .
049c96
+In order to avoid having to know how exactly the kernel creates the hash key,
049c96
+there is the
049c96
+.B sample
049c96
+parameter, which gives sample data to hash and thereby define the table bucket
049c96
+the filter should be added to.
049c96
+
049c96
+.RE
049c96
+In fact, even if not explicitly requested
049c96
+.B u32
049c96
+creates a hash table for every
049c96
+.B priority
049c96
+a filter is being added with. The table's size is 1 though, so it is in fact
049c96
+merely a linked list.
049c96
+.SH VALUES
049c96
+Options and selectors require values to be specified in a specific format, which
049c96
+is often non-intuitive. Therefore the terminals in
049c96
+.I SYNOPSIS
049c96
+have been given descriptive names to indicate the required format and/or maximum
049c96
+allowed numeric value: Prefixes
049c96
+.IR u32 ", " u16 " and " u8
049c96
+indicate four, two and single byte unsigned values. E.g.
049c96
+.I u16
049c96
+indicates a two byte-sized value in range between 0 and 65535 (0xFFFF)
049c96
+inclusive. A prefix of
049c96
+.I int
049c96
+indicates a four byte signed value. A middle part of
049c96
+.I _hex_
049c96
+indicates that the value is parsed in hexadecimal format. Otherwise, the
049c96
+value's base is automatically detected, i.e. values prefixed with
049c96
+.I 0x
049c96
+are considered hexadecimal, a leading
049c96
+.I 0
049c96
+indicates octal format and decimal format otherwise. There are some values with
049c96
+special formatting as well:
049c96
+.IR ip_address " and " netmask
049c96
+are in dotted-quad formatting as usual for IPv4 addresses. An
049c96
+.I ip6_address
049c96
+is specified in common, colon-separated hexadecimal format. Finally,
049c96
+.I prefixlen
049c96
+is an unsigned, decimal integer value in range from 0 to the address width in
049c96
+bits (32 for IPv4 and 128 for IPv6).
049c96
+
049c96
+Sometimes values need to be dividable by a certain number. In that case a name
049c96
+of the form
049c96
+.I N*val
049c96
+was chosen, indicating that
049c96
+.I val
049c96
+must be dividable by
049c96
+.IR N .
049c96
+Or the other way around: the resulting value must be a multiple of
049c96
+.IR N .
049c96
+.SH OPTIONS
049c96
+.B U32
049c96
+recognizes the following options:
049c96
+.TP
049c96
+.BI handle " HANDLE"
049c96
+The handle is used to reference a filter and therefore must be unique. It
049c96
+consists of a hash table identifier
049c96
+.B htid
049c96
+and optional
049c96
+.B hash
049c96
+(which identifies the hash table's bucket) and
049c96
+.BR nodeid .
049c96
+All these values are parsed as unsigned, hexadecimal numbers with length 12bits
049c96
+(
049c96
+.BR htid " and " nodeid )
049c96
+or 8bits (
049c96
+.BR hash ).
049c96
+Alternatively one may specify a single, 32bit long hex number which contains
049c96
+the three fields bits in concatenated form. Other than the fields themselves, it
049c96
+has to be prefixed by
049c96
+.BR 0x .
049c96
+.TP
049c96
+.BI offset " OFFSET"
049c96
+Set an offset which defines where matches of subsequent filters are applied to.
049c96
+Therefore this option is useful only when combined with
049c96
+.BR link " or a combination of " ht " and " sample .
049c96
+The offset may be given explicitly by using the
049c96
+.B plus
049c96
+keyword, or extracted from the packet data with
049c96
+.BR at .
049c96
+It is possible to mangle the latter using
049c96
+.BR mask " and/or " shift
049c96
+keywords. By default, this offset is recorded but not implicitly applied. It is
049c96
+used only to substitute the
049c96
+.B nexthdr+
049c96
+statement. Using the keyword
049c96
+.B eat
049c96
+though inverses this behaviour: the offset is applied always, and
049c96
+.B nexthdr+
049c96
+will fall back to zero.
049c96
+.TP
049c96
+.BI hashkey " HASHKEY"
049c96
+Spefify what packet data to use to calculate a hash key for bucket lookup. The
049c96
+kernel adjusts the value according to the hash table's size. For this to work,
049c96
+the option
049c96
+.B link
049c96
+must be given.
049c96
+.TP
049c96
+.BI classid " CLASSID"
049c96
+Classify matching packets into the given
049c96
+.IR CLASSID ,
049c96
+which consists of either 16bit
049c96
+.BR major " and " minor
049c96
+numbers or a single 32bit value combining both.
049c96
+.TP
049c96
+.BI divisor " u32_value"
049c96
+Specify a modulo value. Used when creating hash tables to define their size or
049c96
+for declaring a
049c96
+.B sample
049c96
+to calculate hash table keys from. Must be a power of two with exponent not
049c96
+exceeding eight.
049c96
+.TP
049c96
+.BI order " u32_value"
049c96
+A value to order filters by, ascending. Conflicts with
049c96
+.B handle
049c96
+which serves the same purpose.
049c96
+.TP
049c96
+.BI sample " SELECTOR"
049c96
+Used together with
049c96
+.B ht
049c96
+to specify which bucket to add this filter to. This allows one to avoid having
049c96
+to know how exactly the kernel calculates hashes. The additional
049c96
+.B divisor
049c96
+defaults to 256, so must be given for hash tables of different size.
049c96
+.TP
049c96
+.BI link " HANDLE"
049c96
+Delegate matching packets to filters in a hash table.
049c96
+.I HANDLE
049c96
+is used to only specify the hash table, so only
049c96
+.BR htid " may be given, " hash " and " nodeid
049c96
+have to be omitted. By default, bucket number 0 will be used and can be
049c96
+overridden by the
049c96
+.B hashkey
049c96
+option.
049c96
+.TP
049c96
+.BI indev " ifname"
049c96
+Filter on the incoming interface of the packet. Obviously works only for
049c96
+forwarded traffic.
049c96
+.TP
049c96
+.BI help
049c96
+Print a brief help text about possible options.
049c96
+.SH SELECTORS
049c96
+Basically the only real selector is
049c96
+.B u32 .
049c96
+All others merely provide a higher level syntax and are internally translated
049c96
+into
049c96
+.B u32 .
049c96
+.TP
049c96
+.BI u32 " VAL_MASK_32"
049c96
+.TQ
049c96
+.BI u16 " VAL_MASK_16"
049c96
+.TQ
049c96
+.BI u8 " VAL_MASK_8"
049c96
+Match packet data to a given value. The selector name defines the sample length
049c96
+to extract (32bits for
049c96
+.BR u32 ,
049c96
+16bits for
049c96
+.B u16
049c96
+and 8bits for
049c96
+.BR u8 ).
049c96
+Before comparing, the sample is binary AND'ed with the given mask. This way
049c96
+uninteresting bits can be cleared before comparison. The position of the sample
049c96
+is defined by the offset specified in
049c96
+.IR AT .
049c96
+.TP
049c96
+.BI ip " IP"
049c96
+.TQ
049c96
+.BI ip6 " IP6"
049c96
+Assume packet starts with an IPv4 (
049c96
+.BR ip )
049c96
+or IPv6 (
049c96
+.BR ip6 )
049c96
+header.
049c96
+.IR IP / IP6
049c96
+then allows to match various header fields:
049c96
+.RS
049c96
+.TP
049c96
+.BI src " ADDR"
049c96
+.BI dst " ADDR"
049c96
+Compare Source or Destination Address fields against the value of
049c96
+.IR ADDR .
049c96
+The reserved words
049c96
+.BR default ", " any " and " all
049c96
+effectively match any address. Otherwise an IP address of the particular
049c96
+protocol is expected, optionally suffixed by a prefix length to match whole
049c96
+subnets. In case of IPv4 a netmask may also be given.
049c96
+.TP
049c96
+.BI dsfield " VAL_MASK_8"
049c96
+IPv4 only. Match the packet header's DSCP/ECN field. Synonyms to this are
049c96
+.BR tos " and " precedence .
049c96
+.TP
049c96
+.BI ihl " VAL_MASK_8"
049c96
+IPv4 only. Match the Internet Header Length field. Note that the value's unit is
049c96
+32bits, so to match a packet with 24byte header length
049c96
+.I u8_value
049c96
+has to be 6.
049c96
+.TP
049c96
+.BI protocol " VAL_MASK_8"
049c96
+Match the Protocol (IPv4) or Next Header (IPv6) field value, e.g. 6 for TCP.
049c96
+.TP
049c96
+.BI icmp_type " VAL_MASK_8"
049c96
+.TQ
049c96
+.BI icmp_code " VAL_MASK_8"
049c96
+Assume a next-header protocol of icmp or ipv6-icmp and match Type or Code
049c96
+field values. This is dangerous, as the code assumes minimal header size for
049c96
+IPv4 and lack of extension headers for IPv6.
049c96
+.TP
049c96
+.BI sport " VAL_MASK_16"
049c96
+.TQ
049c96
+.BI dport " VAL_MASK_16"
049c96
+Match layer four source or destination ports. This is dangerous as well, as it
049c96
+assumes a suitable layer four protocol is present (which has Source and
049c96
+Destination Port fields right at the start of the header and 16bit in size).
049c96
+Also minimal header size for IPv4 and lack of IPv6 extension headers is assumed.
049c96
+.TP
049c96
+.B nofrag
049c96
+.TQ
049c96
+.B firstfrag
049c96
+.TQ
049c96
+.B df
049c96
+.TQ
049c96
+.B mf
049c96
+IPv4 only, check certain flags and fragment offset values. Match if the packet
049c96
+is not a fragment
049c96
+.RB ( nofrag ),
049c96
+the first fragment
049c96
+.RB ( firstfrag ),
049c96
+if Don't Fragment
049c96
+.RB ( df )
049c96
+or More Fragments
049c96
+.RB ( mf )
049c96
+bits are set.
049c96
+.TP
049c96
+.BI priority " VAL_MASK_8"
049c96
+IPv6 only. Match the header's Traffic Class field, which has the same purpose
049c96
+and semantics of IPv4's ToS field since RFC 3168: upper six bits are DSCP, the
049c96
+lower two ECN.
049c96
+.TP
049c96
+.BI flowlabel " VAL_MASK_32"
049c96
+IPv6 only. Match the Flow Label field's value. Note that Flow Label itself is
049c96
+only 20bytes long, which are the least significant ones here. The remaining
049c96
+upper 12bytes match Version and Traffic Class fields.
049c96
+.RE
049c96
+.TP
049c96
+.BI tcp " TCPUDP"
049c96
+.TQ
049c96
+.BI udp " TCPUDP"
049c96
+Match fields of next header of protocol TCP or UDP. The possible values for
049c96
+.I TCPDUP
049c96
+are:
049c96
+.RS
049c96
+.TP
049c96
+.BI src " VAL_MASK_16"
049c96
+Match on Source Port field value.
049c96
+.TP
049c96
+.BI dst " VALMASK_16"
049c96
+Match on Destination Port field value.
049c96
+.RE
049c96
+.TP
049c96
+.BI icmp " ICMP"
049c96
+Match fields of next header of protocol ICMP. The possible values for
049c96
+.I ICMP
049c96
+are:
049c96
+.RS
049c96
+.TP
049c96
+.BI type " VAL_MASK_8"
049c96
+Match on ICMP Type field.
049c96
+.TP
049c96
+.BI code " VAL_MASK_8"
049c96
+Match on ICMP Code field.
049c96
+.RE
049c96
+.TP
049c96
+.BI mark " VAL_MASK_32"
049c96
+Match on netfilter fwmark value.
049c96
+.TP
049c96
+.BI ether " ETHER"
049c96
+Match on ethernet header fields. Possible values for
049c96
+.I ETHER
049c96
+are:
049c96
+.RS
049c96
+.TP
049c96
+.BI src " ether_address" " " AT
049c96
+.TQ
049c96
+.BI dst " ether_address" " " AT
049c96
+Match on source or destination ethernet address. This is dangerous: It assumes
049c96
+an ethernet header is present at the start of the packet. This will probably
049c96
+lead to unexpected things if used with layer three interfaces like e.g. tun or
049c96
+ppp.
049c96
+.SH EXAMPLES
049c96
+.RS
049c96
+.EX
049c96
+tc filter add dev eth0 parent 999:0 prio 99 protocol ip u32 \\
049c96
+        match ip src 192.168.8.0/24 classid 1:1
049c96
+.EE
049c96
+.RE
049c96
+
049c96
+This attaches a filter to the qdisc identified by
049c96
+.BR 999:0.
049c96
+It's priority is
049c96
+.BR 99 ,
049c96
+which affects in which order multiple filters attached to the same
049c96
+.B parent
049c96
+are consulted (the lower the earlier). The filter handles packets of
049c96
+.B protocol
049c96
+type
049c96
+.BR ip ,
049c96
+and
049c96
+.BR match es
049c96
+if the IP header's source address is within the
049c96
+.B 192.168.8.0/24
049c96
+subnet. Matching packets are classified into class
049c96
+.BR 1.1 .
049c96
+The effect of this command might be surprising at first glance:
049c96
+
049c96
+.RS
049c96
+.EX
049c96
+filter parent 1: protocol ip pref 99 u32
049c96
+filter parent 1: protocol ip pref 99 u32 \\
049c96
+        fh 800: ht divisor 1
049c96
+filter parent 1: protocol ip pref 99 u32 \\
049c96
+        fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 \\
049c96
+        match c0a80800/ffffff00 at 12
049c96
+.EE
049c96
+.RE
049c96
+
049c96
+So parent
049c96
+.B 1:
049c96
+is assigned a new
049c96
+.B u32
049c96
+filter, which contains a hash table of size 1 (as the
049c96
+.B divisor
049c96
+indicates). The table ID is
049c96
+.BR 800 .
049c96
+The third line then shows the actual filter which was added above: it sits in
049c96
+table
049c96
+.B 800
049c96
+and bucket
049c96
+.BR 0 ,
049c96
+classifies packets into class ID
049c96
+.B 1:1
049c96
+and matches the upper three bytes of the four byte value at offset
049c96
+.B 12
049c96
+to be
049c96
+.BR 0xc0a808 ,
049c96
+which is 192, 168 and 8.
049c96
+
049c96
+Now for something more complicated, namely creating a custom hash table:
049c96
+
049c96
+.RS
049c96
+.EX
049c96
+tc filter add dev eth0 prio 99 handle 1: u32 divisor 256
049c96
+.EE
049c96
+.RE
049c96
+
049c96
+This creates a table of size 256 with handle
049c96
+.B 1:
049c96
+in priority
049c96
+.BR 99 .
049c96
+The effect is as follows:
049c96
+
049c96
+.RS
049c96
+.EX
049c96
+filter parent 1: protocol all pref 99 u32
049c96
+filter parent 1: protocol all pref 99 u32 fh 1: ht divisor 256
049c96
+filter parent 1: protocol all pref 99 u32 fh 800: ht divisor 1
049c96
+.EE
049c96
+.RE
049c96
+
049c96
+So along with the requested hash table (handle
049c96
+.BR 1: ),
049c96
+the kernel has created his own table of size 1 to hold other filters of the same
049c96
+priority.
049c96
+
049c96
+The next step is to create a filter which links to the created hash table:
049c96
+
049c96
+.RS
049c96
+.EX
049c96
+tc filter add dev eth0 parent 1: prio 1 u32 \\
049c96
+        link 1: hashkey mask 0x0000ff00 at 12 \\
049c96
+        match ip src 192.168.0.0/16
049c96
+.EE
049c96
+.RE
049c96
+
049c96
+The filter is given a lower priority than the hash table itself so
049c96
+.B u32
049c96
+consults it before manually traversing the hash table. The options
049c96
+.BR link " and " hashkey
049c96
+determine which table and bucket to redirect to. In this case the hash key
049c96
+should be constructed out of the second byte at offset 12, which corresponds to
049c96
+an IP packet's third byte of the source address field. Along with the
049c96
+.B match
049c96
+statement, this effectively maps all class C networks below 192.168.0.0/16 to
049c96
+different buckets of the hash table.
049c96
+
049c96
+Filters for certain subnets can be created like so:
049c96
+
049c96
+.RS
049c96
+.EX
049c96
+tc filter add dev eth0 parent 1: prio 99 u32 \\
049c96
+        ht 1: sample u32 0x00000800 0x0000ff00 at 12 \\
049c96
+        match ip src 192.168.8.0/24 classid 1:1
049c96
+.EE
049c96
+.RE
049c96
+
049c96
+The bucket is defined using the
049c96
+.B sample
049c96
+option: In this case, the second byte at offset 12 must be 0x08, exactly. In
049c96
+this case, the resulting bucket ID is obviously 8, but as soon as
049c96
+.B sample
049c96
+selects an amount of data which could exceed the
049c96
+.BR divisor ,
049c96
+one would have to know the kernel-internal algorithm to deduce the destination
049c96
+bucket. This filter's
049c96
+.B match
049c96
+statement is redundant in this case, as the entropy for the hash key does not
049c96
+exceed the table size and therefore no collisions can occur. Otherwise it's
049c96
+necessary to prevent matching unwanted packets.
049c96
+
049c96
+Matching upper layer fields is problematic since IPv4 header length is variable
049c96
+and IPv6 supports extension headers which affect upper layer header offset. To
049c96
+overcome this, there is the possibility to specify
049c96
+.B nexthdr+
049c96
+when giving an offset, and to make things easier there are the
049c96
+.BR tcp " and " udp
049c96
+matches which use
049c96
+.B nexthdr+
049c96
+implicitly. This offset has to be calculated in beforehand though, and the only
049c96
+way to achieve that is by doing it in a separate filter which then links to the
049c96
+filter which wants to use it. Here is an example of doing so:
049c96
+
049c96
+.RS
049c96
+.EX
049c96
+tc filter add dev eth0 parent 1:0 protocol ip handle 1: \\
049c96
+        u32 divisor 1
049c96
+tc filter add dev eth0 parent 1:0 protocol ip \\
049c96
+        u32 ht 1: \\
049c96
+        match tcp src 22 FFFF \\
049c96
+        classid 1:2
049c96
+tc filter add dev eth0 parent 1:0 protocol ip \\
049c96
+        u32 ht 800: \\
049c96
+        match ip protocol 6 FF \\
049c96
+        match ip firstfrag \\
049c96
+        offset at 0 mask 0f00 shift 6 \\
049c96
+        link 1:
049c96
+.EE
049c96
+.RE
049c96
+
049c96
+This is what is being done: In the first call, a single element sized hash table
049c96
+is created so there is a place to hold the linked to filter and a known handle
049c96
+.RB ( 1: )
049c96
+to reference to it. The second call then adds the actual filter, which pushes
049c96
+packets with TCP source port 22 into class
049c96
+.BR 1:2 .
049c96
+Using
049c96
+.BR ht ,
049c96
+it is moved into the hash table created by the first call. The third call then
049c96
+does the actual magic: It matches IPv4 packets with next layer protocol 6 (TCP),
049c96
+only if it's the first fragment (usually TCP sets DF bit, but if it doesn't and
049c96
+the packet is fragmented, only the first one contains the TCP header), and then
049c96
+sets the offset based on the IP header's IHL field (right-shifting by 6
049c96
+eliminates the offset of the field and at the same time converts the value into
049c96
+byte unit). Finally, using
049c96
+.BR link ,
049c96
+the hash table from first call is referenced which holds the filter from second
049c96
+call.
049c96
+.SH SEE ALSO
049c96
+.BR tc (8),
049c96
+.br
049c96
+.BR cls_u32.txt " at " http://linux-tc-notes.sourceforge.net/
049c96
-- 
049c96
1.8.3.1
049c96